From c4a450ee1dfa26977b148e085016c803d942c241 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 11:09:07 +0530 Subject: [PATCH 01/84] wip: broken --- server/src/handlers/http.rs | 6 +- server/src/main.rs | 1 + server/src/modal/ingest_server.rs | 203 +++++++++++++++ server/src/modal/mod.rs | 4 + server/src/modal/query_server.rs | 1 + server/src/modal/server.rs | 316 ++++++++++++++++++++++++ server/src/utils/arrow/merged_reader.rs | 1 - 7 files changed, 528 insertions(+), 4 deletions(-) create mode 100644 server/src/modal/ingest_server.rs create mode 100644 server/src/modal/mod.rs create mode 100644 server/src/modal/query_server.rs create mode 100644 server/src/modal/server.rs diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index e30e3d77a..5b72b6115 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -52,9 +52,9 @@ mod role; include!(concat!(env!("OUT_DIR"), "/generated.rs")); -const MAX_EVENT_PAYLOAD_SIZE: usize = 10485760; -const API_BASE_PATH: &str = "/api"; -const API_VERSION: &str = "v1"; +pub const MAX_EVENT_PAYLOAD_SIZE: usize = 10485760; +pub const API_BASE_PATH: &str = "/api"; +pub const API_VERSION: &str = "v1"; pub async fn run_http( prometheus: PrometheusMetrics, diff --git a/server/src/main.rs b/server/src/main.rs index ef0cb2cc6..bf90b4e19 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -46,6 +46,7 @@ mod stats; mod storage; mod utils; mod validator; +mod modal; use option::CONFIG; diff --git a/server/src/modal/ingest_server.rs b/server/src/modal/ingest_server.rs new file mode 100644 index 000000000..8406a05bb --- /dev/null +++ b/server/src/modal/ingest_server.rs @@ -0,0 +1,203 @@ +use std::{fs::File, io::BufReader, path::PathBuf}; + +use super::parseable_server::ParseableServer; +use actix_web::{web, App, HttpServer, Scope}; +use actix_web_prometheus::PrometheusMetrics; +use async_trait::async_trait; +use itertools::Itertools; +use rustls::{Certificate, PrivateKey, ServerConfig}; +use rustls_pemfile::{certs, pkcs8_private_keys}; + +use crate::{ + handlers::http::{ + base_path, cross_origin_config, health_check, ingest, logstream, middleware::RouteExt, + MAX_EVENT_PAYLOAD_SIZE, + }, + option::CONFIG, + rbac::role::Action, +}; + +pub struct IngestServer; + +impl IngestServer { + fn get_logstream_factory() -> Scope { + web::scope("/{logstream}") + .service( + web::resource("") + // PUT "/logstream/{logstream}" ==> Create log stream + .route( + web::put() + .to(logstream::put_stream) + .authorize_for_stream(Action::CreateStream), + ) + // POST "/logstream/{logstream}" ==> Post logs to given log stream + .route( + web::post() + .to(ingest::post_event) + .authorize_for_stream(Action::Ingest), + ) + // DELETE "/logstream/{logstream}" ==> Delete log stream + .route( + web::delete() + .to(logstream::delete) + .authorize_for_stream(Action::DeleteStream), + ) + .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), + ) + .service( + web::resource("/alert") + // PUT "/logstream/{logstream}/alert" ==> Set alert for given log stream + .route( + web::put() + .to(logstream::put_alert) + .authorize_for_stream(Action::PutAlert), + ) + // GET "/logstream/{logstream}/alert" ==> Get alert for given log stream + .route( + web::get() + .to(logstream::get_alert) + .authorize_for_stream(Action::GetAlert), + ), + ) + .service( + // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream + web::resource("/schema").route( + web::get() + .to(logstream::schema) + .authorize_for_stream(Action::GetSchema), + ), + ) + .service( + // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream + web::resource("/stats").route( + web::get() + .to(logstream::get_stats) + .authorize_for_stream(Action::GetStats), + ), + ) + .service( + web::resource("/retention") + // PUT "/logstream/{logstream}/retention" ==> Set retention for given logstream + .route( + web::put() + .to(logstream::put_retention) + .authorize_for_stream(Action::PutRetention), + ) + // GET "/logstream/{logstream}/retention" ==> Get retention for given logstream + .route( + web::get() + .to(logstream::get_retention) + .authorize_for_stream(Action::GetRetention), + ), + ) + .service( + web::resource("/cache") + // PUT "/logstream/{logstream}/cache" ==> Set retention for given logstream + .route( + web::put() + .to(logstream::put_enable_cache) + .authorize_for_stream(Action::PutCacheEnabled), + ) + // GET "/logstream/{logstream}/cache" ==> Get retention for given logstream + .route( + web::get() + .to(logstream::get_cache_enabled) + .authorize_for_stream(Action::GetCacheEnabled), + ), + ) + } + + fn get_ssl_acceptor( + tls_cert: &Option, + tls_key: &Option, + ) -> anyhow::Result> { + match (tls_cert, tls_key) { + (Some(cert), Some(key)) => { + let server_config = ServerConfig::builder() + .with_safe_defaults() + .with_no_client_auth(); + + let cert_file = &mut BufReader::new(File::open(cert)?); + let key_file = &mut BufReader::new(File::open(key)?); + let cert_chain = certs(cert_file)?.into_iter().map(Certificate).collect_vec(); + + let mut keys = pkcs8_private_keys(key_file)? + .into_iter() + .map(PrivateKey) + .collect_vec(); + + Ok(Some( + server_config.with_single_cert(cert_chain, keys.remove(0))?, + )) + } + (_, _) => Ok(None), + } + } +} + +#[async_trait] +impl ParseableServer for IngestServer { + async fn start(&self, prometheus: PrometheusMetrics) -> anyhow::Result<()> { + let server = HttpServer::new(move || { + App::new() + .wrap(prometheus.clone()) + .configure(|config| { + config.service( + web::scope(&base_path()) + // POST "/ingest" ==> Post logs to given log stream based on header + .service( + web::resource("/ingest") + .route( + web::post() + .to(ingest::ingest) + .authorize_for_stream(Action::Ingest), + ) + .app_data( + web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE), + ), + ) + // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command + .service( + web::resource("/liveness") + .route(web::get().to(health_check::liveness)), + ) + // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes + .service( + web::resource("/readiness") + .route(web::get().to(health_check::readiness)), + ) + .service( + web::scope("/logstream") + .service( + web::resource("").route( + web::get() + .to(logstream::list) + .authorize(Action::ListStream), + ), + ) + .service(IngestServer::get_logstream_factory()), + ), + ); + }) + .wrap(actix_web::middleware::Logger::default()) + .wrap(actix_web::middleware::Compress::default()) + .wrap(cross_origin_config()) + }) + .workers(num_cpus::get()); + + let (tls_cert, tls_key) = ( + &CONFIG.parseable.tls_cert_path, + &CONFIG.parseable.tls_key_path, + ); + if let Some(server_config) = IngestServer::get_ssl_acceptor(tls_cert, tls_key)? { + server + .bind_rustls(&CONFIG.parseable.address, server_config)? + .run() + .await?; + } else { + server.bind(&CONFIG.parseable.address)?.run().await?; + } + + Ok(()) + } +} diff --git a/server/src/modal/mod.rs b/server/src/modal/mod.rs new file mode 100644 index 000000000..3bc75963d --- /dev/null +++ b/server/src/modal/mod.rs @@ -0,0 +1,4 @@ +pub mod ingest_server; +pub mod parseable_server; +pub mod query_server; +pub mod server; diff --git a/server/src/modal/query_server.rs b/server/src/modal/query_server.rs new file mode 100644 index 000000000..ea23887e9 --- /dev/null +++ b/server/src/modal/query_server.rs @@ -0,0 +1 @@ +pub struct QueryServer; diff --git a/server/src/modal/server.rs b/server/src/modal/server.rs new file mode 100644 index 000000000..a9e934ad6 --- /dev/null +++ b/server/src/modal/server.rs @@ -0,0 +1,316 @@ +use std::{fs::File, io::BufReader, sync::Arc}; + +use actix_web::{web, App, HttpServer}; +use actix_web_prometheus::PrometheusMetrics; +use async_trait::async_trait; +use openid::Discovered; +use rustls::{Certificate, PrivateKey, ServerConfig}; +use rustls_pemfile::{certs, pkcs8_private_keys}; + +use crate::{handlers::http::{ingest, llm, logstream, middleware::{DisAllowRootUser, RouteExt}, rbac, role, MAX_EVENT_PAYLOAD_SIZE}, oidc, option::CONFIG, rbac::role::Action}; + +use super::parseable_server::{cross_origin_config, ParseableServer, API_BASE_PATH, API_VERSION}; + +include!(concat!(env!("OUT_DIR"), "/generated.rs")); +pub struct SuperServer; + + + +#[async_trait] +impl ParseableServer for SuperServer { + async fn start( + &self, + prometheus: PrometheusMetrics, + oidc_client: Option, + ) -> anyhow::Result<()> { + let oidc_client = match oidc_client { + Some(config) => { + let client = config + .connect(&format!("{API_BASE_PATH}/{API_VERSION}/o/code")) + .await?; + Some(Arc::new(client)) + } + None => None, + }; + + // use app here + // app.configure(|config| ) + let create_app = move || { + App::new() + .wrap(prometheus.clone()) + .configure(|cfg| configure_routes(cfg, oidc_client.clone())) + .wrap(actix_web::middleware::Logger::default()) + .wrap(actix_web::middleware::Compress::default()) + .wrap(cross_origin_config()) + }; + + let ssl_acceptor = match ( + &CONFIG.parseable.tls_cert_path, + &CONFIG.parseable.tls_key_path, + ) { + (Some(cert), Some(key)) => { + // init server config builder with safe defaults + let config = ServerConfig::builder() + .with_safe_defaults() + .with_no_client_auth(); + + // load TLS key/cert files + let cert_file = &mut BufReader::new(File::open(cert)?); + let key_file = &mut BufReader::new(File::open(key)?); + + // convert files to key/cert objects + let cert_chain = certs(cert_file)?.into_iter().map(Certificate).collect(); + + let mut keys: Vec = pkcs8_private_keys(key_file)? + .into_iter() + .map(PrivateKey) + .collect(); + + // exit if no keys could be parsed + if keys.is_empty() { + anyhow::bail!("Could not locate PKCS 8 private keys."); + } + + let server_config = config.with_single_cert(cert_chain, keys.remove(0))?; + + Some(server_config) + } + (_, _) => None, + }; + + // concurrent workers equal to number of cores on the cpu + let http_server = HttpServer::new(create_app).workers(num_cpus::get()); + if let Some(config) = ssl_acceptor { + http_server + .bind_rustls(&CONFIG.parseable.address, config)? + .run() + .await?; + } else { + http_server.bind(&CONFIG.parseable.address)?.run().await?; + } + + Ok(()) + } +} + +pub fn configure_routes( + cfg: &mut web::ServiceConfig, + oidc_client: Option>>, +) { + let generated = generate(); + + //log stream API + let logstream_api = web::scope("/{logstream}") + .service( + web::resource("") + // PUT "/logstream/{logstream}" ==> Create log stream + .route( + web::put() + .to(logstream::put_stream) + .authorize_for_stream(Action::CreateStream), + ) + // POST "/logstream/{logstream}" ==> Post logs to given log stream + .route( + web::post() + .to(ingest::post_event) + .authorize_for_stream(Action::Ingest), + ) + // DELETE "/logstream/{logstream}" ==> Delete log stream + .route( + web::delete() + .to(logstream::delete) + .authorize_for_stream(Action::DeleteStream), + ) + .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), + ) + .service( + web::resource("/alert") + // PUT "/logstream/{logstream}/alert" ==> Set alert for given log stream + .route( + web::put() + .to(logstream::put_alert) + .authorize_for_stream(Action::PutAlert), + ) + // GET "/logstream/{logstream}/alert" ==> Get alert for given log stream + .route( + web::get() + .to(logstream::get_alert) + .authorize_for_stream(Action::GetAlert), + ), + ) + .service( + // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream + web::resource("/schema").route( + web::get() + .to(logstream::schema) + .authorize_for_stream(Action::GetSchema), + ), + ) + .service( + // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream + web::resource("/stats").route( + web::get() + .to(logstream::get_stats) + .authorize_for_stream(Action::GetStats), + ), + ) + .service( + web::resource("/retention") + // PUT "/logstream/{logstream}/retention" ==> Set retention for given logstream + .route( + web::put() + .to(logstream::put_retention) + .authorize_for_stream(Action::PutRetention), + ) + // GET "/logstream/{logstream}/retention" ==> Get retention for given logstream + .route( + web::get() + .to(logstream::get_retention) + .authorize_for_stream(Action::GetRetention), + ), + ) + .service( + web::resource("/cache") + // PUT "/logstream/{logstream}/cache" ==> Set retention for given logstream + .route( + web::put() + .to(logstream::put_enable_cache) + .authorize_for_stream(Action::PutCacheEnabled), + ) + // GET "/logstream/{logstream}/cache" ==> Get retention for given logstream + .route( + web::get() + .to(logstream::get_cache_enabled) + .authorize_for_stream(Action::GetCacheEnabled), + ), + ); + + // User API + let user_api = web::scope("/user") + .service( + web::resource("") + // GET /user => List all users + .route(web::get().to(rbac::list_users).authorize(Action::ListUser)), + ) + .service( + web::resource("/{username}") + // PUT /user/{username} => Create a new user + .route(web::post().to(rbac::post_user).authorize(Action::PutUser)) + // DELETE /user/{username} => Delete a user + .route( + web::delete() + .to(rbac::delete_user) + .authorize(Action::DeleteUser), + ) + .wrap(DisAllowRootUser), + ) + .service( + web::resource("/{username}/role") + // PUT /user/{username}/roles => Put roles for user + .route( + web::put() + .to(rbac::put_role) + .authorize(Action::PutUserRoles) + .wrap(DisAllowRootUser), + ) + .route( + web::get() + .to(rbac::get_role) + .authorize_for_user(Action::GetUserRoles), + ), + ) + .service( + web::resource("/{username}/generate-new-password") + // POST /user/{username}/generate-new-password => reset password for this user + .route( + web::post() + .to(rbac::post_gen_password) + .authorize(Action::PutUser) + .wrap(DisAllowRootUser), + ), + ); + + let llm_query_api = web::scope("/llm").service( + web::resource("").route( + web::post() + .to(llm::make_llm_request) + .authorize(Action::QueryLLM), + ), + ); + + let role_api = web::scope("/role") + // GET Role List + .service(web::resource("").route(web::get().to(role::list).authorize(Action::ListRole))) + .service( + // PUT and GET Default Role + web::resource("/default") + .route(web::put().to(role::put_default).authorize(Action::PutRole)) + .route(web::get().to(role::get_default).authorize(Action::GetRole)), + ) + .service( + // PUT, GET, DELETE Roles + web::resource("/{name}") + .route(web::put().to(role::put).authorize(Action::PutRole)) + .route(web::delete().to(role::delete).authorize(Action::DeleteRole)) + .route(web::get().to(role::get).authorize(Action::GetRole)), + ); + + let mut oauth_api = web::scope("/o") + .service(web::resource("/login").route(web::get().to(crate::handlers::http::oidc::login))) + .service(web::resource("/logout").route(web::get().to(crate::handlers::http::oidc::logout))) + .service(web::resource("/code").route(web::get().to(crate::handlers::http::oidc::reply_login))); + + if let Some(client) = oidc_client { + info!("Registered oidc client"); + oauth_api = oauth_api.app_data(web::Data::from(client)) + } + + // Deny request if username is same as the env variable P_USERNAME. + cfg.service( + // Base path "{url}/api/v1" + web::scope(&base_path()) + // .wrap(PathFilter) + // POST "/query" ==> Get results of the SQL query passed in request body + .service( + web::resource("/query") + .route(web::post().to(query::query).authorize(Action::Query)), + ) + // POST "/ingest" ==> Post logs to given log stream based on header + .service( + web::resource("/ingest") + .route( + web::post() + .to(ingest::ingest) + .authorize_for_stream(Action::Ingest), + ) + .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), + ) + // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command + .service(web::resource("/liveness").route(web::get().to(health_check::liveness))) + // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes + .service(web::resource("/readiness").route(web::get().to(health_check::readiness))) + // GET "/about" ==> Returns information about instance + .service( + web::resource("/about") + .route(web::get().to(about::about).authorize(Action::GetAbout)), + ) + .service( + web::scope("/logstream") + .service( + // GET "/logstream" ==> Get list of all Log Streams on the server + web::resource("") + .route(web::get().to(logstream::list).authorize(Action::ListStream)), + ) + .service( + // logstream API + logstream_api, + ), + ) + .service(user_api) + .service(llm_query_api) + .service(oauth_api) + .service(role_api), + ) + // GET "/" ==> Serve the static frontend directory + .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); +} \ No newline at end of file diff --git a/server/src/utils/arrow/merged_reader.rs b/server/src/utils/arrow/merged_reader.rs index 8a31ae200..3ac29f4b7 100644 --- a/server/src/utils/arrow/merged_reader.rs +++ b/server/src/utils/arrow/merged_reader.rs @@ -18,7 +18,6 @@ */ use std::{fs::File, io::BufReader, path::PathBuf, sync::Arc}; - use arrow_array::{RecordBatch, TimestampMillisecondArray}; use arrow_ipc::reader::StreamReader; use arrow_schema::Schema; From 392d704705ef03c241c908cc59d00bb9499c36a3 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:00:24 +0530 Subject: [PATCH 02/84] ran cargo fmt --- server/src/main.rs | 2 +- server/src/utils/arrow/merged_reader.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main.rs b/server/src/main.rs index bf90b4e19..4b16256da 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -37,6 +37,7 @@ mod localcache; mod metadata; mod metrics; mod migration; +mod modal; mod oidc; mod option; mod query; @@ -46,7 +47,6 @@ mod stats; mod storage; mod utils; mod validator; -mod modal; use option::CONFIG; diff --git a/server/src/utils/arrow/merged_reader.rs b/server/src/utils/arrow/merged_reader.rs index 3ac29f4b7..ef76ddf3f 100644 --- a/server/src/utils/arrow/merged_reader.rs +++ b/server/src/utils/arrow/merged_reader.rs @@ -17,11 +17,11 @@ * */ -use std::{fs::File, io::BufReader, path::PathBuf, sync::Arc}; use arrow_array::{RecordBatch, TimestampMillisecondArray}; use arrow_ipc::reader::StreamReader; use arrow_schema::Schema; use itertools::kmerge_by; +use std::{fs::File, io::BufReader, path::PathBuf, sync::Arc}; use super::{ adapt_batch, From daa701b9b780c94debca49658a5f3686ccc218e9 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:02:06 +0530 Subject: [PATCH 03/84] made stuff public on crate level Might need to change this later --- server/src/handlers/http.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index 5b72b6115..a3c290915 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -37,18 +37,18 @@ use crate::rbac::role::Action; use self::middleware::{DisAllowRootUser, ModeFilter, RouteExt}; -mod about; -mod health_check; -mod ingest; +pub(crate) mod about; +pub(crate) mod health_check; +pub(crate) mod ingest; mod kinesis; -mod llm; -mod logstream; -mod middleware; -mod oidc; +pub(crate) mod llm; +pub(crate) mod logstream; +pub(crate) mod middleware; +pub(crate) mod oidc; mod otel; -mod query; -mod rbac; -mod role; +pub(crate) mod query; +pub(crate) mod rbac; +pub(crate) mod role; include!(concat!(env!("OUT_DIR"), "/generated.rs")); @@ -350,7 +350,7 @@ pub fn configure_routes( .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); } -fn base_path() -> String { +pub(crate) fn base_path() -> String { format!("{API_BASE_PATH}/{API_VERSION}") } @@ -358,7 +358,7 @@ pub fn metrics_path() -> String { format!("{}/metrics", base_path()) } -fn cross_origin_config() -> Cors { +pub(crate) fn cross_origin_config() -> Cors { if cfg!(feature = "debug") { Cors::permissive().block_on_origin_mismatch(false) } else { From d5977cb4611538d7ae8c6a67dabaef377de8c6cc Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:08:22 +0530 Subject: [PATCH 04/84] Staring with a Somewhat clean slate --- server/src/modal/ingest_server.rs | 175 ------------------------- server/src/modal/server.rs | 205 +----------------------------- 2 files changed, 3 insertions(+), 377 deletions(-) diff --git a/server/src/modal/ingest_server.rs b/server/src/modal/ingest_server.rs index 8406a05bb..17bc0c231 100644 --- a/server/src/modal/ingest_server.rs +++ b/server/src/modal/ingest_server.rs @@ -1,201 +1,26 @@ -use std::{fs::File, io::BufReader, path::PathBuf}; use super::parseable_server::ParseableServer; -use actix_web::{web, App, HttpServer, Scope}; use actix_web_prometheus::PrometheusMetrics; use async_trait::async_trait; -use itertools::Itertools; -use rustls::{Certificate, PrivateKey, ServerConfig}; -use rustls_pemfile::{certs, pkcs8_private_keys}; use crate::{ - handlers::http::{ - base_path, cross_origin_config, health_check, ingest, logstream, middleware::RouteExt, - MAX_EVENT_PAYLOAD_SIZE, - }, option::CONFIG, - rbac::role::Action, }; pub struct IngestServer; -impl IngestServer { - fn get_logstream_factory() -> Scope { - web::scope("/{logstream}") - .service( - web::resource("") - // PUT "/logstream/{logstream}" ==> Create log stream - .route( - web::put() - .to(logstream::put_stream) - .authorize_for_stream(Action::CreateStream), - ) - // POST "/logstream/{logstream}" ==> Post logs to given log stream - .route( - web::post() - .to(ingest::post_event) - .authorize_for_stream(Action::Ingest), - ) - // DELETE "/logstream/{logstream}" ==> Delete log stream - .route( - web::delete() - .to(logstream::delete) - .authorize_for_stream(Action::DeleteStream), - ) - .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), - ) - .service( - web::resource("/alert") - // PUT "/logstream/{logstream}/alert" ==> Set alert for given log stream - .route( - web::put() - .to(logstream::put_alert) - .authorize_for_stream(Action::PutAlert), - ) - // GET "/logstream/{logstream}/alert" ==> Get alert for given log stream - .route( - web::get() - .to(logstream::get_alert) - .authorize_for_stream(Action::GetAlert), - ), - ) - .service( - // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream - web::resource("/schema").route( - web::get() - .to(logstream::schema) - .authorize_for_stream(Action::GetSchema), - ), - ) - .service( - // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream - web::resource("/stats").route( - web::get() - .to(logstream::get_stats) - .authorize_for_stream(Action::GetStats), - ), - ) - .service( - web::resource("/retention") - // PUT "/logstream/{logstream}/retention" ==> Set retention for given logstream - .route( - web::put() - .to(logstream::put_retention) - .authorize_for_stream(Action::PutRetention), - ) - // GET "/logstream/{logstream}/retention" ==> Get retention for given logstream - .route( - web::get() - .to(logstream::get_retention) - .authorize_for_stream(Action::GetRetention), - ), - ) - .service( - web::resource("/cache") - // PUT "/logstream/{logstream}/cache" ==> Set retention for given logstream - .route( - web::put() - .to(logstream::put_enable_cache) - .authorize_for_stream(Action::PutCacheEnabled), - ) - // GET "/logstream/{logstream}/cache" ==> Get retention for given logstream - .route( - web::get() - .to(logstream::get_cache_enabled) - .authorize_for_stream(Action::GetCacheEnabled), - ), - ) - } - - fn get_ssl_acceptor( - tls_cert: &Option, - tls_key: &Option, - ) -> anyhow::Result> { - match (tls_cert, tls_key) { - (Some(cert), Some(key)) => { - let server_config = ServerConfig::builder() - .with_safe_defaults() - .with_no_client_auth(); - - let cert_file = &mut BufReader::new(File::open(cert)?); - let key_file = &mut BufReader::new(File::open(key)?); - let cert_chain = certs(cert_file)?.into_iter().map(Certificate).collect_vec(); - let mut keys = pkcs8_private_keys(key_file)? - .into_iter() - .map(PrivateKey) - .collect_vec(); - Ok(Some( - server_config.with_single_cert(cert_chain, keys.remove(0))?, - )) - } - (_, _) => Ok(None), - } - } -} -#[async_trait] -impl ParseableServer for IngestServer { - async fn start(&self, prometheus: PrometheusMetrics) -> anyhow::Result<()> { - let server = HttpServer::new(move || { App::new() .wrap(prometheus.clone()) - .configure(|config| { - config.service( - web::scope(&base_path()) - // POST "/ingest" ==> Post logs to given log stream based on header - .service( - web::resource("/ingest") - .route( - web::post() - .to(ingest::ingest) - .authorize_for_stream(Action::Ingest), - ) - .app_data( - web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE), - ), - ) - // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command - .service( - web::resource("/liveness") - .route(web::get().to(health_check::liveness)), - ) - // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes - .service( - web::resource("/readiness") - .route(web::get().to(health_check::readiness)), - ) - .service( - web::scope("/logstream") - .service( - web::resource("").route( - web::get() - .to(logstream::list) - .authorize(Action::ListStream), - ), - ) - .service(IngestServer::get_logstream_factory()), - ), - ); - }) .wrap(actix_web::middleware::Logger::default()) .wrap(actix_web::middleware::Compress::default()) .wrap(cross_origin_config()) - }) - .workers(num_cpus::get()); - let (tls_cert, tls_key) = ( - &CONFIG.parseable.tls_cert_path, - &CONFIG.parseable.tls_key_path, - ); - if let Some(server_config) = IngestServer::get_ssl_acceptor(tls_cert, tls_key)? { - server - .bind_rustls(&CONFIG.parseable.address, server_config)? .run() .await?; } else { - server.bind(&CONFIG.parseable.address)?.run().await?; } Ok(()) diff --git a/server/src/modal/server.rs b/server/src/modal/server.rs index a9e934ad6..5ad58411c 100644 --- a/server/src/modal/server.rs +++ b/server/src/modal/server.rs @@ -93,224 +93,25 @@ impl ParseableServer for SuperServer { } } -pub fn configure_routes( - cfg: &mut web::ServiceConfig, - oidc_client: Option>>, -) { - let generated = generate(); +impl SuperServer { + fn configure_routes(config: &mut web::ServiceConfig, oidc_client: Option) { + let generated = generate(); - //log stream API - let logstream_api = web::scope("/{logstream}") - .service( - web::resource("") - // PUT "/logstream/{logstream}" ==> Create log stream - .route( - web::put() - .to(logstream::put_stream) - .authorize_for_stream(Action::CreateStream), - ) - // POST "/logstream/{logstream}" ==> Post logs to given log stream - .route( - web::post() - .to(ingest::post_event) - .authorize_for_stream(Action::Ingest), - ) - // DELETE "/logstream/{logstream}" ==> Delete log stream - .route( - web::delete() - .to(logstream::delete) - .authorize_for_stream(Action::DeleteStream), - ) - .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), - ) - .service( - web::resource("/alert") - // PUT "/logstream/{logstream}/alert" ==> Set alert for given log stream - .route( - web::put() - .to(logstream::put_alert) - .authorize_for_stream(Action::PutAlert), - ) - // GET "/logstream/{logstream}/alert" ==> Get alert for given log stream - .route( - web::get() - .to(logstream::get_alert) - .authorize_for_stream(Action::GetAlert), - ), - ) - .service( - // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream - web::resource("/schema").route( - web::get() - .to(logstream::schema) - .authorize_for_stream(Action::GetSchema), - ), - ) - .service( - // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream - web::resource("/stats").route( - web::get() - .to(logstream::get_stats) - .authorize_for_stream(Action::GetStats), - ), - ) - .service( - web::resource("/retention") - // PUT "/logstream/{logstream}/retention" ==> Set retention for given logstream - .route( - web::put() - .to(logstream::put_retention) - .authorize_for_stream(Action::PutRetention), - ) - // GET "/logstream/{logstream}/retention" ==> Get retention for given logstream - .route( - web::get() - .to(logstream::get_retention) - .authorize_for_stream(Action::GetRetention), - ), - ) - .service( - web::resource("/cache") - // PUT "/logstream/{logstream}/cache" ==> Set retention for given logstream - .route( - web::put() - .to(logstream::put_enable_cache) - .authorize_for_stream(Action::PutCacheEnabled), - ) - // GET "/logstream/{logstream}/cache" ==> Get retention for given logstream - .route( - web::get() - .to(logstream::get_cache_enabled) - .authorize_for_stream(Action::GetCacheEnabled), - ), - ); - // User API - let user_api = web::scope("/user") - .service( - web::resource("") - // GET /user => List all users - .route(web::get().to(rbac::list_users).authorize(Action::ListUser)), - ) - .service( - web::resource("/{username}") - // PUT /user/{username} => Create a new user - .route(web::post().to(rbac::post_user).authorize(Action::PutUser)) - // DELETE /user/{username} => Delete a user - .route( - web::delete() - .to(rbac::delete_user) - .authorize(Action::DeleteUser), - ) - .wrap(DisAllowRootUser), - ) - .service( - web::resource("/{username}/role") - // PUT /user/{username}/roles => Put roles for user - .route( - web::put() - .to(rbac::put_role) - .authorize(Action::PutUserRoles) - .wrap(DisAllowRootUser), - ) - .route( web::get() - .to(rbac::get_role) - .authorize_for_user(Action::GetUserRoles), ), - ) - .service( - web::resource("/{username}/generate-new-password") - // POST /user/{username}/generate-new-password => reset password for this user - .route( - web::post() - .to(rbac::post_gen_password) - .authorize(Action::PutUser) - .wrap(DisAllowRootUser), ), - ); - - let llm_query_api = web::scope("/llm").service( - web::resource("").route( - web::post() - .to(llm::make_llm_request) - .authorize(Action::QueryLLM), - ), - ); - - let role_api = web::scope("/role") - // GET Role List - .service(web::resource("").route(web::get().to(role::list).authorize(Action::ListRole))) - .service( - // PUT and GET Default Role - web::resource("/default") - .route(web::put().to(role::put_default).authorize(Action::PutRole)) - .route(web::get().to(role::get_default).authorize(Action::GetRole)), - ) - .service( - // PUT, GET, DELETE Roles - web::resource("/{name}") - .route(web::put().to(role::put).authorize(Action::PutRole)) - .route(web::delete().to(role::delete).authorize(Action::DeleteRole)) - .route(web::get().to(role::get).authorize(Action::GetRole)), - ); - let mut oauth_api = web::scope("/o") - .service(web::resource("/login").route(web::get().to(crate::handlers::http::oidc::login))) - .service(web::resource("/logout").route(web::get().to(crate::handlers::http::oidc::logout))) - .service(web::resource("/code").route(web::get().to(crate::handlers::http::oidc::reply_login))); - - if let Some(client) = oidc_client { - info!("Registered oidc client"); - oauth_api = oauth_api.app_data(web::Data::from(client)) } - // Deny request if username is same as the env variable P_USERNAME. - cfg.service( - // Base path "{url}/api/v1" - web::scope(&base_path()) - // .wrap(PathFilter) - // POST "/query" ==> Get results of the SQL query passed in request body .service( - web::resource("/query") - .route(web::post().to(query::query).authorize(Action::Query)), ) - // POST "/ingest" ==> Post logs to given log stream based on header .service( - web::resource("/ingest") .route( - web::post() - .to(ingest::ingest) - .authorize_for_stream(Action::Ingest), ) - .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), ) - // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command - .service(web::resource("/liveness").route(web::get().to(health_check::liveness))) - // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes - .service(web::resource("/readiness").route(web::get().to(health_check::readiness))) - // GET "/about" ==> Returns information about instance .service( - web::resource("/about") - .route(web::get().to(about::about).authorize(Action::GetAbout)), ) .service( - web::scope("/logstream") - .service( - // GET "/logstream" ==> Get list of all Log Streams on the server - web::resource("") - .route(web::get().to(logstream::list).authorize(Action::ListStream)), - ) - .service( - // logstream API - logstream_api, ), ) - .service(user_api) - .service(llm_query_api) - .service(oauth_api) - .service(role_api), - ) - // GET "/" ==> Serve the static frontend directory - .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); -} \ No newline at end of file From f5d859c6392ddae48b73884b9e6d087cbdcb686b Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:09:56 +0530 Subject: [PATCH 05/84] create server.rs + impl ParseableServer Trait --- server/src/modal/server.rs | 260 +++++++++++++++++++++++++++++++++++-- 1 file changed, 248 insertions(+), 12 deletions(-) diff --git a/server/src/modal/server.rs b/server/src/modal/server.rs index 5ad58411c..517a130ee 100644 --- a/server/src/modal/server.rs +++ b/server/src/modal/server.rs @@ -1,27 +1,44 @@ +use crate::handlers::http::about; +use crate::handlers::http::base_path; +use crate::handlers::http::health_check; +use crate::handlers::http::query; +use crate::handlers::http::API_BASE_PATH; +use crate::handlers::http::API_VERSION; use std::{fs::File, io::BufReader, sync::Arc}; +use actix_web::web::resource; +use actix_web::Resource; +use actix_web::Scope; use actix_web::{web, App, HttpServer}; use actix_web_prometheus::PrometheusMetrics; +use actix_web_static_files::ResourceFiles; use async_trait::async_trait; -use openid::Discovered; + use rustls::{Certificate, PrivateKey, ServerConfig}; use rustls_pemfile::{certs, pkcs8_private_keys}; -use crate::{handlers::http::{ingest, llm, logstream, middleware::{DisAllowRootUser, RouteExt}, rbac, role, MAX_EVENT_PAYLOAD_SIZE}, oidc, option::CONFIG, rbac::role::Action}; +use crate::{ + handlers::http::{ + cross_origin_config, ingest, llm, logstream, + middleware::{DisAllowRootUser, RouteExt}, + oidc, rbac, role, MAX_EVENT_PAYLOAD_SIZE, + }, + option::CONFIG, + rbac::role::Action, +}; -use super::parseable_server::{cross_origin_config, ParseableServer, API_BASE_PATH, API_VERSION}; +use super::parseable_server::OpenIdClient; +use super::parseable_server::ParseableServer; include!(concat!(env!("OUT_DIR"), "/generated.rs")); pub struct SuperServer; - - -#[async_trait] +#[async_trait(?Send)] impl ParseableServer for SuperServer { async fn start( &self, prometheus: PrometheusMetrics, - oidc_client: Option, + oidc_client: Option, ) -> anyhow::Result<()> { let oidc_client = match oidc_client { Some(config) => { @@ -33,12 +50,10 @@ impl ParseableServer for SuperServer { None => None, }; - // use app here - // app.configure(|config| ) - let create_app = move || { + let create_app_fn = move || { App::new() .wrap(prometheus.clone()) - .configure(|cfg| configure_routes(cfg, oidc_client.clone())) + .configure(|cfg| SuperServer::configure_routes(cfg, oidc_client.clone())) .wrap(actix_web::middleware::Logger::default()) .wrap(actix_web::middleware::Compress::default()) .wrap(cross_origin_config()) @@ -79,7 +94,7 @@ impl ParseableServer for SuperServer { }; // concurrent workers equal to number of cores on the cpu - let http_server = HttpServer::new(create_app).workers(num_cpus::get()); + let http_server = HttpServer::new(create_app_fn).workers(num_cpus::get()); if let Some(config) = ssl_acceptor { http_server .bind_rustls(&CONFIG.parseable.address, config)? @@ -97,21 +112,242 @@ impl SuperServer { fn configure_routes(config: &mut web::ServiceConfig, oidc_client: Option) { let generated = generate(); + config + .service( + web::scope(&base_path()) + // POST "/query" ==> Get results of the SQL query passed in request body + .service(Self::get_query_factory()) + .service(Self::get_ingest_factory()) + .service(Self::get_liveness_factory()) + .service(Self::get_readiness_factory()) + .service(Self::get_about_factory()) + .service( + web::scope("/logstream") + .service( + // GET "/logstream" ==> Get list of all Log Streams on the server + web::resource("").route( + web::get().to(logstream::list).authorize(Action::ListStream), + ), + ) + .service(Self::get_logstream_webscope()), + ) + .service(Self::get_user_webscope()) + .service(Self::get_llm_webscope()) + .service(Self::get_user_role_webscope()) + .service(Self::get_oauth_webscope(oidc_client)), + ) + // GET "/" ==> Serve the static frontend directory + .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); + } + + // get the query factory + pub fn get_query_factory() -> Resource { + web::resource("/query").route(web::post().to(query::query).authorize(Action::Query)) + } + // get the logstream web scope + pub fn get_logstream_webscope() -> Scope { + web::scope("/{logstream}") + .service( + web::resource("") + // PUT "/logstream/{logstream}" ==> Create log stream + .route( + web::put() + .to(logstream::put_stream) + .authorize_for_stream(Action::CreateStream), + ) + // POST "/logstream/{logstream}" ==> Post logs to given log stream + .route( + web::post() + .to(ingest::post_event) + .authorize_for_stream(Action::Ingest), + ) + // DELETE "/logstream/{logstream}" ==> Delete log stream + .route( + web::delete() + .to(logstream::delete) + .authorize_for_stream(Action::DeleteStream), + ) + .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), + ) + .service( + web::resource("/alert") + // PUT "/logstream/{logstream}/alert" ==> Set alert for given log stream + .route( + web::put() + .to(logstream::put_alert) + .authorize_for_stream(Action::PutAlert), + ) + // GET "/logstream/{logstream}/alert" ==> Get alert for given log stream + .route( + web::get() + .to(logstream::get_alert) + .authorize_for_stream(Action::GetAlert), + ), + ) + .service( + // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream + web::resource("/schema").route( web::get() + .to(logstream::schema) + .authorize_for_stream(Action::GetSchema), ), + ) + .service( + // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream + web::resource("/stats").route( + web::get() + .to(logstream::get_stats) + .authorize_for_stream(Action::GetStats), ), + ) + .service( + web::resource("/retention") + // PUT "/logstream/{logstream}/retention" ==> Set retention for given logstream + .route( + web::put() + .to(logstream::put_retention) + .authorize_for_stream(Action::PutRetention), + ) + // GET "/logstream/{logstream}/retention" ==> Get retention for given logstream + .route( + web::get() + .to(logstream::get_retention) + .authorize_for_stream(Action::GetRetention), + ), + ) + .service( + web::resource("/cache") + // PUT "/logstream/{logstream}/cache" ==> Set retention for given logstream + .route( + web::put() + .to(logstream::put_enable_cache) + .authorize_for_stream(Action::PutCacheEnabled), + ) + // GET "/logstream/{logstream}/cache" ==> Get retention for given logstream + .route( + web::get() + .to(logstream::get_cache_enabled) + .authorize_for_stream(Action::GetCacheEnabled), + ), + ) + } + // get the factory for the ingest route + pub fn get_ingest_factory() -> Resource { + web::resource("/ingest") + .route( + web::post() + .to(ingest::ingest) + .authorize_for_stream(Action::Ingest), + ) + .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)) + } + + // get the oauth webscope + pub fn get_oauth_webscope(oidc_client: Option) -> Scope { + let oauth = web::scope("/o") + .service(resource("/login").route(web::get().to(oidc::login))) + .service(resource("/logout").route(web::get().to(oidc::logout))) + .service(resource("/code").route(web::get().to(oidc::reply_login))); + + if let Some(client) = oidc_client { + oauth.app_data(web::Data::from(client)) + } else { + oauth + } + } + + // get the role webscope + pub fn get_user_role_webscope() -> Scope { + web::scope("/role") + // GET Role List + .service(resource("").route(web::get().to(role::list).authorize(Action::ListRole))) + .service( + // PUT and GET Default Role + resource("/default") + .route(web::put().to(role::put_default).authorize(Action::PutRole)) + .route(web::get().to(role::get_default).authorize(Action::GetRole)), + ) + .service( + // PUT, GET, DELETE Roles + resource("/{name}") + .route(web::put().to(role::put).authorize(Action::PutRole)) + .route(web::delete().to(role::delete).authorize(Action::DeleteRole)) + .route(web::get().to(role::get).authorize(Action::GetRole)), + ) } + // get the user webscope + pub fn get_user_webscope() -> Scope { + web::scope("/user") .service( + web::resource("") + // GET /user => List all users + .route(web::get().to(rbac::list_users).authorize(Action::ListUser)), ) .service( + web::resource("/{username}") + // PUT /user/{username} => Create a new user + .route(web::post().to(rbac::post_user).authorize(Action::PutUser)) + // DELETE /user/{username} => Delete a user .route( + web::delete() + .to(rbac::delete_user) + .authorize(Action::DeleteUser), ) + .wrap(DisAllowRootUser), ) .service( + web::resource("/{username}/role") + // PUT /user/{username}/roles => Put roles for user + .route( + web::put() + .to(rbac::put_role) + .authorize(Action::PutUserRoles) + .wrap(DisAllowRootUser), + ) + .route( + web::get() + .to(rbac::get_role) + .authorize_for_user(Action::GetUserRoles), + ), ) .service( + web::resource("/{username}/generate-new-password") + // POST /user/{username}/generate-new-password => reset password for this user + .route( + web::post() + .to(rbac::post_gen_password) + .authorize(Action::PutUser) + .wrap(DisAllowRootUser), ), ) + } + + // get the llm webscope + pub fn get_llm_webscope() -> Scope { + web::scope("/llm").service( + web::resource("").route( + web::post() + .to(llm::make_llm_request) + .authorize(Action::QueryLLM), + ), + ) + } + + // get the live check + pub fn get_liveness_factory() -> Resource { + web::resource("/liveness").route(web::get().to(health_check::liveness)) + } + + // get the readiness check + pub fn get_readiness_factory() -> Resource { + web::resource("/readiness").route(web::get().to(health_check::readiness)) + } + + // get the about factory + pub fn get_about_factory() -> Resource { + web::resource("/about").route(web::get().to(about::about).authorize(Action::GetAbout)) + } +} From 15b1909655d6a04e9a45a286a55cae10477473b1 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:10:15 +0530 Subject: [PATCH 06/84] create ssl_acceptor.rs --- server/src/modal/ssl_acceptor.rs | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 server/src/modal/ssl_acceptor.rs diff --git a/server/src/modal/ssl_acceptor.rs b/server/src/modal/ssl_acceptor.rs new file mode 100644 index 000000000..1b168582a --- /dev/null +++ b/server/src/modal/ssl_acceptor.rs @@ -0,0 +1,36 @@ +use std::{fs::File, io::BufReader, path::PathBuf}; + +use itertools::Itertools; +use rustls::{Certificate, PrivateKey, ServerConfig}; +use rustls_pemfile::{certs, pkcs8_private_keys}; + +pub fn get_ssl_acceptor( + tls_cert: &Option, + tls_key: &Option, +) -> anyhow::Result> { + match (tls_cert, tls_key) { + (Some(cert), Some(key)) => { + let server_config = ServerConfig::builder() + .with_safe_defaults() + .with_no_client_auth(); + + let cert_file = &mut BufReader::new(File::open(cert)?); + let key_file = &mut BufReader::new(File::open(key)?); + let cert_chain = certs(cert_file)?.into_iter().map(Certificate).collect_vec(); + + let mut keys = pkcs8_private_keys(key_file)? + .into_iter() + .map(PrivateKey) + .collect_vec(); + + if keys.is_empty() { + anyhow::bail!("Could not locate PKCS 8 private keys."); + } + + Ok(Some( + server_config.with_single_cert(cert_chain, keys.remove(0))?, + )) + } + (_, _) => Ok(None), + } +} From b32c53997f364745c30bfbc6a51d926f49a412c5 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:10:39 +0530 Subject: [PATCH 07/84] General Implementation --- server/src/modal/ingest_server.rs | 59 +++++++++++++++++ server/src/modal/mod.rs | 1 + server/src/modal/query_server.rs | 106 ++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+) diff --git a/server/src/modal/ingest_server.rs b/server/src/modal/ingest_server.rs index 17bc0c231..028e7f148 100644 --- a/server/src/modal/ingest_server.rs +++ b/server/src/modal/ingest_server.rs @@ -1,28 +1,87 @@ +use crate::handlers::http::API_BASE_PATH; +use crate::handlers::http::API_VERSION; +use std::sync::Arc; + +use super::parseable_server::OpenIdClient; use super::parseable_server::ParseableServer; +use super::server::SuperServer; +use super::ssl_acceptor::get_ssl_acceptor; + +use actix_web::{web, App, HttpServer}; use actix_web_prometheus::PrometheusMetrics; use async_trait::async_trait; use crate::{ + handlers::http::{base_path, cross_origin_config}, option::CONFIG, }; pub struct IngestServer; +#[async_trait(?Send)] +impl ParseableServer for IngestServer { + async fn start( + &self, + prometheus: PrometheusMetrics, + oidc_client: Option, + ) -> anyhow::Result<()> { + let oidc_client = match oidc_client { + Some(config) => { + let client = config + .connect(&format!("{API_BASE_PATH}/{API_VERSION}/o/code")) + .await?; + Some(Arc::new(client)) + } + None => None, + }; + let ssl = get_ssl_acceptor( + &CONFIG.parseable.tls_cert_path, + &CONFIG.parseable.tls_key_path, + )?; + let create_app_fn = move || { App::new() .wrap(prometheus.clone()) + .configure(|config| IngestServer::configure_routes(config, oidc_client.clone())) .wrap(actix_web::middleware::Logger::default()) .wrap(actix_web::middleware::Compress::default()) .wrap(cross_origin_config()) + }; + // concurrent workers equal to number of logical cores + let http_server = HttpServer::new(create_app_fn).workers(num_cpus::get()); + + if let Some(config) = ssl { + http_server + .bind_rustls(&CONFIG.parseable.address, config)? .run() .await?; } else { + http_server.bind(&CONFIG.parseable.address)?.run().await?; } Ok(()) } } + +impl IngestServer { + // configure the api routes + fn configure_routes(config: &mut web::ServiceConfig, _odic_client: Option) { + let logstream_scope = SuperServer::get_logstream_webscope(); + let ingest_factory = SuperServer::get_ingest_factory(); + + config + .service( + // Base path "{url}/api/v1" + web::scope(&base_path()).service(ingest_factory), + ) + // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command + .service(SuperServer::get_liveness_factory()) + // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes + .service(SuperServer::get_readiness_factory()) + .service(logstream_scope); + } +} diff --git a/server/src/modal/mod.rs b/server/src/modal/mod.rs index 3bc75963d..28113e13f 100644 --- a/server/src/modal/mod.rs +++ b/server/src/modal/mod.rs @@ -2,3 +2,4 @@ pub mod ingest_server; pub mod parseable_server; pub mod query_server; pub mod server; +pub mod ssl_acceptor; diff --git a/server/src/modal/query_server.rs b/server/src/modal/query_server.rs index ea23887e9..b7083636f 100644 --- a/server/src/modal/query_server.rs +++ b/server/src/modal/query_server.rs @@ -1 +1,107 @@ +use actix_web::web; +use std::sync::Arc; + +use crate::handlers::http::middleware::RouteExt; +use crate::handlers::http::{ + about, base_path, cross_origin_config, logstream, query, API_BASE_PATH, API_VERSION, +}; +use crate::rbac::role::Action; +use actix_web::web::ServiceConfig; +use actix_web::{App, HttpServer}; +use actix_web_static_files::ResourceFiles; +use async_trait::async_trait; + +use crate::option::CONFIG; + +use super::parseable_server::{OpenIdClient, ParseableServer}; +use super::server::SuperServer; +use super::ssl_acceptor::get_ssl_acceptor; + +include!(concat!(env!("OUT_DIR"), "/generated.rs")); + pub struct QueryServer; + +#[async_trait(?Send)] +impl ParseableServer for QueryServer { + async fn start( + &self, + prometheus: actix_web_prometheus::PrometheusMetrics, + oidc_client: Option, + ) -> anyhow::Result<()> { + let oidc_client = match oidc_client { + Some(config) => { + let client = config + .connect(&format!("{API_BASE_PATH}/{API_VERSION}/o/code")) + .await?; + Some(Arc::new(client)) + } + + None => None, + }; + + let ssl = get_ssl_acceptor( + &CONFIG.parseable.tls_cert_path, + &CONFIG.parseable.tls_key_path, + )?; + + let create_app_fn = move || { + App::new() + .wrap(prometheus.clone()) + .configure(|config| QueryServer::configure_routes(config, oidc_client.clone())) + .wrap(actix_web::middleware::Logger::default()) + .wrap(actix_web::middleware::Compress::default()) + .wrap(cross_origin_config()) + }; + + // concurrent workers equal to number of cores on the cpu + let http_server = HttpServer::new(create_app_fn).workers(num_cpus::get()); + if let Some(config) = ssl { + http_server + .bind_rustls(&CONFIG.parseable.address, config)? + .run() + .await?; + } else { + http_server.bind(&CONFIG.parseable.address)?.run().await?; + } + + Ok(()) + } +} + +impl QueryServer { + // configure the api routes + pub fn configure_routes(config: &mut ServiceConfig, oidc_client: Option) { + let generated = generate(); + + let user_scope = SuperServer::get_user_webscope(); + let llm_scope = SuperServer::get_llm_webscope(); + let role_scope = SuperServer::get_user_role_webscope(); + let oauth_scope = SuperServer::get_oauth_webscope(oidc_client); + + config + .service( + web::scope(&base_path()) + // POST "/query" ==> Get results of the SQL query passed in request body + .service(SuperServer::get_query_factory()) + // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command + .service(SuperServer::get_liveness_factory()) + // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes + .service(SuperServer::get_readiness_factory()) + // GET "/about" ==> Returns information about instance + .service(SuperServer::get_about_factory()) + .service( + web::scope("/logstream").service( + // GET "/logstream" ==> Get list of all Log Streams on the server + web::resource("").route( + web::get().to(logstream::list).authorize(Action::ListStream), + ), + ), + ) + .service(user_scope) + .service(llm_scope) + .service(oauth_scope) + .service(role_scope), + ) + .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); + } +} From d601538ef452463037f0975aece05bd6e5db3336 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:32:15 +0530 Subject: [PATCH 08/84] chore: move cli handler to a seperate file --- server/src/cli.rs | 466 +++++++++++++++++++++++++++++++++++++++++++ server/src/option.rs | 422 --------------------------------------- 2 files changed, 466 insertions(+), 422 deletions(-) create mode 100644 server/src/cli.rs diff --git a/server/src/cli.rs b/server/src/cli.rs new file mode 100644 index 000000000..691a7a3ca --- /dev/null +++ b/server/src/cli.rs @@ -0,0 +1,466 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + + +use clap::{value_parser, Arg, ArgGroup, Command, FromArgMatches}; +use std::path::PathBuf; + +use url::Url; + +use crate::{oidc::{self, OpenidConfig}, option::{validation, Compression, Mode}}; + +#[derive(Debug, Default)] +pub struct Cli { + /// The location of TLS Cert file + pub tls_cert_path: Option, + + /// The location of TLS Private Key file + pub tls_key_path: Option, + + /// The address on which the http server will listen. + pub address: String, + + /// Base domain under which server is hosted. + /// This information is used by OIDC to refer redirects + pub domain_address: Option, + + /// The local staging path is used as a temporary landing point + /// for incoming events and local cache + pub local_staging_path: PathBuf, + + /// The local cache path is used for speeding up query on latest data + pub local_cache_path: Option, + + /// Size for local cache + pub local_cache_size: u64, + + /// Interval in seconds after which uncommited data would be + /// uploaded to the storage platform. + pub upload_interval: u64, + + /// Username for the basic authentication on the server + pub username: String, + + /// Password for the basic authentication on the server + pub password: String, + + /// OpenId configuration + pub openid: Option, + + /// Server should check for update or not + pub check_update: bool, + + /// Server should send anonymous analytics or not + pub send_analytics: bool, + + /// Open AI access key + pub open_ai_key: Option, + + /// Livetail port + pub grpc_port: u16, + + /// Livetail channel capacity + pub livetail_channel_capacity: usize, + + /// Rows in Parquet Rowgroup + pub row_group_size: usize, + + /// Query memory limit in bytes + pub query_memory_pool_size: Option, + + /// Parquet compression algorithm + pub parquet_compression: Compression, + + /// Mode of operation + pub mode: Mode, +} + +impl Cli { + // identifiers for arguments + pub const TLS_CERT: &'static str = "tls-cert-path"; + pub const TLS_KEY: &'static str = "tls-key-path"; + pub const ADDRESS: &'static str = "address"; + pub const DOMAIN_URI: &'static str = "origin"; + pub const STAGING: &'static str = "local-staging-path"; + pub const CACHE: &'static str = "cache-path"; + pub const CACHE_SIZE: &'static str = "cache-size"; + pub const UPLOAD_INTERVAL: &'static str = "upload-interval"; + pub const USERNAME: &'static str = "username"; + pub const PASSWORD: &'static str = "password"; + pub const CHECK_UPDATE: &'static str = "check-update"; + pub const SEND_ANALYTICS: &'static str = "send-analytics"; + pub const OPEN_AI_KEY: &'static str = "open-ai-key"; + pub const OPENID_CLIENT_ID: &'static str = "oidc-client"; + pub const OPENID_CLIENT_SECRET: &'static str = "oidc-client-secret"; + pub const OPENID_ISSUER: &'static str = "oidc-issuer"; + pub const GRPC_PORT: &'static str = "grpc-port"; + pub const LIVETAIL_CAPACITY: &'static str = "livetail-capacity"; + // todo : what should this flag be + pub const QUERY_MEM_POOL_SIZE: &'static str = "query-mempool-size"; + pub const ROW_GROUP_SIZE: &'static str = "row-group-size"; + pub const PARQUET_COMPRESSION_ALGO: &'static str = "compression-algo"; + pub const MODE: &'static str = "mode"; + pub const DEFAULT_USERNAME: &'static str = "admin"; + pub const DEFAULT_PASSWORD: &'static str = "admin"; + + pub fn local_stream_data_path(&self, stream_name: &str) -> PathBuf { + self.local_staging_path.join(stream_name) + } + + pub fn get_scheme(&self) -> String { + if self.tls_cert_path.is_some() && self.tls_key_path.is_some() { + return "https".to_string(); + } + "http".to_string() + } + + pub fn create_cli_command_with_clap(name: &'static str) -> Command { + Command::new(name).next_line_help(false) + .arg( + Arg::new(Self::TLS_CERT) + .long(Self::TLS_CERT) + .env("P_TLS_CERT_PATH") + .value_name("PATH") + .value_parser(validation::file_path) + .help("Local path on this device where certificate file is located. Required to enable TLS"), + ) + .arg( + Arg::new(Self::TLS_KEY) + .long(Self::TLS_KEY) + .env("P_TLS_KEY_PATH") + .value_name("PATH") + .value_parser(validation::file_path) + .help("Local path on this device where private key file is located. Required to enable TLS"), + ) + .arg( + Arg::new(Self::ADDRESS) + .long(Self::ADDRESS) + .env("P_ADDR") + .value_name("ADDR:PORT") + .default_value("0.0.0.0:8000") + .value_parser(validation::socket_addr) + .help("Address and port for Parseable HTTP(s) server"), + ) + .arg( + Arg::new(Self::STAGING) + .long(Self::STAGING) + .env("P_STAGING_DIR") + .value_name("DIR") + .default_value("./staging") + .value_parser(validation::canonicalize_path) + .help("Local path on this device to be used as landing point for incoming events") + .next_line_help(true), + ) + .arg( + Arg::new(Self::CACHE) + .long(Self::CACHE) + .env("P_CACHE_DIR") + .value_name("DIR") + .value_parser(validation::canonicalize_path) + .help("Local path on this device to be used for caching data") + .next_line_help(true), + ) + .arg( + Arg::new(Self::CACHE_SIZE) + .long(Self::CACHE_SIZE) + .env("P_CACHE_SIZE") + .value_name("size") + .default_value("1GiB") + .value_parser(validation::cache_size) + .help("Maximum allowed cache size for all streams combined (In human readable format, e.g 1GiB, 2GiB, 100MB)") + .next_line_help(true), + ) + .arg( + Arg::new(Self::UPLOAD_INTERVAL) + .long(Self::UPLOAD_INTERVAL) + .env("P_STORAGE_UPLOAD_INTERVAL") + .value_name("SECONDS") + .default_value("60") + .value_parser(validation::upload_interval) + .help("Interval in seconds after which staging data would be sent to the storage") + .next_line_help(true), + ) + .arg( + Arg::new(Self::USERNAME) + .long(Self::USERNAME) + .env("P_USERNAME") + .value_name("STRING") + .required(true) + .help("Admin username to be set for this Parseable server"), + ) + .arg( + Arg::new(Self::PASSWORD) + .long(Self::PASSWORD) + .env("P_PASSWORD") + .value_name("STRING") + .required(true) + .help("Admin password to be set for this Parseable server"), + ) + .arg( + Arg::new(Self::CHECK_UPDATE) + .long(Self::CHECK_UPDATE) + .env("P_CHECK_UPDATE") + .value_name("BOOL") + .required(false) + .default_value("true") + .value_parser(value_parser!(bool)) + .help("Enable/Disable checking for new Parseable release"), + ) + .arg( + Arg::new(Self::SEND_ANALYTICS) + .long(Self::SEND_ANALYTICS) + .env("P_SEND_ANONYMOUS_USAGE_DATA") + .value_name("BOOL") + .required(false) + .default_value("true") + .value_parser(value_parser!(bool)) + .help("Enable/Disable anonymous telemetry data collection"), + ) + .arg( + Arg::new(Self::OPEN_AI_KEY) + .long(Self::OPEN_AI_KEY) + .env("P_OPENAI_API_KEY") + .value_name("STRING") + .required(false) + .help("OpenAI key to enable llm features"), + ) + .arg( + Arg::new(Self::OPENID_CLIENT_ID) + .long(Self::OPENID_CLIENT_ID) + .env("P_OIDC_CLIENT_ID") + .value_name("STRING") + .required(false) + .help("Client id for OIDC provider"), + ) + .arg( + Arg::new(Self::OPENID_CLIENT_SECRET) + .long(Self::OPENID_CLIENT_SECRET) + .env("P_OIDC_CLIENT_SECRET") + .value_name("STRING") + .required(false) + .help("Client secret for OIDC provider"), + ) + .arg( + Arg::new(Self::OPENID_ISSUER) + .long(Self::OPENID_ISSUER) + .env("P_OIDC_ISSUER") + .value_name("URl") + .required(false) + .value_parser(validation::url) + .help("OIDC provider's host address"), + ) + .arg( + Arg::new(Self::DOMAIN_URI) + .long(Self::DOMAIN_URI) + .env("P_ORIGIN_URI") + .value_name("URL") + .required(false) + .value_parser(validation::url) + .help("Parseable server global domain address"), + ) + .arg( + Arg::new(Self::GRPC_PORT) + .long(Self::GRPC_PORT) + .env("P_GRPC_PORT") + .value_name("PORT") + .default_value("8001") + .required(false) + .value_parser(value_parser!(u16)) + .help("Port for gRPC server"), + ) + .arg( + Arg::new(Self::LIVETAIL_CAPACITY) + .long(Self::LIVETAIL_CAPACITY) + .env("P_LIVETAIL_CAPACITY") + .value_name("NUMBER") + .default_value("1000") + .required(false) + .value_parser(value_parser!(usize)) + .help("Number of rows in livetail channel"), + ) + .arg( + Arg::new(Self::QUERY_MEM_POOL_SIZE) + .long(Self::QUERY_MEM_POOL_SIZE) + .env("P_QUERY_MEMORY_LIMIT") + .value_name("Gib") + .required(false) + .value_parser(value_parser!(u8)) + .help("Set a fixed memory limit for query"), + ) + .arg( + Arg::new(Self::ROW_GROUP_SIZE) + .long(Self::ROW_GROUP_SIZE) + .env("P_PARQUET_ROW_GROUP_SIZE") + .value_name("NUMBER") + .required(false) + .default_value("16384") + .value_parser(value_parser!(usize)) + .help("Number of rows in a row group"), + ).arg( + Arg::new(Self::MODE) + .long(Self::MODE) + .env("P_MODE") + .value_name("STRING") + .required(false) + .default_value("all") + .value_parser([ + "query", + "ingest", + "all"]) + .help("Mode of operation"), + ) + .arg( + Arg::new(Self::PARQUET_COMPRESSION_ALGO) + .long(Self::PARQUET_COMPRESSION_ALGO) + .env("P_PARQUET_COMPRESSION_ALGO") + .value_name("[UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD]") + .required(false) + .default_value("lz4") + .value_parser([ + "uncompressed", + "snappy", + "gzip", + "lzo", + "brotli", + "lz4", + "zstd"]) + .help("Parquet compression algorithm"), + ).group( + ArgGroup::new("oidc") + .args([Self::OPENID_CLIENT_ID, Self::OPENID_CLIENT_SECRET, Self::OPENID_ISSUER]) + .requires_all([Self::OPENID_CLIENT_ID, Self::OPENID_CLIENT_SECRET, Self::OPENID_ISSUER]) + .multiple(true) + ) + } +} + +impl FromArgMatches for Cli { + fn from_arg_matches(m: &clap::ArgMatches) -> Result { + let mut s: Self = Self::default(); + s.update_from_arg_matches(m)?; + Ok(s) + } + + fn update_from_arg_matches(&mut self, m: &clap::ArgMatches) -> Result<(), clap::Error> { + self.local_cache_path = m.get_one::(Self::CACHE).cloned(); + self.tls_cert_path = m.get_one::(Self::TLS_CERT).cloned(); + self.tls_key_path = m.get_one::(Self::TLS_KEY).cloned(); + self.domain_address = m.get_one::(Self::DOMAIN_URI).cloned(); + let openid_client_id = m.get_one::(Self::OPENID_CLIENT_ID).cloned(); + let openid_client_secret = m.get_one::(Self::OPENID_CLIENT_SECRET).cloned(); + let openid_issuer = m.get_one::(Self::OPENID_ISSUER).cloned(); + + self.address = m + .get_one::(Self::ADDRESS) + .cloned() + .expect("default value for address"); + self.local_staging_path = m + .get_one::(Self::STAGING) + .cloned() + .expect("default value for staging"); + self.local_cache_size = m + .get_one::(Self::CACHE_SIZE) + .cloned() + .expect("default value for cache size"); + self.upload_interval = m + .get_one::(Self::UPLOAD_INTERVAL) + .cloned() + .expect("default value for upload"); + self.username = m + .get_one::(Self::USERNAME) + .cloned() + .expect("default for username"); + self.password = m + .get_one::(Self::PASSWORD) + .cloned() + .expect("default for password"); + self.check_update = m + .get_one::(Self::CHECK_UPDATE) + .cloned() + .expect("default for check update"); + self.send_analytics = m + .get_one::(Self::SEND_ANALYTICS) + .cloned() + .expect("default for send analytics"); + self.open_ai_key = m.get_one::(Self::OPEN_AI_KEY).cloned(); + self.grpc_port = m + .get_one::(Self::GRPC_PORT) + .cloned() + .expect("default for livetail port"); + self.livetail_channel_capacity = m + .get_one::(Self::LIVETAIL_CAPACITY) + .cloned() + .expect("default for livetail capacity"); + // converts Gib to bytes before assigning + self.query_memory_pool_size = m + .get_one::(Self::QUERY_MEM_POOL_SIZE) + .cloned() + .map(|gib| gib as usize * 1024usize.pow(3)); + self.row_group_size = m + .get_one::(Self::ROW_GROUP_SIZE) + .cloned() + .expect("default for row_group size"); + self.parquet_compression = match m + .get_one::(Self::PARQUET_COMPRESSION_ALGO) + .expect("default for compression algo") + .as_str() + { + "uncompressed" => Compression::UNCOMPRESSED, + "snappy" => Compression::SNAPPY, + "gzip" => Compression::GZIP, + "lzo" => Compression::LZO, + "brotli" => Compression::BROTLI, + "lz4" => Compression::LZ4, + "zstd" => Compression::ZSTD, + _ => unreachable!(), + }; + + self.openid = match (openid_client_id, openid_client_secret, openid_issuer) { + (Some(id), Some(secret), Some(issuer)) => { + let origin = if let Some(url) = self.domain_address.clone() { + oidc::Origin::Production(url) + } else { + oidc::Origin::Local { + socket_addr: self.address.clone(), + https: self.tls_cert_path.is_some() && self.tls_key_path.is_some(), + } + }; + Some(OpenidConfig { + id, + secret, + issuer, + origin, + }) + } + _ => None, + }; + + self.mode = match m + .get_one::(Self::MODE) + .expect("Mode not set") + .as_str() + { + "query" => Mode::Query, + "ingest" => Mode::Ingest, + "all" => Mode::All, + _ => unreachable!(), + }; + + Ok(()) + } +} \ No newline at end of file diff --git a/server/src/option.rs b/server/src/option.rs index 5d713f28b..101a11ebc 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -190,428 +190,6 @@ fn parseable_cli_command() -> Command { .subcommands([local, s3]) } -#[derive(Debug, Default)] -pub struct Server { - /// The location of TLS Cert file - pub tls_cert_path: Option, - - /// The location of TLS Private Key file - pub tls_key_path: Option, - - /// The address on which the http server will listen. - pub address: String, - - /// Base domain under which server is hosted. - /// This information is used by OIDC to refer redirects - pub domain_address: Option, - - /// The local staging path is used as a temporary landing point - /// for incoming events and local cache - pub local_staging_path: PathBuf, - - /// The local cache path is used for speeding up query on latest data - pub local_cache_path: Option, - - /// Size for local cache - pub local_cache_size: u64, - - /// Username for the basic authentication on the server - pub username: String, - - /// Password for the basic authentication on the server - pub password: String, - - /// OpenId configuration - pub openid: Option, - - /// Server should check for update or not - pub check_update: bool, - - /// Server should send anonymous analytics or not - pub send_analytics: bool, - - /// Open AI access key - pub open_ai_key: Option, - - /// Livetail port - pub grpc_port: u16, - - /// Livetail channel capacity - pub livetail_channel_capacity: usize, - - /// Rows in Parquet Rowgroup - pub row_group_size: usize, - - /// Query memory limit in bytes - pub query_memory_pool_size: Option, - - /// Parquet compression algorithm - pub parquet_compression: Compression, - - /// Mode of operation - pub mode: Mode, -} - -impl FromArgMatches for Server { - fn from_arg_matches(m: &clap::ArgMatches) -> Result { - let mut s: Self = Self::default(); - s.update_from_arg_matches(m)?; - Ok(s) - } - - fn update_from_arg_matches(&mut self, m: &clap::ArgMatches) -> Result<(), clap::Error> { - self.local_cache_path = m.get_one::(Self::CACHE).cloned(); - self.tls_cert_path = m.get_one::(Self::TLS_CERT).cloned(); - self.tls_key_path = m.get_one::(Self::TLS_KEY).cloned(); - self.domain_address = m.get_one::(Self::DOMAIN_URI).cloned(); - let openid_client_id = m.get_one::(Self::OPENID_CLIENT_ID).cloned(); - let openid_client_secret = m.get_one::(Self::OPENID_CLIENT_SECRET).cloned(); - let openid_issuer = m.get_one::(Self::OPENID_ISSUER).cloned(); - - self.address = m - .get_one::(Self::ADDRESS) - .cloned() - .expect("default value for address"); - self.local_staging_path = m - .get_one::(Self::STAGING) - .cloned() - .expect("default value for staging"); - self.local_cache_size = m - .get_one::(Self::CACHE_SIZE) - .cloned() - .expect("default value for cache size"); - self.username = m - .get_one::(Self::USERNAME) - .cloned() - .expect("default for username"); - self.password = m - .get_one::(Self::PASSWORD) - .cloned() - .expect("default for password"); - self.check_update = m - .get_one::(Self::CHECK_UPDATE) - .cloned() - .expect("default for check update"); - self.send_analytics = m - .get_one::(Self::SEND_ANALYTICS) - .cloned() - .expect("default for send analytics"); - self.open_ai_key = m.get_one::(Self::OPEN_AI_KEY).cloned(); - self.grpc_port = m - .get_one::(Self::GRPC_PORT) - .cloned() - .expect("default for livetail port"); - self.livetail_channel_capacity = m - .get_one::(Self::LIVETAIL_CAPACITY) - .cloned() - .expect("default for livetail capacity"); - // converts Gib to bytes before assigning - self.query_memory_pool_size = m - .get_one::(Self::QUERY_MEM_POOL_SIZE) - .cloned() - .map(|gib| gib as usize * 1024usize.pow(3)); - self.row_group_size = m - .get_one::(Self::ROW_GROUP_SIZE) - .cloned() - .expect("default for row_group size"); - self.parquet_compression = match m - .get_one::(Self::PARQUET_COMPRESSION_ALGO) - .expect("default for compression algo") - .as_str() - { - "uncompressed" => Compression::UNCOMPRESSED, - "snappy" => Compression::SNAPPY, - "gzip" => Compression::GZIP, - "lzo" => Compression::LZO, - "brotli" => Compression::BROTLI, - "lz4" => Compression::LZ4, - "zstd" => Compression::ZSTD, - _ => unreachable!(), - }; - - self.openid = match (openid_client_id, openid_client_secret, openid_issuer) { - (Some(id), Some(secret), Some(issuer)) => { - let origin = if let Some(url) = self.domain_address.clone() { - oidc::Origin::Production(url) - } else { - oidc::Origin::Local { - socket_addr: self.address.clone(), - https: self.tls_cert_path.is_some() && self.tls_key_path.is_some(), - } - }; - Some(OpenidConfig { - id, - secret, - issuer, - origin, - }) - } - _ => None, - }; - - self.mode = match m - .get_one::(Self::MODE) - .expect("Mode not set") - .as_str() - { - "query" => Mode::Query, - "ingest" => Mode::Ingest, - "all" => Mode::All, - _ => unreachable!(), - }; - - Ok(()) - } -} - -impl Server { - // identifiers for arguments - pub const TLS_CERT: &'static str = "tls-cert-path"; - pub const TLS_KEY: &'static str = "tls-key-path"; - pub const ADDRESS: &'static str = "address"; - pub const DOMAIN_URI: &'static str = "origin"; - pub const STAGING: &'static str = "local-staging-path"; - pub const CACHE: &'static str = "cache-path"; - pub const CACHE_SIZE: &'static str = "cache-size"; - pub const USERNAME: &'static str = "username"; - pub const PASSWORD: &'static str = "password"; - pub const CHECK_UPDATE: &'static str = "check-update"; - pub const SEND_ANALYTICS: &'static str = "send-analytics"; - pub const OPEN_AI_KEY: &'static str = "open-ai-key"; - pub const OPENID_CLIENT_ID: &'static str = "oidc-client"; - pub const OPENID_CLIENT_SECRET: &'static str = "oidc-client-secret"; - pub const OPENID_ISSUER: &'static str = "oidc-issuer"; - pub const GRPC_PORT: &'static str = "grpc-port"; - pub const LIVETAIL_CAPACITY: &'static str = "livetail-capacity"; - // todo : what should this flag be - pub const QUERY_MEM_POOL_SIZE: &'static str = "query-mempool-size"; - pub const ROW_GROUP_SIZE: &'static str = "row-group-size"; - pub const PARQUET_COMPRESSION_ALGO: &'static str = "compression-algo"; - pub const MODE: &'static str = "mode"; - pub const DEFAULT_USERNAME: &'static str = "admin"; - pub const DEFAULT_PASSWORD: &'static str = "admin"; - - pub fn local_stream_data_path(&self, stream_name: &str) -> PathBuf { - self.local_staging_path.join(stream_name) - } - - pub fn get_scheme(&self) -> String { - if self.tls_cert_path.is_some() && self.tls_key_path.is_some() { - return "https".to_string(); - } - "http".to_string() - } - - pub fn get_clap_command(name: &'static str) -> Command { - Command::new(name).next_line_help(false) - .arg( - Arg::new(Self::TLS_CERT) - .long(Self::TLS_CERT) - .env("P_TLS_CERT_PATH") - .value_name("PATH") - .value_parser(validation::file_path) - .help("Local path on this device where certificate file is located. Required to enable TLS"), - ) - .arg( - Arg::new(Self::TLS_KEY) - .long(Self::TLS_KEY) - .env("P_TLS_KEY_PATH") - .value_name("PATH") - .value_parser(validation::file_path) - .help("Local path on this device where private key file is located. Required to enable TLS"), - ) - .arg( - Arg::new(Self::ADDRESS) - .long(Self::ADDRESS) - .env("P_ADDR") - .value_name("ADDR:PORT") - .default_value("0.0.0.0:8000") - .value_parser(validation::socket_addr) - .help("Address and port for Parseable HTTP(s) server"), - ) - .arg( - Arg::new(Self::STAGING) - .long(Self::STAGING) - .env("P_STAGING_DIR") - .value_name("DIR") - .default_value("./staging") - .value_parser(validation::canonicalize_path) - .help("Local path on this device to be used as landing point for incoming events") - .next_line_help(true), - ) - .arg( - Arg::new(Self::CACHE) - .long(Self::CACHE) - .env("P_CACHE_DIR") - .value_name("DIR") - .value_parser(validation::canonicalize_path) - .help("Local path on this device to be used for caching data") - .next_line_help(true), - ) - .arg( - Arg::new(Self::CACHE_SIZE) - .long(Self::CACHE_SIZE) - .env("P_CACHE_SIZE") - .value_name("size") - .default_value("1GiB") - .value_parser(validation::cache_size) - .help("Maximum allowed cache size for all streams combined (In human readable format, e.g 1GiB, 2GiB, 100MB)") - .next_line_help(true), - ) - .arg( - Arg::new(Self::USERNAME) - .long(Self::USERNAME) - .env("P_USERNAME") - .value_name("STRING") - .required(true) - .help("Admin username to be set for this Parseable server"), - ) - .arg( - Arg::new(Self::PASSWORD) - .long(Self::PASSWORD) - .env("P_PASSWORD") - .value_name("STRING") - .required(true) - .help("Admin password to be set for this Parseable server"), - ) - .arg( - Arg::new(Self::CHECK_UPDATE) - .long(Self::CHECK_UPDATE) - .env("P_CHECK_UPDATE") - .value_name("BOOL") - .required(false) - .default_value("true") - .value_parser(value_parser!(bool)) - .help("Enable/Disable checking for new Parseable release"), - ) - .arg( - Arg::new(Self::SEND_ANALYTICS) - .long(Self::SEND_ANALYTICS) - .env("P_SEND_ANONYMOUS_USAGE_DATA") - .value_name("BOOL") - .required(false) - .default_value("true") - .value_parser(value_parser!(bool)) - .help("Enable/Disable anonymous telemetry data collection"), - ) - .arg( - Arg::new(Self::OPEN_AI_KEY) - .long(Self::OPEN_AI_KEY) - .env("P_OPENAI_API_KEY") - .value_name("STRING") - .required(false) - .help("OpenAI key to enable llm features"), - ) - .arg( - Arg::new(Self::OPENID_CLIENT_ID) - .long(Self::OPENID_CLIENT_ID) - .env("P_OIDC_CLIENT_ID") - .value_name("STRING") - .required(false) - .help("Client id for OIDC provider"), - ) - .arg( - Arg::new(Self::OPENID_CLIENT_SECRET) - .long(Self::OPENID_CLIENT_SECRET) - .env("P_OIDC_CLIENT_SECRET") - .value_name("STRING") - .required(false) - .help("Client secret for OIDC provider"), - ) - .arg( - Arg::new(Self::OPENID_ISSUER) - .long(Self::OPENID_ISSUER) - .env("P_OIDC_ISSUER") - .value_name("URl") - .required(false) - .value_parser(validation::url) - .help("OIDC provider's host address"), - ) - .arg( - Arg::new(Self::DOMAIN_URI) - .long(Self::DOMAIN_URI) - .env("P_ORIGIN_URI") - .value_name("URL") - .required(false) - .value_parser(validation::url) - .help("Parseable server global domain address"), - ) - .arg( - Arg::new(Self::GRPC_PORT) - .long(Self::GRPC_PORT) - .env("P_GRPC_PORT") - .value_name("PORT") - .default_value("8001") - .required(false) - .value_parser(value_parser!(u16)) - .help("Port for gRPC server"), - ) - .arg( - Arg::new(Self::LIVETAIL_CAPACITY) - .long(Self::LIVETAIL_CAPACITY) - .env("P_LIVETAIL_CAPACITY") - .value_name("NUMBER") - .default_value("1000") - .required(false) - .value_parser(value_parser!(usize)) - .help("Number of rows in livetail channel"), - ) - .arg( - Arg::new(Self::QUERY_MEM_POOL_SIZE) - .long(Self::QUERY_MEM_POOL_SIZE) - .env("P_QUERY_MEMORY_LIMIT") - .value_name("Gib") - .required(false) - .value_parser(value_parser!(u8)) - .help("Set a fixed memory limit for query"), - ) - .arg( - Arg::new(Self::ROW_GROUP_SIZE) - .long(Self::ROW_GROUP_SIZE) - .env("P_PARQUET_ROW_GROUP_SIZE") - .value_name("NUMBER") - .required(false) - .default_value("16384") - .value_parser(value_parser!(usize)) - .help("Number of rows in a row group"), - ).arg( - Arg::new(Self::MODE) - .long(Self::MODE) - .env("P_MODE") - .value_name("STRING") - .required(false) - .default_value("all") - .value_parser([ - "query", - "ingest", - "all"]) - .help("Mode of operation"), - ) - .arg( - Arg::new(Self::PARQUET_COMPRESSION_ALGO) - .long(Self::PARQUET_COMPRESSION_ALGO) - .env("P_PARQUET_COMPRESSION_ALGO") - .value_name("[UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD]") - .required(false) - .default_value("lz4") - .value_parser([ - "uncompressed", - "snappy", - "gzip", - "lzo", - "brotli", - "lz4", - "zstd"]) - .help("Parquet compression algorithm"), - ).group( - ArgGroup::new("oidc") - .args([Self::OPENID_CLIENT_ID, Self::OPENID_CLIENT_SECRET, Self::OPENID_ISSUER]) - .requires_all([Self::OPENID_CLIENT_ID, Self::OPENID_CLIENT_SECRET, Self::OPENID_ISSUER]) - .multiple(true) - ) - } -} - #[derive(Debug, Default, Eq, PartialEq)] pub enum Mode { Query, From 0f2ec9b537f44a46f9f8769bd6fc5c35fde66f32 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:33:13 +0530 Subject: [PATCH 09/84] chore: changes to function and cli struct name, to avoid collision --- server/src/analytics.rs | 2 +- server/src/banner.rs | 2 +- server/src/handlers/http/about.rs | 2 +- server/src/main.rs | 1 + server/src/option.rs | 51 +++++++++++++++---------------- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/server/src/analytics.rs b/server/src/analytics.rs index ca8d172d6..e10311469 100644 --- a/server/src/analytics.rs +++ b/server/src/analytics.rs @@ -90,7 +90,7 @@ impl Report { cpu_count, memory_total_bytes: mem_total, platform: platform().to_string(), - mode: CONFIG.mode_string().to_string(), + mode: CONFIG.get_storage_mode_string().to_string(), version: current().released_version.to_string(), commit_hash: current().commit_hash, metrics: build_metrics(), diff --git a/server/src/banner.rs b/server/src/banner.rs index 0f1dc5120..207fe1705 100644 --- a/server/src/banner.rs +++ b/server/src/banner.rs @@ -102,7 +102,7 @@ async fn storage_info(config: &Config) { Mode: \"{}\" Staging: \"{}\"", "Storage:".to_string().bold(), - config.mode_string(), + config.get_storage_mode_string(), config.staging_dir().to_string_lossy(), ); diff --git a/server/src/handlers/http/about.rs b/server/src/handlers/http/about.rs index 3f42ccc4f..7e5d82653 100644 --- a/server/src/handlers/http/about.rs +++ b/server/src/handlers/http/about.rs @@ -40,7 +40,7 @@ pub async fn about() -> Json { let current_version = format!("v{}", current_release.released_version); let commit = current_release.commit_hash; let deployment_id = meta.deployment_id.to_string(); - let mode = CONFIG.mode_string(); + let mode = CONFIG.get_storage_mode_string(); let staging = CONFIG.staging_dir(); let grpc_port = CONFIG.parseable.grpc_port; diff --git a/server/src/main.rs b/server/src/main.rs index 4b16256da..0e1afc3c8 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -25,6 +25,7 @@ use std::panic::{catch_unwind, AssertUnwindSafe}; use std::thread::{self, JoinHandle}; use std::time::Duration; +mod cli; mod about; mod alerts; mod analytics; diff --git a/server/src/option.rs b/server/src/option.rs index 101a11ebc..9a108be8b 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -17,16 +17,15 @@ */ use clap::error::ErrorKind; -use clap::{command, value_parser, Arg, ArgGroup, Args, Command, FromArgMatches}; +use clap::{command, Args, Command, FromArgMatches}; use once_cell::sync::Lazy; use parquet::basic::{BrotliLevel, GzipLevel, ZstdLevel}; use std::env; use std::path::PathBuf; use std::sync::Arc; -use url::Url; -use crate::oidc::{self, OpenidConfig}; +use crate::cli::Cli; use crate::storage::{FSConfig, ObjectStorageError, ObjectStorageProvider, S3Config}; pub const MIN_CACHE_SIZE_BYTES: u64 = 1000u64.pow(3); // 1 GiB @@ -36,18 +35,18 @@ pub static CONFIG: Lazy> = Lazy::new(|| Arc::new(Config::new())); #[derive(Debug)] pub struct Config { - pub parseable: Server, + pub parseable: Cli, storage: Arc, pub storage_name: &'static str, } impl Config { fn new() -> Self { - let cli = parseable_cli_command().get_matches(); + let cli = create_parseable_cli_command().get_matches(); match cli.subcommand() { Some(("local-store", m)) => { - let server = match Server::from_arg_matches(m) { - Ok(server) => server, + let cli = match Cli::from_arg_matches(m) { + Ok(cli) => cli, Err(err) => err.exit(), }; let storage = match FSConfig::from_arg_matches(m) { @@ -55,8 +54,8 @@ impl Config { Err(err) => err.exit(), }; - if server.local_staging_path == storage.root { - parseable_cli_command() + if cli.local_staging_path == storage.root { + create_parseable_cli_command() .error( ErrorKind::ValueValidation, "Cannot use same path for storage and staging", @@ -64,8 +63,8 @@ impl Config { .exit() } - if server.local_cache_path.is_some() { - parseable_cli_command() + if cli.local_cache_path.is_some() { + create_parseable_cli_command() .error( ErrorKind::ValueValidation, "Cannot use cache with local-store subcommand.", @@ -74,14 +73,14 @@ impl Config { } Config { - parseable: server, + parseable: cli, storage: Arc::new(storage), storage_name: "drive", } } Some(("s3-store", m)) => { - let server = match Server::from_arg_matches(m) { - Ok(server) => server, + let cli = match Cli::from_arg_matches(m) { + Ok(cli) => cli, Err(err) => err.exit(), }; let storage = match S3Config::from_arg_matches(m) { @@ -90,7 +89,7 @@ impl Config { }; Config { - parseable: server, + parseable: cli, storage: Arc::new(storage), storage_name: "s3", } @@ -118,7 +117,7 @@ impl Config { return Ok(()); } - if self.mode_string() == "Local drive" { + if self.get_storage_mode_string() == "Local drive" { return Err(ObjectStorageError::Custom(format!("Could not start the server because directory '{}' contains stale data, please use an empty directory, and restart the server.\n{}", self.storage.get_endpoint(), JOIN_COMMUNITY))); } @@ -143,14 +142,14 @@ impl Config { } pub fn is_default_creds(&self) -> bool { - self.parseable.username == Server::DEFAULT_USERNAME - && self.parseable.password == Server::DEFAULT_PASSWORD + self.parseable.username == Cli::DEFAULT_USERNAME + && self.parseable.password == Cli::DEFAULT_PASSWORD } // returns the string representation of the storage mode // drive --> Local drive // s3 --> S3 bucket - pub fn mode_string(&self) -> &str { + pub fn get_storage_mode_string(&self) -> &str { let mut mode = "S3 bucket"; if self.storage_name == "drive" { mode = "Local drive"; @@ -159,18 +158,18 @@ impl Config { } } -fn parseable_cli_command() -> Command { - let local = Server::get_clap_command("local-store"); +fn create_parseable_cli_command() -> Command { + let local = Cli::create_cli_command_with_clap("local-store"); let local = ::augment_args_for_update(local); let local = local - .mut_arg(Server::USERNAME, |arg| { - arg.required(false).default_value(Server::DEFAULT_USERNAME) + .mut_arg(Cli::USERNAME, |arg| { + arg.required(false).default_value(Cli::DEFAULT_USERNAME) }) - .mut_arg(Server::PASSWORD, |arg| { - arg.required(false).default_value(Server::DEFAULT_PASSWORD) + .mut_arg(Cli::PASSWORD, |arg| { + arg.required(false).default_value(Cli::DEFAULT_PASSWORD) }); - let s3 = Server::get_clap_command("s3-store"); + let s3 = Cli::create_cli_command_with_clap("s3-store"); let s3 = ::augment_args_for_update(s3); command!() From eb6d4273664a7057cdc06e6efbec26c02b1042e4 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:34:35 +0530 Subject: [PATCH 10/84] fix imports --- server/src/modal/query_server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/modal/query_server.rs b/server/src/modal/query_server.rs index b7083636f..5da52a1fa 100644 --- a/server/src/modal/query_server.rs +++ b/server/src/modal/query_server.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use crate::handlers::http::middleware::RouteExt; use crate::handlers::http::{ - about, base_path, cross_origin_config, logstream, query, API_BASE_PATH, API_VERSION, + base_path, cross_origin_config, logstream, API_BASE_PATH, API_VERSION, }; use crate::rbac::role::Action; use actix_web::web::ServiceConfig; From d8e28d1f7b41195a3575edb46af3f9038fd56cad Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:34:59 +0530 Subject: [PATCH 11/84] chore: add LICENSE --- server/src/modal/ingest_server.rs | 19 +++++++++++++++++++ server/src/modal/mod.rs | 19 +++++++++++++++++++ server/src/modal/query_server.rs | 19 +++++++++++++++++++ server/src/modal/server.rs | 19 +++++++++++++++++++ server/src/modal/ssl_acceptor.rs | 18 ++++++++++++++++++ 5 files changed, 94 insertions(+) diff --git a/server/src/modal/ingest_server.rs b/server/src/modal/ingest_server.rs index 028e7f148..204ba1b00 100644 --- a/server/src/modal/ingest_server.rs +++ b/server/src/modal/ingest_server.rs @@ -1,3 +1,22 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + + use crate::handlers::http::API_BASE_PATH; use crate::handlers::http::API_VERSION; diff --git a/server/src/modal/mod.rs b/server/src/modal/mod.rs index 28113e13f..f13e733bc 100644 --- a/server/src/modal/mod.rs +++ b/server/src/modal/mod.rs @@ -1,3 +1,22 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + + pub mod ingest_server; pub mod parseable_server; pub mod query_server; diff --git a/server/src/modal/query_server.rs b/server/src/modal/query_server.rs index 5da52a1fa..c8a43d51f 100644 --- a/server/src/modal/query_server.rs +++ b/server/src/modal/query_server.rs @@ -1,3 +1,22 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + + use actix_web::web; use std::sync::Arc; diff --git a/server/src/modal/server.rs b/server/src/modal/server.rs index 517a130ee..905b2efbb 100644 --- a/server/src/modal/server.rs +++ b/server/src/modal/server.rs @@ -1,3 +1,22 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + + use crate::handlers::http::about; use crate::handlers::http::base_path; use crate::handlers::http::health_check; diff --git a/server/src/modal/ssl_acceptor.rs b/server/src/modal/ssl_acceptor.rs index 1b168582a..6b51113b1 100644 --- a/server/src/modal/ssl_acceptor.rs +++ b/server/src/modal/ssl_acceptor.rs @@ -1,3 +1,21 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + use std::{fs::File, io::BufReader, path::PathBuf}; use itertools::Itertools; From e40d2d45b61d59110ff13967656d0c64325e560a Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 14:59:50 +0530 Subject: [PATCH 12/84] chore: move the constants storage.rs --- server/src/storage.rs | 7 +++++++ server/src/storage/object_storage.rs | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/server/src/storage.rs b/server/src/storage.rs index 975fcf445..347f12a74 100644 --- a/server/src/storage.rs +++ b/server/src/storage.rs @@ -39,6 +39,13 @@ pub use store_metadata::{ pub use self::staging::StorageDir; +// metadata file names in a Stream prefix +pub const STREAM_METADATA_FILE_NAME: &str = ".stream.json"; +pub const PARSEABLE_METADATA_FILE_NAME: &str = ".parseable.json"; +pub const SCHEMA_FILE_NAME: &str = ".schema"; +pub const ALERT_FILE_NAME: &str = ".alert.json"; +pub const MANIFEST_FILE: &str = "manifest.json"; + /// local sync interval to move data.records to /tmp dir of that stream. /// 60 sec is a reasonable value. pub const LOCAL_SYNC_INTERVAL: u64 = 60; diff --git a/server/src/storage/object_storage.rs b/server/src/storage/object_storage.rs index 7494d16e1..9bcad3a89 100644 --- a/server/src/storage/object_storage.rs +++ b/server/src/storage/object_storage.rs @@ -49,13 +49,6 @@ use std::{ time::{Duration, Instant}, }; -// metadata file names in a Stream prefix -pub(super) const STREAM_METADATA_FILE_NAME: &str = ".stream.json"; -pub(super) const PARSEABLE_METADATA_FILE_NAME: &str = ".parseable.json"; -const SCHEMA_FILE_NAME: &str = ".schema"; -const ALERT_FILE_NAME: &str = ".alert.json"; -const MANIFEST_FILE: &str = "manifest.json"; - pub trait ObjectStorageProvider: StorageMetrics + std::fmt::Debug { fn get_datafusion_runtime(&self) -> RuntimeConfig; fn get_object_store(&self) -> Arc; From 0f9c680dc9984fc97cae989b21ed6f33ae5db2df Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:00:57 +0530 Subject: [PATCH 13/84] chore: replaced sentinals with the constants --- server/src/migration.rs | 16 ++++++++-------- server/src/option.rs | 4 ++-- server/src/storage/localfs.rs | 4 ++-- server/src/storage/object_storage.rs | 3 ++- server/src/storage/s3.rs | 6 +++--- server/src/storage/store_metadata.rs | 2 +- 6 files changed, 18 insertions(+), 17 deletions(-) diff --git a/server/src/migration.rs b/server/src/migration.rs index 5484e84c3..863be3650 100644 --- a/server/src/migration.rs +++ b/server/src/migration.rs @@ -29,7 +29,7 @@ use serde::Serialize; use crate::{ option::Config, - storage::{ObjectStorage, ObjectStorageError}, + storage::{ObjectStorage, ObjectStorageError, PARSEABLE_METADATA_FILE_NAME, SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME}, }; /// Migrate the metdata from v1 or v2 to v3 @@ -89,7 +89,7 @@ pub async fn run_migration(config: &Config) -> anyhow::Result<()> { } async fn migration_stream(stream: &str, storage: &dyn ObjectStorage) -> anyhow::Result<()> { - let path = RelativePathBuf::from_iter([stream, ".stream.json"]); + let path = RelativePathBuf::from(STREAM_METADATA_FILE_NAME); let stream_metadata = storage.get_object(&path).await?; let stream_metadata: serde_json::Value = serde_json::from_slice(&stream_metadata).expect("stream.json is valid json"); @@ -106,7 +106,7 @@ async fn migration_stream(stream: &str, storage: &dyn ObjectStorage) -> anyhow:: .put_object(&path, to_bytes(&new_stream_metadata)) .await?; - let schema_path = RelativePathBuf::from_iter([stream, ".schema"]); + let schema_path = RelativePathBuf::from_iter([stream, SCHEMA_FILE_NAME]); let schema = storage.get_object(&schema_path).await?; let schema = serde_json::from_slice(&schema).ok(); let map = schema_migration::v1_v3(schema)?; @@ -118,7 +118,7 @@ async fn migration_stream(stream: &str, storage: &dyn ObjectStorage) -> anyhow:: .put_object(&path, to_bytes(&new_stream_metadata)) .await?; - let schema_path = RelativePathBuf::from_iter([stream, ".schema"]); + let schema_path = RelativePathBuf::from_iter([stream, SCHEMA_FILE_NAME]); let schema = storage.get_object(&schema_path).await?; let schema = serde_json::from_slice(&schema)?; let map = schema_migration::v2_v3(schema)?; @@ -138,7 +138,7 @@ fn to_bytes(any: &(impl ?Sized + Serialize)) -> Bytes { } pub fn get_staging_metadata(config: &Config) -> anyhow::Result> { - let path = config.staging_dir().join(".parseable.json"); + let path = config.staging_dir().join(PARSEABLE_METADATA_FILE_NAME); let bytes = match std::fs::read(path) { Ok(bytes) => bytes, Err(err) => match err.kind() { @@ -153,7 +153,7 @@ pub fn get_staging_metadata(config: &Config) -> anyhow::Result anyhow::Result> { - let path = RelativePathBuf::from_iter([".parseable.json"]); + let path = RelativePathBuf::from(PARSEABLE_METADATA_FILE_NAME); match storage.get_object(&path).await { Ok(bytes) => Ok(Some( serde_json::from_slice(&bytes).expect("parseable config is valid json"), @@ -172,13 +172,13 @@ pub async fn put_remote_metadata( storage: &dyn ObjectStorage, metadata: &serde_json::Value, ) -> anyhow::Result<()> { - let path = RelativePathBuf::from_iter([".parseable.json"]); + let path = RelativePathBuf::from(PARSEABLE_METADATA_FILE_NAME); let metadata = serde_json::to_vec(metadata)?.into(); Ok(storage.put_object(&path, metadata).await?) } pub fn put_staging_metadata(config: &Config, metadata: &serde_json::Value) -> anyhow::Result<()> { - let path = config.staging_dir().join(".parseable.json"); + let path = config.staging_dir().join(PARSEABLE_METADATA_FILE_NAME); let mut file = OpenOptions::new() .create(true) .truncate(true) diff --git a/server/src/option.rs b/server/src/option.rs index 9a108be8b..32f9d53d6 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -27,7 +27,7 @@ use std::sync::Arc; use crate::cli::Cli; use crate::storage::{FSConfig, ObjectStorageError, ObjectStorageProvider, S3Config}; - +use crate::storage::PARSEABLE_METADATA_FILE_NAME; pub const MIN_CACHE_SIZE_BYTES: u64 = 1000u64.pow(3); // 1 GiB pub const JOIN_COMMUNITY: &str = "Join us on Parseable Slack community for questions : https://logg.ing/community"; @@ -100,7 +100,7 @@ impl Config { pub async fn validate(&self) -> Result<(), ObjectStorageError> { let obj_store = self.storage.get_object_store(); - let rel_path = relative_path::RelativePathBuf::from(".parseable.json"); + let rel_path = relative_path::RelativePathBuf::from(PARSEABLE_METADATA_FILE_NAME); let has_parseable_json = obj_store.get_object(&rel_path).await.is_ok(); diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index e0880cff0..7fac68670 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -34,7 +34,7 @@ use tokio_stream::wrappers::ReadDirStream; use crate::metrics::storage::{localfs::REQUEST_RESPONSE_TIME, StorageMetrics}; use crate::option::validation; -use super::{object_storage, LogStream, ObjectStorage, ObjectStorageError, ObjectStorageProvider}; +use super::{LogStream, ObjectStorage, ObjectStorageError, ObjectStorageProvider, STREAM_METADATA_FILE_NAME}; #[derive(Debug, Clone, clap::Args)] #[command( @@ -248,7 +248,7 @@ async fn dir_with_stream( if entry.file_type().await?.is_dir() { let path = entry.path(); - let stream_json_path = path.join(object_storage::STREAM_METADATA_FILE_NAME); + let stream_json_path = path.join(STREAM_METADATA_FILE_NAME); if stream_json_path.exists() { Ok(Some(dir_name)) } else { diff --git a/server/src/storage/object_storage.rs b/server/src/storage/object_storage.rs index 9bcad3a89..8141f9898 100644 --- a/server/src/storage/object_storage.rs +++ b/server/src/storage/object_storage.rs @@ -20,6 +20,7 @@ use super::{ retention::Retention, staging::convert_disk_files_to_parquet, LogStream, ObjectStorageError, ObjectStoreFormat, Permisssion, StorageDir, StorageMetadata, }; +use super::{ALERT_FILE_NAME, MANIFEST_FILE, PARSEABLE_METADATA_FILE_NAME, SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME}; use crate::{ alerts::Alerts, @@ -77,7 +78,7 @@ pub trait ObjectStorage: Sync + 'static { async fn get_latency(&self) -> Duration { // It's Ok to `unwrap` here. The hardcoded value will always Result in // an `Ok`. - let path = RelativePathBuf::from_path(".parseable.json").unwrap(); + let path = RelativePathBuf::from_path(PARSEABLE_METADATA_FILE_NAME).unwrap(); let start = Instant::now(); let _ = self.get_object(&path).await; diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index ef1144f18..f9316c2f2 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -42,7 +42,7 @@ use crate::metrics::storage::{s3::REQUEST_RESPONSE_TIME, StorageMetrics}; use crate::storage::{LogStream, ObjectStorage, ObjectStorageError}; use super::metrics_layer::MetricLayer; -use super::{object_storage, ObjectStorageProvider}; +use super::{ ObjectStorageProvider, PARSEABLE_METADATA_FILE_NAME, STREAM_METADATA_FILE_NAME}; // in bytes const MULTIPART_UPLOAD_SIZE: usize = 1024 * 1024 * 100; @@ -304,7 +304,7 @@ impl S3 { let stream_json_check = FuturesUnordered::new(); for dir in &dirs { - let key = format!("{}/{}", dir, object_storage::STREAM_METADATA_FILE_NAME); + let key = format!("{}/{}", dir, STREAM_METADATA_FILE_NAME); let task = async move { self.client.head(&StorePath::from(key)).await.map(|_| ()) }; stream_json_check.push(task); } @@ -424,7 +424,7 @@ impl ObjectStorage for S3 { async fn check(&self) -> Result<(), ObjectStorageError> { Ok(self .client - .head(&object_storage::PARSEABLE_METADATA_FILE_NAME.into()) + .head(&PARSEABLE_METADATA_FILE_NAME.into()) .await .map(|_| ())?) } diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index b7d3a52f8..996daae43 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -32,7 +32,7 @@ use crate::{ utils::uid, }; -use super::object_storage::PARSEABLE_METADATA_FILE_NAME; +use super::PARSEABLE_METADATA_FILE_NAME; // Expose some static variables for internal usage pub static STORAGE_METADATA: OnceCell = OnceCell::new(); From 9c96195ae82220e30a07abec31ed9b051a3674cb Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:30:24 +0530 Subject: [PATCH 14/84] fix: forgot default --- server/src/modal/ingest_server.rs | 1 + server/src/modal/query_server.rs | 1 + server/src/modal/server.rs | 2 ++ 3 files changed, 4 insertions(+) diff --git a/server/src/modal/ingest_server.rs b/server/src/modal/ingest_server.rs index 204ba1b00..0d70ab9b8 100644 --- a/server/src/modal/ingest_server.rs +++ b/server/src/modal/ingest_server.rs @@ -36,6 +36,7 @@ use crate::{ option::CONFIG, }; +#[derive(Default)] pub struct IngestServer; #[async_trait(?Send)] diff --git a/server/src/modal/query_server.rs b/server/src/modal/query_server.rs index c8a43d51f..4f72ec9c8 100644 --- a/server/src/modal/query_server.rs +++ b/server/src/modal/query_server.rs @@ -38,6 +38,7 @@ use super::ssl_acceptor::get_ssl_acceptor; include!(concat!(env!("OUT_DIR"), "/generated.rs")); +#[derive(Default)] pub struct QueryServer; #[async_trait(?Send)] diff --git a/server/src/modal/server.rs b/server/src/modal/server.rs index 905b2efbb..f172b7faa 100644 --- a/server/src/modal/server.rs +++ b/server/src/modal/server.rs @@ -50,6 +50,8 @@ use super::parseable_server::OpenIdClient; use super::parseable_server::ParseableServer; include!(concat!(env!("OUT_DIR"), "/generated.rs")); + +#[derive(Default)] pub struct SuperServer; #[async_trait(?Send)] From abd702a77ea095c9ec100f8b4c65011e22aff6f9 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:31:36 +0530 Subject: [PATCH 15/84] fix: make name more discriptive --- server/src/main.rs | 2 +- server/src/metrics/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main.rs b/server/src/main.rs index 0e1afc3c8..5736ce703 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -81,7 +81,7 @@ async fn main() -> anyhow::Result<()> { // track all parquet files already in the data directory storage::retention::load_retention_from_global(); // load data from stats back to prometheus metrics - metrics::load_from_stats_from_storage().await; + metrics::fetch_stats_from_storage().await; let (localsync_handler, mut localsync_outbox, localsync_inbox) = run_local_sync(); let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) = diff --git a/server/src/metrics/mod.rs b/server/src/metrics/mod.rs index 05e6baf86..513bf2540 100644 --- a/server/src/metrics/mod.rs +++ b/server/src/metrics/mod.rs @@ -133,7 +133,7 @@ fn prom_process_metrics(metrics: &PrometheusMetrics) { #[cfg(not(target_os = "linux"))] fn prom_process_metrics(_metrics: &PrometheusMetrics) {} -pub async fn load_from_stats_from_storage() { +pub async fn fetch_stats_from_storage() { for stream_name in STREAM_INFO.list_streams() { let stats = CONFIG .storage() From d7ca187c4bcf3b0fc78d5f8b29ed09bd6879ee38 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:31:57 +0530 Subject: [PATCH 16/84] fix: from_iter not from --- server/src/migration.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/migration.rs b/server/src/migration.rs index 863be3650..7090b7db0 100644 --- a/server/src/migration.rs +++ b/server/src/migration.rs @@ -89,7 +89,7 @@ pub async fn run_migration(config: &Config) -> anyhow::Result<()> { } async fn migration_stream(stream: &str, storage: &dyn ObjectStorage) -> anyhow::Result<()> { - let path = RelativePathBuf::from(STREAM_METADATA_FILE_NAME); + let path = RelativePathBuf::from_iter([stream, STREAM_METADATA_FILE_NAME]); let stream_metadata = storage.get_object(&path).await?; let stream_metadata: serde_json::Value = serde_json::from_slice(&stream_metadata).expect("stream.json is valid json"); From 0a50e844315d9ec8d4cd56326909e0c1ff24ac5c Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:33:00 +0530 Subject: [PATCH 17/84] using the modal servers --- server/src/handlers/http.rs | 291 +----------------------------------- 1 file changed, 7 insertions(+), 284 deletions(-) diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index a3c290915..def157121 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -32,10 +32,11 @@ use openid::Discovered; use rustls::{Certificate, PrivateKey, ServerConfig}; use rustls_pemfile::{certs, pkcs8_private_keys}; -use crate::option::CONFIG; +use crate::{modal::{ingest_server::IngestServer, parseable_server::ParseableServer, query_server::QueryServer, server::SuperServer}, option::CONFIG}; use crate::rbac::role::Action; use self::middleware::{DisAllowRootUser, ModeFilter, RouteExt}; +use crate::option::Mode; pub(crate) mod about; pub(crate) mod health_check; @@ -60,295 +61,17 @@ pub async fn run_http( prometheus: PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { - let oidc_client = match oidc_client { - Some(config) => { - let client = config - .connect(&format!("{API_BASE_PATH}/{API_VERSION}/o/code")) - .await?; - Some(Arc::new(client)) - } - None => None, - }; - let create_app = move || { - App::new() - .wrap(prometheus.clone()) - .configure(|cfg| configure_routes(cfg, oidc_client.clone())) - .wrap(actix_web::middleware::Logger::default()) - .wrap(actix_web::middleware::Compress::default()) - .wrap(cross_origin_config()) - .wrap(ModeFilter) + let server: Arc = match CONFIG.parseable.mode { + Mode::Query => Arc::new(QueryServer::default()), + Mode::Ingest => Arc::new(IngestServer::default()), + Mode::All => Arc::new(SuperServer::default()), }; - let ssl_acceptor = match ( - &CONFIG.parseable.tls_cert_path, - &CONFIG.parseable.tls_key_path, - ) { - (Some(cert), Some(key)) => { - // init server config builder with safe defaults - let config = ServerConfig::builder() - .with_safe_defaults() - .with_no_client_auth(); - - // load TLS key/cert files - let cert_file = &mut BufReader::new(File::open(cert)?); - let key_file = &mut BufReader::new(File::open(key)?); - - // convert files to key/cert objects - let cert_chain = certs(cert_file)?.into_iter().map(Certificate).collect(); - - let mut keys: Vec = pkcs8_private_keys(key_file)? - .into_iter() - .map(PrivateKey) - .collect(); - - // exit if no keys could be parsed - if keys.is_empty() { - anyhow::bail!("Could not locate PKCS 8 private keys."); - } - - let server_config = config.with_single_cert(cert_chain, keys.remove(0))?; - - Some(server_config) - } - (_, _) => None, - }; - - // concurrent workers equal to number of cores on the cpu - let http_server = HttpServer::new(create_app).workers(num_cpus::get()); - if let Some(config) = ssl_acceptor { - http_server - .bind_rustls(&CONFIG.parseable.address, config)? - .run() - .await?; - } else { - http_server.bind(&CONFIG.parseable.address)?.run().await?; - } - + server.start(prometheus, oidc_client).await?; Ok(()) } -pub fn configure_routes( - cfg: &mut web::ServiceConfig, - oidc_client: Option>>, -) { - let generated = generate(); - - //log stream API - let logstream_api = web::scope("/{logstream}") - .service( - web::resource("") - // PUT "/logstream/{logstream}" ==> Create log stream - .route( - web::put() - .to(logstream::put_stream) - .authorize_for_stream(Action::CreateStream), - ) - // POST "/logstream/{logstream}" ==> Post logs to given log stream - .route( - web::post() - .to(ingest::post_event) - .authorize_for_stream(Action::Ingest), - ) - // DELETE "/logstream/{logstream}" ==> Delete log stream - .route( - web::delete() - .to(logstream::delete) - .authorize_for_stream(Action::DeleteStream), - ) - .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), - ) - .service( - web::resource("/alert") - // PUT "/logstream/{logstream}/alert" ==> Set alert for given log stream - .route( - web::put() - .to(logstream::put_alert) - .authorize_for_stream(Action::PutAlert), - ) - // GET "/logstream/{logstream}/alert" ==> Get alert for given log stream - .route( - web::get() - .to(logstream::get_alert) - .authorize_for_stream(Action::GetAlert), - ), - ) - .service( - // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream - web::resource("/schema").route( - web::get() - .to(logstream::schema) - .authorize_for_stream(Action::GetSchema), - ), - ) - .service( - // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream - web::resource("/stats").route( - web::get() - .to(logstream::get_stats) - .authorize_for_stream(Action::GetStats), - ), - ) - .service( - web::resource("/retention") - // PUT "/logstream/{logstream}/retention" ==> Set retention for given logstream - .route( - web::put() - .to(logstream::put_retention) - .authorize_for_stream(Action::PutRetention), - ) - // GET "/logstream/{logstream}/retention" ==> Get retention for given logstream - .route( - web::get() - .to(logstream::get_retention) - .authorize_for_stream(Action::GetRetention), - ), - ) - .service( - web::resource("/cache") - // PUT "/logstream/{logstream}/cache" ==> Set retention for given logstream - .route( - web::put() - .to(logstream::put_enable_cache) - .authorize_for_stream(Action::PutCacheEnabled), - ) - // GET "/logstream/{logstream}/cache" ==> Get retention for given logstream - .route( - web::get() - .to(logstream::get_cache_enabled) - .authorize_for_stream(Action::GetCacheEnabled), - ), - ); - - // User API - let user_api = web::scope("/user") - .service( - web::resource("") - // GET /user => List all users - .route(web::get().to(rbac::list_users).authorize(Action::ListUser)), - ) - .service( - web::resource("/{username}") - // PUT /user/{username} => Create a new user - .route(web::post().to(rbac::post_user).authorize(Action::PutUser)) - // DELETE /user/{username} => Delete a user - .route( - web::delete() - .to(rbac::delete_user) - .authorize(Action::DeleteUser), - ) - .wrap(DisAllowRootUser), - ) - .service( - web::resource("/{username}/role") - // PUT /user/{username}/roles => Put roles for user - .route( - web::put() - .to(rbac::put_role) - .authorize(Action::PutUserRoles) - .wrap(DisAllowRootUser), - ) - .route( - web::get() - .to(rbac::get_role) - .authorize_for_user(Action::GetUserRoles), - ), - ) - .service( - web::resource("/{username}/generate-new-password") - // POST /user/{username}/generate-new-password => reset password for this user - .route( - web::post() - .to(rbac::post_gen_password) - .authorize(Action::PutUser) - .wrap(DisAllowRootUser), - ), - ); - - let llm_query_api = web::scope("/llm").service( - web::resource("").route( - web::post() - .to(llm::make_llm_request) - .authorize(Action::QueryLLM), - ), - ); - - let role_api = web::scope("/role") - // GET Role List - .service(resource("").route(web::get().to(role::list).authorize(Action::ListRole))) - .service( - // PUT and GET Default Role - resource("/default") - .route(web::put().to(role::put_default).authorize(Action::PutRole)) - .route(web::get().to(role::get_default).authorize(Action::GetRole)), - ) - .service( - // PUT, GET, DELETE Roles - resource("/{name}") - .route(web::put().to(role::put).authorize(Action::PutRole)) - .route(web::delete().to(role::delete).authorize(Action::DeleteRole)) - .route(web::get().to(role::get).authorize(Action::GetRole)), - ); - - let mut oauth_api = web::scope("/o") - .service(resource("/login").route(web::get().to(oidc::login))) - .service(resource("/logout").route(web::get().to(oidc::logout))) - .service(resource("/code").route(web::get().to(oidc::reply_login))); - - if let Some(client) = oidc_client { - info!("Registered oidc client"); - oauth_api = oauth_api.app_data(web::Data::from(client)) - } - - // Deny request if username is same as the env variable P_USERNAME. - cfg.service( - // Base path "{url}/api/v1" - web::scope(&base_path()) - // .wrap(PathFilter) - // POST "/query" ==> Get results of the SQL query passed in request body - .service( - web::resource("/query") - .route(web::post().to(query::query).authorize(Action::Query)), - ) - // POST "/ingest" ==> Post logs to given log stream based on header - .service( - web::resource("/ingest") - .route( - web::post() - .to(ingest::ingest) - .authorize_for_stream(Action::Ingest), - ) - .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), - ) - // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command - .service(web::resource("/liveness").route(web::get().to(health_check::liveness))) - // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes - .service(web::resource("/readiness").route(web::get().to(health_check::readiness))) - // GET "/about" ==> Returns information about instance - .service( - web::resource("/about") - .route(web::get().to(about::about).authorize(Action::GetAbout)), - ) - .service( - web::scope("/logstream") - .service( - // GET "/logstream" ==> Get list of all Log Streams on the server - web::resource("") - .route(web::get().to(logstream::list).authorize(Action::ListStream)), - ) - .service( - // logstream API - logstream_api, - ), - ) - .service(user_api) - .service(llm_query_api) - .service(oauth_api) - .service(role_api), - ) - // GET "/" ==> Serve the static frontend directory - .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); -} pub(crate) fn base_path() -> String { format!("{API_BASE_PATH}/{API_VERSION}") From e03e63961b5926457cdf0106e573654b6089bfb4 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:35:32 +0530 Subject: [PATCH 18/84] update struct name --- server/src/handlers/http.rs | 4 ++-- server/src/modal/ingest_server.rs | 10 +++++----- server/src/modal/query_server.rs | 18 +++++++++--------- server/src/modal/server.rs | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index def157121..541166500 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -32,7 +32,7 @@ use openid::Discovered; use rustls::{Certificate, PrivateKey, ServerConfig}; use rustls_pemfile::{certs, pkcs8_private_keys}; -use crate::{modal::{ingest_server::IngestServer, parseable_server::ParseableServer, query_server::QueryServer, server::SuperServer}, option::CONFIG}; +use crate::{modal::{ingest_server::IngestServer, parseable_server::ParseableServer, query_server::QueryServer, server::Server}, option::CONFIG}; use crate::rbac::role::Action; use self::middleware::{DisAllowRootUser, ModeFilter, RouteExt}; @@ -65,7 +65,7 @@ pub async fn run_http( let server: Arc = match CONFIG.parseable.mode { Mode::Query => Arc::new(QueryServer::default()), Mode::Ingest => Arc::new(IngestServer::default()), - Mode::All => Arc::new(SuperServer::default()), + Mode::All => Arc::new(Server::default()), }; server.start(prometheus, oidc_client).await?; diff --git a/server/src/modal/ingest_server.rs b/server/src/modal/ingest_server.rs index 0d70ab9b8..f63be801f 100644 --- a/server/src/modal/ingest_server.rs +++ b/server/src/modal/ingest_server.rs @@ -24,7 +24,7 @@ use std::sync::Arc; use super::parseable_server::OpenIdClient; use super::parseable_server::ParseableServer; -use super::server::SuperServer; +use super::server::Server; use super::ssl_acceptor::get_ssl_acceptor; use actix_web::{web, App, HttpServer}; @@ -90,8 +90,8 @@ impl ParseableServer for IngestServer { impl IngestServer { // configure the api routes fn configure_routes(config: &mut web::ServiceConfig, _odic_client: Option) { - let logstream_scope = SuperServer::get_logstream_webscope(); - let ingest_factory = SuperServer::get_ingest_factory(); + let logstream_scope = Server::get_logstream_webscope(); + let ingest_factory = Server::get_ingest_factory(); config .service( @@ -99,9 +99,9 @@ impl IngestServer { web::scope(&base_path()).service(ingest_factory), ) // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command - .service(SuperServer::get_liveness_factory()) + .service(Server::get_liveness_factory()) // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes - .service(SuperServer::get_readiness_factory()) + .service(Server::get_readiness_factory()) .service(logstream_scope); } } diff --git a/server/src/modal/query_server.rs b/server/src/modal/query_server.rs index 4f72ec9c8..47d6a76d7 100644 --- a/server/src/modal/query_server.rs +++ b/server/src/modal/query_server.rs @@ -33,7 +33,7 @@ use async_trait::async_trait; use crate::option::CONFIG; use super::parseable_server::{OpenIdClient, ParseableServer}; -use super::server::SuperServer; +use super::server::Server; use super::ssl_acceptor::get_ssl_acceptor; include!(concat!(env!("OUT_DIR"), "/generated.rs")); @@ -93,22 +93,22 @@ impl QueryServer { pub fn configure_routes(config: &mut ServiceConfig, oidc_client: Option) { let generated = generate(); - let user_scope = SuperServer::get_user_webscope(); - let llm_scope = SuperServer::get_llm_webscope(); - let role_scope = SuperServer::get_user_role_webscope(); - let oauth_scope = SuperServer::get_oauth_webscope(oidc_client); + let user_scope = Server::get_user_webscope(); + let llm_scope = Server::get_llm_webscope(); + let role_scope = Server::get_user_role_webscope(); + let oauth_scope = Server::get_oauth_webscope(oidc_client); config .service( web::scope(&base_path()) // POST "/query" ==> Get results of the SQL query passed in request body - .service(SuperServer::get_query_factory()) + .service(Server::get_query_factory()) // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command - .service(SuperServer::get_liveness_factory()) + .service(Server::get_liveness_factory()) // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes - .service(SuperServer::get_readiness_factory()) + .service(Server::get_readiness_factory()) // GET "/about" ==> Returns information about instance - .service(SuperServer::get_about_factory()) + .service(Server::get_about_factory()) .service( web::scope("/logstream").service( // GET "/logstream" ==> Get list of all Log Streams on the server diff --git a/server/src/modal/server.rs b/server/src/modal/server.rs index f172b7faa..e452e944a 100644 --- a/server/src/modal/server.rs +++ b/server/src/modal/server.rs @@ -52,10 +52,10 @@ use super::parseable_server::ParseableServer; include!(concat!(env!("OUT_DIR"), "/generated.rs")); #[derive(Default)] -pub struct SuperServer; +pub struct Server; #[async_trait(?Send)] -impl ParseableServer for SuperServer { +impl ParseableServer for Server { async fn start( &self, prometheus: PrometheusMetrics, @@ -74,7 +74,7 @@ impl ParseableServer for SuperServer { let create_app_fn = move || { App::new() .wrap(prometheus.clone()) - .configure(|cfg| SuperServer::configure_routes(cfg, oidc_client.clone())) + .configure(|cfg| Server::configure_routes(cfg, oidc_client.clone())) .wrap(actix_web::middleware::Logger::default()) .wrap(actix_web::middleware::Compress::default()) .wrap(cross_origin_config()) @@ -129,7 +129,7 @@ impl ParseableServer for SuperServer { } } -impl SuperServer { +impl Server { fn configure_routes(config: &mut web::ServiceConfig, oidc_client: Option) { let generated = generate(); From a2ab7c19e143ceb9a329a4db2e221a594b82dd95 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:41:30 +0530 Subject: [PATCH 19/84] move server module from modal to http --- server/src/{modal => handlers/http/server}/ingest_server.rs | 0 server/src/{modal => handlers/http/server}/mod.rs | 0 server/src/{modal => handlers/http/server}/query_server.rs | 0 server/src/{modal => handlers/http/server}/server.rs | 0 server/src/{modal => handlers/http/server}/ssl_acceptor.rs | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename server/src/{modal => handlers/http/server}/ingest_server.rs (100%) rename server/src/{modal => handlers/http/server}/mod.rs (100%) rename server/src/{modal => handlers/http/server}/query_server.rs (100%) rename server/src/{modal => handlers/http/server}/server.rs (100%) rename server/src/{modal => handlers/http/server}/ssl_acceptor.rs (100%) diff --git a/server/src/modal/ingest_server.rs b/server/src/handlers/http/server/ingest_server.rs similarity index 100% rename from server/src/modal/ingest_server.rs rename to server/src/handlers/http/server/ingest_server.rs diff --git a/server/src/modal/mod.rs b/server/src/handlers/http/server/mod.rs similarity index 100% rename from server/src/modal/mod.rs rename to server/src/handlers/http/server/mod.rs diff --git a/server/src/modal/query_server.rs b/server/src/handlers/http/server/query_server.rs similarity index 100% rename from server/src/modal/query_server.rs rename to server/src/handlers/http/server/query_server.rs diff --git a/server/src/modal/server.rs b/server/src/handlers/http/server/server.rs similarity index 100% rename from server/src/modal/server.rs rename to server/src/handlers/http/server/server.rs diff --git a/server/src/modal/ssl_acceptor.rs b/server/src/handlers/http/server/ssl_acceptor.rs similarity index 100% rename from server/src/modal/ssl_acceptor.rs rename to server/src/handlers/http/server/ssl_acceptor.rs From a56268c0092f94d9851b719399b00c94c5bf3130 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:41:42 +0530 Subject: [PATCH 20/84] update imports --- server/src/handlers/http.rs | 22 ++++++---------------- server/src/main.rs | 1 - 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index 541166500..03870fe3e 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -16,26 +16,17 @@ * */ -use std::fs::File; -use std::io::BufReader; use std::sync::Arc; use actix_cors::Cors; -use actix_web::{ - web::{self, resource}, - App, HttpServer, -}; use actix_web_prometheus::PrometheusMetrics; -use actix_web_static_files::ResourceFiles; -use log::info; -use openid::Discovered; -use rustls::{Certificate, PrivateKey, ServerConfig}; -use rustls_pemfile::{certs, pkcs8_private_keys}; -use crate::{modal::{ingest_server::IngestServer, parseable_server::ParseableServer, query_server::QueryServer, server::Server}, option::CONFIG}; -use crate::rbac::role::Action; +use crate::option::CONFIG; -use self::middleware::{DisAllowRootUser, ModeFilter, RouteExt}; +use crate::handlers::http::server::{ + ingest_server::IngestServer, parseable_server::ParseableServer, query_server::QueryServer, + server::Server, +}; use crate::option::Mode; pub(crate) mod about; @@ -50,6 +41,7 @@ mod otel; pub(crate) mod query; pub(crate) mod rbac; pub(crate) mod role; +mod server; include!(concat!(env!("OUT_DIR"), "/generated.rs")); @@ -61,7 +53,6 @@ pub async fn run_http( prometheus: PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { - let server: Arc = match CONFIG.parseable.mode { Mode::Query => Arc::new(QueryServer::default()), Mode::Ingest => Arc::new(IngestServer::default()), @@ -72,7 +63,6 @@ pub async fn run_http( Ok(()) } - pub(crate) fn base_path() -> String { format!("{API_BASE_PATH}/{API_VERSION}") } diff --git a/server/src/main.rs b/server/src/main.rs index 5736ce703..f9d388f63 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -38,7 +38,6 @@ mod localcache; mod metadata; mod metrics; mod migration; -mod modal; mod oidc; mod option; mod query; From fc84446c8417221c2644fa3d20bcd445fe862e88 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:44:51 +0530 Subject: [PATCH 21/84] fix changed mode to storage_mode for better clarity Probably convert to enum so it does not conflict with Server Mode --- server/src/storage/store_metadata.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index 996daae43..f698ed844 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -48,7 +48,7 @@ pub struct StaticStorageMetadata { #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct StorageMetadata { pub version: String, - pub mode: String, + pub storage_mode: String, pub staging: PathBuf, pub storage: String, #[serde(default = "crate::utils::uid::gen")] @@ -65,7 +65,7 @@ impl StorageMetadata { pub fn new() -> Self { Self { version: "v3".to_string(), - mode: CONFIG.storage_name.to_owned(), + storage_mode: CONFIG.storage_name.to_owned(), staging: CONFIG.staging_dir().to_path_buf(), storage: CONFIG.storage().get_endpoint(), deployment_id: uid::gen(), @@ -84,7 +84,7 @@ impl StorageMetadata { pub fn set_global(self) { let metadata = StaticStorageMetadata { - mode: self.mode, + mode: self.storage_mode, deployment_id: self.deployment_id, }; From 7852a52e8bf58bad1ae8245524ce945d9d14cd2c Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:48:23 +0530 Subject: [PATCH 22/84] revert back to module name modal --- server/src/handlers/http.rs | 4 ++-- server/src/handlers/http/{server => modal}/ingest_server.rs | 1 - server/src/handlers/http/{server => modal}/mod.rs | 1 - server/src/handlers/http/{server => modal}/query_server.rs | 1 - server/src/handlers/http/{server => modal}/server.rs | 1 - server/src/handlers/http/{server => modal}/ssl_acceptor.rs | 0 6 files changed, 2 insertions(+), 6 deletions(-) rename server/src/handlers/http/{server => modal}/ingest_server.rs (99%) rename server/src/handlers/http/{server => modal}/mod.rs (99%) rename server/src/handlers/http/{server => modal}/query_server.rs (99%) rename server/src/handlers/http/{server => modal}/server.rs (99%) rename server/src/handlers/http/{server => modal}/ssl_acceptor.rs (100%) diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index 03870fe3e..e5469f5f9 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -23,7 +23,7 @@ use actix_web_prometheus::PrometheusMetrics; use crate::option::CONFIG; -use crate::handlers::http::server::{ +use crate::handlers::http::modal::{ ingest_server::IngestServer, parseable_server::ParseableServer, query_server::QueryServer, server::Server, }; @@ -36,12 +36,12 @@ mod kinesis; pub(crate) mod llm; pub(crate) mod logstream; pub(crate) mod middleware; +mod modal; pub(crate) mod oidc; mod otel; pub(crate) mod query; pub(crate) mod rbac; pub(crate) mod role; -mod server; include!(concat!(env!("OUT_DIR"), "/generated.rs")); diff --git a/server/src/handlers/http/server/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs similarity index 99% rename from server/src/handlers/http/server/ingest_server.rs rename to server/src/handlers/http/modal/ingest_server.rs index f63be801f..498a5029a 100644 --- a/server/src/handlers/http/server/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -16,7 +16,6 @@ * */ - use crate::handlers::http::API_BASE_PATH; use crate::handlers::http::API_VERSION; diff --git a/server/src/handlers/http/server/mod.rs b/server/src/handlers/http/modal/mod.rs similarity index 99% rename from server/src/handlers/http/server/mod.rs rename to server/src/handlers/http/modal/mod.rs index f13e733bc..3856e66cd 100644 --- a/server/src/handlers/http/server/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -16,7 +16,6 @@ * */ - pub mod ingest_server; pub mod parseable_server; pub mod query_server; diff --git a/server/src/handlers/http/server/query_server.rs b/server/src/handlers/http/modal/query_server.rs similarity index 99% rename from server/src/handlers/http/server/query_server.rs rename to server/src/handlers/http/modal/query_server.rs index 47d6a76d7..5e95838fb 100644 --- a/server/src/handlers/http/server/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -16,7 +16,6 @@ * */ - use actix_web::web; use std::sync::Arc; diff --git a/server/src/handlers/http/server/server.rs b/server/src/handlers/http/modal/server.rs similarity index 99% rename from server/src/handlers/http/server/server.rs rename to server/src/handlers/http/modal/server.rs index e452e944a..daa1d21b3 100644 --- a/server/src/handlers/http/server/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -16,7 +16,6 @@ * */ - use crate::handlers::http::about; use crate::handlers::http::base_path; use crate::handlers::http::health_check; diff --git a/server/src/handlers/http/server/ssl_acceptor.rs b/server/src/handlers/http/modal/ssl_acceptor.rs similarity index 100% rename from server/src/handlers/http/server/ssl_acceptor.rs rename to server/src/handlers/http/modal/ssl_acceptor.rs From c2dbb66b4b2f06fd97112f117a0055f64be7fdd7 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:50:23 +0530 Subject: [PATCH 23/84] fix: remove call to ::default --- server/src/handlers/http.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index e5469f5f9..ee7c69764 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -54,9 +54,9 @@ pub async fn run_http( oidc_client: Option, ) -> anyhow::Result<()> { let server: Arc = match CONFIG.parseable.mode { - Mode::Query => Arc::new(QueryServer::default()), - Mode::Ingest => Arc::new(IngestServer::default()), - Mode::All => Arc::new(Server::default()), + Mode::Query => Arc::new(QueryServer), + Mode::Ingest => Arc::new(IngestServer), + Mode::All => Arc::new(Server), }; server.start(prometheus, oidc_client).await?; From d56ddf3836c03deef60dc5f8084aba411c07f582 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 15:50:43 +0530 Subject: [PATCH 24/84] run cargofmt --- server/src/cli.rs | 8 +++++--- server/src/main.rs | 2 +- server/src/migration.rs | 5 ++++- server/src/option.rs | 2 +- server/src/storage/localfs.rs | 4 +++- server/src/storage/object_storage.rs | 5 ++++- server/src/storage/s3.rs | 2 +- 7 files changed, 19 insertions(+), 9 deletions(-) diff --git a/server/src/cli.rs b/server/src/cli.rs index 691a7a3ca..738d5d7ee 100644 --- a/server/src/cli.rs +++ b/server/src/cli.rs @@ -16,13 +16,15 @@ * */ - use clap::{value_parser, Arg, ArgGroup, Command, FromArgMatches}; use std::path::PathBuf; use url::Url; -use crate::{oidc::{self, OpenidConfig}, option::{validation, Compression, Mode}}; +use crate::{ + oidc::{self, OpenidConfig}, + option::{validation, Compression, Mode}, +}; #[derive(Debug, Default)] pub struct Cli { @@ -463,4 +465,4 @@ impl FromArgMatches for Cli { Ok(()) } -} \ No newline at end of file +} diff --git a/server/src/main.rs b/server/src/main.rs index f9d388f63..8a4cf65da 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -25,12 +25,12 @@ use std::panic::{catch_unwind, AssertUnwindSafe}; use std::thread::{self, JoinHandle}; use std::time::Duration; -mod cli; mod about; mod alerts; mod analytics; mod banner; mod catalog; +mod cli; mod event; mod handlers; mod livetail; diff --git a/server/src/migration.rs b/server/src/migration.rs index 7090b7db0..be8e5de10 100644 --- a/server/src/migration.rs +++ b/server/src/migration.rs @@ -29,7 +29,10 @@ use serde::Serialize; use crate::{ option::Config, - storage::{ObjectStorage, ObjectStorageError, PARSEABLE_METADATA_FILE_NAME, SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME}, + storage::{ + ObjectStorage, ObjectStorageError, PARSEABLE_METADATA_FILE_NAME, SCHEMA_FILE_NAME, + STREAM_METADATA_FILE_NAME, + }, }; /// Migrate the metdata from v1 or v2 to v3 diff --git a/server/src/option.rs b/server/src/option.rs index 32f9d53d6..f00f51712 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -26,8 +26,8 @@ use std::path::PathBuf; use std::sync::Arc; use crate::cli::Cli; -use crate::storage::{FSConfig, ObjectStorageError, ObjectStorageProvider, S3Config}; use crate::storage::PARSEABLE_METADATA_FILE_NAME; +use crate::storage::{FSConfig, ObjectStorageError, ObjectStorageProvider, S3Config}; pub const MIN_CACHE_SIZE_BYTES: u64 = 1000u64.pow(3); // 1 GiB pub const JOIN_COMMUNITY: &str = "Join us on Parseable Slack community for questions : https://logg.ing/community"; diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index 7fac68670..8acea743d 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -34,7 +34,9 @@ use tokio_stream::wrappers::ReadDirStream; use crate::metrics::storage::{localfs::REQUEST_RESPONSE_TIME, StorageMetrics}; use crate::option::validation; -use super::{LogStream, ObjectStorage, ObjectStorageError, ObjectStorageProvider, STREAM_METADATA_FILE_NAME}; +use super::{ + LogStream, ObjectStorage, ObjectStorageError, ObjectStorageProvider, STREAM_METADATA_FILE_NAME, +}; #[derive(Debug, Clone, clap::Args)] #[command( diff --git a/server/src/storage/object_storage.rs b/server/src/storage/object_storage.rs index 8141f9898..26019bead 100644 --- a/server/src/storage/object_storage.rs +++ b/server/src/storage/object_storage.rs @@ -20,7 +20,10 @@ use super::{ retention::Retention, staging::convert_disk_files_to_parquet, LogStream, ObjectStorageError, ObjectStoreFormat, Permisssion, StorageDir, StorageMetadata, }; -use super::{ALERT_FILE_NAME, MANIFEST_FILE, PARSEABLE_METADATA_FILE_NAME, SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME}; +use super::{ + ALERT_FILE_NAME, MANIFEST_FILE, PARSEABLE_METADATA_FILE_NAME, SCHEMA_FILE_NAME, + STREAM_METADATA_FILE_NAME, +}; use crate::{ alerts::Alerts, diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index f9316c2f2..771f5520b 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -42,7 +42,7 @@ use crate::metrics::storage::{s3::REQUEST_RESPONSE_TIME, StorageMetrics}; use crate::storage::{LogStream, ObjectStorage, ObjectStorageError}; use super::metrics_layer::MetricLayer; -use super::{ ObjectStorageProvider, PARSEABLE_METADATA_FILE_NAME, STREAM_METADATA_FILE_NAME}; +use super::{ObjectStorageProvider, PARSEABLE_METADATA_FILE_NAME, STREAM_METADATA_FILE_NAME}; // in bytes const MULTIPART_UPLOAD_SIZE: usize = 1024 * 1024 * 100; From cf19487a886c8babee35be4d38b0b8f22fec3d56 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 16:54:23 +0530 Subject: [PATCH 25/84] add constant for default version may change --- server/src/handlers/http/modal/server.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index daa1d21b3..f399fec8d 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -48,6 +48,8 @@ use crate::{ use super::parseable_server::OpenIdClient; use super::parseable_server::ParseableServer; +pub const DEFAULT_VERSION: &str = "v3"; + include!(concat!(env!("OUT_DIR"), "/generated.rs")); #[derive(Default)] From 2047c9ace77191879c56c0dd51cb443d843426c7 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 16:54:52 +0530 Subject: [PATCH 26/84] add new trait function get_bucket_name --- server/src/storage/localfs.rs | 10 ++++++++++ server/src/storage/object_storage.rs | 3 +++ server/src/storage/s3.rs | 4 ++++ 3 files changed, 17 insertions(+) diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index 8acea743d..d56e1565f 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -230,6 +230,16 @@ impl ObjectStorage for LocalFS { fn store_url(&self) -> url::Url { url::Url::parse("file:///").unwrap() } + + fn get_bucket_name(&self) -> String { + self.root + .iter() + .last() + .unwrap() + .to_str() + .unwrap() + .to_string() + } } async fn dir_with_stream( diff --git a/server/src/storage/object_storage.rs b/server/src/storage/object_storage.rs index 26019bead..d0f34e97f 100644 --- a/server/src/storage/object_storage.rs +++ b/server/src/storage/object_storage.rs @@ -399,6 +399,9 @@ pub trait ObjectStorage: Sync + 'static { Ok(()) } + + // pick a better name + fn get_bucket_name(&self) -> String; } async fn commit_schema_to_storage( diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index 771f5520b..e8e91ba8c 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -482,6 +482,10 @@ impl ObjectStorage for S3 { .map(|name| name.as_ref().to_string()) .collect::>()) } + + fn get_bucket_name(&self) -> String { + self.bucket.clone() + } } impl From for ObjectStorageError { From 773a4bca1f48490abe4261b56b4269d1d31ceb26 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 16:55:10 +0530 Subject: [PATCH 27/84] chore: move variables where it makes more sence --- server/src/cli.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/src/cli.rs b/server/src/cli.rs index 738d5d7ee..984124263 100644 --- a/server/src/cli.rs +++ b/server/src/cli.rs @@ -363,9 +363,6 @@ impl FromArgMatches for Cli { self.tls_cert_path = m.get_one::(Self::TLS_CERT).cloned(); self.tls_key_path = m.get_one::(Self::TLS_KEY).cloned(); self.domain_address = m.get_one::(Self::DOMAIN_URI).cloned(); - let openid_client_id = m.get_one::(Self::OPENID_CLIENT_ID).cloned(); - let openid_client_secret = m.get_one::(Self::OPENID_CLIENT_SECRET).cloned(); - let openid_issuer = m.get_one::(Self::OPENID_ISSUER).cloned(); self.address = m .get_one::(Self::ADDRESS) @@ -432,6 +429,10 @@ impl FromArgMatches for Cli { _ => unreachable!(), }; + let openid_client_id = m.get_one::(Self::OPENID_CLIENT_ID).cloned(); + let openid_client_secret = m.get_one::(Self::OPENID_CLIENT_SECRET).cloned(); + let openid_issuer = m.get_one::(Self::OPENID_ISSUER).cloned(); + self.openid = match (openid_client_id, openid_client_secret, openid_issuer) { (Some(id), Some(secret), Some(issuer)) => { let origin = if let Some(url) = self.domain_address.clone() { From 3495ce109ffe4fdf5799e2a0a4afc3737dec80e0 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 16:56:22 +0530 Subject: [PATCH 28/84] add struct IngestorMetadata --- .../src/handlers/http/modal/ingest_server.rs | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 498a5029a..e87ecae97 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -103,4 +103,31 @@ impl IngestServer { .service(Server::get_readiness_factory()) .service(logstream_scope); } + + +#[derive(Serialize, Debug, Deserialize)] +struct IngesterMetadata { + version: String, + address: String, + port: String, + origin: String, + bucket_name: String, +} + +impl IngesterMetadata { + pub fn new( + address: String, + port: String, + origin: String, + version: String, + bucket_name: String, + ) -> Self { + Self { + address, + port, + origin, + version, + bucket_name, + } + } } From 1079d7176af4bc313a4f47eb31afd1a64f33ff7f Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 17:01:37 +0530 Subject: [PATCH 29/84] add function to get ingestor address --- server/src/handlers/http/modal/ingest_server.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index e87ecae97..323338725 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -104,6 +104,18 @@ impl IngestServer { .service(logstream_scope); } + #[inline(always)] + fn get_ingestor_address(&self) -> Option<(String, String)> { + // this might cause an issue down the line + // best is to make the Cli Struct better, but thats a chore + CONFIG + .parseable + .address + .split(":") + .map(|string| string.to_owned()) + .collect_tuple() + } +} #[derive(Serialize, Debug, Deserialize)] struct IngesterMetadata { From 43698dc085def64f8cfbb9026f9dc873bb613ee1 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Mon, 5 Feb 2024 17:02:07 +0530 Subject: [PATCH 30/84] WIP: ingestor config put_object --- .../src/handlers/http/modal/ingest_server.rs | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 323338725..be9fb0362 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -18,17 +18,23 @@ use crate::handlers::http::API_BASE_PATH; use crate::handlers::http::API_VERSION; +use crate::utils::hostname_unchecked; use std::sync::Arc; use super::parseable_server::OpenIdClient; use super::parseable_server::ParseableServer; use super::server::Server; +use super::server::DEFAULT_VERSION; use super::ssl_acceptor::get_ssl_acceptor; use actix_web::{web, App, HttpServer}; use actix_web_prometheus::PrometheusMetrics; use async_trait::async_trait; +use itertools::Itertools; +use relative_path::RelativePathBuf; +use serde::Deserialize; +use serde::Serialize; use crate::{ handlers::http::{base_path, cross_origin_config}, @@ -82,6 +88,24 @@ impl ParseableServer for IngestServer { http_server.bind(&CONFIG.parseable.address)?.run().await?; } + let store = CONFIG.storage().get_object_store(); + + let (address, port) = self + .get_ingestor_address() + .unwrap_or(("0.0.0.0".to_string(), "8000".to_string())); + let path = + RelativePathBuf::from(format!(".ingestor.{}.{}.json", hostname_unchecked(), port)); + + let resource = IngesterMetadata::new( + address, + port, + CONFIG.parseable.domain_address.clone().unwrap().to_string(), + DEFAULT_VERSION.to_string(), + store.get_bucket_name(), + ); + + store.put_object(&path, resource); + Ok(()) } } From 5bced8e19c0a6b49bbe203d763d0d3df29d33c54 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 6 Feb 2024 12:34:35 +0530 Subject: [PATCH 31/84] ran cargo fmt --- server/src/storage/object_storage.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/storage/object_storage.rs b/server/src/storage/object_storage.rs index d0f34e97f..6feaab717 100644 --- a/server/src/storage/object_storage.rs +++ b/server/src/storage/object_storage.rs @@ -400,8 +400,8 @@ pub trait ObjectStorage: Sync + 'static { Ok(()) } - // pick a better name - fn get_bucket_name(&self) -> String; + // pick a better name + fn get_bucket_name(&self) -> String; } async fn commit_schema_to_storage( From dcb1f01db8499cdb9b22524e2d29b58b41d55b4f Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 6 Feb 2024 12:35:05 +0530 Subject: [PATCH 32/84] add some comments --- server/src/handlers/http/modal/ingest_server.rs | 8 +++++--- server/src/handlers/http/modal/server.rs | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index be9fb0362..6af9a9ecb 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -51,6 +51,7 @@ impl ParseableServer for IngestServer { prometheus: PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { + // get the oidc client let oidc_client = match oidc_client { Some(config) => { let client = config @@ -62,11 +63,13 @@ impl ParseableServer for IngestServer { None => None, }; + // get the ssl stuff let ssl = get_ssl_acceptor( &CONFIG.parseable.tls_cert_path, &CONFIG.parseable.tls_key_path, )?; + // fn that creates the app let create_app_fn = move || { App::new() .wrap(prometheus.clone()) @@ -112,7 +115,8 @@ impl ParseableServer for IngestServer { impl IngestServer { // configure the api routes - fn configure_routes(config: &mut web::ServiceConfig, _odic_client: Option) { + // odic_client is not used + fn configure_routes(config: &mut web::ServiceConfig, _oidc_client: Option) { let logstream_scope = Server::get_logstream_webscope(); let ingest_factory = Server::get_ingest_factory(); @@ -121,9 +125,7 @@ impl IngestServer { // Base path "{url}/api/v1" web::scope(&base_path()).service(ingest_factory), ) - // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command .service(Server::get_liveness_factory()) - // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes .service(Server::get_readiness_factory()) .service(logstream_scope); } diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index f399fec8d..d3082b978 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -168,6 +168,7 @@ impl Server { } // get the logstream web scope + // all except the GET route pub fn get_logstream_webscope() -> Scope { web::scope("/{logstream}") .service( @@ -359,11 +360,13 @@ impl Server { } // get the live check + // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command pub fn get_liveness_factory() -> Resource { web::resource("/liveness").route(web::get().to(health_check::liveness)) } // get the readiness check + // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes pub fn get_readiness_factory() -> Resource { web::resource("/readiness").route(web::get().to(health_check::readiness)) } From 00fb5974c4eeb9895d22730435ba9b2bf5cb1e7c Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 6 Feb 2024 12:38:54 +0530 Subject: [PATCH 33/84] move ingestor metadata struct --- .../src/handlers/http/modal/ingest_server.rs | 27 +---------------- server/src/handlers/http/modal/server.rs | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 6af9a9ecb..2eabfe28d 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -24,6 +24,7 @@ use std::sync::Arc; use super::parseable_server::OpenIdClient; use super::parseable_server::ParseableServer; +use super::server::IngesterMetadata; use super::server::Server; use super::server::DEFAULT_VERSION; use super::ssl_acceptor::get_ssl_acceptor; @@ -33,8 +34,6 @@ use actix_web_prometheus::PrometheusMetrics; use async_trait::async_trait; use itertools::Itertools; use relative_path::RelativePathBuf; -use serde::Deserialize; -use serde::Serialize; use crate::{ handlers::http::{base_path, cross_origin_config}, @@ -141,31 +140,7 @@ impl IngestServer { .map(|string| string.to_owned()) .collect_tuple() } -} -#[derive(Serialize, Debug, Deserialize)] -struct IngesterMetadata { - version: String, - address: String, - port: String, - origin: String, - bucket_name: String, -} -impl IngesterMetadata { - pub fn new( - address: String, - port: String, - origin: String, - version: String, - bucket_name: String, - ) -> Self { - Self { - address, - port, - origin, - version, - bucket_name, - } } } diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index d3082b978..c099f78d9 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -34,6 +34,8 @@ use async_trait::async_trait; use rustls::{Certificate, PrivateKey, ServerConfig}; use rustls_pemfile::{certs, pkcs8_private_keys}; +use serde::Deserialize; +use serde::Serialize; use crate::{ handlers::http::{ @@ -376,3 +378,30 @@ impl Server { web::resource("/about").route(web::get().to(about::about).authorize(Action::GetAbout)) } } +#[derive(Serialize, Debug, Deserialize)] +pub struct IngesterMetadata { + version: String, + address: String, + port: String, + origin: String, + bucket_name: String, +} + +impl IngesterMetadata { + pub fn new( + address: String, + port: String, + origin: String, + version: String, + bucket_name: String, + ) -> Self { + Self { + address, + port, + origin, + version, + bucket_name, + } + } +} + From 0d7b7faa372d308ad891bd31ba356967f6deb7a4 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 6 Feb 2024 12:40:10 +0530 Subject: [PATCH 34/84] clean up --- .../src/handlers/http/modal/ingest_server.rs | 67 ++++++++++++------- server/src/handlers/http/modal/server.rs | 33 +++++++++ 2 files changed, 74 insertions(+), 26 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 2eabfe28d..0dc000615 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -20,6 +20,8 @@ use crate::handlers::http::API_BASE_PATH; use crate::handlers::http::API_VERSION; use crate::utils::hostname_unchecked; +use std::net::SocketAddr; +use std::str::FromStr; use std::sync::Arc; use super::parseable_server::OpenIdClient; @@ -29,11 +31,12 @@ use super::server::Server; use super::server::DEFAULT_VERSION; use super::ssl_acceptor::get_ssl_acceptor; +use actix_web::body::MessageBody; use actix_web::{web, App, HttpServer}; use actix_web_prometheus::PrometheusMetrics; use async_trait::async_trait; -use itertools::Itertools; use relative_path::RelativePathBuf; +use url::Url; use crate::{ handlers::http::{base_path, cross_origin_config}, @@ -62,6 +65,9 @@ impl ParseableServer for IngestServer { None => None, }; + // set the ingestor metadata + self.set_ingestor_metadata().await?; + // get the ssl stuff let ssl = get_ssl_acceptor( &CONFIG.parseable.tls_cert_path, @@ -90,24 +96,6 @@ impl ParseableServer for IngestServer { http_server.bind(&CONFIG.parseable.address)?.run().await?; } - let store = CONFIG.storage().get_object_store(); - - let (address, port) = self - .get_ingestor_address() - .unwrap_or(("0.0.0.0".to_string(), "8000".to_string())); - let path = - RelativePathBuf::from(format!(".ingestor.{}.{}.json", hostname_unchecked(), port)); - - let resource = IngesterMetadata::new( - address, - port, - CONFIG.parseable.domain_address.clone().unwrap().to_string(), - DEFAULT_VERSION.to_string(), - store.get_bucket_name(), - ); - - store.put_object(&path, resource); - Ok(()) } } @@ -130,17 +118,44 @@ impl IngestServer { } #[inline(always)] - fn get_ingestor_address(&self) -> Option<(String, String)> { + fn get_ingestor_address(&self) -> SocketAddr { // this might cause an issue down the line // best is to make the Cli Struct better, but thats a chore - CONFIG - .parseable - .address - .split(":") - .map(|string| string.to_owned()) - .collect_tuple() + (CONFIG.parseable.address.clone()) + .parse::() + .unwrap() } + async fn set_ingestor_metadata(&self) -> anyhow::Result<()> { + let store = CONFIG.storage().get_object_store(); + + let sock = self.get_ingestor_address(); + let path = RelativePathBuf::from(format!( + ".ingestor.{}.{}.json", + hostname_unchecked(), + sock.port() + )); + + let resource = IngesterMetadata::new( + sock.ip().to_string(), + sock.port().to_string(), + CONFIG + .parseable + .domain_address + .clone() + .unwrap_or(Url::parse("https://0.0.0.0:8000").unwrap()) + .to_string(), + DEFAULT_VERSION.to_string(), + store.get_bucket_name(), + ); + + let resource = serde_json::to_string(&resource) + .unwrap() + .try_into_bytes() + .unwrap(); + store.put_object(&path, resource).await?; + + Ok(()) } } diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index c099f78d9..c58834dfa 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -378,6 +378,7 @@ impl Server { web::resource("/about").route(web::get().to(about::about).authorize(Action::GetAbout)) } } + #[derive(Serialize, Debug, Deserialize)] pub struct IngesterMetadata { version: String, @@ -405,3 +406,35 @@ impl IngesterMetadata { } } +#[cfg(test)] +mod test { + use actix_web::body::MessageBody; + use rstest::rstest; + + use super::{IngesterMetadata, DEFAULT_VERSION}; + + #[rstest] + fn check_resource() { + let im = IngesterMetadata::new( + "0.0.0.0".to_string(), + "8000".to_string(), + "https://localhost:8000".to_string(), + DEFAULT_VERSION.to_string(), + "somebucket".to_string(), + ); + + let lhs = serde_json::to_string(&im) + .unwrap() + .try_into_bytes() + .unwrap(); + let rhs = br#"{"version":"v3", +"address":"0.0.0.0", +"port":"8000", +"origin":"https://localhost:8000", +"bucket_name":"somebucket"}"# + .try_into_bytes() + .unwrap(); + + assert_eq!(lhs, rhs); + } +} From 177cac150688b667d1dd6b04e1c248077b72fce3 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 6 Feb 2024 13:03:57 +0530 Subject: [PATCH 35/84] fix: mod.rs mistake --- server/src/handlers/http.rs | 3 +- .../src/handlers/http/modal/ingest_server.rs | 11 ++- server/src/handlers/http/modal/mod.rs | 86 ++++++++++++++++++- .../src/handlers/http/modal/query_server.rs | 4 +- server/src/handlers/http/modal/server.rs | 68 +-------------- 5 files changed, 96 insertions(+), 76 deletions(-) diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index ee7c69764..82c76d242 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -24,8 +24,7 @@ use actix_web_prometheus::PrometheusMetrics; use crate::option::CONFIG; use crate::handlers::http::modal::{ - ingest_server::IngestServer, parseable_server::ParseableServer, query_server::QueryServer, - server::Server, + ingest_server::IngestServer, query_server::QueryServer, server::Server, ParseableServer, }; use crate::option::Mode; diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 0dc000615..414ef8477 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -21,15 +21,14 @@ use crate::handlers::http::API_VERSION; use crate::utils::hostname_unchecked; use std::net::SocketAddr; -use std::str::FromStr; use std::sync::Arc; -use super::parseable_server::OpenIdClient; -use super::parseable_server::ParseableServer; -use super::server::IngesterMetadata; use super::server::Server; -use super::server::DEFAULT_VERSION; use super::ssl_acceptor::get_ssl_acceptor; +use super::IngesterMetadata; +use super::OpenIdClient; +use super::ParseableServer; +use super::DEFAULT_VERSION; use actix_web::body::MessageBody; use actix_web::{web, App, HttpServer}; @@ -131,7 +130,7 @@ impl IngestServer { let sock = self.get_ingestor_address(); let path = RelativePathBuf::from(format!( - ".ingestor.{}.{}.json", + "{}.{}.ingestor.json", hostname_unchecked(), sock.port() )); diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index 3856e66cd..f4def6528 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -17,7 +17,91 @@ */ pub mod ingest_server; -pub mod parseable_server; pub mod query_server; pub mod server; pub mod ssl_acceptor; + +use std::sync::Arc; + +use actix_web_prometheus::PrometheusMetrics; +use async_trait::async_trait; +use openid::Discovered; + +use crate::oidc; +use serde::Deserialize; +use serde::Serialize; +pub type OpenIdClient = Arc>; + +pub const DEFAULT_VERSION: &str = "v3"; + +#[async_trait(?Send)] +pub trait ParseableServer { + // async fn validate(&self) -> Result<(), ObjectStorageError>; + + /// configure the server + async fn start( + &self, + prometheus: PrometheusMetrics, + oidc_client: Option, + ) -> anyhow::Result<()>; +} + +#[derive(Serialize, Debug, Deserialize)] +pub struct IngesterMetadata { + version: String, + address: String, + port: String, + origin: String, + bucket_name: String, +} + +impl IngesterMetadata { + pub fn new( + address: String, + port: String, + origin: String, + version: String, + bucket_name: String, + ) -> Self { + Self { + address, + port, + origin, + version, + bucket_name, + } + } +} + +#[cfg(test)] +mod test { + use actix_web::body::MessageBody; + use rstest::rstest; + + use super::{IngesterMetadata, DEFAULT_VERSION}; + + #[rstest] + fn check_resource() { + let im = IngesterMetadata::new( + "0.0.0.0".to_string(), + "8000".to_string(), + "https://localhost:8000".to_string(), + DEFAULT_VERSION.to_string(), + "somebucket".to_string(), + ); + + let lhs = serde_json::to_string(&im) + .unwrap() + .try_into_bytes() + .unwrap(); + let rhs = br#"{"version":"v3", +"address":"0.0.0.0", +"port":"8000", +"origin":"https://localhost:8000", +"bucket_name":"somebucket"}"# + .try_into_bytes() + .unwrap(); + + assert_eq!(lhs, rhs); + } +} diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 5e95838fb..7625fcd3d 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -31,9 +31,9 @@ use async_trait::async_trait; use crate::option::CONFIG; -use super::parseable_server::{OpenIdClient, ParseableServer}; use super::server::Server; use super::ssl_acceptor::get_ssl_acceptor; +use super::{OpenIdClient, ParseableServer}; include!(concat!(env!("OUT_DIR"), "/generated.rs")); @@ -47,6 +47,8 @@ impl ParseableServer for QueryServer { prometheus: actix_web_prometheus::PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { + let store = CONFIG.storage().get_object_store(); + let oidc_client = match oidc_client { Some(config) => { let client = config diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index c58834dfa..5b4691555 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -34,8 +34,6 @@ use async_trait::async_trait; use rustls::{Certificate, PrivateKey, ServerConfig}; use rustls_pemfile::{certs, pkcs8_private_keys}; -use serde::Deserialize; -use serde::Serialize; use crate::{ handlers::http::{ @@ -47,10 +45,8 @@ use crate::{ rbac::role::Action, }; -use super::parseable_server::OpenIdClient; -use super::parseable_server::ParseableServer; - -pub const DEFAULT_VERSION: &str = "v3"; +use super::OpenIdClient; +use super::ParseableServer; include!(concat!(env!("OUT_DIR"), "/generated.rs")); @@ -378,63 +374,3 @@ impl Server { web::resource("/about").route(web::get().to(about::about).authorize(Action::GetAbout)) } } - -#[derive(Serialize, Debug, Deserialize)] -pub struct IngesterMetadata { - version: String, - address: String, - port: String, - origin: String, - bucket_name: String, -} - -impl IngesterMetadata { - pub fn new( - address: String, - port: String, - origin: String, - version: String, - bucket_name: String, - ) -> Self { - Self { - address, - port, - origin, - version, - bucket_name, - } - } -} - -#[cfg(test)] -mod test { - use actix_web::body::MessageBody; - use rstest::rstest; - - use super::{IngesterMetadata, DEFAULT_VERSION}; - - #[rstest] - fn check_resource() { - let im = IngesterMetadata::new( - "0.0.0.0".to_string(), - "8000".to_string(), - "https://localhost:8000".to_string(), - DEFAULT_VERSION.to_string(), - "somebucket".to_string(), - ); - - let lhs = serde_json::to_string(&im) - .unwrap() - .try_into_bytes() - .unwrap(); - let rhs = br#"{"version":"v3", -"address":"0.0.0.0", -"port":"8000", -"origin":"https://localhost:8000", -"bucket_name":"somebucket"}"# - .try_into_bytes() - .unwrap(); - - assert_eq!(lhs, rhs); - } -} From 3dfc0c3cb3dd57241c44f4aa39160fa23ee78bf8 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 6 Feb 2024 13:25:02 +0530 Subject: [PATCH 36/84] chore: rename function to something more discripive --- server/src/storage/s3.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index e8e91ba8c..58b8b2219 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -209,7 +209,7 @@ impl ObjectStorageProvider for S3Config { } } -fn to_path(path: &RelativePath) -> StorePath { +fn to_object_store_path(path: &RelativePath) -> StorePath { StorePath::from(path.as_str()) } @@ -223,6 +223,7 @@ impl S3 { let instant = Instant::now(); let resp = self.client.get(&to_path(path)).await; + let resp = self.client.get(&to_object_store_path(path)).await; match resp { Ok(resp) => { @@ -250,6 +251,7 @@ impl S3 { ) -> Result<(), ObjectStorageError> { let time = Instant::now(); let resp = self.client.put(&to_path(path), resource).await; + let resp = self.client.put(&to_object_store_path(path), resource).await; let status = if resp.is_ok() { "200" } else { "400" }; let time = time.elapsed().as_secs_f64(); REQUEST_RESPONSE_TIME From bbfbb7b71195456f4c8f095c12886b04470828a4 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 6 Feb 2024 17:19:32 +0530 Subject: [PATCH 37/84] commiting for sanity --- server/src/handlers/http.rs | 22 ++++++--- .../src/handlers/http/modal/ingest_server.rs | 2 +- server/src/handlers/http/modal/mod.rs | 5 +- .../src/handlers/http/modal/query_server.rs | 42 ++++++++++++---- server/src/handlers/http/modal/server.rs | 2 +- server/src/storage/localfs.rs | 41 +++++++++++++++- server/src/storage/object_storage.rs | 3 ++ server/src/storage/s3.rs | 49 ++++++++++++++++++- 8 files changed, 143 insertions(+), 23 deletions(-) diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index 82c76d242..9de18cc1f 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use actix_cors::Cors; use actix_web_prometheus::PrometheusMetrics; +use tokio::sync::Mutex; use crate::option::CONFIG; @@ -35,7 +36,7 @@ mod kinesis; pub(crate) mod llm; pub(crate) mod logstream; pub(crate) mod middleware; -mod modal; +pub(crate) mod modal; pub(crate) mod oidc; mod otel; pub(crate) mod query; @@ -52,13 +53,22 @@ pub async fn run_http( prometheus: PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { - let server: Arc = match CONFIG.parseable.mode { - Mode::Query => Arc::new(QueryServer), - Mode::Ingest => Arc::new(IngestServer), - Mode::All => Arc::new(Server), + let server: Arc> = match CONFIG.parseable.mode { + Mode::Query => { + dbg!("Mode::Query"); + Arc::new(Mutex::new(QueryServer::default())) + } + Mode::Ingest => { + dbg!("Mode::Ingest"); + Arc::new(Mutex::new(IngestServer)) + } + Mode::All => { + dbg!("Mode::All"); + Arc::new(Mutex::new(Server)) + } }; - server.start(prometheus, oidc_client).await?; + server.try_lock()?.start(prometheus, oidc_client).await?; Ok(()) } diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 414ef8477..a2f3ebfba 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -48,7 +48,7 @@ pub struct IngestServer; #[async_trait(?Send)] impl ParseableServer for IngestServer { async fn start( - &self, + &mut self, prometheus: PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index f4def6528..cf7161850 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -33,6 +33,7 @@ use serde::Serialize; pub type OpenIdClient = Arc>; pub const DEFAULT_VERSION: &str = "v3"; +pub const INGESTOR_FILE_EXTENSION: &str = "ingestor.json"; #[async_trait(?Send)] pub trait ParseableServer { @@ -40,13 +41,13 @@ pub trait ParseableServer { /// configure the server async fn start( - &self, + &mut self, prometheus: PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()>; } -#[derive(Serialize, Debug, Deserialize)] +#[derive(Serialize, Debug, Deserialize, Default)] pub struct IngesterMetadata { version: String, address: String, diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 7625fcd3d..12945f041 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -16,38 +16,44 @@ * */ -use actix_web::web; -use std::sync::Arc; - use crate::handlers::http::middleware::RouteExt; use crate::handlers::http::{ base_path, cross_origin_config, logstream, API_BASE_PATH, API_VERSION, }; use crate::rbac::role::Action; +use actix_web::web; use actix_web::web::ServiceConfig; use actix_web::{App, HttpServer}; use actix_web_static_files::ResourceFiles; use async_trait::async_trait; +use itertools::Itertools; +use relative_path::RelativePathBuf; +use std::sync::Arc; use crate::option::CONFIG; use super::server::Server; use super::ssl_acceptor::get_ssl_acceptor; -use super::{OpenIdClient, ParseableServer}; +use super::{IngesterMetadata, OpenIdClient, ParseableServer}; include!(concat!(env!("OUT_DIR"), "/generated.rs")); -#[derive(Default)] -pub struct QueryServer; +type IngesterMetadataArr = Vec; +type IngesterMetadataPtr = Arc; + +#[derive(Default, Debug)] +pub struct QueryServer(IngesterMetadataPtr); #[async_trait(?Send)] impl ParseableServer for QueryServer { async fn start( - &self, + &mut self, prometheus: actix_web_prometheus::PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { - let store = CONFIG.storage().get_object_store(); + + // ! get object for local-storage is broken + self.0 = self.get_ingestor_info().await?; let oidc_client = match oidc_client { Some(config) => { @@ -104,9 +110,7 @@ impl QueryServer { web::scope(&base_path()) // POST "/query" ==> Get results of the SQL query passed in request body .service(Server::get_query_factory()) - // GET "/liveness" ==> Liveness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command .service(Server::get_liveness_factory()) - // GET "/readiness" ==> Readiness check as per https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes .service(Server::get_readiness_factory()) // GET "/about" ==> Returns information about instance .service(Server::get_about_factory()) @@ -125,4 +129,22 @@ impl QueryServer { ) .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); } + + async fn get_ingestor_info(&self) -> anyhow::Result { + let store = CONFIG.storage().get_object_store(); + + let root_path = RelativePathBuf::from(""); + + // ! get object for local-storage is broken + let arr = store + .get_objects(&root_path) + .await? + .to_vec() + .iter() + // this unwrap will most definateley shoot me in the foot later + .map(|x| serde_json::from_slice::(x).unwrap_or_default()) + .collect_vec(); + + Ok(Arc::new(arr)) + } } diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index 5b4691555..4f9d68e67 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -56,7 +56,7 @@ pub struct Server; #[async_trait(?Send)] impl ParseableServer for Server { async fn start( - &self, + &mut self, prometheus: PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index d56e1565f..a31e84ba7 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -31,8 +31,11 @@ use relative_path::RelativePath; use tokio::fs::{self, DirEntry}; use tokio_stream::wrappers::ReadDirStream; -use crate::metrics::storage::{localfs::REQUEST_RESPONSE_TIME, StorageMetrics}; use crate::option::validation; +use crate::{ + handlers::http::modal::INGESTOR_FILE_EXTENSION, + metrics::storage::{localfs::REQUEST_RESPONSE_TIME, StorageMetrics}, +}; use super::{ LogStream, ObjectStorage, ObjectStorageError, ObjectStorageProvider, STREAM_METADATA_FILE_NAME, @@ -112,6 +115,42 @@ impl ObjectStorage for LocalFS { res } + // ! get object for local-storage is broken + async fn get_objects( + &self, + base_path: &RelativePath, + ) -> Result, ObjectStorageError> { + let time = Instant::now(); + let path = self.path_in_root(base_path); + let mut entries = fs::read_dir(path).await?; + let mut res = Vec::new(); + while let Some(entry) = entries.next_entry().await? { + let ingestor_file = entry + .path() + .extension() + .unwrap_or_default() // I have no idea what the default is Add a test to check it out + .to_str() + .unwrap() + .eq(INGESTOR_FILE_EXTENSION); + dbg!(&ingestor_file); + if !ingestor_file { + continue; + } + + let file = fs::read(entry.path()).await?; + res.push(file.into()); + } + + // maybe change the return code + let status = if res.is_empty() { "200" } else { "400" }; + let time = time.elapsed().as_secs_f64(); + REQUEST_RESPONSE_TIME + .with_label_values(&["GET", status]) + .observe(time); + + Ok(res) + } + async fn put_object( &self, path: &RelativePath, diff --git a/server/src/storage/object_storage.rs b/server/src/storage/object_storage.rs index 6feaab717..1008a9941 100644 --- a/server/src/storage/object_storage.rs +++ b/server/src/storage/object_storage.rs @@ -63,6 +63,8 @@ pub trait ObjectStorageProvider: StorageMetrics + std::fmt::Debug { #[async_trait] pub trait ObjectStorage: Sync + 'static { async fn get_object(&self, path: &RelativePath) -> Result; + async fn get_objects(&self, base_path: &RelativePath) + -> Result, ObjectStorageError>; async fn put_object( &self, path: &RelativePath, @@ -289,6 +291,7 @@ pub trait ObjectStorage: Sync + 'static { self.put_object(&path, to_bytes(&manifest)).await } + // gets the snapshot of the stream async fn get_snapshot(&self, stream: &str) -> Result { let path = stream_json_path(stream); let bytes = self.get_object(&path).await?; diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index 58b8b2219..6f825e102 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -38,6 +38,7 @@ use std::path::Path as StdPath; use std::sync::Arc; use std::time::{Duration, Instant}; +use crate::handlers::http::modal::INGESTOR_FILE_EXTENSION; use crate::metrics::storage::{s3::REQUEST_RESPONSE_TIME, StorageMetrics}; use crate::storage::{LogStream, ObjectStorage, ObjectStorageError}; @@ -222,7 +223,6 @@ impl S3 { async fn _get_object(&self, path: &RelativePath) -> Result { let instant = Instant::now(); - let resp = self.client.get(&to_path(path)).await; let resp = self.client.get(&to_object_store_path(path)).await; match resp { @@ -250,7 +250,6 @@ impl S3 { resource: Bytes, ) -> Result<(), ObjectStorageError> { let time = Instant::now(); - let resp = self.client.put(&to_path(path), resource).await; let resp = self.client.put(&to_object_store_path(path), resource).await; let status = if resp.is_ok() { "200" } else { "400" }; let time = time.elapsed().as_secs_f64(); @@ -405,6 +404,52 @@ impl ObjectStorage for S3 { Ok(self._get_object(path).await?) } + // TBD is this the right way or the api calls are too many? + async fn get_objects( + &self, + base_path: &RelativePath, + ) -> Result, ObjectStorageError> { + let instant = Instant::now(); + + let mut list_stream = self + .client + .list(Some(&to_object_store_path(base_path))) + .await?; + + let mut res = vec![]; + + while let Some(meta) = list_stream.next().await.transpose()? { + let ingestor_file = meta + .location + .extension() + .unwrap_or(".not") + .eq(INGESTOR_FILE_EXTENSION); + + if !ingestor_file { + continue; + } + + let byts = self + .get_object( + &RelativePath::from_path(meta.location.as_ref()).map_err(|err| { + ObjectStorageError::Custom( + format!("Error while getting files: {:}", err).into(), + ) + })?, + ) + .await?; + + res.push(byts); + } + + let instant = instant.elapsed().as_secs_f64(); + REQUEST_RESPONSE_TIME + .with_label_values(&["GET", "200"]) + .observe(instant); + + Ok(res) + } + async fn put_object( &self, path: &RelativePath, From 8712cbbce9481d0b21a753eb13274113b7d576a6 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 7 Feb 2024 21:32:49 +0530 Subject: [PATCH 38/84] misc --- server/src/handlers/http/modal/mod.rs | 11 ++++++----- server/src/handlers/http/modal/query_server.rs | 1 - server/src/storage/localfs.rs | 3 +-- server/src/storage/object_storage.rs | 7 +++++-- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index cf7161850..70a4e823f 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -32,6 +32,7 @@ use serde::Deserialize; use serde::Serialize; pub type OpenIdClient = Arc>; +// to be decided on what the Default version should be pub const DEFAULT_VERSION: &str = "v3"; pub const INGESTOR_FILE_EXTENSION: &str = "ingestor.json"; @@ -49,11 +50,11 @@ pub trait ParseableServer { #[derive(Serialize, Debug, Deserialize, Default)] pub struct IngesterMetadata { - version: String, - address: String, - port: String, - origin: String, - bucket_name: String, + pub version: String, + pub address: String, + pub port: String, + pub origin: String, // domain + pub bucket_name: String, } impl IngesterMetadata { diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 12945f041..3c630d555 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -52,7 +52,6 @@ impl ParseableServer for QueryServer { oidc_client: Option, ) -> anyhow::Result<()> { - // ! get object for local-storage is broken self.0 = self.get_ingestor_info().await?; let oidc_client = match oidc_client { diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index a31e84ba7..1585e2eb9 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -79,6 +79,7 @@ impl ObjectStorageProvider for FSConfig { } pub struct LocalFS { + // absolute path of the data directory root: PathBuf, } @@ -115,7 +116,6 @@ impl ObjectStorage for LocalFS { res } - // ! get object for local-storage is broken async fn get_objects( &self, base_path: &RelativePath, @@ -132,7 +132,6 @@ impl ObjectStorage for LocalFS { .to_str() .unwrap() .eq(INGESTOR_FILE_EXTENSION); - dbg!(&ingestor_file); if !ingestor_file { continue; } diff --git a/server/src/storage/object_storage.rs b/server/src/storage/object_storage.rs index 1008a9941..52cc05b41 100644 --- a/server/src/storage/object_storage.rs +++ b/server/src/storage/object_storage.rs @@ -63,8 +63,11 @@ pub trait ObjectStorageProvider: StorageMetrics + std::fmt::Debug { #[async_trait] pub trait ObjectStorage: Sync + 'static { async fn get_object(&self, path: &RelativePath) -> Result; - async fn get_objects(&self, base_path: &RelativePath) - -> Result, ObjectStorageError>; + // want to make it more generic with a filter function + async fn get_objects( + &self, + base_path: Option<&RelativePath>, + ) -> Result, ObjectStorageError>; async fn put_object( &self, path: &RelativePath, From afb8c7c29c02165920e02789503691d7670c327c Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 7 Feb 2024 21:33:38 +0530 Subject: [PATCH 39/84] add root param to s3 struct --- server/src/storage/s3.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index 6f825e102..a7be640fc 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -198,6 +198,7 @@ impl ObjectStorageProvider for S3Config { Arc::new(S3 { client: s3, bucket: self.bucket_name.clone(), + root: StorePath::from(""), }) } @@ -217,6 +218,7 @@ fn to_object_store_path(path: &RelativePath) -> StorePath { pub struct S3 { client: LimitStore, bucket: String, + root: StorePath } impl S3 { From dff79d774ef73d6939cc6a980ff0933071249b54 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 7 Feb 2024 21:35:08 +0530 Subject: [PATCH 40/84] does my life have any meaning? Just some param switches and getting and parsing .ingestor.json files --- .../src/handlers/http/modal/ingest_server.rs | 10 +++-- .../src/handlers/http/modal/query_server.rs | 41 +++++++++++++++---- server/src/storage/localfs.rs | 21 ++++++---- server/src/storage/s3.rs | 16 +++++--- 4 files changed, 66 insertions(+), 22 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index a2f3ebfba..0bdbd7a32 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -18,7 +18,6 @@ use crate::handlers::http::API_BASE_PATH; use crate::handlers::http::API_VERSION; -use crate::utils::hostname_unchecked; use std::net::SocketAddr; use std::sync::Arc; @@ -131,10 +130,15 @@ impl IngestServer { let sock = self.get_ingestor_address(); let path = RelativePathBuf::from(format!( "{}.{}.ingestor.json", - hostname_unchecked(), + sock.ip(), // this might be wrong sock.port() )); + if let Ok(_) = store.get_object(&path).await { + println!("Ingestor metadata already exists"); + return Ok(()); + }; + let resource = IngesterMetadata::new( sock.ip().to_string(), sock.port().to_string(), @@ -142,7 +146,7 @@ impl IngestServer { .parseable .domain_address .clone() - .unwrap_or(Url::parse("https://0.0.0.0:8000").unwrap()) + .unwrap_or(Url::parse(&format!("https://{}:{}", sock.ip(), sock.port())).unwrap()) .to_string(), DEFAULT_VERSION.to_string(), store.get_bucket_name(), diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 3c630d555..711545fb3 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -21,6 +21,7 @@ use crate::handlers::http::{ base_path, cross_origin_config, logstream, API_BASE_PATH, API_VERSION, }; use crate::rbac::role::Action; +use actix_web::http::header; use actix_web::web; use actix_web::web::ServiceConfig; use actix_web::{App, HttpServer}; @@ -29,6 +30,7 @@ use async_trait::async_trait; use itertools::Itertools; use relative_path::RelativePathBuf; use std::sync::Arc; +use url::Url; use crate::option::CONFIG; @@ -39,10 +41,10 @@ use super::{IngesterMetadata, OpenIdClient, ParseableServer}; include!(concat!(env!("OUT_DIR"), "/generated.rs")); type IngesterMetadataArr = Vec; -type IngesterMetadataPtr = Arc; +type IngesterMetaPtr = Arc; #[derive(Default, Debug)] -pub struct QueryServer(IngesterMetadataPtr); +pub struct QueryServer(IngesterMetaPtr); #[async_trait(?Send)] impl ParseableServer for QueryServer { @@ -51,9 +53,22 @@ impl ParseableServer for QueryServer { prometheus: actix_web_prometheus::PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { - self.0 = self.get_ingestor_info().await?; + // on subsequent runs, the qurier should check if the ingestor is up and running or not + for ingester in self.0.iter() { + // yes the format macro does not need the '/' ingester.origin already + // has '/' because Url::Parse will add it if it is not present + // uri should be something like `http://address/api/v1/liveness` + let uri = Url::parse(&format!("{}{}/liveness", &ingester.origin, base_path()))?; + + if !Self::check_liveness(uri).await { + eprintln!("Ingestor at {} is not reachable", &ingester.origin); + } else { + println!("Ingestor at {} is up and running", &ingester.origin); + } + } + let oidc_client = match oidc_client { Some(config) => { let client = config @@ -129,14 +144,13 @@ impl QueryServer { .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); } - async fn get_ingestor_info(&self) -> anyhow::Result { + async fn get_ingestor_info(&self) -> anyhow::Result { let store = CONFIG.storage().get_object_store(); let root_path = RelativePathBuf::from(""); - - // ! get object for local-storage is broken + dbg!(&root_path); let arr = store - .get_objects(&root_path) + .get_objects(Some(&root_path)) .await? .to_vec() .iter() @@ -146,4 +160,17 @@ impl QueryServer { Ok(Arc::new(arr)) } + + pub async fn check_liveness(uri: Url) -> bool { + let reqw = reqwest::Client::new() + .get(uri) + .header(header::CONTENT_TYPE, "application/json") + .send() + .await; + + match reqw { + Ok(_) => true, + Err(_) => false, + } + } } diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index 1585e2eb9..8875d2080 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -118,20 +118,27 @@ impl ObjectStorage for LocalFS { async fn get_objects( &self, - base_path: &RelativePath, + base_path: Option<&RelativePath>, ) -> Result, ObjectStorageError> { let time = Instant::now(); - let path = self.path_in_root(base_path); - let mut entries = fs::read_dir(path).await?; + + let prefix = if let Some(path) = base_path { + path.to_path(&self.root) + } else { + self.root.clone() + }; + + let mut entries = fs::read_dir(&prefix).await?; let mut res = Vec::new(); while let Some(entry) = entries.next_entry().await? { let ingestor_file = entry .path() - .extension() - .unwrap_or_default() // I have no idea what the default is Add a test to check it out + .file_name() + .unwrap_or_default() .to_str() - .unwrap() - .eq(INGESTOR_FILE_EXTENSION); + .unwrap_or_default() + .contains(INGESTOR_FILE_EXTENSION); + if !ingestor_file { continue; } diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index a7be640fc..0b8d1b502 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -409,13 +409,19 @@ impl ObjectStorage for S3 { // TBD is this the right way or the api calls are too many? async fn get_objects( &self, - base_path: &RelativePath, + base_path: Option<&RelativePath>, ) -> Result, ObjectStorageError> { let instant = Instant::now(); + let prefix = if let Some(base_path) = base_path { + to_object_store_path(base_path) + } else { + self.root.clone() + }; + let mut list_stream = self .client - .list(Some(&to_object_store_path(base_path))) + .list(Some(&prefix)) .await?; let mut res = vec![]; @@ -423,9 +429,9 @@ impl ObjectStorage for S3 { while let Some(meta) = list_stream.next().await.transpose()? { let ingestor_file = meta .location - .extension() - .unwrap_or(".not") - .eq(INGESTOR_FILE_EXTENSION); + .filename() + .unwrap_or_default() + .contains(INGESTOR_FILE_EXTENSION); if !ingestor_file { continue; From b08804b309132da5d6026309b4c905817028978c Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 7 Feb 2024 21:49:00 +0530 Subject: [PATCH 41/84] remove upload interval from cli (rebase on main #fb3fd21) --- server/src/cli.rs | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/server/src/cli.rs b/server/src/cli.rs index 984124263..691547f86 100644 --- a/server/src/cli.rs +++ b/server/src/cli.rs @@ -51,10 +51,6 @@ pub struct Cli { /// Size for local cache pub local_cache_size: u64, - /// Interval in seconds after which uncommited data would be - /// uploaded to the storage platform. - pub upload_interval: u64, - /// Username for the basic authentication on the server pub username: String, @@ -101,7 +97,6 @@ impl Cli { pub const STAGING: &'static str = "local-staging-path"; pub const CACHE: &'static str = "cache-path"; pub const CACHE_SIZE: &'static str = "cache-size"; - pub const UPLOAD_INTERVAL: &'static str = "upload-interval"; pub const USERNAME: &'static str = "username"; pub const PASSWORD: &'static str = "password"; pub const CHECK_UPDATE: &'static str = "check-update"; @@ -187,16 +182,7 @@ impl Cli { .help("Maximum allowed cache size for all streams combined (In human readable format, e.g 1GiB, 2GiB, 100MB)") .next_line_help(true), ) - .arg( - Arg::new(Self::UPLOAD_INTERVAL) - .long(Self::UPLOAD_INTERVAL) - .env("P_STORAGE_UPLOAD_INTERVAL") - .value_name("SECONDS") - .default_value("60") - .value_parser(validation::upload_interval) - .help("Interval in seconds after which staging data would be sent to the storage") - .next_line_help(true), - ) + .arg( Arg::new(Self::USERNAME) .long(Self::USERNAME) @@ -376,10 +362,6 @@ impl FromArgMatches for Cli { .get_one::(Self::CACHE_SIZE) .cloned() .expect("default value for cache size"); - self.upload_interval = m - .get_one::(Self::UPLOAD_INTERVAL) - .cloned() - .expect("default value for upload"); self.username = m .get_one::(Self::USERNAME) .cloned() From b93e63e6d2d308420cc07ea745daa5e5de5e3692 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 7 Feb 2024 21:57:05 +0530 Subject: [PATCH 42/84] misc comment --- server/src/option.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/server/src/option.rs b/server/src/option.rs index f00f51712..0aec6d1ac 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -98,6 +98,9 @@ impl Config { } } + // validate the storage, if the proper path for staging directory is provided + // if the proper data directory is provided, or s3 bucket is provided etc + // probably change name to validate_storage or something pub async fn validate(&self) -> Result<(), ObjectStorageError> { let obj_store = self.storage.get_object_store(); let rel_path = relative_path::RelativePathBuf::from(PARSEABLE_METADATA_FILE_NAME); @@ -150,11 +153,10 @@ impl Config { // drive --> Local drive // s3 --> S3 bucket pub fn get_storage_mode_string(&self) -> &str { - let mut mode = "S3 bucket"; if self.storage_name == "drive" { - mode = "Local drive"; + return "Local drive"; } - mode + "S3 bucket" } } From f6755ae54b248f5a95d386b3d1d1645974e9ff84 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 8 Feb 2024 14:37:20 +0530 Subject: [PATCH 43/84] make deepsource happy --- server/src/handlers/http/modal/ingest_server.rs | 4 ++-- server/src/handlers/http/modal/query_server.rs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 0bdbd7a32..202aa91e7 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -134,7 +134,7 @@ impl IngestServer { sock.port() )); - if let Ok(_) = store.get_object(&path).await { + if store.get_object(&path).await.is_ok() { println!("Ingestor metadata already exists"); return Ok(()); }; @@ -146,7 +146,7 @@ impl IngestServer { .parseable .domain_address .clone() - .unwrap_or(Url::parse(&format!("https://{}:{}", sock.ip(), sock.port())).unwrap()) + .unwrap_or_else(|| Url::parse(&format!("https://{}:{}", sock.ip(), sock.port())).unwrap()) .to_string(), DEFAULT_VERSION.to_string(), store.get_bucket_name(), diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 711545fb3..becb5e220 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -148,7 +148,6 @@ impl QueryServer { let store = CONFIG.storage().get_object_store(); let root_path = RelativePathBuf::from(""); - dbg!(&root_path); let arr = store .get_objects(Some(&root_path)) .await? From f47b4fbbca5839876a49545565ae94119fbd2d28 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 8 Feb 2024 17:07:58 +0530 Subject: [PATCH 44/84] move data sync logic to a different file Keep main.rs clean --- server/src/main.rs | 99 ++------------------------------------- server/src/sync.rs | 112 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 95 deletions(-) create mode 100644 server/src/sync.rs diff --git a/server/src/main.rs b/server/src/main.rs index 8a4cf65da..4a5d02fd8 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -16,15 +16,6 @@ * */ -use clokwerk::{AsyncScheduler, Job, Scheduler, TimeUnits}; -use thread_priority::{ThreadBuilder, ThreadPriority}; -use tokio::sync::oneshot; -use tokio::sync::oneshot::error::TryRecvError; - -use std::panic::{catch_unwind, AssertUnwindSafe}; -use std::thread::{self, JoinHandle}; -use std::time::Duration; - mod about; mod alerts; mod analytics; @@ -45,6 +36,7 @@ mod rbac; mod response; mod stats; mod storage; +mod sync; mod utils; mod validator; @@ -82,9 +74,9 @@ async fn main() -> anyhow::Result<()> { // load data from stats back to prometheus metrics metrics::fetch_stats_from_storage().await; - let (localsync_handler, mut localsync_outbox, localsync_inbox) = run_local_sync(); + let (localsync_handler, mut localsync_outbox, localsync_inbox) = sync::run_local_sync(); let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) = - object_store_sync(); + sync::object_store_sync(); // all internal data structures populated now. // start the analytics scheduler if enabled @@ -114,92 +106,9 @@ async fn main() -> anyhow::Result<()> { _ = &mut remote_sync_outbox => { // remote_sync failed, this is recoverable by just starting remote_sync thread again remote_sync_handler.join().unwrap_or(()); - (remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = object_store_sync(); + (remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = sync::object_store_sync(); } }; } } - -fn object_store_sync() -> (JoinHandle<()>, oneshot::Receiver<()>, oneshot::Sender<()>) { - let (outbox_tx, outbox_rx) = oneshot::channel::<()>(); - let (inbox_tx, inbox_rx) = oneshot::channel::<()>(); - let mut inbox_rx = AssertUnwindSafe(inbox_rx); - let handle = thread::spawn(move || { - let res = catch_unwind(move || { - let rt = actix_web::rt::System::new(); - rt.block_on(async { - let mut scheduler = AsyncScheduler::new(); - scheduler - .every(STORAGE_UPLOAD_INTERVAL.seconds()) - // Extra time interval is added so that this schedular does not race with local sync. - .plus(5u32.seconds()) - .run(|| async { - if let Err(e) = CONFIG.storage().get_object_store().sync().await { - log::warn!("failed to sync local data with object store. {:?}", e); - } - }); - - loop { - tokio::time::sleep(Duration::from_secs(1)).await; - scheduler.run_pending().await; - match AssertUnwindSafe(|| inbox_rx.try_recv())() { - Ok(_) => break, - Err(TryRecvError::Empty) => continue, - Err(TryRecvError::Closed) => { - // should be unreachable but breaking anyways - break; - } - } - } - }) - }); - - if res.is_err() { - outbox_tx.send(()).unwrap(); - } - }); - - (handle, outbox_rx, inbox_tx) -} - -fn run_local_sync() -> (JoinHandle<()>, oneshot::Receiver<()>, oneshot::Sender<()>) { - let (outbox_tx, outbox_rx) = oneshot::channel::<()>(); - let (inbox_tx, inbox_rx) = oneshot::channel::<()>(); - let mut inbox_rx = AssertUnwindSafe(inbox_rx); - - let handle = ThreadBuilder::default() - .name("local-sync") - .priority(ThreadPriority::Max) - .spawn(move |priority_result| { - if priority_result.is_err() { - log::warn!("Max priority cannot be set for sync thread. Make sure that user/program is allowed to set thread priority.") - } - let res = catch_unwind(move || { - let mut scheduler = Scheduler::new(); - scheduler - .every((storage::LOCAL_SYNC_INTERVAL as u32).seconds()) - .run(move || crate::event::STREAM_WRITERS.unset_all()); - - loop { - thread::sleep(Duration::from_millis(50)); - scheduler.run_pending(); - match AssertUnwindSafe(|| inbox_rx.try_recv())() { - Ok(_) => break, - Err(TryRecvError::Empty) => continue, - Err(TryRecvError::Closed) => { - // should be unreachable but breaking anyways - break; - } - } - } - }); - - if res.is_err() { - outbox_tx.send(()).unwrap(); - } - }) - .unwrap(); - - (handle, outbox_rx, inbox_tx) -} diff --git a/server/src/sync.rs b/server/src/sync.rs new file mode 100644 index 000000000..d7eb5d2d7 --- /dev/null +++ b/server/src/sync.rs @@ -0,0 +1,112 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use clokwerk::{AsyncScheduler, Job, Scheduler, TimeUnits}; +use thread_priority::{ThreadBuilder, ThreadPriority}; +use tokio::sync::oneshot; +use tokio::sync::oneshot::error::TryRecvError; + +use std::panic::{catch_unwind, AssertUnwindSafe}; +use std::thread::{self, JoinHandle}; +use std::time::Duration; + +use crate::option::CONFIG; +use crate::{storage, STORAGE_UPLOAD_INTERVAL}; + +pub(crate) fn object_store_sync() -> (JoinHandle<()>, oneshot::Receiver<()>, oneshot::Sender<()>) { + let (outbox_tx, outbox_rx) = oneshot::channel::<()>(); + let (inbox_tx, inbox_rx) = oneshot::channel::<()>(); + let mut inbox_rx = AssertUnwindSafe(inbox_rx); + let handle = thread::spawn(move || { + let res = catch_unwind(move || { + let rt = actix_web::rt::System::new(); + rt.block_on(async { + let mut scheduler = AsyncScheduler::new(); + scheduler + .every(STORAGE_UPLOAD_INTERVAL.seconds()) + // Extra time interval is added so that this schedular does not race with local sync. + .plus(5u32.seconds()) + .run(|| async { + if let Err(e) = CONFIG.storage().get_object_store().sync().await { + log::warn!("failed to sync local data with object store. {:?}", e); + } + }); + + loop { + tokio::time::sleep(Duration::from_secs(1)).await; + scheduler.run_pending().await; + match AssertUnwindSafe(|| inbox_rx.try_recv())() { + Ok(_) => break, + Err(TryRecvError::Empty) => continue, + Err(TryRecvError::Closed) => { + // should be unreachable but breaking anyways + break; + } + } + } + }) + }); + + if res.is_err() { + outbox_tx.send(()).unwrap(); + } + }); + + (handle, outbox_rx, inbox_tx) +} + +pub(crate) fn run_local_sync() -> (JoinHandle<()>, oneshot::Receiver<()>, oneshot::Sender<()>) { + let (outbox_tx, outbox_rx) = oneshot::channel::<()>(); + let (inbox_tx, inbox_rx) = oneshot::channel::<()>(); + let mut inbox_rx = AssertUnwindSafe(inbox_rx); + + let handle = ThreadBuilder::default() + .name("local-sync") + .priority(ThreadPriority::Max) + .spawn(move |priority_result| { + if priority_result.is_err() { + log::warn!("Max priority cannot be set for sync thread. Make sure that user/program is allowed to set thread priority.") + } + let res = catch_unwind(move || { + let mut scheduler = Scheduler::new(); + scheduler + .every((storage::LOCAL_SYNC_INTERVAL as u32).seconds()) + .run(move || crate::event::STREAM_WRITERS.unset_all()); + + loop { + thread::sleep(Duration::from_millis(50)); + scheduler.run_pending(); + match AssertUnwindSafe(|| inbox_rx.try_recv())() { + Ok(_) => break, + Err(TryRecvError::Empty) => continue, + Err(TryRecvError::Closed) => { + // should be unreachable but breaking anyways + break; + } + } + } + }); + + if res.is_err() { + outbox_tx.send(()).unwrap(); + } + }) + .unwrap(); + + (handle, outbox_rx, inbox_tx) +} From f6e351e2f206b07ab02fb914c9323785efbc8e5a Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 8 Feb 2024 17:08:22 +0530 Subject: [PATCH 45/84] misc modified: server/src/storage/s3.rs --- .../src/handlers/http/modal/ingest_server.rs | 113 +++++++++++++++--- server/src/handlers/http/modal/mod.rs | 2 + .../src/handlers/http/modal/query_server.rs | 61 +++++++++- server/src/handlers/http/modal/server.rs | 100 ++++++++++++++-- server/src/main.rs | 2 +- server/src/storage/s3.rs | 13 +- 6 files changed, 250 insertions(+), 41 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 202aa91e7..7b40bcd67 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -16,8 +16,17 @@ * */ +use crate::analytics; +use crate::banner; use crate::handlers::http::API_BASE_PATH; use crate::handlers::http::API_VERSION; +use crate::localcache::LocalCacheManager; +use crate::metadata; +use crate::metrics; +use crate::storage; +use crate::storage::ObjectStorageError; +use crate::storage::PARSEABLE_METADATA_FILE_NAME; +use crate::sync; use std::net::SocketAddr; use std::sync::Arc; @@ -46,22 +55,14 @@ pub struct IngestServer; #[async_trait(?Send)] impl ParseableServer for IngestServer { + // we dont need oidc client here its just here to satisfy the trait async fn start( &mut self, prometheus: PrometheusMetrics, - oidc_client: Option, + _oidc_client: Option, ) -> anyhow::Result<()> { - // get the oidc client - let oidc_client = match oidc_client { - Some(config) => { - let client = config - .connect(&format!("{API_BASE_PATH}/{API_VERSION}/o/code")) - .await?; - Some(Arc::new(client)) - } - - None => None, - }; + // check for querier state. Is it there, or was it there in the past + self.check_querier_state().await?; // set the ingestor metadata self.set_ingestor_metadata().await?; @@ -76,7 +77,7 @@ impl ParseableServer for IngestServer { let create_app_fn = move || { App::new() .wrap(prometheus.clone()) - .configure(|config| IngestServer::configure_routes(config, oidc_client.clone())) + .configure(|config| IngestServer::configure_routes(config, None)) .wrap(actix_web::middleware::Logger::default()) .wrap(actix_web::middleware::Compress::default()) .wrap(cross_origin_config()) @@ -100,7 +101,6 @@ impl ParseableServer for IngestServer { impl IngestServer { // configure the api routes - // odic_client is not used fn configure_routes(config: &mut web::ServiceConfig, _oidc_client: Option) { let logstream_scope = Server::get_logstream_webscope(); let ingest_factory = Server::get_ingest_factory(); @@ -124,9 +124,11 @@ impl IngestServer { .unwrap() } + // create the ingestor metadata and put the .ingestor.json file in the object store async fn set_ingestor_metadata(&self) -> anyhow::Result<()> { let store = CONFIG.storage().get_object_store(); + // remove ip adn go with the domain name let sock = self.get_ingestor_address(); let path = RelativePathBuf::from(format!( "{}.{}.ingestor.json", @@ -146,7 +148,9 @@ impl IngestServer { .parseable .domain_address .clone() - .unwrap_or_else(|| Url::parse(&format!("https://{}:{}", sock.ip(), sock.port())).unwrap()) + .unwrap_or_else(|| { + Url::parse(&format!("https://{}:{}", sock.ip(), sock.port())).unwrap() + }) .to_string(), DEFAULT_VERSION.to_string(), store.get_bucket_name(), @@ -161,4 +165,83 @@ impl IngestServer { Ok(()) } + + // check for querier state. Is it there, or was it there in the past + // this should happen before the set the ingestor metadata + async fn check_querier_state(&self) -> anyhow::Result<(), ObjectStorageError> { + // how do we check for querier state? + // based on the work flow of the system, the querier will always need to start first + // i.e the querier will create the `.parseable.json` file + + let store = CONFIG.storage().get_object_store(); + let path = RelativePathBuf::from(PARSEABLE_METADATA_FILE_NAME); + + match store.get_object(&path).await { + Ok(_) => Ok(()), + Err(_) => Err(ObjectStorageError::Custom( + "Querier Server has not been started yet. Please start the querier server first." + .to_string(), + )), + } + } + + pub async fn initialize(&mut self) -> anyhow::Result<()> { + // to get the .parseable.json file in staging + let meta = storage::resolve_parseable_metadata().await?; + banner::print(&CONFIG, &meta).await; + + // set the info in the global metadata + meta.set_global(); + + if let Some(cache_manager) = LocalCacheManager::global() { + cache_manager + .validate(CONFIG.parseable.local_cache_size) + .await?; + }; + + let prom = metrics::build_metrics_handler(); + CONFIG.storage().register_store_metrics(&prom); + + let storage = CONFIG.storage().get_object_store(); + if let Err(err) = metadata::STREAM_INFO.load(&*storage).await { + log::warn!("could not populate local metadata. {:?}", err); + } + + metrics::fetch_stats_from_storage().await; + + let (localsync_handler, mut localsync_outbox, localsync_inbox) = sync::run_local_sync(); + let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) = + sync::object_store_sync(); + + // all internal data structures populated now. + // start the analytics scheduler if enabled + if CONFIG.parseable.send_analytics { + analytics::init_analytics_scheduler(); + } + let app = self.start(prom, CONFIG.parseable.openid.clone()); + tokio::pin!(app); + loop { + tokio::select! { + e = &mut app => { + // actix server finished .. stop other threads and stop the server + remote_sync_inbox.send(()).unwrap_or(()); + localsync_inbox.send(()).unwrap_or(()); + localsync_handler.join().unwrap_or(()); + remote_sync_handler.join().unwrap_or(()); + return e + }, + _ = &mut localsync_outbox => { + // crash the server if localsync fails for any reason + // panic!("Local Sync thread died. Server will fail now!") + return Err(anyhow::Error::msg("Failed to sync local data to drive. Please restart the Parseable server.\n\nJoin us on Parseable Slack if the issue persists after restart : https://launchpass.com/parseable")) + }, + _ = &mut remote_sync_outbox => { + // remote_sync failed, this is recoverable by just starting remote_sync thread again + remote_sync_handler.join().unwrap_or(()); + (remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = sync::object_store_sync(); + } + + }; + } + } } diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index 70a4e823f..870f05b88 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -36,6 +36,8 @@ pub type OpenIdClient = Arc>; pub const DEFAULT_VERSION: &str = "v3"; pub const INGESTOR_FILE_EXTENSION: &str = "ingestor.json"; +include!(concat!(env!("OUT_DIR"), "/generated.rs")); + #[async_trait(?Send)] pub trait ParseableServer { // async fn validate(&self) -> Result<(), ObjectStorageError>; diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index becb5e220..fecc389a6 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -21,6 +21,7 @@ use crate::handlers::http::{ base_path, cross_origin_config, logstream, API_BASE_PATH, API_VERSION, }; use crate::rbac::role::Action; +use crate::{analytics, banner, metadata, metrics, migration, rbac, storage}; use actix_web::http::header; use actix_web::web; use actix_web::web::ServiceConfig; @@ -36,9 +37,7 @@ use crate::option::CONFIG; use super::server::Server; use super::ssl_acceptor::get_ssl_acceptor; -use super::{IngesterMetadata, OpenIdClient, ParseableServer}; - -include!(concat!(env!("OUT_DIR"), "/generated.rs")); +use super::{generate, IngesterMetadata, OpenIdClient, ParseableServer}; type IngesterMetadataArr = Vec; type IngesterMetaPtr = Arc; @@ -167,9 +166,59 @@ impl QueryServer { .send() .await; - match reqw { - Ok(_) => true, - Err(_) => false, + reqw.is_ok() + } + + async fn initialize(&mut self) -> anyhow::Result<()> { + migration::run_metadata_migration(&CONFIG).await?; + + let metadata = storage::resolve_parseable_metadata().await?; + banner::print(&CONFIG, &metadata).await; + + // initialize the rbac map + rbac::map::init(&metadata); + + // keep metadata info in mem + metadata.set_global(); + + let prometheus = metrics::build_metrics_handler(); + CONFIG.storage().register_store_metrics(&prometheus); + + migration::run_migration(&CONFIG).await?; + + // when do we do this + let storage = CONFIG.storage().get_object_store(); + if let Err(e) = metadata::STREAM_INFO.load(&*storage).await { + log::warn!("could not populate local metadata. {:?}", e); + } + + // track all parquet files already in the data directory + storage::retention::load_retention_from_global(); + + // load data from stats back to prometheus metrics + metrics::fetch_stats_from_storage().await; + + // all internal data structures populated now. + // start the analytics scheduler if enabled + if CONFIG.parseable.send_analytics { + analytics::init_analytics_scheduler(); + } + + // how does livetail work? + // tokio::spawn(handlers::livetail::server()); + + let app = self.start(prometheus, CONFIG.parseable.openid.clone()); + + tokio::pin!(app); + + // this never actually loops + // rather than pinning we can just await? + loop { + tokio::select! { + err= &mut app => { + return err; + }, + } } } } diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index 4f9d68e67..e52dfd6a5 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -16,12 +16,22 @@ * */ +use crate::analytics; +use crate::banner; +use crate::handlers; use crate::handlers::http::about; use crate::handlers::http::base_path; use crate::handlers::http::health_check; use crate::handlers::http::query; use crate::handlers::http::API_BASE_PATH; use crate::handlers::http::API_VERSION; +use crate::localcache::LocalCacheManager; +use crate::metadata; +use crate::metrics; +use crate::migration; +use crate::rbac; +use crate::storage; +use crate::sync; use std::{fs::File, io::BufReader, sync::Arc}; use actix_web::web::resource; @@ -37,19 +47,18 @@ use rustls_pemfile::{certs, pkcs8_private_keys}; use crate::{ handlers::http::{ - cross_origin_config, ingest, llm, logstream, + self, cross_origin_config, ingest, llm, logstream, middleware::{DisAllowRootUser, RouteExt}, - oidc, rbac, role, MAX_EVENT_PAYLOAD_SIZE, + oidc, role, MAX_EVENT_PAYLOAD_SIZE, }, option::CONFIG, rbac::role::Action, }; +use super::generate; use super::OpenIdClient; use super::ParseableServer; -include!(concat!(env!("OUT_DIR"), "/generated.rs")); - #[derive(Default)] pub struct Server; @@ -305,16 +314,24 @@ impl Server { .service( web::resource("") // GET /user => List all users - .route(web::get().to(rbac::list_users).authorize(Action::ListUser)), + .route( + web::get() + .to(http::rbac::list_users) + .authorize(Action::ListUser), + ), ) .service( web::resource("/{username}") // PUT /user/{username} => Create a new user - .route(web::post().to(rbac::post_user).authorize(Action::PutUser)) + .route( + web::post() + .to(http::rbac::post_user) + .authorize(Action::PutUser), + ) // DELETE /user/{username} => Delete a user .route( web::delete() - .to(rbac::delete_user) + .to(http::rbac::delete_user) .authorize(Action::DeleteUser), ) .wrap(DisAllowRootUser), @@ -324,13 +341,13 @@ impl Server { // PUT /user/{username}/roles => Put roles for user .route( web::put() - .to(rbac::put_role) + .to(http::rbac::put_role) .authorize(Action::PutUserRoles) .wrap(DisAllowRootUser), ) .route( web::get() - .to(rbac::get_role) + .to(http::rbac::get_role) .authorize_for_user(Action::GetUserRoles), ), ) @@ -339,7 +356,7 @@ impl Server { // POST /user/{username}/generate-new-password => reset password for this user .route( web::post() - .to(rbac::post_gen_password) + .to(http::rbac::post_gen_password) .authorize(Action::PutUser) .wrap(DisAllowRootUser), ), @@ -373,4 +390,67 @@ impl Server { pub fn get_about_factory() -> Resource { web::resource("/about").route(web::get().to(about::about).authorize(Action::GetAbout)) } + + pub async fn initialize(&mut self) -> anyhow::Result<()> { + migration::run_metadata_migration(&CONFIG).await?; + let metadata = storage::resolve_parseable_metadata().await?; + banner::print(&CONFIG, &metadata).await; + rbac::map::init(&metadata); + metadata.set_global(); + + if let Some(cache_manager) = LocalCacheManager::global() { + cache_manager + .validate(CONFIG.parseable.local_cache_size) + .await?; + }; + + let prometheus = metrics::build_metrics_handler(); + CONFIG.storage().register_store_metrics(&prometheus); + + migration::run_migration(&CONFIG).await?; + + let storage = CONFIG.storage().get_object_store(); + if let Err(err) = metadata::STREAM_INFO.load(&*storage).await { + log::warn!("could not populate local metadata. {:?}", err); + } + + storage::retention::load_retention_from_global(); + metrics::fetch_stats_from_storage().await; + + let (localsync_handler, mut localsync_outbox, localsync_inbox) = sync::run_local_sync(); + let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) = + sync::object_store_sync(); + + if CONFIG.parseable.send_analytics { + analytics::init_analytics_scheduler(); + } + + tokio::spawn(handlers::livetail::server()); + + let app = self.start(prometheus, CONFIG.parseable.openid.clone()); + + tokio::pin!(app); + loop { + tokio::select! { + e = &mut app => { + // actix server finished .. stop other threads and stop the server + remote_sync_inbox.send(()).unwrap_or(()); + localsync_inbox.send(()).unwrap_or(()); + localsync_handler.join().unwrap_or(()); + remote_sync_handler.join().unwrap_or(()); + return e + }, + _ = &mut localsync_outbox => { + // crash the server if localsync fails for any reason + // panic!("Local Sync thread died. Server will fail now!") + return Err(anyhow::Error::msg("Failed to sync local data to drive. Please restart the Parseable server.\n\nJoin us on Parseable Slack if the issue persists after restart : https://launchpass.com/parseable")) + }, + _ = &mut remote_sync_outbox => { + // remote_sync failed, this is recoverable by just starting remote_sync thread again + remote_sync_handler.join().unwrap_or(()); + (remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = sync::object_store_sync(); + } + }; + } + } } diff --git a/server/src/main.rs b/server/src/main.rs index 4a5d02fd8..bc13f4e43 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -48,7 +48,6 @@ pub const STORAGE_UPLOAD_INTERVAL: u32 = 60; #[actix_web::main] async fn main() -> anyhow::Result<()> { env_logger::init(); - let storage = CONFIG.storage().get_object_store(); CONFIG.validate().await?; migration::run_metadata_migration(&CONFIG).await?; let metadata = storage::resolve_parseable_metadata().await?; @@ -65,6 +64,7 @@ async fn main() -> anyhow::Result<()> { migration::run_migration(&CONFIG).await?; + let storage = CONFIG.storage().get_object_store(); if let Err(e) = metadata::STREAM_INFO.load(&*storage).await { log::warn!("could not populate local metadata. {:?}", e); } diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index 0b8d1b502..716e0fdc2 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -218,7 +218,7 @@ fn to_object_store_path(path: &RelativePath) -> StorePath { pub struct S3 { client: LimitStore, bucket: String, - root: StorePath + root: StorePath, } impl S3 { @@ -419,10 +419,7 @@ impl ObjectStorage for S3 { self.root.clone() }; - let mut list_stream = self - .client - .list(Some(&prefix)) - .await?; + let mut list_stream = self.client.list(Some(&prefix)).await?; let mut res = vec![]; @@ -439,10 +436,8 @@ impl ObjectStorage for S3 { let byts = self .get_object( - &RelativePath::from_path(meta.location.as_ref()).map_err(|err| { - ObjectStorageError::Custom( - format!("Error while getting files: {:}", err).into(), - ) + RelativePath::from_path(meta.location.as_ref()).map_err(|err| { + ObjectStorageError::Custom(format!("Error while getting files: {:}", err)) })?, ) .await?; From cb8ab46ef4278e32cd72ddbba378b424f227b4bc Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 8 Feb 2024 17:09:55 +0530 Subject: [PATCH 46/84] add comments --- server/src/main.rs | 3 +++ server/src/migration.rs | 2 ++ 2 files changed, 5 insertions(+) diff --git a/server/src/main.rs b/server/src/main.rs index bc13f4e43..9b70b18fe 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -64,6 +64,8 @@ async fn main() -> anyhow::Result<()> { migration::run_migration(&CONFIG).await?; + // when do we do this ingestor only most likely + // needs to be updated every so often(when and how) let storage = CONFIG.storage().get_object_store(); if let Err(e) = metadata::STREAM_INFO.load(&*storage).await { log::warn!("could not populate local metadata. {:?}", e); @@ -84,6 +86,7 @@ async fn main() -> anyhow::Result<()> { analytics::init_analytics_scheduler(); } + // this is supposed to happen only in query and super servers tokio::spawn(handlers::livetail::server()); let app = handlers::http::run_http(prometheus, CONFIG.parseable.openid.clone()); diff --git a/server/src/migration.rs b/server/src/migration.rs index be8e5de10..eb3b98c0e 100644 --- a/server/src/migration.rs +++ b/server/src/migration.rs @@ -49,6 +49,7 @@ pub async fn run_metadata_migration(config: &Config) -> anyhow::Result<()> { .and_then(|version| version.as_str()) } + // if storage metadata is none do nothing if let Some(storage_metadata) = storage_metadata { match get_version(&storage_metadata) { Some("v1") => { @@ -63,6 +64,7 @@ pub async fn run_metadata_migration(config: &Config) -> anyhow::Result<()> { } } + // if staging metadata is none do nothing if let Some(staging_metadata) = staging_metadata { match get_version(&staging_metadata) { Some("v1") => { From b5c1bd285b1f13f79059a67ce8c016babe5c9867 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Fri, 9 Feb 2024 12:46:33 +0530 Subject: [PATCH 47/84] temp --- .../src/handlers/http/modal/ingest_server.rs | 4 +-- server/src/handlers/http/modal/mod.rs | 1 + .../src/handlers/http/modal/query_server.rs | 30 +++++++++---------- server/src/handlers/http/modal/server.rs | 7 ++++- 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 7b40bcd67..c2c0a63a2 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -18,8 +18,6 @@ use crate::analytics; use crate::banner; -use crate::handlers::http::API_BASE_PATH; -use crate::handlers::http::API_VERSION; use crate::localcache::LocalCacheManager; use crate::metadata; use crate::metrics; @@ -29,7 +27,6 @@ use crate::storage::PARSEABLE_METADATA_FILE_NAME; use crate::sync; use std::net::SocketAddr; -use std::sync::Arc; use super::server::Server; use super::ssl_acceptor::get_ssl_acceptor; @@ -185,6 +182,7 @@ impl IngestServer { } } + #[allow(unused)] pub async fn initialize(&mut self) -> anyhow::Result<()> { // to get the .parseable.json file in staging let meta = storage::resolve_parseable_metadata().await?; diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index 870f05b88..2ee60abbe 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -21,6 +21,7 @@ pub mod query_server; pub mod server; pub mod ssl_acceptor; +use std::collections::HashMap; use std::sync::Arc; use actix_web_prometheus::PrometheusMetrics; diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index fecc389a6..92b14ec68 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -111,8 +111,6 @@ impl ParseableServer for QueryServer { impl QueryServer { // configure the api routes pub fn configure_routes(config: &mut ServiceConfig, oidc_client: Option) { - let generated = generate(); - let user_scope = Server::get_user_webscope(); let llm_scope = Server::get_llm_webscope(); let role_scope = Server::get_user_role_webscope(); @@ -140,7 +138,7 @@ impl QueryServer { .service(oauth_scope) .service(role_scope), ) - .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); + .service(Server::get_generated()); } async fn get_ingestor_info(&self) -> anyhow::Result { @@ -168,7 +166,7 @@ impl QueryServer { reqw.is_ok() } - + #[allow(unused)] async fn initialize(&mut self) -> anyhow::Result<()> { migration::run_metadata_migration(&CONFIG).await?; @@ -207,18 +205,20 @@ impl QueryServer { // how does livetail work? // tokio::spawn(handlers::livetail::server()); - let app = self.start(prometheus, CONFIG.parseable.openid.clone()); + self.start(prometheus, CONFIG.parseable.openid.clone()).await?; - tokio::pin!(app); + // tokio::pin!(app); - // this never actually loops - // rather than pinning we can just await? - loop { - tokio::select! { - err= &mut app => { - return err; - }, - } - } + // // this never actually loops + // // rather than pinning we can just await? + // loop { + // tokio::select! { + // err= &mut app => { + // return err; + // }, + // } + // } + + Ok(()) } } diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index e52dfd6a5..312af1c2a 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -166,7 +166,7 @@ impl Server { .service(Self::get_oauth_webscope(oidc_client)), ) // GET "/" ==> Serve the static frontend directory - .service(ResourceFiles::new("/", generated).resolve_not_found_to_root()); + .service(Self::get_generated()); } // get the query factory @@ -391,6 +391,11 @@ impl Server { web::resource("/about").route(web::get().to(about::about).authorize(Action::GetAbout)) } + pub fn get_generated() -> ResourceFiles { + ResourceFiles::new("/", generate()).resolve_not_found_to_root() + } + + #[allow(unused)] pub async fn initialize(&mut self) -> anyhow::Result<()> { migration::run_metadata_migration(&CONFIG).await?; let metadata = storage::resolve_parseable_metadata().await?; From 2073cb80d984a6956ff35cc43ab5d00cb7a2f392 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 01:33:21 +0530 Subject: [PATCH 48/84] change origin to domain name in IngesterMetadata struct --- .../src/handlers/http/modal/ingest_server.rs | 1 - server/src/handlers/http/modal/mod.rs | 19 ++++----------- .../src/handlers/http/modal/query_server.rs | 23 +++++++++---------- 3 files changed, 15 insertions(+), 28 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index c2c0a63a2..6b800ad9a 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -139,7 +139,6 @@ impl IngestServer { }; let resource = IngesterMetadata::new( - sock.ip().to_string(), sock.port().to_string(), CONFIG .parseable diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index 2ee60abbe..795c7ef38 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -21,7 +21,6 @@ pub mod query_server; pub mod server; pub mod ssl_acceptor; -use std::collections::HashMap; use std::sync::Arc; use actix_web_prometheus::PrometheusMetrics; @@ -54,24 +53,16 @@ pub trait ParseableServer { #[derive(Serialize, Debug, Deserialize, Default)] pub struct IngesterMetadata { pub version: String, - pub address: String, pub port: String, - pub origin: String, // domain + pub domain_name: String, pub bucket_name: String, } impl IngesterMetadata { - pub fn new( - address: String, - port: String, - origin: String, - version: String, - bucket_name: String, - ) -> Self { + pub fn new(port: String, domain_name: String, version: String, bucket_name: String) -> Self { Self { - address, port, - origin, + domain_name, version, bucket_name, } @@ -88,7 +79,6 @@ mod test { #[rstest] fn check_resource() { let im = IngesterMetadata::new( - "0.0.0.0".to_string(), "8000".to_string(), "https://localhost:8000".to_string(), DEFAULT_VERSION.to_string(), @@ -100,9 +90,8 @@ mod test { .try_into_bytes() .unwrap(); let rhs = br#"{"version":"v3", -"address":"0.0.0.0", "port":"8000", -"origin":"https://localhost:8000", +"domain_name":"https://localhost:8000", "bucket_name":"somebucket"}"# .try_into_bytes() .unwrap(); diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 92b14ec68..fdb9558f2 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -16,17 +16,12 @@ * */ -use crate::handlers::http::middleware::RouteExt; -use crate::handlers::http::{ - base_path, cross_origin_config, logstream, API_BASE_PATH, API_VERSION, -}; -use crate::rbac::role::Action; +use crate::handlers::http::{base_path, cross_origin_config, API_BASE_PATH, API_VERSION}; use crate::{analytics, banner, metadata, metrics, migration, rbac, storage}; use actix_web::http::header; use actix_web::web; use actix_web::web::ServiceConfig; use actix_web::{App, HttpServer}; -use actix_web_static_files::ResourceFiles; use async_trait::async_trait; use itertools::Itertools; use relative_path::RelativePathBuf; @@ -37,13 +32,13 @@ use crate::option::CONFIG; use super::server::Server; use super::ssl_acceptor::get_ssl_acceptor; -use super::{generate, IngesterMetadata, OpenIdClient, ParseableServer}; +use super::{IngesterMetadata, OpenIdClient, ParseableServer}; type IngesterMetadataArr = Vec; -type IngesterMetaPtr = Arc; +type IngesterMetaArrPtr = Arc; #[derive(Default, Debug)] -pub struct QueryServer(IngesterMetaPtr); +pub struct QueryServer(IngesterMetaArrPtr); #[async_trait(?Send)] impl ParseableServer for QueryServer { @@ -59,12 +54,16 @@ impl ParseableServer for QueryServer { // yes the format macro does not need the '/' ingester.origin already // has '/' because Url::Parse will add it if it is not present // uri should be something like `http://address/api/v1/liveness` - let uri = Url::parse(&format!("{}{}/liveness", &ingester.origin, base_path()))?; + let uri = Url::parse(&format!( + "{}{}/liveness", + &ingester.domain_name, + base_path() + ))?; if !Self::check_liveness(uri).await { - eprintln!("Ingestor at {} is not reachable", &ingester.origin); + eprintln!("Ingestor at {} is not reachable", &ingester.domain_name); } else { - println!("Ingestor at {} is up and running", &ingester.origin); + println!("Ingestor at {} is up and running", &ingester.domain_name); } } From 79018013be7add7564e02f75dbbc8c313d772fc2 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 01:37:34 +0530 Subject: [PATCH 49/84] init trait method added --- .../src/handlers/http/modal/ingest_server.rs | 15 +- server/src/handlers/http/modal/mod.rs | 2 + .../src/handlers/http/modal/query_server.rs | 30 ++- server/src/handlers/http/modal/server.rs | 183 +++++++++--------- 4 files changed, 117 insertions(+), 113 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 6b800ad9a..12205b044 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -94,12 +94,17 @@ impl ParseableServer for IngestServer { Ok(()) } + + /// implement the init method will just invoke the initialize method + async fn init(&mut self) -> anyhow::Result<()> { + self.initialize().await + } } impl IngestServer { // configure the api routes fn configure_routes(config: &mut web::ServiceConfig, _oidc_client: Option) { - let logstream_scope = Server::get_logstream_webscope(); + let _logstream_scope = Server::get_logstream_webscope(); let ingest_factory = Server::get_ingest_factory(); config @@ -108,8 +113,9 @@ impl IngestServer { web::scope(&base_path()).service(ingest_factory), ) .service(Server::get_liveness_factory()) - .service(Server::get_readiness_factory()) - .service(logstream_scope); + .service(Server::get_readiness_factory()); + // add metrics here + // .service(logstream_scope); } #[inline(always)] @@ -181,8 +187,7 @@ impl IngestServer { } } - #[allow(unused)] - pub async fn initialize(&mut self) -> anyhow::Result<()> { + async fn initialize(&mut self) -> anyhow::Result<()> { // to get the .parseable.json file in staging let meta = storage::resolve_parseable_metadata().await?; banner::print(&CONFIG, &meta).await; diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index 795c7ef38..36caff507 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -48,6 +48,8 @@ pub trait ParseableServer { prometheus: PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()>; + + async fn init(&mut self) -> anyhow::Result<()>; } #[derive(Serialize, Debug, Deserialize, Default)] diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index fdb9558f2..79ee0079f 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -105,12 +105,16 @@ impl ParseableServer for QueryServer { Ok(()) } + + /// implementation of init should just invoke a call to initialize + async fn init(&mut self) -> anyhow::Result<()> { + self.initialize().await + } } impl QueryServer { // configure the api routes - pub fn configure_routes(config: &mut ServiceConfig, oidc_client: Option) { - let user_scope = Server::get_user_webscope(); + fn configure_routes(config: &mut ServiceConfig, oidc_client: Option) { let llm_scope = Server::get_llm_webscope(); let role_scope = Server::get_user_role_webscope(); let oauth_scope = Server::get_oauth_webscope(oidc_client); @@ -124,15 +128,8 @@ impl QueryServer { .service(Server::get_readiness_factory()) // GET "/about" ==> Returns information about instance .service(Server::get_about_factory()) - .service( - web::scope("/logstream").service( - // GET "/logstream" ==> Get list of all Log Streams on the server - web::resource("").route( - web::get().to(logstream::list).authorize(Action::ListStream), - ), - ), - ) - .service(user_scope) + .service(Server::get_logstream_webscope()) + .service(Server::get_user_webscope()) .service(llm_scope) .service(oauth_scope) .service(role_scope), @@ -140,7 +137,7 @@ impl QueryServer { .service(Server::get_generated()); } - async fn get_ingestor_info(&self) -> anyhow::Result { + async fn get_ingestor_info(&self) -> anyhow::Result { let store = CONFIG.storage().get_object_store(); let root_path = RelativePathBuf::from(""); @@ -165,7 +162,8 @@ impl QueryServer { reqw.is_ok() } - #[allow(unused)] + + /// initialize the server, run migrations as needed and start the server async fn initialize(&mut self) -> anyhow::Result<()> { migration::run_metadata_migration(&CONFIG).await?; @@ -201,10 +199,8 @@ impl QueryServer { analytics::init_analytics_scheduler(); } - // how does livetail work? - // tokio::spawn(handlers::livetail::server()); - - self.start(prometheus, CONFIG.parseable.openid.clone()).await?; + self.start(prometheus, CONFIG.parseable.openid.clone()) + .await?; // tokio::pin!(app); diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index 312af1c2a..729a149f9 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -55,6 +55,7 @@ use crate::{ rbac::role::Action, }; +// use super::generate; use super::generate; use super::OpenIdClient; use super::ParseableServer; @@ -135,12 +136,16 @@ impl ParseableServer for Server { Ok(()) } + + /// implementation of init should just invoke a call to initialize + async fn init(&mut self) -> anyhow::Result<()> { + self.initialize().await + } } impl Server { fn configure_routes(config: &mut web::ServiceConfig, oidc_client: Option) { - let generated = generate(); - + // there might be a bug in the configure routes method config .service( web::scope(&base_path()) @@ -150,17 +155,7 @@ impl Server { .service(Self::get_liveness_factory()) .service(Self::get_readiness_factory()) .service(Self::get_about_factory()) - .service( - web::scope("/logstream") - .service( - // GET "/logstream" ==> Get list of all Log Streams on the server - web::resource("").route( - web::get().to(logstream::list).authorize(Action::ListStream), - ), - ) - .service(Self::get_logstream_webscope()), - ) - .service(Self::get_user_webscope()) + .service(Self::get_logstream_webscope()) .service(Self::get_llm_webscope()) .service(Self::get_user_role_webscope()) .service(Self::get_oauth_webscope(oidc_client)), @@ -175,90 +170,97 @@ impl Server { } // get the logstream web scope - // all except the GET route pub fn get_logstream_webscope() -> Scope { - web::scope("/{logstream}") + web::scope("/logstream") .service( + // GET "/logstream" ==> Get list of all Log Streams on the server web::resource("") - // PUT "/logstream/{logstream}" ==> Create log stream - .route( - web::put() - .to(logstream::put_stream) - .authorize_for_stream(Action::CreateStream), - ) - // POST "/logstream/{logstream}" ==> Post logs to given log stream - .route( - web::post() - .to(ingest::post_event) - .authorize_for_stream(Action::Ingest), - ) - // DELETE "/logstream/{logstream}" ==> Delete log stream - .route( - web::delete() - .to(logstream::delete) - .authorize_for_stream(Action::DeleteStream), - ) - .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), + .route(web::get().to(logstream::list).authorize(Action::ListStream)), ) .service( - web::resource("/alert") - // PUT "/logstream/{logstream}/alert" ==> Set alert for given log stream - .route( - web::put() - .to(logstream::put_alert) - .authorize_for_stream(Action::PutAlert), + web::scope("/{logstream}") + .service( + web::resource("") + // PUT "/logstream/{logstream}" ==> Create log stream + .route( + web::put() + .to(logstream::put_stream) + .authorize_for_stream(Action::CreateStream), + ) + // POST "/logstream/{logstream}" ==> Post logs to given log stream + .route( + web::post() + .to(ingest::post_event) + .authorize_for_stream(Action::Ingest), + ) + // DELETE "/logstream/{logstream}" ==> Delete log stream + .route( + web::delete() + .to(logstream::delete) + .authorize_for_stream(Action::DeleteStream), + ) + .app_data(web::PayloadConfig::default().limit(MAX_EVENT_PAYLOAD_SIZE)), ) - // GET "/logstream/{logstream}/alert" ==> Get alert for given log stream - .route( - web::get() - .to(logstream::get_alert) - .authorize_for_stream(Action::GetAlert), - ), - ) - .service( - // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream - web::resource("/schema").route( - web::get() - .to(logstream::schema) - .authorize_for_stream(Action::GetSchema), - ), - ) - .service( - // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream - web::resource("/stats").route( - web::get() - .to(logstream::get_stats) - .authorize_for_stream(Action::GetStats), - ), - ) - .service( - web::resource("/retention") - // PUT "/logstream/{logstream}/retention" ==> Set retention for given logstream - .route( - web::put() - .to(logstream::put_retention) - .authorize_for_stream(Action::PutRetention), + .service( + web::resource("/alert") + // PUT "/logstream/{logstream}/alert" ==> Set alert for given log stream + .route( + web::put() + .to(logstream::put_alert) + .authorize_for_stream(Action::PutAlert), + ) + // GET "/logstream/{logstream}/alert" ==> Get alert for given log stream + .route( + web::get() + .to(logstream::get_alert) + .authorize_for_stream(Action::GetAlert), + ), ) - // GET "/logstream/{logstream}/retention" ==> Get retention for given logstream - .route( - web::get() - .to(logstream::get_retention) - .authorize_for_stream(Action::GetRetention), - ), - ) - .service( - web::resource("/cache") - // PUT "/logstream/{logstream}/cache" ==> Set retention for given logstream - .route( - web::put() - .to(logstream::put_enable_cache) - .authorize_for_stream(Action::PutCacheEnabled), + .service( + // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream + web::resource("/schema").route( + web::get() + .to(logstream::schema) + .authorize_for_stream(Action::GetSchema), + ), ) - // GET "/logstream/{logstream}/cache" ==> Get retention for given logstream - .route( - web::get() - .to(logstream::get_cache_enabled) - .authorize_for_stream(Action::GetCacheEnabled), + .service( + // GET "/logstream/{logstream}/stats" ==> Get stats for given log stream + web::resource("/stats").route( + web::get() + .to(logstream::get_stats) + .authorize_for_stream(Action::GetStats), + ), + ) + .service( + web::resource("/retention") + // PUT "/logstream/{logstream}/retention" ==> Set retention for given logstream + .route( + web::put() + .to(logstream::put_retention) + .authorize_for_stream(Action::PutRetention), + ) + // GET "/logstream/{logstream}/retention" ==> Get retention for given logstream + .route( + web::get() + .to(logstream::get_retention) + .authorize_for_stream(Action::GetRetention), + ), + ) + .service( + web::resource("/cache") + // PUT "/logstream/{logstream}/cache" ==> Set retention for given logstream + .route( + web::put() + .to(logstream::put_enable_cache) + .authorize_for_stream(Action::PutCacheEnabled), + ) + // GET "/logstream/{logstream}/cache" ==> Get retention for given logstream + .route( + web::get() + .to(logstream::get_cache_enabled) + .authorize_for_stream(Action::GetCacheEnabled), + ), ), ) } @@ -395,8 +397,7 @@ impl Server { ResourceFiles::new("/", generate()).resolve_not_found_to_root() } - #[allow(unused)] - pub async fn initialize(&mut self) -> anyhow::Result<()> { + async fn initialize(&mut self) -> anyhow::Result<()> { migration::run_metadata_migration(&CONFIG).await?; let metadata = storage::resolve_parseable_metadata().await?; banner::print(&CONFIG, &metadata).await; From 3b5a8b027ff928263e3f964e80429b32c6bed612 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 01:37:44 +0530 Subject: [PATCH 50/84] misc --- server/src/handlers/http.rs | 5 +- server/src/main.rs | 159 ++++++++++++++++++++++-------------- 2 files changed, 101 insertions(+), 63 deletions(-) diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index 9de18cc1f..e17f4d2f3 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -43,12 +43,15 @@ pub(crate) mod query; pub(crate) mod rbac; pub(crate) mod role; -include!(concat!(env!("OUT_DIR"), "/generated.rs")); +// this needs to be removed from here. It is in modal->mod.rs +// include!(concat!(env!("OUT_DIR"), "/generated.rs")); pub const MAX_EVENT_PAYLOAD_SIZE: usize = 10485760; pub const API_BASE_PATH: &str = "/api"; pub const API_VERSION: &str = "v1"; +#[allow(unused)] +/// to be removed pub async fn run_http( prometheus: PrometheusMetrics, oidc_client: Option, diff --git a/server/src/main.rs b/server/src/main.rs index 9b70b18fe..8b0ade4bf 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -40,78 +40,113 @@ mod sync; mod utils; mod validator; -use option::CONFIG; +use std::sync::Arc; -use crate::localcache::LocalCacheManager; +use handlers::http::modal::ParseableServer; +use option::{Mode, CONFIG}; +use tokio::sync::RwLock; + +use crate::{ + handlers::http::modal::{ + ingest_server::IngestServer, query_server::QueryServer, server::Server, + }, + // localcache::LocalCacheManager, +}; pub const STORAGE_UPLOAD_INTERVAL: u32 = 60; #[actix_web::main] async fn main() -> anyhow::Result<()> { env_logger::init(); CONFIG.validate().await?; - migration::run_metadata_migration(&CONFIG).await?; - let metadata = storage::resolve_parseable_metadata().await?; - banner::print(&CONFIG, &metadata).await; - rbac::map::init(&metadata); - metadata.set_global(); - if let Some(cache_manager) = LocalCacheManager::global() { - cache_manager - .validate(CONFIG.parseable.local_cache_size) - .await?; - }; - let prometheus = metrics::build_metrics_handler(); - CONFIG.storage().register_store_metrics(&prometheus); - - migration::run_migration(&CONFIG).await?; - // when do we do this ingestor only most likely - // needs to be updated every so often(when and how) - let storage = CONFIG.storage().get_object_store(); - if let Err(e) = metadata::STREAM_INFO.load(&*storage).await { - log::warn!("could not populate local metadata. {:?}", e); - } - - // track all parquet files already in the data directory - storage::retention::load_retention_from_global(); - // load data from stats back to prometheus metrics - metrics::fetch_stats_from_storage().await; - - let (localsync_handler, mut localsync_outbox, localsync_inbox) = sync::run_local_sync(); - let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) = - sync::object_store_sync(); + let server: Arc> = match CONFIG.parseable.mode { + Mode::Query => { + dbg!("Mode::Query"); + Arc::new(RwLock::new(QueryServer::default())) + } + Mode::Ingest => { + dbg!("Mode::Ingest"); + Arc::new(RwLock::new(IngestServer)) + } + Mode::All => { + dbg!("Mode::All"); + Arc::new(RwLock::new(Server)) + } + }; - // all internal data structures populated now. - // start the analytics scheduler if enabled - if CONFIG.parseable.send_analytics { - analytics::init_analytics_scheduler(); - } + // add logic for graceful shutdown if + // MODE == Query / Ingest and storage = local-store - // this is supposed to happen only in query and super servers - tokio::spawn(handlers::livetail::server()); + // But does an RwLock Make sence? maybe figure something out + server.write().await.init().await?; + // server.try_lock()?.start(prometheus, oidc_client).await?; - let app = handlers::http::run_http(prometheus, CONFIG.parseable.openid.clone()); - tokio::pin!(app); - loop { - tokio::select! { - e = &mut app => { - // actix server finished .. stop other threads and stop the server - remote_sync_inbox.send(()).unwrap_or(()); - localsync_inbox.send(()).unwrap_or(()); - localsync_handler.join().unwrap_or(()); - remote_sync_handler.join().unwrap_or(()); - return e - }, - _ = &mut localsync_outbox => { - // crash the server if localsync fails for any reason - // panic!("Local Sync thread died. Server will fail now!") - return Err(anyhow::Error::msg("Failed to sync local data to drive. Please restart the Parseable server.\n\nJoin us on Parseable Slack if the issue persists after restart : https://launchpass.com/parseable")) - }, - _ = &mut remote_sync_outbox => { - // remote_sync failed, this is recoverable by just starting remote_sync thread again - remote_sync_handler.join().unwrap_or(()); - (remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = sync::object_store_sync(); - } + ///////////////////////////////////////////// + // migration::run_metadata_migration(&CONFIG).await?; + // let metadata = storage::resolve_parseable_metadata().await?; + // banner::print(&CONFIG, &metadata).await; + // rbac::map::init(&metadata); + // metadata.set_global(); + // if let Some(cache_manager) = LocalCacheManager::global() { + // cache_manager + // .validate(CONFIG.parseable.local_cache_size) + // .await?; + // }; + // let prometheus = metrics::build_metrics_handler(); + // CONFIG.storage().register_store_metrics(&prometheus); + // + // migration::run_migration(&CONFIG).await?; + // + // // when do we do this ingestor only most likely + // // needs to be updated every so often(when and how) + // let storage = CONFIG.storage().get_object_store(); + // if let Err(e) = metadata::STREAM_INFO.load(&*storage).await { + // log::warn!("could not populate local metadata. {:?}", e); + // } + // + // // track all parquet files already in the data directory + // storage::retention::load_retention_from_global(); + // // load data from stats back to prometheus metrics + // metrics::fetch_stats_from_storage().await; + // + // let (localsync_handler, mut localsync_outbox, localsync_inbox) = sync::run_local_sync(); + // let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) = + // sync::object_store_sync(); + // + // // all internal data structures populated now. + // // start the analytics scheduler if enabled + // if CONFIG.parseable.send_analytics { + // analytics::init_analytics_scheduler(); + // } + // + // // this is supposed to happen only in query and super servers + // tokio::spawn(handlers::livetail::server()); + // + // let app = handlers::http::run_http(prometheus, CONFIG.parseable.openid.clone()); + // tokio::pin!(app); + // loop { + // tokio::select! { + // e = &mut app => { + // // actix server finished .. stop other threads and stop the server + // remote_sync_inbox.send(()).unwrap_or(()); + // localsync_inbox.send(()).unwrap_or(()); + // localsync_handler.join().unwrap_or(()); + // remote_sync_handler.join().unwrap_or(()); + // return e + // }, + // _ = &mut localsync_outbox => { + // // crash the server if localsync fails for any reason + // // panic!("Local Sync thread died. Server will fail now!") + // return Err(anyhow::Error::msg("Failed to sync local data to drive. Please restart the Parseable server.\n\nJoin us on Parseable Slack if the issue persists after restart : https://launchpass.com/parseable")) + // }, + // _ = &mut remote_sync_outbox => { + // // remote_sync failed, this is recoverable by just starting remote_sync thread again + // remote_sync_handler.join().unwrap_or(()); + // (remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = sync::object_store_sync(); + // } + // + // }; + // } - }; - } + Ok(()) } From 0d650e99163b2b87dbf4a87d9a3b60133e87daf9 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 12:50:19 +0530 Subject: [PATCH 51/84] rename function --- server/src/main.rs | 2 +- server/src/option.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/server/src/main.rs b/server/src/main.rs index 8b0ade4bf..cc22a9434 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -57,7 +57,7 @@ pub const STORAGE_UPLOAD_INTERVAL: u32 = 60; #[actix_web::main] async fn main() -> anyhow::Result<()> { env_logger::init(); - CONFIG.validate().await?; + CONFIG.validate_storage().await?; let server: Arc> = match CONFIG.parseable.mode { Mode::Query => { diff --git a/server/src/option.rs b/server/src/option.rs index 0aec6d1ac..6a5932726 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -100,8 +100,7 @@ impl Config { // validate the storage, if the proper path for staging directory is provided // if the proper data directory is provided, or s3 bucket is provided etc - // probably change name to validate_storage or something - pub async fn validate(&self) -> Result<(), ObjectStorageError> { + pub async fn validate_storage(&self) -> Result<(), ObjectStorageError> { let obj_store = self.storage.get_object_store(); let rel_path = relative_path::RelativePathBuf::from(PARSEABLE_METADATA_FILE_NAME); From cf9d76907ab2820914d81d2fc20c966e49356f94 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 12:51:58 +0530 Subject: [PATCH 52/84] update trait ParseableServer --- server/src/handlers/http/modal/ingest_server.rs | 13 +++++++++++++ server/src/handlers/http/modal/mod.rs | 2 ++ server/src/handlers/http/modal/query_server.rs | 12 ++++++++++++ server/src/handlers/http/modal/server.rs | 4 ++++ 4 files changed, 31 insertions(+) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 12205b044..e841201bd 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -97,8 +97,21 @@ impl ParseableServer for IngestServer { /// implement the init method will just invoke the initialize method async fn init(&mut self) -> anyhow::Result<()> { + // self.validate()?; self.initialize().await } + + #[allow(unused)] + fn validate(&self) -> anyhow::Result<()> { + if CONFIG.get_storage_mode_string() == "Local drive" { + return Err(anyhow::Error::msg( + // Error Message can be better + "Ingest Server cannot be started in local storage mode. Please start the server in a supported storage mode.", + )); + } + + Ok(()) + } } impl IngestServer { diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index 36caff507..181d6c7b5 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -50,6 +50,8 @@ pub trait ParseableServer { ) -> anyhow::Result<()>; async fn init(&mut self) -> anyhow::Result<()>; + + fn validate(&self) -> anyhow::Result<()>; } #[derive(Serialize, Debug, Deserialize, Default)] diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 79ee0079f..ff4e94b2d 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -108,8 +108,20 @@ impl ParseableServer for QueryServer { /// implementation of init should just invoke a call to initialize async fn init(&mut self) -> anyhow::Result<()> { + // self.validate()?; self.initialize().await } + + #[allow(unused)] + fn validate(&self) -> anyhow::Result<()> { + if CONFIG.get_storage_mode_string() == "Local drive" { + return Err(anyhow::anyhow!( + "Query Server cannot be started in local storage mode. Please start the server in a supported storage mode.", + )); + } + + Ok(()) + } } impl QueryServer { diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index 729a149f9..a711f1ef6 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -141,6 +141,10 @@ impl ParseableServer for Server { async fn init(&mut self) -> anyhow::Result<()> { self.initialize().await } + + fn validate(&self) -> anyhow::Result<()> { + Ok(()) + } } impl Server { From 851d0705bbd70416e26829598baa7e1b7e10a437 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 12:53:20 +0530 Subject: [PATCH 53/84] update to IngesterMetadata Struct --- .../src/handlers/http/modal/ingest_server.rs | 2 ++ server/src/handlers/http/modal/mod.rs | 27 ++++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index e841201bd..eb8900135 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -169,6 +169,8 @@ impl IngestServer { .to_string(), DEFAULT_VERSION.to_string(), store.get_bucket_name(), + CONFIG.parseable.username.clone(), + CONFIG.parseable.password.clone(), // is this secure? ); let resource = serde_json::to_string(&resource) diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index 181d6c7b5..c1a6023d3 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -28,6 +28,7 @@ use async_trait::async_trait; use openid::Discovered; use crate::oidc; +use base64::Engine; use serde::Deserialize; use serde::Serialize; pub type OpenIdClient = Arc>; @@ -60,15 +61,28 @@ pub struct IngesterMetadata { pub port: String, pub domain_name: String, pub bucket_name: String, + pub token: String, } impl IngesterMetadata { - pub fn new(port: String, domain_name: String, version: String, bucket_name: String) -> Self { + pub fn new( + port: String, + domain_name: String, + version: String, + bucket_name: String, + username: String, + password: String, + ) -> Self { + let token = base64::prelude::BASE64_STANDARD.encode(format!("{}:{}", username, password)); + + let token = format!("Basic {}", token); + Self { port, domain_name, version, bucket_name, + token, } } } @@ -87,18 +101,17 @@ mod test { "https://localhost:8000".to_string(), DEFAULT_VERSION.to_string(), "somebucket".to_string(), + "admin".to_string(), + "admin".to_string(), ); let lhs = serde_json::to_string(&im) .unwrap() .try_into_bytes() .unwrap(); - let rhs = br#"{"version":"v3", -"port":"8000", -"domain_name":"https://localhost:8000", -"bucket_name":"somebucket"}"# - .try_into_bytes() - .unwrap(); + let rhs = br#"{"version":"v3","port":"8000","domain_name":"https://localhost:8000","bucket_name":"somebucket","token":"Basic YWRtaW46YWRtaW4="}"# + .try_into_bytes() + .unwrap(); assert_eq!(lhs, rhs); } From 74dd2e4b4855f0d365744ea01a76269940e79159 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 12:54:03 +0530 Subject: [PATCH 54/84] make the routed inline --- server/src/handlers/http/modal/ingest_server.rs | 9 ++------- server/src/handlers/http/modal/query_server.rs | 13 ++++++------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index eb8900135..0d365030f 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -117,18 +117,13 @@ impl ParseableServer for IngestServer { impl IngestServer { // configure the api routes fn configure_routes(config: &mut web::ServiceConfig, _oidc_client: Option) { - let _logstream_scope = Server::get_logstream_webscope(); - let ingest_factory = Server::get_ingest_factory(); - config .service( // Base path "{url}/api/v1" - web::scope(&base_path()).service(ingest_factory), + web::scope(&base_path()).service(Server::get_ingest_factory()), ) .service(Server::get_liveness_factory()) - .service(Server::get_readiness_factory()); - // add metrics here - // .service(logstream_scope); + .service(Server::get_readiness_factory()) } #[inline(always)] diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index ff4e94b2d..c02640b19 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -127,10 +127,6 @@ impl ParseableServer for QueryServer { impl QueryServer { // configure the api routes fn configure_routes(config: &mut ServiceConfig, oidc_client: Option) { - let llm_scope = Server::get_llm_webscope(); - let role_scope = Server::get_user_role_webscope(); - let oauth_scope = Server::get_oauth_webscope(oidc_client); - config .service( web::scope(&base_path()) @@ -142,9 +138,9 @@ impl QueryServer { .service(Server::get_about_factory()) .service(Server::get_logstream_webscope()) .service(Server::get_user_webscope()) - .service(llm_scope) - .service(oauth_scope) - .service(role_scope), + .service(Server::get_llm_webscope()) + .service(Server::get_oauth_webscope(oidc_client)) + .service(Server::get_user_role_webscope()), ) .service(Server::get_generated()); } @@ -162,6 +158,9 @@ impl QueryServer { .map(|x| serde_json::from_slice::(x).unwrap_or_default()) .collect_vec(); + // validate the ingester metadata + // TODO: add validation logic here + Ok(Arc::new(arr)) } From 03f16323cb67f821208cd0b52a670d512d3a8d81 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 12:55:33 +0530 Subject: [PATCH 55/84] add custom metrics api points --- .../src/handlers/http/modal/ingest_server.rs | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 0d365030f..0a6949183 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -18,9 +18,12 @@ use crate::analytics; use crate::banner; +use crate::handlers::http::logstream; +use crate::handlers::http::middleware::RouteExt; use crate::localcache::LocalCacheManager; use crate::metadata; use crate::metrics; +use crate::rbac::role::Action; use crate::storage; use crate::storage::ObjectStorageError; use crate::storage::PARSEABLE_METADATA_FILE_NAME; @@ -36,6 +39,7 @@ use super::ParseableServer; use super::DEFAULT_VERSION; use actix_web::body::MessageBody; +use actix_web::Scope; use actix_web::{web, App, HttpServer}; use actix_web_prometheus::PrometheusMetrics; use async_trait::async_trait; @@ -124,6 +128,28 @@ impl IngestServer { ) .service(Server::get_liveness_factory()) .service(Server::get_readiness_factory()) + .service(Self::get_metrics_webscope()); + } + + fn get_metrics_webscope() -> Scope { + web::scope("/logstream").service( + web::scope("/{logstream}") + .service( + // GET "/logstream/{logstream}/schema" ==> Get schema for given log stream + web::resource("/schema").route( + web::get() + .to(logstream::schema) + .authorize_for_stream(Action::GetSchema), + ), + ) + .service( + web::resource("/stats").route( + web::get() + .to(logstream::get_stats) + .authorize_for_stream(Action::GetStats), + ), + ), + ) } #[inline(always)] From 0cbf1e61cf9f3b71c1f7d31c980fb233deaf660d Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 13:25:21 +0530 Subject: [PATCH 56/84] fix: User Api on Default Mode User API was not properly exposed in Default mode --- server/src/handlers/http/modal/server.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index a711f1ef6..e2bc965d9 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -160,11 +160,11 @@ impl Server { .service(Self::get_readiness_factory()) .service(Self::get_about_factory()) .service(Self::get_logstream_webscope()) + .service(Self::get_user_webscope()) .service(Self::get_llm_webscope()) - .service(Self::get_user_role_webscope()) - .service(Self::get_oauth_webscope(oidc_client)), + .service(Self::get_oauth_webscope(oidc_client)) + .service(Self::get_user_role_webscope()), ) - // GET "/" ==> Serve the static frontend directory .service(Self::get_generated()); } From bde8bddf499f9980c1df57685c9e7b0cd7ff5095 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 13:25:39 +0530 Subject: [PATCH 57/84] misc: add comment --- server/src/handlers/http/modal/query_server.rs | 1 - server/src/handlers/http/modal/server.rs | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index c02640b19..4d5dca5b5 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -134,7 +134,6 @@ impl QueryServer { .service(Server::get_query_factory()) .service(Server::get_liveness_factory()) .service(Server::get_readiness_factory()) - // GET "/about" ==> Returns information about instance .service(Server::get_about_factory()) .service(Server::get_logstream_webscope()) .service(Server::get_user_webscope()) diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index e2bc965d9..db22f2c4f 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -397,6 +397,7 @@ impl Server { web::resource("/about").route(web::get().to(about::about).authorize(Action::GetAbout)) } + // GET "/" ==> Serve the static frontend directory pub fn get_generated() -> ResourceFiles { ResourceFiles::new("/", generate()).resolve_not_found_to_root() } From e6e9965a2505980e0ef8fe270a66374a8c4e392f Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 19:28:44 +0530 Subject: [PATCH 58/84] fix: Metadata value mode --- server/src/storage/store_metadata.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index f698ed844..996daae43 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -48,7 +48,7 @@ pub struct StaticStorageMetadata { #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct StorageMetadata { pub version: String, - pub storage_mode: String, + pub mode: String, pub staging: PathBuf, pub storage: String, #[serde(default = "crate::utils::uid::gen")] @@ -65,7 +65,7 @@ impl StorageMetadata { pub fn new() -> Self { Self { version: "v3".to_string(), - storage_mode: CONFIG.storage_name.to_owned(), + mode: CONFIG.storage_name.to_owned(), staging: CONFIG.staging_dir().to_path_buf(), storage: CONFIG.storage().get_endpoint(), deployment_id: uid::gen(), @@ -84,7 +84,7 @@ impl StorageMetadata { pub fn set_global(self) { let metadata = StaticStorageMetadata { - mode: self.storage_mode, + mode: self.mode, deployment_id: self.deployment_id, }; From 75494491106d26bbd235075eec12cc9e2cf4642f Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Tue, 13 Feb 2024 19:28:58 +0530 Subject: [PATCH 59/84] remove redundant else statement --- server/src/handlers/http/logstream.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/handlers/http/logstream.rs b/server/src/handlers/http/logstream.rs index afd057eda..6b8592331 100644 --- a/server/src/handlers/http/logstream.rs +++ b/server/src/handlers/http/logstream.rs @@ -120,9 +120,8 @@ pub async fn put_stream(req: HttpRequest) -> Result ), status: StatusCode::BAD_REQUEST, }); - } else { - create_stream(stream_name).await?; } + create_stream(stream_name).await?; Ok(("log stream created", StatusCode::OK)) } From 94a45c6bac0f01274b7cffe9c8bc9bed126ed5cf Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 14 Feb 2024 15:04:27 +0530 Subject: [PATCH 60/84] fix: Check for Query Server Deployment Proper Error bubbles up if the deployment is incorrect --- server/src/handlers/http/modal/ingest_server.rs | 7 +++---- server/src/storage/store_metadata.rs | 12 +++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 0a6949183..4f8b99090 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -62,9 +62,6 @@ impl ParseableServer for IngestServer { prometheus: PrometheusMetrics, _oidc_client: Option, ) -> anyhow::Result<()> { - // check for querier state. Is it there, or was it there in the past - self.check_querier_state().await?; - // set the ingestor metadata self.set_ingestor_metadata().await?; @@ -217,13 +214,15 @@ impl IngestServer { match store.get_object(&path).await { Ok(_) => Ok(()), Err(_) => Err(ObjectStorageError::Custom( - "Querier Server has not been started yet. Please start the querier server first." + "Query Server has not been started yet. Please start the querier server first." .to_string(), )), } } async fn initialize(&mut self) -> anyhow::Result<()> { + // check for querier state. Is it there, or was it there in the past + self.check_querier_state().await?; // to get the .parseable.json file in staging let meta = storage::resolve_parseable_metadata().await?; banner::print(&CONFIG, &meta).await; diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index 996daae43..a9ea7a6dc 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -26,7 +26,7 @@ use once_cell::sync::OnceCell; use std::io; use crate::{ - option::{CONFIG, JOIN_COMMUNITY}, + option::{Mode, CONFIG, JOIN_COMMUNITY}, rbac::{role::model::DefaultPrivilege, user::User}, storage::ObjectStorageError, utils::uid, @@ -132,13 +132,19 @@ pub async fn resolve_parseable_metadata() -> Result { create_dir_all(CONFIG.staging_dir())?; let metadata = StorageMetadata::new(); - // new metadata needs to be set on both staging and remote - overwrite_remote = true; + // new metadata needs to be set + // if mode is query or all then both staging and remote + match CONFIG.parseable.mode { + Mode::All | Mode::Query => overwrite_remote = true, + _ => (), + } + // else only staging overwrite_staging = true; Ok(metadata) } From 65cee8bac08b24888a12be3d2bcb0d8f986e7321 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 14 Feb 2024 16:00:56 +0530 Subject: [PATCH 61/84] feat: impl to_str for Mode Enum maybe trait impl is better maybe do later --- server/src/option.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/server/src/option.rs b/server/src/option.rs index 6a5932726..e24e5e7e5 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -198,6 +198,16 @@ pub enum Mode { All, } +impl Mode { + pub fn to_str(&self) -> &str { + match self { + Mode::Query => "Query Server", + Mode::Ingest => "Ingest Server", + Mode::All => "All", + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] #[allow(non_camel_case_types, clippy::upper_case_acronyms)] pub enum Compression { From 0a228c11e5a82b2c368ae85c57f27c0bedb2a350 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 14 Feb 2024 16:02:21 +0530 Subject: [PATCH 62/84] fix: Banner Changes Banner Changes to indicate the server Mode and other information --- server/src/about.rs | 8 ++++---- server/src/banner.rs | 32 +++++++++++++++++--------------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/server/src/about.rs b/server/src/about.rs index f33ac2137..9aea9ff28 100644 --- a/server/src/about.rs +++ b/server/src/about.rs @@ -90,10 +90,10 @@ pub fn print_about( eprint!( " {} - Version: \"v{}\"", + Version:\t\t\t\t\t\"v{}\"", "About:".to_string().bold(), current_version, - ); + ); // " " " " if let Some(latest_release) = latest_release { if latest_release.version > current_version { @@ -103,8 +103,8 @@ pub fn print_about( eprintln!( " - Commit: \"{commit_hash}\" - Docs: \"https://logg.ing/docs\"" + Commit:\t\t\t\t\t\t\"{commit_hash}\" + Docs:\t\t\t\t\t\t\"https://logg.ing/docs\"" ); } diff --git a/server/src/banner.rs b/server/src/banner.rs index 207fe1705..bd6ca2bc6 100644 --- a/server/src/banner.rs +++ b/server/src/banner.rs @@ -35,13 +35,13 @@ pub async fn print(config: &Config, meta: &StorageMetadata) { fn print_ascii_art() { let ascii_name = r#" - `7MM"""Mq. *MM `7MM - MM `MM. MM MM - MM ,M9 ,6"Yb. `7Mb,od8 ,pP"Ybd .gP"Ya ,6"Yb. MM,dMMb. MM .gP"Ya - MMmmdM9 8) MM MM' "' 8I `" ,M' Yb 8) MM MM `Mb MM ,M' Yb - MM ,pm9MM MM `YMMMa. 8M"""""" ,pm9MM MM M8 MM 8M"""""" - MM 8M MM MM L. I8 YM. , 8M MM MM. ,M9 MM YM. , - .JMML. `Moo9^Yo..JMML. M9mmmP' `Mbmmd' `Moo9^Yo. P^YbmdP' .JMML. `Mbmmd' + `7MM"""Mq. *MM `7MM + MM `MM. MM MM + MM ,M9 ,6"Yb. `7Mb,od8 ,pP"Ybd .gP"Ya ,6"Yb. MM,dMMb. MM .gP"Ya + MMmmdM9 8) MM MM' "' 8I `" ,M' Yb 8) MM MM `Mb MM ,M' Yb + MM ,pm9MM MM `YMMMa. 8M"""""" ,pm9MM MM M8 MM 8M"""""" + MM 8M MM MM L. I8 YM. , 8M MM MM. ,M9 MM YM. , + .JMML. `Moo9^Yo..JMML. M9mmmP' `Mbmmd' `Moo9^Yo. P^YbmdP' .JMML. `Mbmmd' "#; eprint!("{ascii_name}"); @@ -77,13 +77,15 @@ fn status_info(config: &Config, scheme: &str, id: Uid) { eprintln!( " {} - Address: {} - Credentials: {} - LLM Status: \"{}\"", + Address:\t\t\t\t\t{} + Credentials:\t\t\t\t\t{} + Server Mode:\t\t\t\t\t\"{}\" + LLM Status:\t\t\t\t\t\"{}\"", "Server:".to_string().bold(), address, credentials, - llm_status + llm_status, + config.parseable.mode.to_str() ); } @@ -99,8 +101,8 @@ async fn storage_info(config: &Config) { eprintln!( " {} - Mode: \"{}\" - Staging: \"{}\"", + Storage Mode:\t\t\t\t\t\"{}\" + Staging Path:\t\t\t\t\t\"{}\"", "Storage:".to_string().bold(), config.get_storage_mode_string(), config.staging_dir().to_string_lossy(), @@ -114,7 +116,7 @@ async fn storage_info(config: &Config) { eprintln!( "\ - {:8}Cache: \"{}\", (size: {})", + {:8}Cache:\t\t\t\t\t\"{}\", (size: {})", "", path.display(), size @@ -123,7 +125,7 @@ async fn storage_info(config: &Config) { eprintln!( "\ - {:8}Store: \"{}\", (latency: {:?})", + {:8}Store:\t\t\t\t\t\t\"{}\", (latency: {:?})", "", storage.get_endpoint(), latency From 8964cf13b1be5e977750f0db91996f37dc44931c Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 14 Feb 2024 16:54:30 +0530 Subject: [PATCH 63/84] chore: add unit test --- server/src/handlers/http/modal/mod.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index c1a6023d3..a9f9bfb12 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -55,7 +55,7 @@ pub trait ParseableServer { fn validate(&self) -> anyhow::Result<()>; } -#[derive(Serialize, Debug, Deserialize, Default)] +#[derive(Serialize, Debug, Deserialize, Default, Clone, Eq, PartialEq)] pub struct IngesterMetadata { pub version: String, pub port: String, @@ -95,7 +95,23 @@ mod test { use super::{IngesterMetadata, DEFAULT_VERSION}; #[rstest] - fn check_resource() { + fn test_deserialize_resource() { + let lhs: IngesterMetadata = IngesterMetadata::new( + "8000".to_string(), + "https://localhost:8000".to_string(), + DEFAULT_VERSION.to_string(), + "somebucket".to_string(), + "admin".to_string(), + "admin".to_string(), + ); + + let rhs = serde_json::from_slice::(br#"{"version":"v3","port":"8000","domain_name":"https://localhost:8000","bucket_name":"somebucket","token":"Basic YWRtaW46YWRtaW4="}"#).unwrap(); + + assert_eq!(rhs, lhs); + } + + #[rstest] + fn test_serialize_resource() { let im = IngesterMetadata::new( "8000".to_string(), "https://localhost:8000".to_string(), From 329c7277e3fb8140ca44404ae713b82c32895642 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 14 Feb 2024 16:54:59 +0530 Subject: [PATCH 64/84] fix: ingester file name change --- server/src/handlers/http/modal/ingest_server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 4f8b99090..09be208f8 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -165,7 +165,7 @@ impl IngestServer { // remove ip adn go with the domain name let sock = self.get_ingestor_address(); let path = RelativePathBuf::from(format!( - "{}.{}.ingestor.json", + "ingestor.{}.{}.json", sock.ip(), // this might be wrong sock.port() )); From a66f7b1ac09aedb2e490fcd2120a75d9fa728ba3 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 14 Feb 2024 16:55:47 +0530 Subject: [PATCH 65/84] chore: refactor get_ingestor_info function --- server/src/handlers/http/modal/ingest_server.rs | 4 ++-- server/src/handlers/http/modal/mod.rs | 12 ++++++------ server/src/handlers/http/modal/query_server.rs | 9 +++++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 09be208f8..aa661b2ae 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -187,8 +187,8 @@ impl IngestServer { .to_string(), DEFAULT_VERSION.to_string(), store.get_bucket_name(), - CONFIG.parseable.username.clone(), - CONFIG.parseable.password.clone(), // is this secure? + &CONFIG.parseable.username, + &CONFIG.parseable.password, // is this secure? ); let resource = serde_json::to_string(&resource) diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index a9f9bfb12..cfd7ca22c 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -70,8 +70,8 @@ impl IngesterMetadata { domain_name: String, version: String, bucket_name: String, - username: String, - password: String, + username: &str, + password: &str, ) -> Self { let token = base64::prelude::BASE64_STANDARD.encode(format!("{}:{}", username, password)); @@ -101,8 +101,8 @@ mod test { "https://localhost:8000".to_string(), DEFAULT_VERSION.to_string(), "somebucket".to_string(), - "admin".to_string(), - "admin".to_string(), + "admin", + "admin", ); let rhs = serde_json::from_slice::(br#"{"version":"v3","port":"8000","domain_name":"https://localhost:8000","bucket_name":"somebucket","token":"Basic YWRtaW46YWRtaW4="}"#).unwrap(); @@ -117,8 +117,8 @@ mod test { "https://localhost:8000".to_string(), DEFAULT_VERSION.to_string(), "somebucket".to_string(), - "admin".to_string(), - "admin".to_string(), + "admin", + "admin", ); let lhs = serde_json::to_string(&im) diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 4d5dca5b5..6b5339566 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -47,7 +47,7 @@ impl ParseableServer for QueryServer { prometheus: actix_web_prometheus::PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { - self.0 = self.get_ingestor_info().await?; + self.get_ingestor_info().await?; // on subsequent runs, the qurier should check if the ingestor is up and running or not for ingester in self.0.iter() { @@ -144,7 +144,7 @@ impl QueryServer { .service(Server::get_generated()); } - async fn get_ingestor_info(&self) -> anyhow::Result { + async fn get_ingestor_info(&mut self) -> anyhow::Result<()> { let store = CONFIG.storage().get_object_store(); let root_path = RelativePathBuf::from(""); @@ -157,10 +157,11 @@ impl QueryServer { .map(|x| serde_json::from_slice::(x).unwrap_or_default()) .collect_vec(); - // validate the ingester metadata // TODO: add validation logic here + // validate the ingester metadata - Ok(Arc::new(arr)) + self.0 = Arc::new(arr.clone()); + Ok(()) } pub async fn check_liveness(uri: Url) -> bool { From fc88c69175a67d4bfff3d90ff54d24e37f6db470 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 14 Feb 2024 19:48:26 +0530 Subject: [PATCH 66/84] fix: banner server status In the banner the LLM Status and the Mode values were switched. Fixed it in this patch --- server/src/banner.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/banner.rs b/server/src/banner.rs index bd6ca2bc6..d9f3cc609 100644 --- a/server/src/banner.rs +++ b/server/src/banner.rs @@ -84,8 +84,8 @@ fn status_info(config: &Config, scheme: &str, id: Uid) { "Server:".to_string().bold(), address, credentials, - llm_status, - config.parseable.mode.to_str() + config.parseable.mode.to_str(), + llm_status ); } From b4d6d236534195d41da495a1724979678b59eab7 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 14 Feb 2024 20:00:19 +0530 Subject: [PATCH 67/84] fix: server behaviour when new ingest server is set up When new server is setup, the ingest server staging should fetch the metadata file from object store --- server/src/storage/store_metadata.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index a9ea7a6dc..1584ee647 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -130,8 +130,12 @@ pub async fn resolve_parseable_metadata() -> Result overwrite_remote = true, + _ => (), + } Ok(metadata) } From 04788ceceaff2f37af1a53f530df51ffc744ae72 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 15 Feb 2024 11:05:29 +0530 Subject: [PATCH 68/84] fix: Ingest Server Not Ingesting when attatched to an old data store When an old data location is used the ingester was not able to ingest. Due to roles not being set properly. --- server/src/handlers/http/modal/ingest_server.rs | 3 +++ server/src/storage/store_metadata.rs | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index aa661b2ae..a7d57d168 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -23,6 +23,7 @@ use crate::handlers::http::middleware::RouteExt; use crate::localcache::LocalCacheManager; use crate::metadata; use crate::metrics; +use crate::rbac; use crate::rbac::role::Action; use crate::storage; use crate::storage::ObjectStorageError; @@ -227,6 +228,8 @@ impl IngestServer { let meta = storage::resolve_parseable_metadata().await?; banner::print(&CONFIG, &meta).await; + rbac::map::init(&meta); + // set the info in the global metadata meta.set_global(); diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index 1584ee647..5a951454a 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -134,9 +134,10 @@ pub async fn resolve_parseable_metadata() -> Result overwrite_remote = true, - _ => (), + _ => { + metadata.staging = CONFIG.staging_dir().to_path_buf(); + }, } - Ok(metadata) } EnvChange::CreateBoth => { From 691dc7d8bd20ba771d86105c0c9410f56a3e3da9 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 15 Feb 2024 11:53:08 +0530 Subject: [PATCH 69/84] remove INGESTOR_FILE_EXTENSION constant --- server/src/handlers/http/modal/mod.rs | 1 - server/src/storage/localfs.rs | 4 ++-- server/src/storage/s3.rs | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index cfd7ca22c..5db77cbdb 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -35,7 +35,6 @@ pub type OpenIdClient = Arc>; // to be decided on what the Default version should be pub const DEFAULT_VERSION: &str = "v3"; -pub const INGESTOR_FILE_EXTENSION: &str = "ingestor.json"; include!(concat!(env!("OUT_DIR"), "/generated.rs")); diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index 8875d2080..ffbab408a 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -33,7 +33,7 @@ use tokio_stream::wrappers::ReadDirStream; use crate::option::validation; use crate::{ - handlers::http::modal::INGESTOR_FILE_EXTENSION, + metrics::storage::{localfs::REQUEST_RESPONSE_TIME, StorageMetrics}, }; @@ -137,7 +137,7 @@ impl ObjectStorage for LocalFS { .unwrap_or_default() .to_str() .unwrap_or_default() - .contains(INGESTOR_FILE_EXTENSION); + .contains("ingestor"); if !ingestor_file { continue; diff --git a/server/src/storage/s3.rs b/server/src/storage/s3.rs index 716e0fdc2..edf342f9b 100644 --- a/server/src/storage/s3.rs +++ b/server/src/storage/s3.rs @@ -38,7 +38,6 @@ use std::path::Path as StdPath; use std::sync::Arc; use std::time::{Duration, Instant}; -use crate::handlers::http::modal::INGESTOR_FILE_EXTENSION; use crate::metrics::storage::{s3::REQUEST_RESPONSE_TIME, StorageMetrics}; use crate::storage::{LogStream, ObjectStorage, ObjectStorageError}; @@ -428,7 +427,7 @@ impl ObjectStorage for S3 { .location .filename() .unwrap_or_default() - .contains(INGESTOR_FILE_EXTENSION); + .contains("ingestor"); if !ingestor_file { continue; From ab17f5f4ec7fd9cb418db16475eeb2f12d190dc8 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 15 Feb 2024 11:54:45 +0530 Subject: [PATCH 70/84] chore: remove commented out code --- server/src/main.rs | 68 ---------------------------------------------- 1 file changed, 68 deletions(-) diff --git a/server/src/main.rs b/server/src/main.rs index cc22a9434..4219204aa 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -79,74 +79,6 @@ async fn main() -> anyhow::Result<()> { // But does an RwLock Make sence? maybe figure something out server.write().await.init().await?; - // server.try_lock()?.start(prometheus, oidc_client).await?; - - ///////////////////////////////////////////// - // migration::run_metadata_migration(&CONFIG).await?; - // let metadata = storage::resolve_parseable_metadata().await?; - // banner::print(&CONFIG, &metadata).await; - // rbac::map::init(&metadata); - // metadata.set_global(); - // if let Some(cache_manager) = LocalCacheManager::global() { - // cache_manager - // .validate(CONFIG.parseable.local_cache_size) - // .await?; - // }; - // let prometheus = metrics::build_metrics_handler(); - // CONFIG.storage().register_store_metrics(&prometheus); - // - // migration::run_migration(&CONFIG).await?; - // - // // when do we do this ingestor only most likely - // // needs to be updated every so often(when and how) - // let storage = CONFIG.storage().get_object_store(); - // if let Err(e) = metadata::STREAM_INFO.load(&*storage).await { - // log::warn!("could not populate local metadata. {:?}", e); - // } - // - // // track all parquet files already in the data directory - // storage::retention::load_retention_from_global(); - // // load data from stats back to prometheus metrics - // metrics::fetch_stats_from_storage().await; - // - // let (localsync_handler, mut localsync_outbox, localsync_inbox) = sync::run_local_sync(); - // let (mut remote_sync_handler, mut remote_sync_outbox, mut remote_sync_inbox) = - // sync::object_store_sync(); - // - // // all internal data structures populated now. - // // start the analytics scheduler if enabled - // if CONFIG.parseable.send_analytics { - // analytics::init_analytics_scheduler(); - // } - // - // // this is supposed to happen only in query and super servers - // tokio::spawn(handlers::livetail::server()); - // - // let app = handlers::http::run_http(prometheus, CONFIG.parseable.openid.clone()); - // tokio::pin!(app); - // loop { - // tokio::select! { - // e = &mut app => { - // // actix server finished .. stop other threads and stop the server - // remote_sync_inbox.send(()).unwrap_or(()); - // localsync_inbox.send(()).unwrap_or(()); - // localsync_handler.join().unwrap_or(()); - // remote_sync_handler.join().unwrap_or(()); - // return e - // }, - // _ = &mut localsync_outbox => { - // // crash the server if localsync fails for any reason - // // panic!("Local Sync thread died. Server will fail now!") - // return Err(anyhow::Error::msg("Failed to sync local data to drive. Please restart the Parseable server.\n\nJoin us on Parseable Slack if the issue persists after restart : https://launchpass.com/parseable")) - // }, - // _ = &mut remote_sync_outbox => { - // // remote_sync failed, this is recoverable by just starting remote_sync thread again - // remote_sync_handler.join().unwrap_or(()); - // (remote_sync_handler, remote_sync_outbox, remote_sync_inbox) = sync::object_store_sync(); - // } - // - // }; - // } Ok(()) } From 0449fccb49da55d2765537e2ccce36076f36ae91 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 15 Feb 2024 11:57:00 +0530 Subject: [PATCH 71/84] Trait update --- .../src/handlers/http/modal/ingest_server.rs | 6 +-- server/src/handlers/http/modal/mod.rs | 4 +- .../src/handlers/http/modal/query_server.rs | 39 +++++++++++++------ server/src/handlers/http/modal/server.rs | 6 +-- 4 files changed, 36 insertions(+), 19 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index a7d57d168..9e4a7f653 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -59,7 +59,7 @@ pub struct IngestServer; impl ParseableServer for IngestServer { // we dont need oidc client here its just here to satisfy the trait async fn start( - &mut self, + &self, prometheus: PrometheusMetrics, _oidc_client: Option, ) -> anyhow::Result<()> { @@ -98,7 +98,7 @@ impl ParseableServer for IngestServer { } /// implement the init method will just invoke the initialize method - async fn init(&mut self) -> anyhow::Result<()> { + async fn init(&self) -> anyhow::Result<()> { // self.validate()?; self.initialize().await } @@ -221,7 +221,7 @@ impl IngestServer { } } - async fn initialize(&mut self) -> anyhow::Result<()> { + async fn initialize(&self) -> anyhow::Result<()> { // check for querier state. Is it there, or was it there in the past self.check_querier_state().await?; // to get the .parseable.json file in staging diff --git a/server/src/handlers/http/modal/mod.rs b/server/src/handlers/http/modal/mod.rs index 5db77cbdb..5881b3bfb 100644 --- a/server/src/handlers/http/modal/mod.rs +++ b/server/src/handlers/http/modal/mod.rs @@ -44,12 +44,12 @@ pub trait ParseableServer { /// configure the server async fn start( - &mut self, + &self, prometheus: PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()>; - async fn init(&mut self) -> anyhow::Result<()>; + async fn init(&self) -> anyhow::Result<()>; fn validate(&self) -> anyhow::Result<()>; } diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 6b5339566..ba6796190 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -26,8 +26,11 @@ use async_trait::async_trait; use itertools::Itertools; use relative_path::RelativePathBuf; use std::sync::Arc; +use tokio::io::AsyncWriteExt; use url::Url; +use tokio::fs::File as TokioFile; + use crate::option::CONFIG; use super::server::Server; @@ -35,22 +38,22 @@ use super::ssl_acceptor::get_ssl_acceptor; use super::{IngesterMetadata, OpenIdClient, ParseableServer}; type IngesterMetadataArr = Vec; -type IngesterMetaArrPtr = Arc; #[derive(Default, Debug)] -pub struct QueryServer(IngesterMetaArrPtr); +pub struct QueryServer; #[async_trait(?Send)] impl ParseableServer for QueryServer { async fn start( - &mut self, + &self, prometheus: actix_web_prometheus::PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { - self.get_ingestor_info().await?; + let data = Self::get_ingestor_info().await?; // on subsequent runs, the qurier should check if the ingestor is up and running or not - for ingester in self.0.iter() { + for ingester in data.iter() { + dbg!(&ingester); // yes the format macro does not need the '/' ingester.origin already // has '/' because Url::Parse will add it if it is not present // uri should be something like `http://address/api/v1/liveness` @@ -107,7 +110,7 @@ impl ParseableServer for QueryServer { } /// implementation of init should just invoke a call to initialize - async fn init(&mut self) -> anyhow::Result<()> { + async fn init(&self) -> anyhow::Result<()> { // self.validate()?; self.initialize().await } @@ -144,14 +147,13 @@ impl QueryServer { .service(Server::get_generated()); } - async fn get_ingestor_info(&mut self) -> anyhow::Result<()> { + async fn get_ingestor_info() -> anyhow::Result { let store = CONFIG.storage().get_object_store(); let root_path = RelativePathBuf::from(""); let arr = store .get_objects(Some(&root_path)) .await? - .to_vec() .iter() // this unwrap will most definateley shoot me in the foot later .map(|x| serde_json::from_slice::(x).unwrap_or_default()) @@ -160,8 +162,11 @@ impl QueryServer { // TODO: add validation logic here // validate the ingester metadata - self.0 = Arc::new(arr.clone()); - Ok(()) + let mut f = Self::get_meta_file().await; + // writer the arr in f + f.write(serde_json::to_string(&arr)?.as_bytes()).await?; + + Ok(arr) } pub async fn check_liveness(uri: Url) -> bool { @@ -175,8 +180,9 @@ impl QueryServer { } /// initialize the server, run migrations as needed and start the server - async fn initialize(&mut self) -> anyhow::Result<()> { + async fn initialize(&self) -> anyhow::Result<()> { migration::run_metadata_migration(&CONFIG).await?; + tokio::fs::File::create(CONFIG.staging_dir().join(".query.json")).await?; let metadata = storage::resolve_parseable_metadata().await?; banner::print(&CONFIG, &metadata).await; @@ -227,4 +233,15 @@ impl QueryServer { Ok(()) } + + async fn get_meta_file() -> TokioFile { + let meta_path = CONFIG.staging_dir().join(".query.json"); + + tokio::fs::OpenOptions::new() + .read(true) + .write(true) + .open(meta_path) + .await + .unwrap() + } } diff --git a/server/src/handlers/http/modal/server.rs b/server/src/handlers/http/modal/server.rs index db22f2c4f..84b9d8e90 100644 --- a/server/src/handlers/http/modal/server.rs +++ b/server/src/handlers/http/modal/server.rs @@ -66,7 +66,7 @@ pub struct Server; #[async_trait(?Send)] impl ParseableServer for Server { async fn start( - &mut self, + &self, prometheus: PrometheusMetrics, oidc_client: Option, ) -> anyhow::Result<()> { @@ -138,7 +138,7 @@ impl ParseableServer for Server { } /// implementation of init should just invoke a call to initialize - async fn init(&mut self) -> anyhow::Result<()> { + async fn init(&self) -> anyhow::Result<()> { self.initialize().await } @@ -402,7 +402,7 @@ impl Server { ResourceFiles::new("/", generate()).resolve_not_found_to_root() } - async fn initialize(&mut self) -> anyhow::Result<()> { + async fn initialize(&self) -> anyhow::Result<()> { migration::run_metadata_migration(&CONFIG).await?; let metadata = storage::resolve_parseable_metadata().await?; banner::print(&CONFIG, &metadata).await; From 676c976ad3f7ddcfebb81cf81cd9b2674444e8bf Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 15 Feb 2024 11:57:10 +0530 Subject: [PATCH 72/84] rm rwlock --- server/src/main.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/server/src/main.rs b/server/src/main.rs index 4219204aa..0f7683c8d 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -44,7 +44,6 @@ use std::sync::Arc; use handlers::http::modal::ParseableServer; use option::{Mode, CONFIG}; -use tokio::sync::RwLock; use crate::{ handlers::http::modal::{ @@ -59,18 +58,18 @@ async fn main() -> anyhow::Result<()> { env_logger::init(); CONFIG.validate_storage().await?; - let server: Arc> = match CONFIG.parseable.mode { + let server: Arc = match CONFIG.parseable.mode { Mode::Query => { dbg!("Mode::Query"); - Arc::new(RwLock::new(QueryServer::default())) + Arc::new(QueryServer::default()) } Mode::Ingest => { dbg!("Mode::Ingest"); - Arc::new(RwLock::new(IngestServer)) + Arc::new(IngestServer) } Mode::All => { dbg!("Mode::All"); - Arc::new(RwLock::new(Server)) + Arc::new(Server) } }; @@ -78,7 +77,7 @@ async fn main() -> anyhow::Result<()> { // MODE == Query / Ingest and storage = local-store // But does an RwLock Make sence? maybe figure something out - server.write().await.init().await?; + server.init().await?; Ok(()) } From a9676b40f0fd8508f04352d1bdab86d5b24f6458 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 15 Feb 2024 13:23:15 +0530 Subject: [PATCH 73/84] chore: clean up --- server/src/handlers/http/modal/query_server.rs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index ba6796190..a68ee9c94 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -147,6 +147,7 @@ impl QueryServer { .service(Server::get_generated()); } + // update the .query.json file and return the new IngesterMetadataArr async fn get_ingestor_info() -> anyhow::Result { let store = CONFIG.storage().get_object_store(); @@ -219,18 +220,6 @@ impl QueryServer { self.start(prometheus, CONFIG.parseable.openid.clone()) .await?; - // tokio::pin!(app); - - // // this never actually loops - // // rather than pinning we can just await? - // loop { - // tokio::select! { - // err= &mut app => { - // return err; - // }, - // } - // } - Ok(()) } From a333c2156e4d3e42a472a21caa91ade475aa5327 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 15 Feb 2024 13:23:15 +0530 Subject: [PATCH 74/84] chore: clean up --- server/src/handlers/http.rs | 36 ----------------------------------- server/src/main.rs | 2 +- server/src/storage/localfs.rs | 5 +---- 3 files changed, 2 insertions(+), 41 deletions(-) diff --git a/server/src/handlers/http.rs b/server/src/handlers/http.rs index e17f4d2f3..ea69b0b59 100644 --- a/server/src/handlers/http.rs +++ b/server/src/handlers/http.rs @@ -16,18 +16,7 @@ * */ -use std::sync::Arc; - use actix_cors::Cors; -use actix_web_prometheus::PrometheusMetrics; -use tokio::sync::Mutex; - -use crate::option::CONFIG; - -use crate::handlers::http::modal::{ - ingest_server::IngestServer, query_server::QueryServer, server::Server, ParseableServer, -}; -use crate::option::Mode; pub(crate) mod about; pub(crate) mod health_check; @@ -50,31 +39,6 @@ pub const MAX_EVENT_PAYLOAD_SIZE: usize = 10485760; pub const API_BASE_PATH: &str = "/api"; pub const API_VERSION: &str = "v1"; -#[allow(unused)] -/// to be removed -pub async fn run_http( - prometheus: PrometheusMetrics, - oidc_client: Option, -) -> anyhow::Result<()> { - let server: Arc> = match CONFIG.parseable.mode { - Mode::Query => { - dbg!("Mode::Query"); - Arc::new(Mutex::new(QueryServer::default())) - } - Mode::Ingest => { - dbg!("Mode::Ingest"); - Arc::new(Mutex::new(IngestServer)) - } - Mode::All => { - dbg!("Mode::All"); - Arc::new(Mutex::new(Server)) - } - }; - - server.try_lock()?.start(prometheus, oidc_client).await?; - Ok(()) -} - pub(crate) fn base_path() -> String { format!("{API_BASE_PATH}/{API_VERSION}") } diff --git a/server/src/main.rs b/server/src/main.rs index 0f7683c8d..d0805b1fd 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -61,7 +61,7 @@ async fn main() -> anyhow::Result<()> { let server: Arc = match CONFIG.parseable.mode { Mode::Query => { dbg!("Mode::Query"); - Arc::new(QueryServer::default()) + Arc::new(QueryServer) } Mode::Ingest => { dbg!("Mode::Ingest"); diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index ffbab408a..bce837d58 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -31,11 +31,8 @@ use relative_path::RelativePath; use tokio::fs::{self, DirEntry}; use tokio_stream::wrappers::ReadDirStream; +use crate::metrics::storage::{localfs::REQUEST_RESPONSE_TIME, StorageMetrics}; use crate::option::validation; -use crate::{ - - metrics::storage::{localfs::REQUEST_RESPONSE_TIME, StorageMetrics}, -}; use super::{ LogStream, ObjectStorage, ObjectStorageError, ObjectStorageProvider, STREAM_METADATA_FILE_NAME, From bb7a78a7d9116c038573e64f5aa297f96ce233d1 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 15 Feb 2024 13:35:17 +0530 Subject: [PATCH 75/84] debug out the written buff size for debug only will be removed later --- server/src/handlers/http/modal/query_server.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index a68ee9c94..67c72c8cf 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -165,8 +165,8 @@ impl QueryServer { let mut f = Self::get_meta_file().await; // writer the arr in f - f.write(serde_json::to_string(&arr)?.as_bytes()).await?; - + let write_size = f.write(serde_json::to_string(&arr)?.as_bytes()).await?; + dbg!(write_size); Ok(arr) } From 08ab3a92b10dd268bf130c21d76bf2dbf584873e Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Thu, 15 Feb 2024 13:36:04 +0530 Subject: [PATCH 76/84] add sync tokio task to sync ingestor info --- server/src/handlers/http/modal/query_server.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 67c72c8cf..c8f32dd73 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -217,12 +217,24 @@ impl QueryServer { analytics::init_analytics_scheduler(); } + // spawn the sync thread + tokio::spawn(Self::sync_ingestor_metadata()); + self.start(prometheus, CONFIG.parseable.openid.clone()) .await?; Ok(()) } + async fn sync_ingestor_metadata() { + let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(60 / 10)); + loop { + interval.tick().await; + dbg!("Tick"); + Self::get_ingestor_info().await.unwrap(); + } + } + async fn get_meta_file() -> TokioFile { let meta_path = CONFIG.staging_dir().join(".query.json"); From 4d4cb304ef2a2918bc39e354dc2cc91a1e0cc5f2 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Fri, 16 Feb 2024 11:14:57 +0530 Subject: [PATCH 77/84] fix: bug where query.json was not being created properly --- server/src/handlers/http/modal/query_server.rs | 3 +-- server/src/storage/store_metadata.rs | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index c8f32dd73..39dd763f2 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -183,9 +183,8 @@ impl QueryServer { /// initialize the server, run migrations as needed and start the server async fn initialize(&self) -> anyhow::Result<()> { migration::run_metadata_migration(&CONFIG).await?; - tokio::fs::File::create(CONFIG.staging_dir().join(".query.json")).await?; - let metadata = storage::resolve_parseable_metadata().await?; + tokio::fs::File::create(CONFIG.staging_dir().join(".query.json")).await?; banner::print(&CONFIG, &metadata).await; // initialize the rbac map diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index 5a951454a..3c051ac62 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -92,6 +92,7 @@ impl StorageMetadata { } } +/// deals with the staging directory creation and metadata resolution /// always returns remote metadata as it is source of truth /// overwrites staging metadata while updating storage info pub async fn resolve_parseable_metadata() -> Result { From 093fbd0187b63fc1f3c4f8180c17c444cd6017f7 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Fri, 16 Feb 2024 13:45:07 +0530 Subject: [PATCH 78/84] update: change parquet file names to include port --- server/src/storage/staging.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/server/src/storage/staging.rs b/server/src/storage/staging.rs index 31c5dffed..e3d6d0fbb 100644 --- a/server/src/storage/staging.rs +++ b/server/src/storage/staging.rs @@ -20,6 +20,7 @@ use std::{ collections::HashMap, fs, + net::SocketAddr, path::{Path, PathBuf}, process, sync::Arc, @@ -159,6 +160,16 @@ impl StorageDir { fn arrow_path_to_parquet(path: &Path) -> PathBuf { let filename = path.file_name().unwrap().to_str().unwrap(); let (_, filename) = filename.split_once('.').unwrap(); + + let port = CONFIG + .parseable + .address + .clone() + .parse::() + .unwrap() + .port(); + let filename = filename.rsplit_once('.').unwrap(); + let filename = format!("{}.{}.{}", filename.0, port, filename.1); let mut parquet_path = path.to_owned(); parquet_path.set_file_name(filename); parquet_path.set_extension("parquet"); From 311ae8b00c11bde6d287741a5291a27f557df254 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 21 Feb 2024 20:30:04 +0530 Subject: [PATCH 79/84] create a util func to get the sock_addr 1. Todo: Update with this function whereever needed --- server/src/utils.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/server/src/utils.rs b/server/src/utils.rs index 83af01cc6..530f2b21d 100644 --- a/server/src/utils.rs +++ b/server/src/utils.rs @@ -23,8 +23,12 @@ pub mod json; pub mod uid; pub mod update; +use std::net::{IpAddr, SocketAddr}; + use chrono::{DateTime, NaiveDate, Timelike, Utc}; +use crate::option::CONFIG; + #[allow(dead_code)] pub fn hostname() -> Option { hostname::get() @@ -222,6 +226,12 @@ impl TimePeriod { } } +#[inline(always)] +pub fn get_address() -> (IpAddr, u16) { + let addr = CONFIG.parseable.address.parse::().unwrap(); + (addr.ip(), addr.port()) +} + #[cfg(test)] mod tests { use chrono::DateTime; From 26a11700677b4f3db3cab664fc6dfd4714ccaf87 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Wed, 21 Feb 2024 20:31:18 +0530 Subject: [PATCH 80/84] manifest file name change manifest file name is now changed from -> to manifest.json -> ip.port.manifest.json --- server/src/catalog.rs | 7 +++++-- server/src/main.rs | 3 ++- server/src/option.rs | 12 ++++++++++++ server/src/storage/object_storage.rs | 6 +++++- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/server/src/catalog.rs b/server/src/catalog.rs index f8adad1ca..f95c8d3dd 100644 --- a/server/src/catalog.rs +++ b/server/src/catalog.rs @@ -24,7 +24,8 @@ use relative_path::RelativePathBuf; use crate::{ catalog::manifest::Manifest, query::PartialTimeFilter, - storage::{ObjectStorage, ObjectStorageError}, + storage::{ObjectStorage, ObjectStorageError, MANIFEST_FILE}, + utils::get_address, }; use self::{column::Column, snapshot::ManifestItem}; @@ -137,7 +138,9 @@ pub async fn update_snapshot( ..Manifest::default() }; - let path = partition_path(stream_name, lower_bound, upper_bound).join("manifest.json"); + let addr = get_address(); + let mainfest_file_name = format!("{}.{}.{}", addr.0, addr.1, MANIFEST_FILE); + let path = partition_path(stream_name, lower_bound, upper_bound).join(&mainfest_file_name); storage .put_object(&path, serde_json::to_vec(&manifest).unwrap().into()) .await?; diff --git a/server/src/main.rs b/server/src/main.rs index d0805b1fd..59d47a3d0 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -75,8 +75,9 @@ async fn main() -> anyhow::Result<()> { // add logic for graceful shutdown if // MODE == Query / Ingest and storage = local-store + // option.rs ln: 161 + // CONFIG.run_time_mode_validation()?; - // But does an RwLock Make sence? maybe figure something out server.init().await?; Ok(()) diff --git a/server/src/option.rs b/server/src/option.rs index e24e5e7e5..99bff7e66 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -157,6 +157,18 @@ impl Config { } "S3 bucket" } + + #[allow(dead_code)] + pub fn run_time_mode_validation(&self) -> anyhow::Result<()> { + let check = (self.parseable.mode == Mode::Ingest || self.parseable.mode == Mode::Query) + && self.storage_name == "drive"; + + if check { + anyhow::bail!(format!("Cannot start the server in {} mode with local storage, please use S3 bucket for storage", self.parseable.mode.to_str())) + } + + Ok(()) + } } fn create_parseable_cli_command() -> Command { diff --git a/server/src/storage/object_storage.rs b/server/src/storage/object_storage.rs index 52cc05b41..53c6779e7 100644 --- a/server/src/storage/object_storage.rs +++ b/server/src/storage/object_storage.rs @@ -25,6 +25,7 @@ use super::{ STREAM_METADATA_FILE_NAME, }; +use crate::utils::get_address; use crate::{ alerts::Alerts, catalog::{self, manifest::Manifest, snapshot::Snapshot}, @@ -266,6 +267,7 @@ pub trait ObjectStorage: Sync + 'static { } } + // get the manifest info async fn get_manifest( &self, path: &RelativePath, @@ -449,5 +451,7 @@ fn alert_json_path(stream_name: &str) -> RelativePathBuf { #[inline(always)] fn manifest_path(prefix: &str) -> RelativePathBuf { - RelativePathBuf::from_iter([prefix, MANIFEST_FILE]) + let addr = get_address(); + let mainfest_file_name = format!("{}.{}.{}", addr.0, addr.1, MANIFEST_FILE); + RelativePathBuf::from_iter([prefix, &mainfest_file_name]) } From 755994a2f51cbe55720a1f44aa36b3b8bb3d22c1 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Fri, 23 Feb 2024 20:25:23 +0530 Subject: [PATCH 81/84] debug: for working to be uncommented --- server/src/handlers/http/modal/ingest_server.rs | 2 +- server/src/handlers/http/modal/query_server.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/handlers/http/modal/ingest_server.rs b/server/src/handlers/http/modal/ingest_server.rs index 9e4a7f653..cfe1cbf23 100644 --- a/server/src/handlers/http/modal/ingest_server.rs +++ b/server/src/handlers/http/modal/ingest_server.rs @@ -183,7 +183,7 @@ impl IngestServer { .domain_address .clone() .unwrap_or_else(|| { - Url::parse(&format!("https://{}:{}", sock.ip(), sock.port())).unwrap() + Url::parse(&format!("http://{}:{}", sock.ip(), sock.port())).unwrap() }) .to_string(), DEFAULT_VERSION.to_string(), diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 39dd763f2..2edf9dc9b 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -217,7 +217,7 @@ impl QueryServer { } // spawn the sync thread - tokio::spawn(Self::sync_ingestor_metadata()); + // tokio::spawn(Self::sync_ingestor_metadata()); self.start(prometheus, CONFIG.parseable.openid.clone()) .await?; From 7f73462f1e5a09c0ab6993dc0004f669b939161a Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Fri, 23 Feb 2024 20:25:53 +0530 Subject: [PATCH 82/84] fix: manifest not being created in multi mode --- server/src/catalog.rs | 65 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/server/src/catalog.rs b/server/src/catalog.rs index f95c8d3dd..cba5ddfb9 100644 --- a/server/src/catalog.rs +++ b/server/src/catalog.rs @@ -106,20 +106,67 @@ pub async fn update_snapshot( item.time_lower_bound <= lower_bound && lower_bound < item.time_upper_bound }); + // if the mode in I.S. manifest needs to be created but it is not getting created because + // there is already a pos, to index into stream.json + // We update the manifest referenced by this position // This updates an existing file so there is no need to create a snapshot entry. if let Some(pos) = pos { let info = &mut manifests[pos]; let path = partition_path(stream_name, info.time_lower_bound, info.time_upper_bound); - let Some(mut manifest) = storage.get_manifest(&path).await? else { - return Err(ObjectStorageError::UnhandledError( - "Manifest found in snapshot but not in object-storage" - .to_string() - .into(), - )); - }; - manifest.apply_change(change); - storage.put_manifest(&path, manifest).await?; + + let mut ch = false; + for m in manifests.iter() { + let s = get_address(); + let p = format!("{}.{}.{}", s.0, s.1, MANIFEST_FILE); + if m.manifest_path.contains(&p) { + ch = true; + } + } + if ch { + let Some(mut manifest) = storage.get_manifest(&path).await? else { + return Err(ObjectStorageError::UnhandledError( + "Manifest found in snapshot but not in object-storage" + .to_string() + .into(), + )); + }; + manifest.apply_change(change); + storage.put_manifest(&path, manifest).await?; + } else { + let lower_bound = lower_bound.date_naive().and_time(NaiveTime::MIN).and_utc(); + let upper_bound = lower_bound + .date_naive() + .and_time( + NaiveTime::from_num_seconds_from_midnight_opt( + 23 * 3600 + 59 * 60 + 59, + 999_999_999, + ) + .unwrap(), + ) + .and_utc(); + + let manifest = Manifest { + files: vec![change], + ..Manifest::default() + }; + + let addr = get_address(); + let mainfest_file_name = format!("{}.{}.{}", addr.0, addr.1, MANIFEST_FILE); + let path = + partition_path(stream_name, lower_bound, upper_bound).join(&mainfest_file_name); + storage + .put_object(&path, serde_json::to_vec(&manifest).unwrap().into()) + .await?; + let path = storage.absolute_url(&path); + let new_snapshot_entriy = snapshot::ManifestItem { + manifest_path: path.to_string(), + time_lower_bound: lower_bound, + time_upper_bound: upper_bound, + }; + manifests.push(new_snapshot_entriy); + storage.put_snapshot(stream_name, meta).await?; + } } else { let lower_bound = lower_bound.date_naive().and_time(NaiveTime::MIN).and_utc(); let upper_bound = lower_bound From 72186b4ea42ce511e2391613eb6363a15dc3aed1 Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Fri, 23 Feb 2024 20:40:47 +0530 Subject: [PATCH 83/84] chore: remove debug macros --- server/src/handlers/http/modal/query_server.rs | 1 - server/src/main.rs | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 2edf9dc9b..2d60f96e2 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -166,7 +166,6 @@ impl QueryServer { let mut f = Self::get_meta_file().await; // writer the arr in f let write_size = f.write(serde_json::to_string(&arr)?.as_bytes()).await?; - dbg!(write_size); Ok(arr) } diff --git a/server/src/main.rs b/server/src/main.rs index 59d47a3d0..2257800d5 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -58,17 +58,15 @@ async fn main() -> anyhow::Result<()> { env_logger::init(); CONFIG.validate_storage().await?; + // these are empty ptrs so mem footprint should be minimal let server: Arc = match CONFIG.parseable.mode { Mode::Query => { - dbg!("Mode::Query"); Arc::new(QueryServer) } Mode::Ingest => { - dbg!("Mode::Ingest"); Arc::new(IngestServer) } Mode::All => { - dbg!("Mode::All"); Arc::new(Server) } }; From a802a7198f43ad44c23b435a0c8355ebc05b72dd Mon Sep 17 00:00:00 2001 From: Eshan Chatterjee Date: Sat, 24 Feb 2024 16:34:42 +0530 Subject: [PATCH 84/84] clean up --- server/src/handlers/http/modal/query_server.rs | 7 ++++--- server/src/main.rs | 14 +++++--------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/server/src/handlers/http/modal/query_server.rs b/server/src/handlers/http/modal/query_server.rs index 2d60f96e2..acd5c7579 100644 --- a/server/src/handlers/http/modal/query_server.rs +++ b/server/src/handlers/http/modal/query_server.rs @@ -53,7 +53,7 @@ impl ParseableServer for QueryServer { // on subsequent runs, the qurier should check if the ingestor is up and running or not for ingester in data.iter() { - dbg!(&ingester); + // dbg!(&ingester); // yes the format macro does not need the '/' ingester.origin already // has '/' because Url::Parse will add it if it is not present // uri should be something like `http://address/api/v1/liveness` @@ -165,7 +165,7 @@ impl QueryServer { let mut f = Self::get_meta_file().await; // writer the arr in f - let write_size = f.write(serde_json::to_string(&arr)?.as_bytes()).await?; + let _ = f.write(serde_json::to_string(&arr)?.as_bytes()).await?; Ok(arr) } @@ -224,11 +224,12 @@ impl QueryServer { Ok(()) } + #[allow(dead_code)] async fn sync_ingestor_metadata() { let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(60 / 10)); loop { interval.tick().await; - dbg!("Tick"); + // dbg!("Tick"); Self::get_ingestor_info().await.unwrap(); } } diff --git a/server/src/main.rs b/server/src/main.rs index 2257800d5..60bca9fcf 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -60,15 +60,11 @@ async fn main() -> anyhow::Result<()> { // these are empty ptrs so mem footprint should be minimal let server: Arc = match CONFIG.parseable.mode { - Mode::Query => { - Arc::new(QueryServer) - } - Mode::Ingest => { - Arc::new(IngestServer) - } - Mode::All => { - Arc::new(Server) - } + Mode::Query => Arc::new(QueryServer), + + Mode::Ingest => Arc::new(IngestServer), + + Mode::All => Arc::new(Server), }; // add logic for graceful shutdown if