From e70e144711ddd1db7dfde814a82c8be1a8613991 Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Fri, 12 Jan 2024 11:00:15 +0530 Subject: [PATCH 1/4] fix for #573 corrected error message in case of deployment mismatch data directory creation should not happen in case of deployment mismatch staging should be overwritten in case of new staging --- server/src/main.rs | 1 + server/src/option.rs | 5 ++--- server/src/storage/localfs.rs | 7 +++---- server/src/storage/store_metadata.rs | 17 ++++------------- 4 files changed, 10 insertions(+), 20 deletions(-) diff --git a/server/src/main.rs b/server/src/main.rs index de98cff83..78c81c579 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -58,6 +58,7 @@ async fn main() -> anyhow::Result<()> { CONFIG.validate_staging()?; migration::run_metadata_migration(&CONFIG).await?; let metadata = storage::resolve_parseable_metadata().await?; + banner::print(&CONFIG, &metadata).await; rbac::map::init(&metadata); metadata.set_global(); diff --git a/server/src/option.rs b/server/src/option.rs index 1104f9674..a539e2268 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -43,7 +43,6 @@ pub struct Config { impl Config { fn new() -> Self { let cli = parseable_cli_command().get_matches(); - match cli.subcommand() { Some(("local-store", m)) => { let server = match Server::from_arg_matches(m) { @@ -51,7 +50,7 @@ impl Config { Err(err) => err.exit(), }; let storage = match FSConfig::from_arg_matches(m) { - Ok(server) => server, + Ok(storage) => storage, Err(err) => err.exit(), }; @@ -85,7 +84,7 @@ impl Config { Err(err) => err.exit(), }; let storage = match S3Config::from_arg_matches(m) { - Ok(server) => server, + Ok(storage) => storage, Err(err) => err.exit(), }; diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index e1dc31ee5..f6a5d188e 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -32,7 +32,7 @@ use tokio::fs::{self, DirEntry}; use tokio_stream::wrappers::ReadDirStream; use crate::metrics::storage::{localfs::REQUEST_RESPONSE_TIME, StorageMetrics}; -use crate::{option::validation, utils::validate_path_is_writeable}; +use crate::utils::validate_path_is_writeable; use super::{object_storage, LogStream, ObjectStorage, ObjectStorageError, ObjectStorageProvider}; @@ -49,8 +49,7 @@ pub struct FSConfig { #[arg( env = "P_FS_DIR", value_name = "filesystem path", - default_value = "./data", - value_parser = validation::canonicalize_path + default_value = "./data" )] pub root: PathBuf, } @@ -187,7 +186,7 @@ impl ObjectStorage for LocalFS { }; let to_path = self.root.join(key); if let Some(path) = to_path.parent() { - fs::create_dir_all(path).await? + fs::create_dir_all(path).await?; } let _ = fs_extra::file::copy(path, to_path, &op)?; Ok(()) diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index b0ef21d6d..1b44b9c2f 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -100,13 +100,7 @@ pub async fn resolve_parseable_metadata() -> Result { - if staging.deployment_id == remote.deployment_id { - EnvChange::None(remote) - } else { - EnvChange::DeploymentMismatch - } - } + (Some(_staging), Some(remote)) => EnvChange::None(remote), (None, Some(remote)) => EnvChange::NewStaging(remote), (Some(_), None) => EnvChange::NewRemote, (None, None) => EnvChange::CreateBoth, @@ -116,16 +110,14 @@ pub async fn resolve_parseable_metadata() -> Result { // overwrite staging anyways so that it matches remote in case of any divergence overwrite_staging = true; Ok(metadata) - } - EnvChange::DeploymentMismatch => Err(MISMATCH), + }, EnvChange::NewRemote => { - Err("Could not start the server because metadata not found in storage") + Err("Could not start the server because staging directory indicates stale data from previous deployment, please choose an empty staging directory and restart the server") } EnvChange::NewStaging(mut metadata) => { create_dir_all(CONFIG.staging_dir())?; @@ -171,9 +163,8 @@ pub async fn resolve_parseable_metadata() -> Result Date: Fri, 12 Jan 2024 20:11:35 +0530 Subject: [PATCH 2/4] fix for #573 corrected error message in case of deployment mismatch data directory creation should not happen in case of deployment mismatch staging should be overwritten in case of new staging default staging and data directory should not be created if env var has different path --- Cargo.lock | 7 +++++++ server/Cargo.toml | 1 + server/src/main.rs | 3 +-- server/src/option.rs | 31 ++++++++++++++++++---------- server/src/storage/localfs.rs | 5 +++-- server/src/storage/store_metadata.rs | 4 ++-- 6 files changed, 34 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 59c342deb..e293a0663 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2711,6 +2711,7 @@ dependencies = [ "once_cell", "openid", "parquet", + "path-clean", "prometheus", "rand", "regex", @@ -2760,6 +2761,12 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba" +[[package]] +name = "path-clean" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" + [[package]] name = "path-matchers" version = "1.0.2" diff --git a/server/Cargo.toml b/server/Cargo.toml index 9c151e6d1..57afe65c5 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -102,6 +102,7 @@ url = "2.4.0" http-auth-basic = "0.3.3" serde_repr = "0.1.17" hashlru = { version = "0.11.0", features = ["serde"] } +path-clean = "1.0.1" [build-dependencies] cargo_toml = "0.15" diff --git a/server/src/main.rs b/server/src/main.rs index 78c81c579..954ed6ddd 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -55,10 +55,9 @@ use crate::localcache::LocalCacheManager; async fn main() -> anyhow::Result<()> { env_logger::init(); let storage = CONFIG.storage().get_object_store(); - CONFIG.validate_staging()?; migration::run_metadata_migration(&CONFIG).await?; let metadata = storage::resolve_parseable_metadata().await?; - + CONFIG.validate_staging()?; banner::print(&CONFIG, &metadata).await; rbac::map::init(&metadata); metadata.set_global(); diff --git a/server/src/option.rs b/server/src/option.rs index a539e2268..65b5bd6a1 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -21,7 +21,8 @@ use clap::{command, value_parser, Arg, ArgGroup, Args, Command, FromArgMatches}; use once_cell::sync::Lazy; use parquet::basic::{BrotliLevel, GzipLevel, ZstdLevel}; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; +use std::env; use std::sync::Arc; use url::Url; @@ -107,7 +108,7 @@ impl Config { self.storage.clone() } - pub fn staging_dir(&self) -> &Path { + pub fn staging_dir(&self) -> &PathBuf { &self.parseable.local_staging_path } @@ -610,12 +611,14 @@ impl From for parquet::basic::Compression { pub mod validation { use std::{ - fs::{canonicalize, create_dir_all}, net::ToSocketAddrs, - path::PathBuf, + path::{PathBuf, Path}, str::FromStr, + env,io }; + use path_clean::PathClean; + use crate::option::MIN_CACHE_SIZE_BYTES; use crate::storage::LOCAL_SYNC_INTERVAL; use human_size::{multiples, SpecificSize}; @@ -633,16 +636,22 @@ pub mod validation { Ok(path) } + pub fn absolute_path(path: impl AsRef) -> io::Result { + let path = path.as_ref(); + + let absolute_path = if path.is_absolute() { + path.to_path_buf() + } else { + env::current_dir()?.join(path) + }.clean(); + + Ok(absolute_path) + } pub fn canonicalize_path(s: &str) -> Result { let path = PathBuf::from(s); - - create_dir_all(&path) - .map_err(|err| err.to_string()) - .and_then(|_| { - canonicalize(&path) - .map_err(|_| "Cannot use the path provided as an absolute path".to_string()) - }) + let absolute_path = absolute_path(&path); + Ok(absolute_path.unwrap()) } pub fn socket_addr(s: &str) -> Result { diff --git a/server/src/storage/localfs.rs b/server/src/storage/localfs.rs index f6a5d188e..df88499a9 100644 --- a/server/src/storage/localfs.rs +++ b/server/src/storage/localfs.rs @@ -32,7 +32,7 @@ use tokio::fs::{self, DirEntry}; use tokio_stream::wrappers::ReadDirStream; use crate::metrics::storage::{localfs::REQUEST_RESPONSE_TIME, StorageMetrics}; -use crate::utils::validate_path_is_writeable; +use crate::{option::validation, utils::validate_path_is_writeable}; use super::{object_storage, LogStream, ObjectStorage, ObjectStorageError, ObjectStorageProvider}; @@ -49,7 +49,8 @@ pub struct FSConfig { #[arg( env = "P_FS_DIR", value_name = "filesystem path", - default_value = "./data" + default_value = "./data", + value_parser = validation::canonicalize_path )] pub root: PathBuf, } diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index 1b44b9c2f..ae0f1c3d8 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -66,7 +66,7 @@ impl StorageMetadata { Self { version: "v3".to_string(), mode: CONFIG.storage_name.to_owned(), - staging: CONFIG.staging_dir().canonicalize().unwrap(), + staging: CONFIG.staging_dir().to_path_buf(), storage: CONFIG.storage().get_endpoint(), deployment_id: uid::gen(), users: Vec::new(), @@ -130,7 +130,7 @@ pub async fn resolve_parseable_metadata() -> Result { create_dir_all(CONFIG.staging_dir())?; - let metadata = StorageMetadata::new(); + let metadata = StorageMetadata::new(); // new metadata needs to be set on both staging and remote overwrite_remote = true; overwrite_staging = true; From f8780ee614c1de26815e4ea89ef6bf5cb9820c97 Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Fri, 12 Jan 2024 20:16:54 +0530 Subject: [PATCH 3/4] cargo fmt and clippy fix --- server/src/option.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/server/src/option.rs b/server/src/option.rs index 65b5bd6a1..8b3983170 100644 --- a/server/src/option.rs +++ b/server/src/option.rs @@ -21,8 +21,8 @@ use clap::{command, value_parser, Arg, ArgGroup, Args, Command, FromArgMatches}; use once_cell::sync::Lazy; use parquet::basic::{BrotliLevel, GzipLevel, ZstdLevel}; -use std::path::PathBuf; use std::env; +use std::path::PathBuf; use std::sync::Arc; use url::Url; @@ -611,10 +611,10 @@ impl From for parquet::basic::Compression { pub mod validation { use std::{ + env, io, net::ToSocketAddrs, - path::{PathBuf, Path}, + path::{Path, PathBuf}, str::FromStr, - env,io }; use path_clean::PathClean; @@ -638,20 +638,20 @@ pub mod validation { } pub fn absolute_path(path: impl AsRef) -> io::Result { let path = path.as_ref(); - + let absolute_path = if path.is_absolute() { path.to_path_buf() } else { env::current_dir()?.join(path) - }.clean(); - + } + .clean(); + Ok(absolute_path) } pub fn canonicalize_path(s: &str) -> Result { let path = PathBuf::from(s); - let absolute_path = absolute_path(&path); - Ok(absolute_path.unwrap()) + Ok(absolute_path(path).unwrap()) } pub fn socket_addr(s: &str) -> Result { From 6b2146247f20e0a723f9a882fbb80211c1440454 Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Mon, 15 Jan 2024 18:27:06 +0530 Subject: [PATCH 4/4] changed logic to throw exception in case of deployment mismatch between staging and remote --- server/src/storage/store_metadata.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/server/src/storage/store_metadata.rs b/server/src/storage/store_metadata.rs index ae0f1c3d8..0e9ad955f 100644 --- a/server/src/storage/store_metadata.rs +++ b/server/src/storage/store_metadata.rs @@ -100,7 +100,13 @@ pub async fn resolve_parseable_metadata() -> Result EnvChange::None(remote), + (Some(staging), Some(remote)) => { + if staging.deployment_id == remote.deployment_id { + EnvChange::None(remote) + } else { + EnvChange::NewRemote + } + } (None, Some(remote)) => EnvChange::NewStaging(remote), (Some(_), None) => EnvChange::NewRemote, (None, None) => EnvChange::CreateBoth, @@ -130,7 +136,7 @@ pub async fn resolve_parseable_metadata() -> Result { create_dir_all(CONFIG.staging_dir())?; - let metadata = StorageMetadata::new(); + let metadata = StorageMetadata::new(); // new metadata needs to be set on both staging and remote overwrite_remote = true; overwrite_staging = true;