Skip to content

Commit 3f4b94f

Browse files
committed
migration of metadata files to seperate directorie
1 parent 50bc2e0 commit 3f4b94f

File tree

12 files changed

+423
-87
lines changed

12 files changed

+423
-87
lines changed

server/src/handlers/http/cluster/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use crate::option::CONFIG;
2727

2828
use crate::metrics::prom_utils::Metrics;
2929
use crate::storage::ObjectStorageError;
30+
use crate::storage::PARSEABLE_ROOT_DIRECTORY;
3031
use actix_web::http::header;
3132
use actix_web::{HttpRequest, Responder};
3233
use http::StatusCode;
@@ -338,7 +339,7 @@ pub async fn get_cluster_metrics() -> Result<impl Responder, PostError> {
338339
pub async fn get_ingester_info() -> anyhow::Result<IngesterMetadataArr> {
339340
let store = CONFIG.storage().get_object_store();
340341

341-
let root_path = RelativePathBuf::from("");
342+
let root_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY);
342343
let arr = store
343344
.get_objects(Some(&root_path))
344345
.await?

server/src/handlers/http/modal/ingest_server.rs

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,17 @@ impl ParseableServer for IngestServer {
102102
/// implement the init method will just invoke the initialize method
103103
async fn init(&self) -> anyhow::Result<()> {
104104
self.validate()?;
105+
// check for querier state. Is it there, or was it there in the past
106+
self.check_querier_state().await?;
107+
// to get the .parseable.json file in staging
108+
self.validate_credentials().await?;
109+
110+
let metadata = storage::resolve_parseable_metadata().await?;
111+
banner::print(&CONFIG, &metadata).await;
112+
rbac::map::init(&metadata);
113+
// set the info in the global metadata
114+
metadata.set_global();
115+
105116
self.initialize().await
106117
}
107118

@@ -267,19 +278,6 @@ impl IngestServer {
267278
}
268279

269280
async fn initialize(&self) -> anyhow::Result<()> {
270-
// check for querier state. Is it there, or was it there in the past
271-
self.check_querier_state().await?;
272-
// to get the .parseable.json file in staging
273-
self.validate_credentials().await?;
274-
275-
let metadata = storage::resolve_parseable_metadata().await?;
276-
banner::print(&CONFIG, &metadata).await;
277-
278-
rbac::map::init(&metadata);
279-
280-
// set the info in the global metadata
281-
metadata.set_global();
282-
283281
if let Some(cache_manager) = LocalCacheManager::global() {
284282
cache_manager
285283
.validate(CONFIG.parseable.local_cache_size)

server/src/handlers/http/modal/query_server.rs

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,15 @@ impl ParseableServer for QueryServer {
9898
/// implementation of init should just invoke a call to initialize
9999
async fn init(&self) -> anyhow::Result<()> {
100100
self.validate()?;
101+
migration::run_file_migration(&CONFIG).await?;
102+
CONFIG.validate_storage().await?;
103+
migration::run_metadata_migration(&CONFIG).await?;
104+
let metadata = storage::resolve_parseable_metadata().await?;
105+
banner::print(&CONFIG, &metadata).await;
106+
// initialize the rbac map
107+
rbac::map::init(&metadata);
108+
// keep metadata info in mem
109+
metadata.set_global();
101110
self.initialize().await
102111
}
103112

@@ -165,18 +174,6 @@ impl QueryServer {
165174

166175
/// initialize the server, run migrations as needed and start the server
167176
async fn initialize(&self) -> anyhow::Result<()> {
168-
migration::run_metadata_migration(&CONFIG).await?;
169-
170-
let metadata = storage::resolve_parseable_metadata().await?;
171-
172-
banner::print(&CONFIG, &metadata).await;
173-
174-
// initialize the rbac map
175-
rbac::map::init(&metadata);
176-
177-
// keep metadata info in mem
178-
metadata.set_global();
179-
180177
let prometheus = metrics::build_metrics_handler();
181178
CONFIG.storage().register_store_metrics(&prometheus);
182179

@@ -189,7 +186,6 @@ impl QueryServer {
189186

190187
// track all parquet files already in the data directory
191188
storage::retention::load_retention_from_global();
192-
193189
// load data from stats back to prometheus metrics
194190
metrics::fetch_stats_from_storage().await;
195191

server/src/handlers/http/modal/server.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,13 @@ impl ParseableServer for Server {
141141
/// implementation of init should just invoke a call to initialize
142142
async fn init(&self) -> anyhow::Result<()> {
143143
self.validate()?;
144+
migration::run_file_migration(&CONFIG).await?;
145+
CONFIG.validate_storage().await?;
146+
migration::run_metadata_migration(&CONFIG).await?;
147+
let metadata = storage::resolve_parseable_metadata().await?;
148+
banner::print(&CONFIG, &metadata).await;
149+
rbac::map::init(&metadata);
150+
metadata.set_global();
144151
self.initialize().await
145152
}
146153

@@ -405,12 +412,6 @@ impl Server {
405412
}
406413

407414
async fn initialize(&self) -> anyhow::Result<()> {
408-
migration::run_metadata_migration(&CONFIG).await?;
409-
let metadata = storage::resolve_parseable_metadata().await?;
410-
banner::print(&CONFIG, &metadata).await;
411-
rbac::map::init(&metadata);
412-
metadata.set_global();
413-
414415
if let Some(cache_manager) = LocalCacheManager::global() {
415416
cache_manager
416417
.validate(CONFIG.parseable.local_cache_size)

server/src/main.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ pub const STORAGE_UPLOAD_INTERVAL: u32 = 60;
5656
#[actix_web::main]
5757
async fn main() -> anyhow::Result<()> {
5858
env_logger::init();
59-
CONFIG.validate_storage().await?;
6059

6160
// these are empty ptrs so mem footprint should be minimal
6261
let server: Arc<dyn ParseableServer> = match CONFIG.parseable.mode {

server/src/migration.rs

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,19 @@ mod metadata_migration;
2121
mod schema_migration;
2222
mod stream_metadata_migration;
2323

24-
use std::fs::OpenOptions;
24+
use std::{fs::OpenOptions, sync::Arc};
2525

2626
use bytes::Bytes;
27+
use itertools::Itertools;
2728
use relative_path::RelativePathBuf;
2829
use serde::Serialize;
2930

3031
use crate::{
3132
option::Config,
3233
storage::{
33-
object_storage::{parseable_json_path, stream_json_path}, ObjectStorage, ObjectStorageError,SCHEMA_FILE_NAME,
34+
object_storage::{parseable_json_path, stream_json_path},
35+
ObjectStorage, ObjectStorageError, PARSEABLE_METADATA_FILE_NAME, PARSEABLE_ROOT_DIRECTORY,
36+
SCHEMA_FILE_NAME, STREAM_ROOT_DIRECTORY,
3437
},
3538
};
3639

@@ -120,7 +123,8 @@ async fn migration_stream(stream: &str, storage: &dyn ObjectStorage) -> anyhow::
120123
.put_object(&path, to_bytes(&new_stream_metadata))
121124
.await?;
122125

123-
let schema_path = RelativePathBuf::from_iter([stream, SCHEMA_FILE_NAME]);
126+
let schema_path =
127+
RelativePathBuf::from_iter([stream, STREAM_ROOT_DIRECTORY, SCHEMA_FILE_NAME]);
124128
let schema = storage.get_object(&schema_path).await?;
125129
let schema = serde_json::from_slice(&schema).ok();
126130
let map = schema_migration::v1_v3(schema)?;
@@ -132,7 +136,8 @@ async fn migration_stream(stream: &str, storage: &dyn ObjectStorage) -> anyhow::
132136
.put_object(&path, to_bytes(&new_stream_metadata))
133137
.await?;
134138

135-
let schema_path = RelativePathBuf::from_iter([stream, SCHEMA_FILE_NAME]);
139+
let schema_path =
140+
RelativePathBuf::from_iter([stream, STREAM_ROOT_DIRECTORY, SCHEMA_FILE_NAME]);
136141
let schema = storage.get_object(&schema_path).await?;
137142
let schema = serde_json::from_slice(&schema)?;
138143
let map = schema_migration::v2_v3(schema)?;
@@ -195,7 +200,6 @@ pub async fn put_remote_metadata(
195200

196201
pub fn put_staging_metadata(config: &Config, metadata: &serde_json::Value) -> anyhow::Result<()> {
197202
let path = parseable_json_path().to_path(config.staging_dir());
198-
//config.staging_dir().join(PARSEABLE_METADATA_FILE_NAME);
199203
let mut file = OpenOptions::new()
200204
.create(true)
201205
.truncate(true)
@@ -204,3 +208,92 @@ pub fn put_staging_metadata(config: &Config, metadata: &serde_json::Value) -> an
204208
serde_json::to_writer(&mut file, metadata)?;
205209
Ok(())
206210
}
211+
212+
pub async fn run_file_migration(config: &Config) -> anyhow::Result<()> {
213+
let object_store = config.storage().get_object_store();
214+
215+
let old_meta_file_path = RelativePathBuf::from(PARSEABLE_METADATA_FILE_NAME);
216+
217+
// if this errors that means migrations is already done
218+
if let Err(err) = object_store.get_object(&old_meta_file_path).await {
219+
if matches!(err, ObjectStorageError::NoSuchKey(_)) {
220+
return Ok(());
221+
}
222+
return Err(err.into());
223+
}
224+
225+
run_meta_file_migration(&object_store, old_meta_file_path).await?;
226+
run_stream_files_migration(object_store).await?;
227+
228+
Ok(())
229+
}
230+
231+
async fn run_meta_file_migration(
232+
object_store: &Arc<dyn ObjectStorage + Send>,
233+
old_meta_file_path: RelativePathBuf,
234+
) -> anyhow::Result<()> {
235+
log::info!("Migrating metadata files to new location");
236+
237+
// get the list of all meta files
238+
let mut meta_files = object_store.get_ingester_meta_file_paths().await?;
239+
meta_files.push(old_meta_file_path);
240+
241+
for file in meta_files {
242+
match object_store.get_object(&file).await {
243+
Ok(bytes) => {
244+
// we can unwrap here because we know the file exists
245+
let new_path = RelativePathBuf::from_iter([
246+
PARSEABLE_ROOT_DIRECTORY,
247+
file.file_name().unwrap(),
248+
]);
249+
object_store.put_object(&new_path, bytes).await?;
250+
object_store.delete_object(&file).await?;
251+
}
252+
Err(err) => {
253+
// if error is not a no such key error, something weird happened
254+
// so return the error
255+
if !matches!(err, ObjectStorageError::NoSuchKey(_)) {
256+
return Err(err.into());
257+
}
258+
}
259+
}
260+
}
261+
262+
Ok(())
263+
}
264+
265+
async fn run_stream_files_migration(
266+
object_store: Arc<dyn ObjectStorage + Send>,
267+
) -> anyhow::Result<()> {
268+
let streams = object_store
269+
.list_old_streams()
270+
.await?
271+
.into_iter()
272+
.map(|stream| stream.name)
273+
.collect_vec();
274+
275+
for stream in streams {
276+
let paths = object_store.get_stream_file_paths(&stream).await?;
277+
278+
for path in paths {
279+
match object_store.get_object(&path).await {
280+
Ok(bytes) => {
281+
let new_path = RelativePathBuf::from_iter([
282+
stream.as_str(),
283+
STREAM_ROOT_DIRECTORY,
284+
path.file_name().unwrap(),
285+
]);
286+
object_store.put_object(&new_path, bytes).await?;
287+
object_store.delete_object(&path).await?;
288+
}
289+
Err(err) => {
290+
if !matches!(err, ObjectStorageError::NoSuchKey(_)) {
291+
return Err(err.into());
292+
}
293+
}
294+
}
295+
}
296+
}
297+
298+
Ok(())
299+
}

server/src/option.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use std::path::PathBuf;
2626
use std::sync::Arc;
2727

2828
use crate::cli::Cli;
29-
use crate::storage::PARSEABLE_METADATA_FILE_NAME;
29+
use crate::storage::object_storage::parseable_json_path;
3030
use crate::storage::{FSConfig, ObjectStorageError, ObjectStorageProvider, S3Config};
3131
pub const MIN_CACHE_SIZE_BYTES: u64 = 1000u64.pow(3); // 1 GiB
3232
pub const JOIN_COMMUNITY: &str =
@@ -102,7 +102,7 @@ impl Config {
102102
// if the proper data directory is provided, or s3 bucket is provided etc
103103
pub async fn validate_storage(&self) -> Result<(), ObjectStorageError> {
104104
let obj_store = self.storage.get_object_store();
105-
let rel_path = relative_path::RelativePathBuf::from(PARSEABLE_METADATA_FILE_NAME);
105+
let rel_path = parseable_json_path();
106106

107107
let has_parseable_json = obj_store.get_object(&rel_path).await.is_ok();
108108

server/src/storage.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ pub use self::staging::StorageDir;
4343
// metadata file names in a Stream prefix
4444
pub const STREAM_METADATA_FILE_NAME: &str = ".stream.json";
4545
pub const PARSEABLE_METADATA_FILE_NAME: &str = ".parseable.json";
46+
pub const STREAM_ROOT_DIRECTORY: &str = ".stream";
4647
pub const PARSEABLE_ROOT_DIRECTORY: &str = ".parseable";
4748
pub const SCHEMA_FILE_NAME: &str = ".schema";
4849
pub const ALERT_FILE_NAME: &str = ".alert.json";

0 commit comments

Comments
 (0)