|
18 | 18 |
|
19 | 19 | use std::{io::ErrorKind, sync::Arc}; |
20 | 20 |
|
| 21 | +use self::{column::Column, snapshot::ManifestItem}; |
| 22 | +use crate::handlers::http::base_path_without_preceding_slash; |
| 23 | +use crate::option::CONFIG; |
21 | 24 | use crate::{ |
22 | 25 | catalog::manifest::Manifest, |
23 | 26 | query::PartialTimeFilter, |
24 | 27 | storage::{object_storage::manifest_path, ObjectStorage, ObjectStorageError}, |
25 | 28 | }; |
| 29 | +use crate::{handlers, Mode}; |
| 30 | +use bytes::Bytes; |
26 | 31 | use chrono::{DateTime, Local, NaiveDateTime, NaiveTime, Utc}; |
27 | 32 | use relative_path::RelativePathBuf; |
28 | 33 | use std::io::Error as IOError; |
29 | | - |
30 | | -use self::{column::Column, snapshot::ManifestItem}; |
31 | | - |
32 | 34 | pub mod column; |
33 | 35 | pub mod manifest; |
34 | 36 | pub mod snapshot; |
@@ -208,51 +210,99 @@ pub async fn remove_manifest_from_snapshot( |
208 | 210 | storage: Arc<dyn ObjectStorage + Send>, |
209 | 211 | stream_name: &str, |
210 | 212 | dates: Vec<String>, |
211 | | -) -> Result<(), ObjectStorageError> { |
212 | | - // get current snapshot |
213 | | - let mut meta = storage.get_object_store_format(stream_name).await?; |
214 | | - let manifests = &mut meta.snapshot.manifest_list; |
| 213 | +) -> Result<Option<String>, ObjectStorageError> { |
| 214 | + match CONFIG.parseable.mode { |
| 215 | + Mode::All | Mode::Ingest => { |
| 216 | + if !dates.is_empty() { |
| 217 | + // get current snapshot |
| 218 | + let mut meta = storage.get_object_store_format(stream_name).await?; |
| 219 | + let manifests = &mut meta.snapshot.manifest_list; |
| 220 | + // Filter out items whose manifest_path contains any of the dates_to_delete |
| 221 | + manifests |
| 222 | + .retain(|item| !dates.iter().any(|date| item.manifest_path.contains(date))); |
| 223 | + storage.put_snapshot(stream_name, meta.snapshot).await?; |
| 224 | + } |
215 | 225 |
|
216 | | - // Filter out items whose manifest_path contains any of the dates_to_delete |
217 | | - manifests.retain(|item| !dates.iter().any(|date| item.manifest_path.contains(date))); |
| 226 | + let first_event_at = get_first_event(storage.clone(), stream_name, Vec::new()).await?; |
218 | 227 |
|
219 | | - storage.put_snapshot(stream_name, meta.snapshot).await?; |
220 | | - Ok(()) |
| 228 | + Ok(first_event_at) |
| 229 | + } |
| 230 | + Mode::Query => Ok(get_first_event(storage, stream_name, dates).await?), |
| 231 | + } |
221 | 232 | } |
222 | 233 |
|
223 | 234 | pub async fn get_first_event( |
224 | 235 | storage: Arc<dyn ObjectStorage + Send>, |
225 | 236 | stream_name: &str, |
| 237 | + dates: Vec<String>, |
226 | 238 | ) -> Result<Option<String>, ObjectStorageError> { |
227 | | - // get current snapshot |
228 | | - let mut meta = storage.get_object_store_format(stream_name).await?; |
229 | | - let manifests = &mut meta.snapshot.manifest_list; |
230 | | - if manifests.is_empty() { |
231 | | - log::info!("No manifest found for stream {stream_name}"); |
232 | | - return Err(ObjectStorageError::Custom("No manifest found".to_string())); |
233 | | - } |
234 | | - |
235 | | - let manifest = &manifests[0]; |
236 | | - |
237 | | - let path = partition_path( |
238 | | - stream_name, |
239 | | - manifest.time_lower_bound, |
240 | | - manifest.time_upper_bound, |
241 | | - ); |
242 | | - let Some(manifest) = storage.get_manifest(&path).await? else { |
243 | | - return Err(ObjectStorageError::UnhandledError( |
244 | | - "Manifest found in snapshot but not in object-storage" |
245 | | - .to_string() |
246 | | - .into(), |
247 | | - )); |
248 | | - }; |
| 239 | + let mut first_event_at: String = String::default(); |
| 240 | + match CONFIG.parseable.mode { |
| 241 | + Mode::All | Mode::Ingest => { |
| 242 | + // get current snapshot |
| 243 | + let mut meta = storage.get_object_store_format(stream_name).await?; |
| 244 | + let manifests = &mut meta.snapshot.manifest_list; |
| 245 | + if manifests.is_empty() { |
| 246 | + log::info!("No manifest found for stream {stream_name}"); |
| 247 | + return Err(ObjectStorageError::Custom("No manifest found".to_string())); |
| 248 | + } |
| 249 | + let manifest = &manifests[0]; |
| 250 | + let path = partition_path( |
| 251 | + stream_name, |
| 252 | + manifest.time_lower_bound, |
| 253 | + manifest.time_upper_bound, |
| 254 | + ); |
| 255 | + let Some(manifest) = storage.get_manifest(&path).await? else { |
| 256 | + return Err(ObjectStorageError::UnhandledError( |
| 257 | + "Manifest found in snapshot but not in object-storage" |
| 258 | + .to_string() |
| 259 | + .into(), |
| 260 | + )); |
| 261 | + }; |
| 262 | + if let Some(first_event) = manifest.files.first() { |
| 263 | + let (lower_bound, _) = get_file_bounds(first_event); |
| 264 | + first_event_at = lower_bound.with_timezone(&Local).to_rfc3339(); |
| 265 | + } |
| 266 | + } |
| 267 | + Mode::Query => { |
| 268 | + let ingestor_metadata = |
| 269 | + handlers::http::cluster::get_ingestor_info() |
| 270 | + .await |
| 271 | + .map_err(|err| { |
| 272 | + log::error!("Fatal: failed to get ingestor info: {:?}", err); |
| 273 | + ObjectStorageError::from(err) |
| 274 | + })?; |
| 275 | + let mut ingestors_first_event_at: Vec<String> = Vec::new(); |
| 276 | + for ingestor in ingestor_metadata { |
| 277 | + let url = format!( |
| 278 | + "{}{}/logstream/{}/retention/cleanup", |
| 279 | + ingestor.domain_name, |
| 280 | + base_path_without_preceding_slash(), |
| 281 | + stream_name |
| 282 | + ); |
| 283 | + // Convert dates vector to Bytes object |
| 284 | + let dates_bytes = Bytes::from(serde_json::to_vec(&dates).unwrap()); |
| 285 | + // delete the stream |
249 | 286 |
|
250 | | - if let Some(first_event) = manifest.files.first() { |
251 | | - let (lower_bound, _) = get_file_bounds(first_event); |
252 | | - let first_event_at = lower_bound.with_timezone(&Local).to_rfc3339(); |
253 | | - return Ok(Some(first_event_at)); |
| 287 | + let ingestor_first_event_at = |
| 288 | + handlers::http::cluster::send_retention_cleanup_request( |
| 289 | + &url, |
| 290 | + ingestor.clone(), |
| 291 | + dates_bytes, |
| 292 | + ) |
| 293 | + .await?; |
| 294 | + if !ingestor_first_event_at.is_empty() { |
| 295 | + ingestors_first_event_at.push(ingestor_first_event_at); |
| 296 | + } |
| 297 | + } |
| 298 | + if ingestors_first_event_at.is_empty() { |
| 299 | + return Ok(None); |
| 300 | + } |
| 301 | + first_event_at = ingestors_first_event_at.iter().min().unwrap().to_string(); |
| 302 | + } |
254 | 303 | } |
255 | | - Ok(None) |
| 304 | + |
| 305 | + Ok(Some(first_event_at)) |
256 | 306 | } |
257 | 307 |
|
258 | 308 | /// Partition the path to which this manifest belongs. |
|
0 commit comments