Skip to content

Commit c2ee3f9

Browse files
author
Devdutt Shenoi
committed
instrument execution plan construction
1 parent 384f84f commit c2ee3f9

File tree

7 files changed

+39
-8
lines changed

7 files changed

+39
-8
lines changed

src/handlers/http/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use actix_cors::Cors;
2020
use arrow_schema::Schema;
2121
use itertools::Itertools;
2222
use serde_json::Value;
23+
use tracing::instrument;
2324

2425
use crate::{option::CONFIG, storage::STREAM_ROOT_DIRECTORY};
2526

@@ -75,6 +76,7 @@ pub fn base_path_without_preceding_slash() -> String {
7576
/// # Returns
7677
///
7778
/// An `anyhow::Result` containing the `arrow_schema::Schema` for the specified stream.
79+
#[instrument]
7880
pub async fn fetch_schema(stream_name: &str) -> anyhow::Result<arrow_schema::Schema> {
7981
let path_prefix =
8082
relative_path::RelativePathBuf::from(format!("{}/{}", stream_name, STREAM_ROOT_DIRECTORY));

src/handlers/http/modal/utils/logstream_utils.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use actix_web::{http::header::HeaderMap, HttpRequest};
2222
use arrow_schema::{Field, Schema};
2323
use bytes::Bytes;
2424
use http::StatusCode;
25+
use tracing::instrument;
2526

2627
use crate::{
2728
handlers::{
@@ -438,6 +439,7 @@ pub async fn create_stream(
438439
/// list all streams from storage
439440
/// if stream exists in storage, create stream and schema from storage
440441
/// and add it to the memory map
442+
#[instrument]
441443
pub async fn create_stream_and_schema_from_storage(stream_name: &str) -> Result<bool, StreamError> {
442444
// Proceed to create log stream if it doesn't exist
443445
let storage = CONFIG.storage().get_object_store();

src/handlers/http/query.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use std::collections::HashMap;
3030
use std::pin::Pin;
3131
use std::sync::Arc;
3232
use std::time::Instant;
33-
use tracing::{error, info, warn};
33+
use tracing::{error, info, instrument, span, warn, Level};
3434

3535
use crate::event::error::EventError;
3636
use crate::handlers::http::fetch_schema;
@@ -70,8 +70,10 @@ pub struct Query {
7070
pub filter_tags: Option<Vec<String>>,
7171
}
7272

73+
#[instrument]
7374
pub async fn query(req: HttpRequest, query_request: Query) -> Result<impl Responder, QueryError> {
7475
let session_state = QUERY_SESSION.state();
76+
let _guard = span!(Level::DEBUG, "Logical Plan Construction");
7577
let raw_logical_plan = match session_state
7678
.create_logical_plan(&query_request.query)
7779
.await
@@ -85,6 +87,7 @@ pub async fn query(req: HttpRequest, query_request: Query) -> Result<impl Respon
8587
.await?
8688
}
8789
};
90+
drop(_guard);
8891
// create a visitor to extract the table name
8992
let mut visitor = TableScanVisitor::default();
9093
let _ = raw_logical_plan.visit(&mut visitor);
@@ -174,6 +177,7 @@ pub async fn query(req: HttpRequest, query_request: Query) -> Result<impl Respon
174177
Ok(response)
175178
}
176179

180+
#[instrument]
177181
pub async fn update_schema_when_distributed(tables: Vec<String>) -> Result<(), QueryError> {
178182
if CONFIG.parseable.mode == Mode::Query {
179183
for table in tables {
@@ -192,10 +196,13 @@ pub async fn update_schema_when_distributed(tables: Vec<String>) -> Result<(), Q
192196
/// Create streams for querier if they do not exist
193197
/// get list of streams from memory and storage
194198
/// create streams for memory from storage if they do not exist
199+
#[instrument]
195200
pub async fn create_streams_for_querier() {
196201
let querier_streams = STREAM_INFO.list_streams();
197202
let store = CONFIG.storage().get_object_store();
203+
let _guard = span!(Level::DEBUG, "Listing Streams");
198204
let storage_streams = store.list_streams().await.unwrap();
205+
drop(_guard);
199206
for stream in storage_streams {
200207
let stream_name = stream.name;
201208

@@ -206,6 +213,7 @@ pub async fn create_streams_for_querier() {
206213
}
207214

208215
#[allow(clippy::too_many_arguments)]
216+
#[instrument]
209217
pub async fn put_results_in_cache(
210218
cache_results: Option<&str>,
211219
user_id: Option<&str>,
@@ -263,6 +271,7 @@ pub async fn put_results_in_cache(
263271
}
264272

265273
#[allow(clippy::too_many_arguments)]
274+
#[instrument]
266275
pub async fn get_results_from_cache(
267276
show_cached: Option<&str>,
268277
query_cache_manager: Option<&QueryCacheManager>,
@@ -385,6 +394,7 @@ impl FromRequest for Query {
385394
}
386395
}
387396

397+
#[instrument]
388398
pub async fn into_query(
389399
query: &Query,
390400
session_state: &SessionState,

src/query/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ use std::collections::HashMap;
3838
use std::path::{Path, PathBuf};
3939
use std::sync::Arc;
4040
use sysinfo::System;
41+
use tracing::instrument;
4142

4243
use self::error::ExecuteError;
4344
use self::stream_schema_provider::GlobalSchemaProvider;
@@ -126,6 +127,7 @@ impl Query {
126127
SessionContext::new_with_state(state)
127128
}
128129

130+
#[instrument]
129131
pub async fn execute(
130132
&self,
131133
stream_name: String,

src/query/stream_schema_provider.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ use itertools::Itertools;
5555
use object_store::{path::Path, ObjectStore};
5656
use relative_path::RelativePathBuf;
5757
use std::{any::Any, collections::HashMap, ops::Bound, sync::Arc};
58+
use tracing::{instrument, span, Level};
5859
use url::Url;
5960

6061
use crate::{
@@ -125,6 +126,7 @@ async fn create_parquet_physical_plan(
125126
state: &dyn Session,
126127
time_partition: Option<String>,
127128
) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {
129+
let _guard = span!(Level::DEBUG, "Construct Physical Plan");
128130
// Convert filters to physical expressions if applicable
129131
let filters = filters
130132
.iter()
@@ -182,6 +184,7 @@ async fn create_parquet_physical_plan(
182184
Ok(plan)
183185
}
184186

187+
#[instrument]
185188
async fn collect_from_snapshot(
186189
snapshot: &catalog::snapshot::Snapshot,
187190
time_filters: &[PartialTimeFilter],
@@ -332,6 +335,7 @@ impl TableProvider for StandardTableProvider {
332335
filters: &[Expr],
333336
limit: Option<usize>,
334337
) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {
338+
let _guard = span!(Level::DEBUG, "Execution Plan Preparation");
335339
let mut memory_exec = None;
336340
let mut cache_exec = None;
337341
let mut hot_tier_exec = None;
@@ -357,6 +361,7 @@ impl TableProvider for StandardTableProvider {
357361

358362
// Memory Execution Plan (for current stream data in memory)
359363
if include_now(filters, time_partition.clone()) {
364+
let _guard = span!(Level::DEBUG, "Memory Execution Plan Preparation");
360365
if let Some(records) =
361366
event::STREAM_WRITERS.recordbatches_cloned(&self.stream, &self.schema)
362367
{
@@ -371,6 +376,7 @@ impl TableProvider for StandardTableProvider {
371376

372377
// Merged snapshot creation for query mode
373378
let merged_snapshot = if CONFIG.parseable.mode == Mode::Query {
379+
let _guard = span!(Level::DEBUG, "Merging snapshots");
374380
let path = RelativePathBuf::from_iter([&self.stream, STREAM_ROOT_DIRECTORY]);
375381
glob_storage
376382
.get_objects(
@@ -398,8 +404,8 @@ impl TableProvider for StandardTableProvider {
398404
let listing_time_fiters =
399405
return_listing_time_filters(&merged_snapshot.manifest_list, &mut time_filters);
400406

401-
listing_exec = if let Some(listing_time_filter) = listing_time_fiters {
402-
legacy_listing_table(
407+
if let Some(listing_time_filter) = listing_time_fiters {
408+
listing_exec = legacy_listing_table(
403409
self.stream.clone(),
404410
glob_storage.clone(),
405411
object_store.clone(),
@@ -412,9 +418,7 @@ impl TableProvider for StandardTableProvider {
412418
time_partition.clone(),
413419
)
414420
.await?
415-
} else {
416-
None
417-
};
421+
}
418422
}
419423

420424
// Manifest file collection
@@ -545,6 +549,7 @@ async fn get_cache_exectuion_plan(
545549
state: &dyn Session,
546550
time_partition: Option<String>,
547551
) -> Result<Option<Arc<dyn ExecutionPlan>>, DataFusionError> {
552+
let _guard = span!(Level::DEBUG, "Construct Execution Plan against Cache");
548553
let (cached, remainder) = cache_manager
549554
.partition_on_cached(stream, manifest_files.clone(), |file: &File| {
550555
&file.file_path
@@ -594,6 +599,7 @@ async fn get_hottier_exectuion_plan(
594599
state: &dyn Session,
595600
time_partition: Option<String>,
596601
) -> Result<Option<Arc<dyn ExecutionPlan>>, DataFusionError> {
602+
let _guard = span!(Level::DEBUG, "Execution Plan for Hot Tier");
597603
let (hot_tier_files, remainder) = hot_tier_manager
598604
.get_hot_tier_manifest_files(stream, manifest_files.clone())
599605
.await
@@ -646,6 +652,7 @@ async fn legacy_listing_table(
646652
limit: Option<usize>,
647653
time_partition: Option<String>,
648654
) -> Result<Option<Arc<dyn ExecutionPlan>>, DataFusionError> {
655+
let _guard = span!(Level::DEBUG, "Execution Plan for Legacy Listings");
649656
let remote_table = ListingTableBuilder::new(stream)
650657
.populate_via_listing(glob_storage.clone(), object_store, time_filters)
651658
.and_then(|builder| async {

src/querycache.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use std::collections::HashMap;
3030
use std::path::{Path, PathBuf};
3131
use tokio::fs as AsyncFs;
3232
use tokio::{fs, sync::Mutex};
33-
use tracing::{error, info, warn};
33+
use tracing::{error, info, instrument, warn};
3434

3535
use crate::handlers::http::users::USERS_ROOT_DIR;
3636
use crate::metadata::STREAM_INFO;
@@ -88,6 +88,7 @@ impl QueryCache {
8888
self.files.remove(key)
8989
}
9090

91+
#[instrument]
9192
pub async fn delete(&mut self, key: &CacheMetadata, path: PathBuf) -> Result<(), CacheError> {
9293
self.files.delete(key);
9394
AsyncFs::remove_file(path).await?;
@@ -101,6 +102,7 @@ impl QueryCache {
101102

102103
// read the parquet
103104
// return the recordbatches
105+
#[instrument]
104106
pub async fn get_cached_records(
105107
&self,
106108
path: &PathBuf,
@@ -141,6 +143,7 @@ impl QueryCacheMeta {
141143
}
142144
}
143145

146+
#[derive(Debug)]
144147
pub struct QueryCacheManager {
145148
filesystem: LocalFileSystem,
146149
cache_path: PathBuf, // refers to the path passed in the env var
@@ -241,6 +244,7 @@ impl QueryCacheManager {
241244
Ok(())
242245
}
243246

247+
#[instrument]
244248
pub async fn get_cache(&self, stream: &str, user_id: &str) -> Result<QueryCache, CacheError> {
245249
let path = query_cache_file_path(&self.cache_path, stream, user_id).unwrap();
246250
let res = self
@@ -320,6 +324,7 @@ impl QueryCacheManager {
320324
Ok(())
321325
}
322326

327+
#[instrument]
323328
pub async fn create_parquet_cache(
324329
&self,
325330
table_name: &str,
@@ -364,6 +369,7 @@ impl QueryCacheManager {
364369
.await
365370
}
366371

372+
#[instrument]
367373
pub async fn clear_cache(&self, stream: &str, user_id: &str) -> Result<(), CacheError> {
368374
let cache = self.get_cache(stream, user_id).await?;
369375
let map = cache.files.values().collect_vec();

src/storage/object_storage.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::R
4848
use itertools::Itertools;
4949
use relative_path::RelativePath;
5050
use relative_path::RelativePathBuf;
51-
use tracing::error;
51+
use tracing::{error, instrument};
5252

5353
use std::collections::BTreeMap;
5454
use std::{
@@ -407,6 +407,7 @@ pub trait ObjectStorage: std::fmt::Debug + Send + Sync + 'static {
407407
}
408408

409409
// gets the snapshot of the stream
410+
#[instrument]
410411
async fn get_object_store_format(
411412
&self,
412413
stream: &str,
@@ -662,6 +663,7 @@ pub trait ObjectStorage: std::fmt::Debug + Send + Sync + 'static {
662663
fn get_bucket_name(&self) -> String;
663664
}
664665

666+
#[instrument]
665667
pub async fn commit_schema_to_storage(
666668
stream_name: &str,
667669
schema: Schema,

0 commit comments

Comments
 (0)