@@ -42,6 +42,7 @@ use parquet::{
4242} ;
4343use rand:: distributions:: DistString ;
4444use relative_path:: RelativePathBuf ;
45+ use tokio:: task:: JoinSet ;
4546use tracing:: { error, info, trace, warn} ;
4647
4748use crate :: {
@@ -50,11 +51,9 @@ use crate::{
5051 metadata:: { LogStreamMetadata , SchemaVersion } ,
5152 metrics,
5253 option:: Mode ,
53- storage:: {
54- object_storage:: to_bytes, retention:: Retention , StreamType , OBJECT_STORE_DATA_GRANULARITY ,
55- } ,
54+ storage:: { object_storage:: to_bytes, retention:: Retention , StreamType } ,
5655 utils:: minute_to_slot,
57- LOCK_EXPECT ,
56+ LOCK_EXPECT , OBJECT_STORE_DATA_GRANULARITY ,
5857} ;
5958
6059use super :: {
@@ -446,21 +445,27 @@ impl Stream {
446445 . set ( 0 ) ;
447446 }
448447
449- // warn!("staging files-\n{staging_files:?}\n");
450- for ( parquet_path, arrow_files) in staging_files {
451- metrics:: STAGING_FILES
452- . with_label_values ( & [ & self . stream_name ] )
453- . set ( arrow_files. len ( ) as i64 ) ;
454-
455- for file in & arrow_files {
456- let file_size = file. metadata ( ) . unwrap ( ) . len ( ) ;
457- let file_type = file. extension ( ) . unwrap ( ) . to_str ( ) . unwrap ( ) ;
448+ //find sum of arrow files in staging directory for a stream
449+ let total_arrow_files = staging_files. values ( ) . map ( |v| v. len ( ) ) . sum :: < usize > ( ) ;
450+ metrics:: STAGING_FILES
451+ . with_label_values ( & [ & self . stream_name ] )
452+ . set ( total_arrow_files as i64 ) ;
458453
459- metrics:: STORAGE_SIZE
460- . with_label_values ( & [ "staging" , & self . stream_name , file_type] )
461- . add ( file_size as i64 ) ;
462- }
454+ //find sum of file sizes of all arrow files in staging_files
455+ let total_arrow_files_size = staging_files
456+ . values ( )
457+ . map ( |v| {
458+ v. iter ( )
459+ . map ( |file| file. metadata ( ) . unwrap ( ) . len ( ) )
460+ . sum :: < u64 > ( )
461+ } )
462+ . sum :: < u64 > ( ) ;
463+ metrics:: STORAGE_SIZE
464+ . with_label_values ( & [ "staging" , & self . stream_name , "arrows" ] )
465+ . set ( total_arrow_files_size as i64 ) ;
463466
467+ // warn!("staging files-\n{staging_files:?}\n");
468+ for ( parquet_path, arrow_files) in staging_files {
464469 let record_reader = MergedReverseRecordReader :: try_new ( & arrow_files) ;
465470 if record_reader. readers . is_empty ( ) {
466471 continue ;
@@ -496,6 +501,7 @@ impl Stream {
496501 "Couldn't rename part file: {part_path:?} -> {parquet_path:?}, error = {e}"
497502 ) ;
498503 }
504+
499505 for file in arrow_files {
500506 // warn!("file-\n{file:?}\n");
501507 let file_size = file. metadata ( ) . unwrap ( ) . len ( ) ;
@@ -655,6 +661,13 @@ impl Stream {
655661 pub fn get_stream_type ( & self ) -> StreamType {
656662 self . metadata . read ( ) . expect ( LOCK_EXPECT ) . stream_type
657663 }
664+
665+ /// First flushes arrows onto disk and then converts the arrow into parquet
666+ pub fn flush_and_convert ( & self , shutdown_signal : bool ) -> Result < ( ) , StagingError > {
667+ self . flush ( ) ;
668+
669+ self . prepare_parquet ( shutdown_signal)
670+ }
658671}
659672
660673#[ derive( Deref , DerefMut , Default ) ]
@@ -721,21 +734,22 @@ impl Streams {
721734 . collect ( )
722735 }
723736
724- /// Convert arrow files into parquet, preparing it for upload
725- pub fn prepare_parquet ( & self , shutdown_signal : bool ) -> Result < ( ) , StagingError > {
737+ /// Asynchronously flushes arrows and compacts into parquet data on all streams in staging,
738+ /// so that it is ready to be pushed onto objectstore.
739+ pub fn flush_and_convert (
740+ & self ,
741+ joinset : & mut JoinSet < Result < ( ) , StagingError > > ,
742+ shutdown_signal : bool ,
743+ ) {
726744 let streams: Vec < Arc < Stream > > = self
727745 . read ( )
728746 . expect ( LOCK_EXPECT )
729747 . values ( )
730748 . map ( Arc :: clone)
731749 . collect ( ) ;
732750 for stream in streams {
733- stream
734- . prepare_parquet ( shutdown_signal)
735- . inspect_err ( |err| error ! ( "Failed to run conversion task {err:?}" ) ) ?;
751+ joinset. spawn ( async move { stream. flush_and_convert ( shutdown_signal) } ) ;
736752 }
737-
738- Ok ( ( ) )
739753 }
740754}
741755
0 commit comments