@@ -24,6 +24,7 @@ use aws_sdk_s3::Error as AwsSdkError;
2424use aws_sdk_s3:: RetryConfig ;
2525use aws_sdk_s3:: { Client , Credentials , Endpoint , Region } ;
2626use aws_smithy_async:: rt:: sleep:: default_async_sleep;
27+ use base64:: encode;
2728use bytes:: Bytes ;
2829use clap:: builder:: ArgPredicate ;
2930
@@ -35,6 +36,7 @@ use datafusion::datasource::object_store::ObjectStoreRegistry;
3536use datafusion:: execution:: runtime_env:: { RuntimeConfig , RuntimeEnv } ;
3637use futures:: StreamExt ;
3738use http:: Uri ;
39+ use md5:: { Digest , Md5 } ;
3840use object_store:: aws:: AmazonS3Builder ;
3941use object_store:: limit:: LimitStore ;
4042use relative_path:: RelativePath ;
@@ -105,6 +107,15 @@ pub struct S3Config {
105107 default_value_if( "demo" , ArgPredicate :: IsPresent , DEFAULT_S3_BUCKET )
106108 ) ]
107109 pub s3_bucket_name : String ,
110+
111+ /// Set client to send content_md5 header on every put request
112+ #[ arg(
113+ long,
114+ env = "P_S3_SET_CONTENT_MD5" ,
115+ value_name = "bool" ,
116+ default_value = "false"
117+ ) ]
118+ pub content_md5 : bool ,
108119}
109120
110121impl ObjectStorageProvider for S3Config {
@@ -153,6 +164,7 @@ impl ObjectStorageProvider for S3Config {
153164 Arc :: new ( S3 {
154165 client,
155166 bucket : self . s3_bucket_name . clone ( ) ,
167+ set_content_md5 : self . content_md5 ,
156168 } )
157169 }
158170
@@ -164,6 +176,7 @@ impl ObjectStorageProvider for S3Config {
164176pub struct S3 {
165177 client : aws_sdk_s3:: Client ,
166178 bucket : String ,
179+ set_content_md5 : bool ,
167180}
168181
169182impl S3 {
@@ -233,16 +246,24 @@ impl S3 {
233246 Ok ( logstreams)
234247 }
235248
236- async fn _upload_file ( & self , key : & str , path : & Path ) -> Result < ( ) , AwsSdkError > {
237- let body = ByteStream :: from_path ( path) . await . unwrap ( ) ;
249+ async fn _upload_file (
250+ & self ,
251+ key : & str ,
252+ path : & Path ,
253+ md5 : Option < String > ,
254+ ) -> Result < ( ) , AwsSdkError > {
255+ let body = ByteStream :: from_path ( & path) . await . unwrap ( ) ;
256+
238257 let resp = self
239258 . client
240259 . put_object ( )
241260 . bucket ( & self . bucket )
242261 . key ( key)
243262 . body ( body)
263+ . set_content_md5 ( md5)
244264 . send ( )
245265 . await ?;
266+
246267 log:: trace!( "{:?}" , resp) ;
247268
248269 Ok ( ( ) )
@@ -260,12 +281,18 @@ impl ObjectStorage for S3 {
260281 path : & RelativePath ,
261282 resource : Bytes ,
262283 ) -> Result < ( ) , ObjectStorageError > {
263- let _resp = self
264- . client
284+ let hash = self . set_content_md5 . then ( || {
285+ let mut hash = Md5 :: new ( ) ;
286+ hash. update ( & resource) ;
287+ encode ( hash. finalize ( ) )
288+ } ) ;
289+
290+ self . client
265291 . put_object ( )
266292 . bucket ( & self . bucket )
267293 . key ( path. as_str ( ) )
268294 . body ( resource. into ( ) )
295+ . set_content_md5 ( hash)
269296 . send ( )
270297 . await
271298 . map_err ( |err| ObjectStorageError :: ConnectionError ( Box :: new ( err) ) ) ?;
@@ -296,7 +323,16 @@ impl ObjectStorage for S3 {
296323 }
297324
298325 async fn upload_file ( & self , key : & str , path : & Path ) -> Result < ( ) , ObjectStorageError > {
299- self . _upload_file ( key, path) . await ?;
326+ let hash = if self . set_content_md5 {
327+ let mut file = std:: fs:: File :: open ( path) ?;
328+ let mut digest = Md5 :: new ( ) ;
329+ std:: io:: copy ( & mut file, & mut digest) ?;
330+ Some ( encode ( digest. finalize ( ) ) )
331+ } else {
332+ None
333+ } ;
334+
335+ self . _upload_file ( key, path, hash) . await ?;
300336
301337 Ok ( ( ) )
302338 }
0 commit comments