Skip to content

Commit 76cc6c4

Browse files
authored
Merge pull request #10069 from Turbo87/publish-stream
controllers/krate/publish: Validate JSON metadata before reading tarball
2 parents cf4b176 + cf2a4fe commit 76cc6c4

File tree

4 files changed

+92
-51
lines changed

4 files changed

+92
-51
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ tempfile = "=3.14.0"
114114
thiserror = "=2.0.3"
115115
tokio = { version = "=1.41.1", features = ["net", "signal", "io-std", "io-util", "rt-multi-thread", "macros", "process"]}
116116
tokio-postgres = "=0.7.12"
117+
tokio-util = "=0.7.12"
117118
toml = "=0.8.19"
118119
tower = "=0.5.1"
119120
tower-http = { version = "=0.6.2", features = ["add-extension", "fs", "catch-panic", "timeout", "compression-full"] }

src/controllers/krate/publish.rs

Lines changed: 70 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::auth::AuthCheck;
55
use crate::worker::jobs::{
66
self, CheckTyposquat, SendPublishNotificationsJob, UpdateDefaultVersion,
77
};
8-
use axum::body::Bytes;
8+
use axum::body::{Body, Bytes};
99
use axum::Json;
1010
use cargo_manifest::{Dependency, DepsSet, TargetDepsSet};
1111
use chrono::{DateTime, SecondsFormat, Utc};
@@ -17,10 +17,12 @@ use diesel_async::scoped_futures::ScopedFutureExt;
1717
use diesel_async::{AsyncConnection, AsyncPgConnection, RunQueryDsl};
1818
use futures_util::TryStreamExt;
1919
use hex::ToHex;
20+
use http::request::Parts;
2021
use http::StatusCode;
21-
use hyper::body::Buf;
2222
use sha2::{Digest, Sha256};
2323
use std::collections::HashMap;
24+
use tokio::io::{AsyncRead, AsyncReadExt};
25+
use tokio_util::io::StreamReader;
2426
use url::Url;
2527

2628
use crate::models::{
@@ -35,7 +37,7 @@ use crate::rate_limiter::LimitedAction;
3537
use crate::schema::*;
3638
use crate::sql::canon_crate_name;
3739
use crate::util::errors::{bad_request, custom, internal, AppResult, BoxedAppError};
38-
use crate::util::{BytesRequest, Maximums};
40+
use crate::util::Maximums;
3941
use crate::views::{
4042
EncodableCrate, EncodableCrateDependency, GoodCrate, PublishMetadata, PublishWarnings,
4143
};
@@ -54,12 +56,20 @@ const MAX_DESCRIPTION_LENGTH: usize = 1000;
5456
/// Currently blocks the HTTP thread, perhaps some function calls can spawn new
5557
/// threads and return completion or error through other methods a `cargo publish
5658
/// --status` command, via crates.io's front end, or email.
57-
pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCrate>> {
58-
let (req, bytes) = req.0.into_parts();
59-
let (json_bytes, tarball_bytes) = split_body(bytes)?;
59+
pub async fn publish(app: AppState, req: Parts, body: Body) -> AppResult<Json<GoodCrate>> {
60+
let stream = body.into_data_stream();
61+
let stream = stream.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err));
62+
let mut reader = StreamReader::new(stream);
6063

61-
let metadata: PublishMetadata = serde_json::from_slice(&json_bytes)
62-
.map_err(|e| bad_request(format_args!("invalid upload request: {e}")))?;
64+
// The format of the req.body() of a publish request is as follows:
65+
//
66+
// metadata length
67+
// metadata in JSON about the crate being published
68+
// .crate tarball length
69+
// .crate tarball file
70+
71+
const MAX_JSON_LENGTH: u32 = 1024 * 1024; // 1 MB
72+
let metadata = read_json_metadata(&mut reader, MAX_JSON_LENGTH).await?;
6373

6474
Crate::validate_crate_name("crate", &metadata.name).map_err(bad_request)?;
6575

@@ -136,20 +146,14 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
136146
.check_rate_limit(auth.user().id, rate_limit_action, &mut conn)
137147
.await?;
138148

139-
let content_length = tarball_bytes.len() as u64;
140-
141149
let maximums = Maximums::new(
142150
existing_crate.as_ref().and_then(|c| c.max_upload_size),
143151
app.config.max_upload_size,
144152
app.config.max_unpack_size,
145153
);
146154

147-
if content_length > maximums.max_upload_size {
148-
return Err(custom(
149-
StatusCode::PAYLOAD_TOO_LARGE,
150-
format!("max upload size is: {}", maximums.max_upload_size),
151-
));
152-
}
155+
let tarball_bytes = read_tarball_bytes(&mut reader, maximums.max_upload_size as u32).await?;
156+
let content_length = tarball_bytes.len() as u64;
153157

154158
let pkg_name = format!("{}-{}", &*metadata.name, &version_string);
155159
let tarball_info =
@@ -573,44 +577,65 @@ async fn count_versions_published_today(
573577
.await
574578
}
575579

576-
#[instrument(skip_all)]
577-
fn split_body(mut bytes: Bytes) -> AppResult<(Bytes, Bytes)> {
578-
// The format of the req.body() of a publish request is as follows:
579-
//
580-
// metadata length
581-
// metadata in JSON about the crate being published
582-
// .crate tarball length
583-
// .crate tarball file
580+
async fn read_json_metadata<R: AsyncRead + Unpin>(
581+
reader: &mut R,
582+
max_length: u32,
583+
) -> Result<PublishMetadata, BoxedAppError> {
584+
let json_len = reader.read_u32_le().await.map_err(|e| {
585+
if e.kind() == std::io::ErrorKind::UnexpectedEof {
586+
bad_request("invalid metadata length")
587+
} else {
588+
e.into()
589+
}
590+
})?;
584591

585-
if bytes.len() < 4 {
586-
// Avoid panic in `get_u32_le()` if there is not enough remaining data
587-
return Err(bad_request("invalid metadata length"));
592+
if json_len > max_length {
593+
let message = "JSON metadata blob too large";
594+
return Err(custom(StatusCode::PAYLOAD_TOO_LARGE, message));
588595
}
589596

590-
let json_len = bytes.get_u32_le() as usize;
591-
if json_len > bytes.len() {
592-
return Err(bad_request(format!(
593-
"invalid metadata length for remaining payload: {json_len}"
594-
)));
595-
}
597+
let mut json_bytes = vec![0; json_len as usize];
598+
reader.read_exact(&mut json_bytes).await.map_err(|e| {
599+
if e.kind() == std::io::ErrorKind::UnexpectedEof {
600+
let message = format!("invalid metadata length for remaining payload: {json_len}");
601+
bad_request(message)
602+
} else {
603+
e.into()
604+
}
605+
})?;
596606

597-
let json_bytes = bytes.split_to(json_len);
607+
serde_json::from_slice(&json_bytes)
608+
.map_err(|e| bad_request(format_args!("invalid upload request: {e}")))
609+
}
598610

599-
if bytes.len() < 4 {
600-
// Avoid panic in `get_u32_le()` if there is not enough remaining data
601-
return Err(bad_request("invalid tarball length"));
602-
}
611+
async fn read_tarball_bytes<R: AsyncRead + Unpin>(
612+
reader: &mut R,
613+
max_length: u32,
614+
) -> Result<Bytes, BoxedAppError> {
615+
let tarball_len = reader.read_u32_le().await.map_err(|e| {
616+
if e.kind() == std::io::ErrorKind::UnexpectedEof {
617+
bad_request("invalid tarball length")
618+
} else {
619+
e.into()
620+
}
621+
})?;
603622

604-
let tarball_len = bytes.get_u32_le() as usize;
605-
if tarball_len > bytes.len() {
606-
return Err(bad_request(format!(
607-
"invalid tarball length for remaining payload: {tarball_len}"
608-
)));
623+
if tarball_len > max_length {
624+
let message = format!("max upload size is: {}", max_length);
625+
return Err(custom(StatusCode::PAYLOAD_TOO_LARGE, message));
609626
}
610627

611-
let tarball_bytes = bytes.split_to(tarball_len);
628+
let mut tarball_bytes = vec![0; tarball_len as usize];
629+
reader.read_exact(&mut tarball_bytes).await.map_err(|e| {
630+
if e.kind() == std::io::ErrorKind::UnexpectedEof {
631+
let message = format!("invalid tarball length for remaining payload: {tarball_len}");
632+
bad_request(message)
633+
} else {
634+
e.into()
635+
}
636+
})?;
612637

613-
Ok((json_bytes, tarball_bytes))
638+
Ok(Bytes::from(tarball_bytes))
614639
}
615640

616641
async fn is_reserved_name(name: &str, conn: &mut AsyncPgConnection) -> QueryResult<bool> {

src/tests/krate/publish/tarball.rs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::tests::builders::PublishBuilder;
22
use crate::tests::util::{RequestHelper, TestApp};
3+
use bytes::{BufMut, BytesMut};
34
use crates_io_tarball::TarballBuilder;
45
use googletest::prelude::*;
56
use http::StatusCode;
@@ -80,9 +81,16 @@ async fn json_bytes_truncated() {
8081
async fn tarball_len_truncated() {
8182
let (app, _, _, token) = TestApp::full().with_token().await;
8283

83-
let response = token
84-
.publish_crate(&[2, 0, 0, 0, b'{', b'}', 0, 0] as &[u8])
85-
.await;
84+
let json = br#"{ "name": "foo", "vers": "1.0.0" }"#;
85+
86+
let mut bytes = BytesMut::new();
87+
bytes.put_u32_le(json.len() as u32);
88+
bytes.put_slice(json);
89+
bytes.put_u8(0);
90+
bytes.put_u8(0);
91+
92+
let response = token.publish_crate(bytes.freeze()).await;
93+
8694
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
8795
assert_snapshot!(response.text(), @r#"{"errors":[{"detail":"invalid tarball length"}]}"#);
8896
assert_that!(app.stored_files().await, empty());
@@ -92,9 +100,15 @@ async fn tarball_len_truncated() {
92100
async fn tarball_bytes_truncated() {
93101
let (app, _, _, token) = TestApp::full().with_token().await;
94102

95-
let response = token
96-
.publish_crate(&[2, 0, 0, 0, b'{', b'}', 100, 0, 0, 0, 0] as &[u8])
97-
.await;
103+
let json = br#"{ "name": "foo", "vers": "1.0.0" }"#;
104+
105+
let mut bytes = BytesMut::new();
106+
bytes.put_u32_le(json.len() as u32);
107+
bytes.put_slice(json);
108+
bytes.put_u32_le(100);
109+
bytes.put_u8(0);
110+
111+
let response = token.publish_crate(bytes.freeze()).await;
98112
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
99113
assert_snapshot!(response.text(), @r#"{"errors":[{"detail":"invalid tarball length for remaining payload: 100"}]}"#);
100114
assert_that!(app.stored_files().await, empty());

0 commit comments

Comments
 (0)