Skip to content

controllers/krate/publish: Validate JSON metadata before reading tarball #10069

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ tempfile = "=3.14.0"
thiserror = "=2.0.3"
tokio = { version = "=1.41.1", features = ["net", "signal", "io-std", "io-util", "rt-multi-thread", "macros", "process"]}
tokio-postgres = "=0.7.12"
tokio-util = "=0.7.12"
toml = "=0.8.19"
tower = "=0.5.1"
tower-http = { version = "=0.6.2", features = ["add-extension", "fs", "catch-panic", "timeout", "compression-full"] }
Expand Down
115 changes: 70 additions & 45 deletions src/controllers/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::auth::AuthCheck;
use crate::worker::jobs::{
self, CheckTyposquat, SendPublishNotificationsJob, UpdateDefaultVersion,
};
use axum::body::Bytes;
use axum::body::{Body, Bytes};
use axum::Json;
use cargo_manifest::{Dependency, DepsSet, TargetDepsSet};
use chrono::{DateTime, SecondsFormat, Utc};
Expand All @@ -17,10 +17,12 @@ use diesel_async::scoped_futures::ScopedFutureExt;
use diesel_async::{AsyncConnection, AsyncPgConnection, RunQueryDsl};
use futures_util::TryStreamExt;
use hex::ToHex;
use http::request::Parts;
use http::StatusCode;
use hyper::body::Buf;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use tokio::io::{AsyncRead, AsyncReadExt};
use tokio_util::io::StreamReader;
use url::Url;

use crate::models::{
Expand All @@ -35,7 +37,7 @@ use crate::rate_limiter::LimitedAction;
use crate::schema::*;
use crate::sql::canon_crate_name;
use crate::util::errors::{bad_request, custom, internal, AppResult, BoxedAppError};
use crate::util::{BytesRequest, Maximums};
use crate::util::Maximums;
use crate::views::{
EncodableCrate, EncodableCrateDependency, GoodCrate, PublishMetadata, PublishWarnings,
};
Expand All @@ -54,12 +56,20 @@ const MAX_DESCRIPTION_LENGTH: usize = 1000;
/// Currently blocks the HTTP thread, perhaps some function calls can spawn new
/// threads and return completion or error through other methods a `cargo publish
/// --status` command, via crates.io's front end, or email.
pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCrate>> {
let (req, bytes) = req.0.into_parts();
let (json_bytes, tarball_bytes) = split_body(bytes)?;
pub async fn publish(app: AppState, req: Parts, body: Body) -> AppResult<Json<GoodCrate>> {
let stream = body.into_data_stream();
let stream = stream.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err));
let mut reader = StreamReader::new(stream);

let metadata: PublishMetadata = serde_json::from_slice(&json_bytes)
.map_err(|e| bad_request(format_args!("invalid upload request: {e}")))?;
// The format of the req.body() of a publish request is as follows:
//
// metadata length
// metadata in JSON about the crate being published
// .crate tarball length
// .crate tarball file

const MAX_JSON_LENGTH: u32 = 1024 * 1024; // 1 MB
let metadata = read_json_metadata(&mut reader, MAX_JSON_LENGTH).await?;

Crate::validate_crate_name("crate", &metadata.name).map_err(bad_request)?;

Expand Down Expand Up @@ -136,20 +146,14 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
.check_rate_limit(auth.user().id, rate_limit_action, &mut conn)
.await?;

let content_length = tarball_bytes.len() as u64;

let maximums = Maximums::new(
existing_crate.as_ref().and_then(|c| c.max_upload_size),
app.config.max_upload_size,
app.config.max_unpack_size,
);

if content_length > maximums.max_upload_size {
return Err(custom(
StatusCode::PAYLOAD_TOO_LARGE,
format!("max upload size is: {}", maximums.max_upload_size),
));
}
let tarball_bytes = read_tarball_bytes(&mut reader, maximums.max_upload_size as u32).await?;
let content_length = tarball_bytes.len() as u64;

let pkg_name = format!("{}-{}", &*metadata.name, &version_string);
let tarball_info =
Expand Down Expand Up @@ -573,44 +577,65 @@ async fn count_versions_published_today(
.await
}

#[instrument(skip_all)]
fn split_body(mut bytes: Bytes) -> AppResult<(Bytes, Bytes)> {
// The format of the req.body() of a publish request is as follows:
//
// metadata length
// metadata in JSON about the crate being published
// .crate tarball length
// .crate tarball file
async fn read_json_metadata<R: AsyncRead + Unpin>(
reader: &mut R,
max_length: u32,
) -> Result<PublishMetadata, BoxedAppError> {
let json_len = reader.read_u32_le().await.map_err(|e| {
if e.kind() == std::io::ErrorKind::UnexpectedEof {
bad_request("invalid metadata length")
} else {
e.into()
}
})?;

if bytes.len() < 4 {
// Avoid panic in `get_u32_le()` if there is not enough remaining data
return Err(bad_request("invalid metadata length"));
if json_len > max_length {
let message = "JSON metadata blob too large";
return Err(custom(StatusCode::PAYLOAD_TOO_LARGE, message));
}

let json_len = bytes.get_u32_le() as usize;
if json_len > bytes.len() {
return Err(bad_request(format!(
"invalid metadata length for remaining payload: {json_len}"
)));
}
let mut json_bytes = vec![0; json_len as usize];
reader.read_exact(&mut json_bytes).await.map_err(|e| {
if e.kind() == std::io::ErrorKind::UnexpectedEof {
let message = format!("invalid metadata length for remaining payload: {json_len}");
bad_request(message)
} else {
e.into()
}
})?;

let json_bytes = bytes.split_to(json_len);
serde_json::from_slice(&json_bytes)
.map_err(|e| bad_request(format_args!("invalid upload request: {e}")))
}

if bytes.len() < 4 {
// Avoid panic in `get_u32_le()` if there is not enough remaining data
return Err(bad_request("invalid tarball length"));
}
async fn read_tarball_bytes<R: AsyncRead + Unpin>(
reader: &mut R,
max_length: u32,
) -> Result<Bytes, BoxedAppError> {
let tarball_len = reader.read_u32_le().await.map_err(|e| {
if e.kind() == std::io::ErrorKind::UnexpectedEof {
bad_request("invalid tarball length")
} else {
e.into()
}
})?;

let tarball_len = bytes.get_u32_le() as usize;
if tarball_len > bytes.len() {
return Err(bad_request(format!(
"invalid tarball length for remaining payload: {tarball_len}"
)));
if tarball_len > max_length {
let message = format!("max upload size is: {}", max_length);
return Err(custom(StatusCode::PAYLOAD_TOO_LARGE, message));
}

let tarball_bytes = bytes.split_to(tarball_len);
let mut tarball_bytes = vec![0; tarball_len as usize];
reader.read_exact(&mut tarball_bytes).await.map_err(|e| {
if e.kind() == std::io::ErrorKind::UnexpectedEof {
let message = format!("invalid tarball length for remaining payload: {tarball_len}");
bad_request(message)
} else {
e.into()
}
})?;

Ok((json_bytes, tarball_bytes))
Ok(Bytes::from(tarball_bytes))
}

async fn is_reserved_name(name: &str, conn: &mut AsyncPgConnection) -> QueryResult<bool> {
Expand Down
26 changes: 20 additions & 6 deletions src/tests/krate/publish/tarball.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::tests::builders::PublishBuilder;
use crate::tests::util::{RequestHelper, TestApp};
use bytes::{BufMut, BytesMut};
use crates_io_tarball::TarballBuilder;
use googletest::prelude::*;
use http::StatusCode;
Expand Down Expand Up @@ -80,9 +81,16 @@ async fn json_bytes_truncated() {
async fn tarball_len_truncated() {
let (app, _, _, token) = TestApp::full().with_token().await;

let response = token
.publish_crate(&[2, 0, 0, 0, b'{', b'}', 0, 0] as &[u8])
.await;
let json = br#"{ "name": "foo", "vers": "1.0.0" }"#;

let mut bytes = BytesMut::new();
bytes.put_u32_le(json.len() as u32);
bytes.put_slice(json);
bytes.put_u8(0);
bytes.put_u8(0);

let response = token.publish_crate(bytes.freeze()).await;

assert_eq!(response.status(), StatusCode::BAD_REQUEST);
assert_snapshot!(response.text(), @r#"{"errors":[{"detail":"invalid tarball length"}]}"#);
assert_that!(app.stored_files().await, empty());
Expand All @@ -92,9 +100,15 @@ async fn tarball_len_truncated() {
async fn tarball_bytes_truncated() {
let (app, _, _, token) = TestApp::full().with_token().await;

let response = token
.publish_crate(&[2, 0, 0, 0, b'{', b'}', 100, 0, 0, 0, 0] as &[u8])
.await;
let json = br#"{ "name": "foo", "vers": "1.0.0" }"#;

let mut bytes = BytesMut::new();
bytes.put_u32_le(json.len() as u32);
bytes.put_slice(json);
bytes.put_u32_le(100);
bytes.put_u8(0);

let response = token.publish_crate(bytes.freeze()).await;
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
assert_snapshot!(response.text(), @r#"{"errors":[{"detail":"invalid tarball length for remaining payload: 100"}]}"#);
assert_that!(app.stored_files().await, empty());
Expand Down