From b4c1a281aa5b3593bc68b22acfa8b8f0d9b586e3 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 27 May 2020 21:25:08 -0400 Subject: [PATCH 01/15] Add compression for uploaded documentation --- Cargo.lock | 38 ++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/db/migrate.rs | 11 +++++++++++ src/storage/database.rs | 10 ++++++---- src/storage/mod.rs | 29 +++++++++++++++++++++++------ src/storage/s3.rs | 9 +++++++++ 6 files changed, 88 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b56d3c252..b29ae77fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -448,6 +448,7 @@ dependencies = [ "tokio 0.1.22 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "zstd 0.5.2+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -966,6 +967,11 @@ dependencies = [ "url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "globset" version = "0.4.5" @@ -4115,6 +4121,34 @@ dependencies = [ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "zstd" +version = "0.5.2+zstd.1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "zstd-safe 2.0.4+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "zstd-safe" +version = "2.0.4+zstd.1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.70 (registry+https://github.com/rust-lang/crates.io-index)", + "zstd-sys 1.4.16+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "zstd-sys" +version = "1.4.16+zstd.1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)", + "glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.70 (registry+https://github.com/rust-lang/crates.io-index)", +] + [metadata] "checksum addr2line 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a49806b9dadc843c61e7c97e72490ad7f7220ae249012fbda9ad0609457c0543" "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" @@ -4223,6 +4257,7 @@ dependencies = [ "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" "checksum gimli 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bcc8e0c9bce37868955864dbecd2b1ab2bdf967e6f28066d65aaac620444b65c" "checksum git2 0.13.6 (registry+https://github.com/rust-lang/crates.io-index)" = "11e4b2082980e751c4bf4273e9cbb4a02c655729c8ee8a79f66cad03c8f4d31e" +"checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" "checksum globset 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "7ad1da430bd7281dde2576f44c84cc3f0f7b475e7202cd503042dff01a8c8120" "checksum globwalk 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "178270263374052c40502e9f607134947de75302c1348d1a0e31db67c1691446" "checksum h2 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)" = "a5b34c246847f938a410a03c5458c7fee2274436675e76d8b903c08efc29c462" @@ -4560,3 +4595,6 @@ dependencies = [ "checksum ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e" "checksum xattr 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c" "checksum xml-rs 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3c1cb601d29fe2c2ac60a2b2e5e293994d87a1f6fa9687a31a15270f909be9c2" +"checksum zstd 0.5.2+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "644352b10ce7f333d6e0af85bd4f5322dc449416dc1211c6308e95bca8923db4" +"checksum zstd-safe 2.0.4+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "7113c0c9aed2c55181f2d9f5b0a36e7d2c0183b11c058ab40b35987479efe4d7" +"checksum zstd-sys 1.4.16+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "c442965efc45353be5a9b9969c9b0872fff6828c7e06d118dda2cb2d0bb11d5a" diff --git a/Cargo.toml b/Cargo.toml index 1f5d8299d..1d1a84e27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ lazy_static = "1.0.0" rustwide = "0.7.1" mime_guess = "2" dotenv = "0.15" +zstd = "0.5" # Data serialization and deserialization serde = { version = "1.0", features = ["derive"] } diff --git a/src/db/migrate.rs b/src/db/migrate.rs index ddd4452f3..6d204194e 100644 --- a/src/db/migrate.rs +++ b/src/db/migrate.rs @@ -340,6 +340,17 @@ pub fn migrate(version: Option, conn: &Connection) -> CratesfyiResult<( ADD COLUMN content tsvector, ADD COLUMN versions JSON DEFAULT '[]';" ), + migration!( + context, + // version + 14, + // description + "Add a field for compression", + // upgrade query + "ALTER TABLE files ADD COLUMN compressed BOOLEAN NOT NULL DEFAULT false;", + // downgrade query + "ALTER TABLE files DROP COLUMN compressed;", + ), ]; for migration in migrations { diff --git a/src/storage/database.rs b/src/storage/database.rs index 0f229c637..60468bcba 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -18,7 +18,7 @@ impl<'a> DatabaseBackend<'a> { pub(super) fn get(&self, path: &str) -> Result { let rows = self.conn.query( - "SELECT path, mime, date_updated, content FROM files WHERE path = $1;", + "SELECT path, mime, date_updated, content, compressed FROM files WHERE path = $1;", &[&path], )?; @@ -31,6 +31,7 @@ impl<'a> DatabaseBackend<'a> { mime: row.get("mime"), date_updated: DateTime::from_utc(row.get::<_, NaiveDateTime>("date_updated"), Utc), content: row.get("content"), + compressed: row.get("compressed"), }) } } @@ -38,11 +39,11 @@ impl<'a> DatabaseBackend<'a> { pub(super) fn store_batch(&self, batch: &[Blob], trans: &Transaction) -> Result<(), Error> { for blob in batch { trans.query( - "INSERT INTO files (path, mime, content) - VALUES ($1, $2, $3) + "INSERT INTO files (path, mime, content, compressed) + VALUES ($1, $2, $3, $4) ON CONFLICT (path) DO UPDATE SET mime = EXCLUDED.mime, content = EXCLUDED.content", - &[&blob.path, &blob.mime, &blob.content], + &[&blob.path, &blob.mime, &blob.content, &blob.compressed], )?; } Ok(()) @@ -79,6 +80,7 @@ mod tests { mime: "text/plain".into(), date_updated: now.trunc_subsecs(6), content: "Hello world!".bytes().collect(), + compressed: false, }, backend.get("dir/foo.txt")? ); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 7b1c20dc0..669d8ee88 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -20,6 +20,7 @@ pub(crate) struct Blob { pub(crate) mime: String, pub(crate) date_updated: DateTime, pub(crate) content: Vec, + pub(crate) compressed: bool, } fn get_file_list_from_dir>(path: P, files: &mut Vec) -> Result<(), Error> { @@ -71,10 +72,15 @@ impl<'a> Storage<'a> { } } pub(crate) fn get(&self, path: &str) -> Result { - match self { + let mut blob = match self { Self::Database(db) => db.get(path), Self::S3(s3) => s3.get(path), + }?; + if blob.compressed { + blob.content = decompress(blob.content.as_slice())?; + blob.compressed = false; } + Ok(blob) } fn store_batch(&mut self, batch: &[Blob], trans: &Transaction) -> Result<(), Error> { @@ -109,9 +115,10 @@ impl<'a> Storage<'a> { .ok() .map(|file| (file_path, file)) }) - .map(|(file_path, mut file)| -> Result<_, Error> { - let mut content: Vec = Vec::new(); - file.read_to_end(&mut content)?; + .map(|(file_path, file)| -> Result<_, Error> { + //let mut content: Vec = Vec::new(); + //file.read_to_end(&mut content)?; + let content = compress(file)?; let bucket_path = Path::new(prefix).join(&file_path); @@ -121,12 +128,13 @@ impl<'a> Storage<'a> { let bucket_path = bucket_path.into_os_string().into_string().unwrap(); let mime = detect_mime(&file_path)?; - file_paths_and_mimes.insert(file_path, mime.to_string()); + Ok(Blob { path: bucket_path, mime: mime.to_string(), content, + compressed: true, // this field is ignored by the backend date_updated: Utc::now(), }) @@ -147,6 +155,14 @@ impl<'a> Storage<'a> { } } +fn compress(content: impl Read) -> Result, Error> { + zstd::encode_all(content, 5).map_err(Into::into) +} + +fn decompress(content: impl Read) -> Result, Error> { + zstd::decode_all(content).map_err(Into::into) +} + fn detect_mime(file_path: &Path) -> Result<&'static str, Error> { let mime = mime_guess::from_path(file_path) .first_raw() @@ -277,9 +293,10 @@ mod test { let uploads: Vec<_> = (0..=MAX_CONCURRENT_UPLOADS + 1) .map(|i| Blob { mime: "text/rust".into(), - content: "fn main() {}".into(), + content: compress("fn main() {}".as_bytes()).unwrap(), path: format!("{}.rs", i), date_updated: Utc::now(), + compressed: true, }) .collect(); diff --git a/src/storage/s3.rs b/src/storage/s3.rs index e8eb78d00..595bbb2fa 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -51,19 +51,25 @@ impl<'a> S3Backend<'a> { b.read_to_end(&mut content).unwrap(); let date_updated = parse_timespec(&res.last_modified.unwrap())?; + let compressed = res.metadata.unwrap_or_default().get("compressed").is_some(); Ok(Blob { path: path.into(), mime: res.content_type.unwrap(), date_updated, content, + compressed, }) } pub(super) fn store_batch(&mut self, batch: &[Blob]) -> Result<(), Error> { use futures::stream::FuturesUnordered; use futures::stream::Stream; + use std::collections::HashMap; + let mut attempts = 0; + let mut compressed = HashMap::with_capacity(1); + compressed.insert("compressed".into(), "true".into()); loop { let mut futures = FuturesUnordered::new(); @@ -75,6 +81,7 @@ impl<'a> S3Backend<'a> { key: blob.path.clone(), body: Some(blob.content.clone().into()), content_type: Some(blob.mime.clone()), + metadata: Some(compressed.clone()), ..Default::default() }) .inspect(|_| { @@ -168,6 +175,7 @@ pub(crate) mod tests { mime: "text/plain".into(), date_updated: Utc::now(), content: "Hello world!".into(), + compressed: false, }; // Add a test file to the database @@ -204,6 +212,7 @@ pub(crate) mod tests { mime: "text/plain".into(), date_updated: Utc::now(), content: "Hello world!".into(), + compressed: false, }) .collect(); From 5c7d2ee8c9fffe33936994ce71543addde561ffb Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 27 May 2020 21:29:26 -0400 Subject: [PATCH 02/15] Remove commented-out code --- src/storage/mod.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 669d8ee88..7480ef58c 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -116,10 +116,7 @@ impl<'a> Storage<'a> { .map(|file| (file_path, file)) }) .map(|(file_path, file)| -> Result<_, Error> { - //let mut content: Vec = Vec::new(); - //file.read_to_end(&mut content)?; let content = compress(file)?; - let bucket_path = Path::new(prefix).join(&file_path); #[cfg(windows)] // On windows, we need to normalize \\ to / so the route logic works From f735e85557e492c00ca03901aa43073b13a04c13 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 27 May 2020 21:36:53 -0400 Subject: [PATCH 03/15] Add test for compression --- src/storage/mod.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 7480ef58c..be35dc7b8 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -311,6 +311,21 @@ mod test { assert_eq!(files[0], std::path::Path::new("Cargo.toml")); } + #[test] + fn test_compression() { + let orig = "fn main() {}"; + let compressed = compress(orig.as_bytes()).unwrap(); + let blob = Blob { + mime: "text/rust".into(), + content: compressed.clone(), + path: "main.rs".into(), + date_updated: Timespec::new(42, 0), + compressed: true, + }; + test_roundtrip(std::slice::from_ref(&blob)); + assert_eq!(decompress(compressed.as_slice()).unwrap(), orig.as_bytes()); + } + #[test] fn test_mime_types() { check_mime(".gitignore", "text/plain"); From de4965edda1b9759bde21c3a3707384a19d9378e Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 27 May 2020 22:38:59 -0400 Subject: [PATCH 04/15] Add benchmark for compression/decompression --- Cargo.toml | 4 ++++ benches/compression.rs | 19 +++++++++++++++++++ src/storage/mod.rs | 5 +++-- 3 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 benches/compression.rs diff --git a/Cargo.toml b/Cargo.toml index 1d1a84e27..54d99d3bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,6 +85,10 @@ rand = "0.7.3" name = "html5ever" harness = false +[[bench]] +name = "compression" +harness = false + [build-dependencies] time = "0.1" git2 = { version = "0.13", default-features = false } diff --git a/benches/compression.rs b/benches/compression.rs new file mode 100644 index 000000000..bacf7ae38 --- /dev/null +++ b/benches/compression.rs @@ -0,0 +1,19 @@ +use cratesfyi::storage::{compress, decompress}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +pub fn criterion_benchmark(c: &mut Criterion) { + // this isn't a great benchmark because it only tests on one file + // ideally we would build a whole crate and compress each file, taking the average + let html = std::fs::read_to_string("benches/struct.CaptureMatches.html").unwrap(); + let html_slice = html.as_bytes(); + c.bench_function("compress regex html", |b| { + b.iter(|| compress(black_box(html_slice))) + }); + let compressed = compress(html_slice).unwrap(); + c.bench_function("decompress regex html", |b| { + b.iter(|| decompress(black_box(compressed.as_slice()))) + }); +} + +criterion_group!(compression, criterion_benchmark); +criterion_main!(compression); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index be35dc7b8..bc70ac2df 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -152,11 +152,12 @@ impl<'a> Storage<'a> { } } -fn compress(content: impl Read) -> Result, Error> { +// public for benchmarking +pub fn compress(content: impl Read) -> Result, Error> { zstd::encode_all(content, 5).map_err(Into::into) } -fn decompress(content: impl Read) -> Result, Error> { +pub fn decompress(content: impl Read) -> Result, Error> { zstd::decode_all(content).map_err(Into::into) } From 1133541f7c3e4256001456c852d504e55cf92f3c Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Thu, 28 May 2020 09:04:13 -0400 Subject: [PATCH 05/15] zstd 5 -> zstd 9 --- src/storage/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index bc70ac2df..0607a6711 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -154,7 +154,7 @@ impl<'a> Storage<'a> { // public for benchmarking pub fn compress(content: impl Read) -> Result, Error> { - zstd::encode_all(content, 5).map_err(Into::into) + zstd::encode_all(content, 9).map_err(Into::into) } pub fn decompress(content: impl Read) -> Result, Error> { From 256452d3bf329ba5ca1b991ad6cda29c56808c29 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Thu, 28 May 2020 09:36:26 -0400 Subject: [PATCH 06/15] Make compression an enum instead of a boolean --- benches/compression.rs | 4 +-- src/db/migrate.rs | 4 +-- src/storage/database.rs | 17 ++++++---- src/storage/mod.rs | 70 +++++++++++++++++++++++++++++------------ src/storage/s3.rs | 13 +++----- 5 files changed, 70 insertions(+), 38 deletions(-) diff --git a/benches/compression.rs b/benches/compression.rs index bacf7ae38..8b4133d62 100644 --- a/benches/compression.rs +++ b/benches/compression.rs @@ -9,9 +9,9 @@ pub fn criterion_benchmark(c: &mut Criterion) { c.bench_function("compress regex html", |b| { b.iter(|| compress(black_box(html_slice))) }); - let compressed = compress(html_slice).unwrap(); + let (compressed, alg) = compress(html_slice).unwrap(); c.bench_function("decompress regex html", |b| { - b.iter(|| decompress(black_box(compressed.as_slice()))) + b.iter(|| decompress(black_box(compressed.as_slice()), alg)) }); } diff --git a/src/db/migrate.rs b/src/db/migrate.rs index 6d204194e..1d7b3f45a 100644 --- a/src/db/migrate.rs +++ b/src/db/migrate.rs @@ -347,9 +347,9 @@ pub fn migrate(version: Option, conn: &Connection) -> CratesfyiResult<( // description "Add a field for compression", // upgrade query - "ALTER TABLE files ADD COLUMN compressed BOOLEAN NOT NULL DEFAULT false;", + "ALTER TABLE files ADD COLUMN compression VARCHAR;", // downgrade query - "ALTER TABLE files DROP COLUMN compressed;", + "ALTER TABLE files DROP COLUMN compression;", ), ]; diff --git a/src/storage/database.rs b/src/storage/database.rs index 60468bcba..c8c506ce2 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -18,7 +18,7 @@ impl<'a> DatabaseBackend<'a> { pub(super) fn get(&self, path: &str) -> Result { let rows = self.conn.query( - "SELECT path, mime, date_updated, content, compressed FROM files WHERE path = $1;", + "SELECT path, mime, date_updated, content, compression FROM files WHERE path = $1;", &[&path], )?; @@ -26,24 +26,29 @@ impl<'a> DatabaseBackend<'a> { Err(PathNotFoundError.into()) } else { let row = rows.get(0); + let compression = row.get::<_, Option>("compression").map(|alg| { + alg.parse() + .expect("invalid or unknown compression algorithm") + }); Ok(Blob { path: row.get("path"), mime: row.get("mime"), date_updated: DateTime::from_utc(row.get::<_, NaiveDateTime>("date_updated"), Utc), content: row.get("content"), - compressed: row.get("compressed"), + compression, }) } } pub(super) fn store_batch(&self, batch: &[Blob], trans: &Transaction) -> Result<(), Error> { for blob in batch { + let compression = blob.compression.as_ref().map(|alg| alg.to_string()); trans.query( - "INSERT INTO files (path, mime, content, compressed) + "INSERT INTO files (path, mime, content, compression) VALUES ($1, $2, $3, $4) ON CONFLICT (path) DO UPDATE - SET mime = EXCLUDED.mime, content = EXCLUDED.content", - &[&blob.path, &blob.mime, &blob.content, &blob.compressed], + SET mime = EXCLUDED.mime, content = EXCLUDED.content, compression = EXCLUDED.compression", + &[&blob.path, &blob.mime, &blob.content, &compression], )?; } Ok(()) @@ -80,7 +85,7 @@ mod tests { mime: "text/plain".into(), date_updated: now.trunc_subsecs(6), content: "Hello world!".bytes().collect(), - compressed: false, + compression: None, }, backend.get("dir/foo.txt")? ); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 0607a6711..c3aaeb177 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -8,19 +8,43 @@ use failure::{err_msg, Error}; use postgres::{transaction::Transaction, Connection}; use std::collections::HashMap; use std::ffi::OsStr; +use std::fmt; use std::fs; use std::io::Read; use std::path::{Path, PathBuf}; const MAX_CONCURRENT_UPLOADS: usize = 1000; +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum CompressionAlgorithm { + Zstd, +} + +impl std::str::FromStr for CompressionAlgorithm { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "zstd" => Ok(CompressionAlgorithm::Zstd), + _ => Err(()), + } + } +} + +impl fmt::Display for CompressionAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CompressionAlgorithm::Zstd => write!(f, "zstd"), + } + } +} + #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub(crate) struct Blob { pub(crate) path: String, pub(crate) mime: String, pub(crate) date_updated: DateTime, pub(crate) content: Vec, - pub(crate) compressed: bool, + pub(crate) compression: Option, } fn get_file_list_from_dir>(path: P, files: &mut Vec) -> Result<(), Error> { @@ -76,9 +100,9 @@ impl<'a> Storage<'a> { Self::Database(db) => db.get(path), Self::S3(s3) => s3.get(path), }?; - if blob.compressed { - blob.content = decompress(blob.content.as_slice())?; - blob.compressed = false; + if let Some(alg) = blob.compression { + blob.content = decompress(blob.content.as_slice(), alg)?; + blob.compression = None; } Ok(blob) } @@ -116,7 +140,7 @@ impl<'a> Storage<'a> { .map(|file| (file_path, file)) }) .map(|(file_path, file)| -> Result<_, Error> { - let content = compress(file)?; + let (content, alg) = compress(file)?; let bucket_path = Path::new(prefix).join(&file_path); #[cfg(windows)] // On windows, we need to normalize \\ to / so the route logic works @@ -131,7 +155,7 @@ impl<'a> Storage<'a> { path: bucket_path, mime: mime.to_string(), content, - compressed: true, + compression: Some(alg), // this field is ignored by the backend date_updated: Utc::now(), }) @@ -153,12 +177,15 @@ impl<'a> Storage<'a> { } // public for benchmarking -pub fn compress(content: impl Read) -> Result, Error> { - zstd::encode_all(content, 9).map_err(Into::into) +pub fn compress(content: impl Read) -> Result<(Vec, CompressionAlgorithm), Error> { + let data = zstd::encode_all(content, 9)?; + Ok((data, CompressionAlgorithm::Zstd)) } -pub fn decompress(content: impl Read) -> Result, Error> { - zstd::decode_all(content).map_err(Into::into) +pub fn decompress(content: impl Read, algorithm: CompressionAlgorithm) -> Result, Error> { + match algorithm { + CompressionAlgorithm::Zstd => zstd::decode_all(content).map_err(Into::into), + } } fn detect_mime(file_path: &Path) -> Result<&'static str, Error> { @@ -289,12 +316,15 @@ mod test { #[test] fn test_batched_uploads() { let uploads: Vec<_> = (0..=MAX_CONCURRENT_UPLOADS + 1) - .map(|i| Blob { - mime: "text/rust".into(), - content: compress("fn main() {}".as_bytes()).unwrap(), - path: format!("{}.rs", i), - date_updated: Utc::now(), - compressed: true, + .map(|i| { + let (content, alg) = compress("fn main() {}".as_bytes()).unwrap(); + Blob { + mime: "text/rust".into(), + content, + path: format!("{}.rs", i), + date_updated: Utc::now(), + compression: Some(alg), + } }) .collect(); @@ -315,16 +345,16 @@ mod test { #[test] fn test_compression() { let orig = "fn main() {}"; - let compressed = compress(orig.as_bytes()).unwrap(); + let (data, alg) = compress(orig.as_bytes()).unwrap(); let blob = Blob { mime: "text/rust".into(), - content: compressed.clone(), + content: data.clone(), path: "main.rs".into(), date_updated: Timespec::new(42, 0), - compressed: true, + compression: Some(alg), }; test_roundtrip(std::slice::from_ref(&blob)); - assert_eq!(decompress(compressed.as_slice()).unwrap(), orig.as_bytes()); + assert_eq!(decompress(data.as_slice(), alg).unwrap(), orig.as_bytes()); } #[test] diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 595bbb2fa..073f0f566 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -51,25 +51,22 @@ impl<'a> S3Backend<'a> { b.read_to_end(&mut content).unwrap(); let date_updated = parse_timespec(&res.last_modified.unwrap())?; - let compressed = res.metadata.unwrap_or_default().get("compressed").is_some(); + let compression = res.content_encoding.and_then(|s| s.parse().ok()); Ok(Blob { path: path.into(), mime: res.content_type.unwrap(), date_updated, content, - compressed, + compression, }) } pub(super) fn store_batch(&mut self, batch: &[Blob]) -> Result<(), Error> { use futures::stream::FuturesUnordered; use futures::stream::Stream; - use std::collections::HashMap; let mut attempts = 0; - let mut compressed = HashMap::with_capacity(1); - compressed.insert("compressed".into(), "true".into()); loop { let mut futures = FuturesUnordered::new(); @@ -81,7 +78,7 @@ impl<'a> S3Backend<'a> { key: blob.path.clone(), body: Some(blob.content.clone().into()), content_type: Some(blob.mime.clone()), - metadata: Some(compressed.clone()), + content_encoding: blob.compression.as_ref().map(|alg| alg.to_string()), ..Default::default() }) .inspect(|_| { @@ -175,7 +172,7 @@ pub(crate) mod tests { mime: "text/plain".into(), date_updated: Utc::now(), content: "Hello world!".into(), - compressed: false, + compression: None, }; // Add a test file to the database @@ -212,7 +209,7 @@ pub(crate) mod tests { mime: "text/plain".into(), date_updated: Utc::now(), content: "Hello world!".into(), - compressed: false, + compression: None, }) .collect(); From 6b1a7c929930b03a68f8b2c81f2a4319d784139c Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 31 May 2020 18:00:29 -0400 Subject: [PATCH 07/15] Add compression algorithms to database --- src/db/add_package.rs | 21 +++++++++++++++++++++ src/db/file.rs | 11 +++++++---- src/db/migrate.rs | 18 +++++++++++++++--- src/docbuilder/rustwide_builder.rs | 23 +++++++++++++---------- src/storage/mod.rs | 12 ++++++++---- 5 files changed, 64 insertions(+), 21 deletions(-) diff --git a/src/db/add_package.rs b/src/db/add_package.rs index 9e676a11a..1d21819bf 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -8,6 +8,7 @@ use crate::{ docbuilder::BuildResult, error::Result, index::api::{CrateOwner, RegistryCrateData}, + storage::CompressionAlgorithm, utils::MetadataPackage, }; use log::debug; @@ -34,6 +35,7 @@ pub(crate) fn add_package_into_database( cratesio_data: &RegistryCrateData, has_docs: bool, has_examples: bool, + compression_algorithms: std::collections::HashSet, ) -> Result { debug!("Adding package into database"); let crate_id = initialize_package_in_database(&conn, metadata_pkg)?; @@ -116,6 +118,7 @@ pub(crate) fn add_package_into_database( add_keywords_into_database(&conn, &metadata_pkg, release_id)?; add_authors_into_database(&conn, &metadata_pkg, release_id)?; add_owners_into_database(&conn, &cratesio_data.owners, crate_id)?; + add_compression_into_database(&conn, compression_algorithms.into_iter(), release_id)?; // Update the crates table with the new release conn.execute( @@ -352,3 +355,21 @@ fn add_owners_into_database(conn: &Connection, owners: &[CrateOwner], crate_id: } Ok(()) } + +/// Add the compression algorithms used for this crate to the database +fn add_compression_into_database(conn: &Connection, algorithms: I, release_id: i32) -> Result<()> +where + I: Iterator, +{ + let sql = " + INSERT INTO compression_rels (release, algorithm) + VALUES ( + $1, + (SELECT id FROM compression WHERE name = $2) + )"; + let prepared = conn.prepare_cached(sql)?; + for alg in algorithms { + prepared.execute(&[&release_id, &alg.to_string()])?; + } + Ok(()) +} diff --git a/src/db/file.rs b/src/db/file.rs index 6cd5ac704..02bc5fc26 100644 --- a/src/db/file.rs +++ b/src/db/file.rs @@ -5,7 +5,7 @@ //! filesystem. This module is adding files into database and retrieving them. use crate::error::Result; -use crate::storage::Storage; +use crate::storage::{CompressionAlgorithms, Storage}; use postgres::Connection; use serde_json::Value; @@ -30,10 +30,13 @@ pub fn add_path_into_database>( conn: &Connection, prefix: &str, path: P, -) -> Result { +) -> Result<(Value, CompressionAlgorithms)> { let mut backend = Storage::new(conn); - let file_list = backend.store_all(conn, prefix, path.as_ref())?; - file_list_to_json(file_list.into_iter().collect()) + let (file_list, algorithms) = backend.store_all(conn, prefix, path.as_ref())?; + Ok(( + file_list_to_json(file_list.into_iter().collect())?, + algorithms, + )) } fn file_list_to_json(file_list: Vec<(PathBuf, String)>) -> Result { diff --git a/src/db/migrate.rs b/src/db/migrate.rs index 1d7b3f45a..013f4f6f1 100644 --- a/src/db/migrate.rs +++ b/src/db/migrate.rs @@ -345,11 +345,23 @@ pub fn migrate(version: Option, conn: &Connection) -> CratesfyiResult<( // version 14, // description - "Add a field for compression", + "Add compression", // upgrade query - "ALTER TABLE files ADD COLUMN compression VARCHAR;", + " + CREATE TABLE compression ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) + ); + INSERT INTO compression (name) VALUES ('zstd'); + -- many to many table between releases and compression + -- stores the set of all compression algorithms used in the release files + CREATE TABLE compression_rels ( + release INT NOT NULL REFERENCES releases(id), + algorithm INT NOT NULL REFERENCES compression(id) + );", // downgrade query - "ALTER TABLE files DROP COLUMN compression;", + "DROP TABLE compression_rels; + DROP TABLE compression;", ), ]; diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 5b9e05251..cb427c7cf 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -6,6 +6,7 @@ use crate::db::{add_build_into_database, add_package_into_database, connect_db}; use crate::docbuilder::{crates::crates_from_path, Limits}; use crate::error::Result; use crate::index::api::RegistryCrateData; +use crate::storage::CompressionAlgorithms; use crate::utils::{copy_doc_dir, parse_rustc_version, CargoMetadata}; use failure::ResultExt; use log::{debug, info, warn, LevelFilter}; @@ -333,6 +334,7 @@ impl RustwideBuilder { let mut files_list = None; let mut has_docs = false; + let mut algs = CompressionAlgorithms::default(); let mut successful_targets = Vec::new(); let metadata = Metadata::from_source_dir(&build.host_source_dir())?; let BuildTargets { @@ -345,11 +347,10 @@ impl RustwideBuilder { if res.result.successful { debug!("adding sources into database"); let prefix = format!("sources/{}/{}", name, version); - files_list = Some(add_path_into_database( - &conn, - &prefix, - build.host_source_dir(), - )?); + let (files, new_algs) = + add_path_into_database(&conn, &prefix, build.host_source_dir())?; + files_list = Some(files); + algs.extend(new_algs); if let Some(name) = res.cargo_metadata.root().library_name() { let host_target = build.host_target_dir(); @@ -376,8 +377,9 @@ impl RustwideBuilder { &metadata, )?; } - self.upload_docs(&conn, name, version, local_storage.path())?; - } + let new_algs = self.upload_docs(&conn, name, version, local_storage.path())?; + algs.extend(new_algs); + }; let has_examples = build.host_source_dir().join("examples").is_dir(); if res.result.successful { @@ -398,6 +400,7 @@ impl RustwideBuilder { &RegistryCrateData::get_from_network(res.cargo_metadata.root())?, has_docs, has_examples, + algs, )?; add_build_into_database(&conn, release_id, &res.result)?; @@ -572,14 +575,14 @@ impl RustwideBuilder { name: &str, version: &str, local_storage: &Path, - ) -> Result<()> { + ) -> Result { debug!("Adding documentation into database"); add_path_into_database( conn, &format!("rustdoc/{}/{}", name, version), local_storage, - )?; - Ok(()) + ) + .map(|t| t.1) } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index c3aaeb177..88d4d9865 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -6,7 +6,7 @@ pub(crate) use self::s3::S3Backend; use chrono::{DateTime, Utc}; use failure::{err_msg, Error}; use postgres::{transaction::Transaction, Connection}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::ffi::OsStr; use std::fmt; use std::fs; @@ -15,6 +15,8 @@ use std::path::{Path, PathBuf}; const MAX_CONCURRENT_UPLOADS: usize = 1000; +pub type CompressionAlgorithms = HashSet; + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum CompressionAlgorithm { Zstd, @@ -119,15 +121,16 @@ impl<'a> Storage<'a> { // If the environment is configured with S3 credentials, this will upload to S3; // otherwise, this will store files in the database. // - // This returns a HashMap. + // This returns (map, set). pub(crate) fn store_all( &mut self, conn: &Connection, prefix: &str, root_dir: &Path, - ) -> Result, Error> { + ) -> Result<(HashMap, HashSet), Error> { let trans = conn.transaction()?; let mut file_paths_and_mimes = HashMap::new(); + let mut algs = HashSet::with_capacity(1); let mut blobs = get_file_list(root_dir)? .into_iter() @@ -150,6 +153,7 @@ impl<'a> Storage<'a> { let mime = detect_mime(&file_path)?; file_paths_and_mimes.insert(file_path, mime.to_string()); + algs.insert(alg); Ok(Blob { path: bucket_path, @@ -172,7 +176,7 @@ impl<'a> Storage<'a> { } trans.commit()?; - Ok(file_paths_and_mimes) + Ok((file_paths_and_mimes, algs)) } } From 6433902ae62d480c28a22520f8e8927ac27490c9 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Fri, 5 Jun 2020 08:18:20 -0400 Subject: [PATCH 08/15] Fix tests --- docker-compose.yml | 2 +- src/storage/mod.rs | 4 ++-- src/test/fakes.rs | 13 +++++++++++-- src/web/source.rs | 18 +++++++++--------- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index a49e31fa0..c5e5b4571 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -34,7 +34,7 @@ services: image: minio/minio entrypoint: > /bin/sh -c " - mkdir /data/rust-docs-rs; + mkdir -p /data/rust-docs-rs; minio server /data; " ports: diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 88d4d9865..d0629959d 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -256,7 +256,7 @@ mod test { let db = env.db(); let conn = db.conn(); let mut backend = Storage::Database(DatabaseBackend::new(&conn)); - let stored_files = backend.store_all(&conn, "", dir.path()).unwrap(); + let (stored_files, algs) = backend.store_all(&conn, "", dir.path()).unwrap(); assert_eq!(stored_files.len(), blobs.len()); for blob in blobs { let name = Path::new(&blob.path); @@ -289,7 +289,7 @@ mod test { let db = env.db(); let conn = db.conn(); let mut backend = Storage::Database(DatabaseBackend::new(&conn)); - let stored_files = backend.store_all(&conn, "rustdoc", dir.path()).unwrap(); + let (stored_files, algs) = backend.store_all(&conn, "rustdoc", dir.path()).unwrap(); assert_eq!(stored_files.len(), files.len()); for name in &files { let name = Path::new(name); diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 234c422a0..5f1a5f1a9 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -149,6 +149,7 @@ impl<'a> FakeRelease<'a> { } pub(crate) fn create(self) -> Result { + use std::collections::HashSet; use std::fs; use std::path::Path; @@ -157,6 +158,7 @@ impl<'a> FakeRelease<'a> { let db = self.db; let mut source_meta = None; + let mut algs = HashSet::new(); if self.build_result.successful { let upload_files = |prefix: &str, files: &[(&str, &[u8])], target: Option<&str>| { let mut path_prefix = tempdir.path().join(prefix); @@ -197,9 +199,15 @@ impl<'a> FakeRelease<'a> { rustdoc_files.push((Box::leak(Box::new(updated)), data)); } } - let rustdoc_meta = upload_files("rustdoc", &rustdoc_files, None)?; + let (rustdoc_meta, new_algs) = upload_files("rustdoc", &rustdoc_files, None)?; + algs.extend(new_algs); log::debug!("added rustdoc files {}", rustdoc_meta); - source_meta = Some(upload_files("source", &self.source_files, None)?); + match upload_files("source", &self.source_files, None)? { + (json, new_algs) => { + source_meta = Some(json); + algs.extend(new_algs); + } + } log::debug!("added source files {}", source_meta.as_ref().unwrap()); for target in &package.targets[1..] { @@ -220,6 +228,7 @@ impl<'a> FakeRelease<'a> { &self.registry_crate_data, self.has_docs, self.has_examples, + HashSet::new(), )?; crate::db::add_build_into_database(&db.conn(), release_id, &self.build_result)?; diff --git a/src/web/source.rs b/src/web/source.rs index 5e6245f4e..650f624f9 100644 --- a/src/web/source.rs +++ b/src/web/source.rs @@ -85,15 +85,15 @@ impl FileList { let rows = conn .query( "SELECT crates.name, - releases.version, - releases.description, - releases.target_name, - releases.rustdoc_status, - releases.files, - releases.default_target - FROM releases - LEFT OUTER JOIN crates ON crates.id = releases.crate_id - WHERE crates.name = $1 AND releases.version = $2", + releases.version, + releases.description, + releases.target_name, + releases.rustdoc_status, + releases.files, + releases.default_target + FROM releases + LEFT OUTER JOIN crates ON crates.id = releases.crate_id + WHERE crates.name = $1 AND releases.version = $2", &[&name, &version], ) .unwrap(); From d72ffe3c3b28ebb548ca0c7893f7db38878ae826 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Fri, 5 Jun 2020 08:34:03 -0400 Subject: [PATCH 09/15] Fix compression when uploading to database backend --- src/db/migrate.rs | 6 +++++- src/storage/database.rs | 8 +++++--- src/storage/mod.rs | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/db/migrate.rs b/src/db/migrate.rs index 013f4f6f1..72ce5e640 100644 --- a/src/db/migrate.rs +++ b/src/db/migrate.rs @@ -353,11 +353,15 @@ pub fn migrate(version: Option, conn: &Connection) -> CratesfyiResult<( name VARCHAR(100) ); INSERT INTO compression (name) VALUES ('zstd'); + -- NULL indicates the file was not compressed + ALTER TABLE files ADD COLUMN compression INT REFERENCES compression(id); -- many to many table between releases and compression -- stores the set of all compression algorithms used in the release files CREATE TABLE compression_rels ( release INT NOT NULL REFERENCES releases(id), - algorithm INT NOT NULL REFERENCES compression(id) + algorithm INT REFERENCES compression(id), + -- make sure we don't store duplicates by accident + UNIQUE(release, algorithm) );", // downgrade query "DROP TABLE compression_rels; diff --git a/src/storage/database.rs b/src/storage/database.rs index c8c506ce2..23d1424ca 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -18,7 +18,9 @@ impl<'a> DatabaseBackend<'a> { pub(super) fn get(&self, path: &str) -> Result { let rows = self.conn.query( - "SELECT path, mime, date_updated, content, compression FROM files WHERE path = $1;", + "SELECT path, mime, date_updated, content, compression.name + FROM files LEFT JOIN compression ON files.compression = compression.id + WHERE path = $1;", &[&path], )?; @@ -26,7 +28,7 @@ impl<'a> DatabaseBackend<'a> { Err(PathNotFoundError.into()) } else { let row = rows.get(0); - let compression = row.get::<_, Option>("compression").map(|alg| { + let compression = row.get::<_, Option>("name").map(|alg| { alg.parse() .expect("invalid or unknown compression algorithm") }); @@ -45,7 +47,7 @@ impl<'a> DatabaseBackend<'a> { let compression = blob.compression.as_ref().map(|alg| alg.to_string()); trans.query( "INSERT INTO files (path, mime, content, compression) - VALUES ($1, $2, $3, $4) + VALUES ($1, $2, $3, (SELECT id FROM compression WHERE name = $4)) ON CONFLICT (path) DO UPDATE SET mime = EXCLUDED.mime, content = EXCLUDED.content, compression = EXCLUDED.compression", &[&blob.path, &blob.mime, &blob.content, &compression], diff --git a/src/storage/mod.rs b/src/storage/mod.rs index d0629959d..211976d49 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -256,7 +256,7 @@ mod test { let db = env.db(); let conn = db.conn(); let mut backend = Storage::Database(DatabaseBackend::new(&conn)); - let (stored_files, algs) = backend.store_all(&conn, "", dir.path()).unwrap(); + let (stored_files, _algs) = backend.store_all(&conn, "", dir.path()).unwrap(); assert_eq!(stored_files.len(), blobs.len()); for blob in blobs { let name = Path::new(&blob.path); From 189e6cd2192fffc7ceda8f5603c4302d6fb3cabf Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Fri, 5 Jun 2020 08:44:44 -0400 Subject: [PATCH 10/15] Cleanup - Fix bad rebase - Remove warning in test mode --- src/storage/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 211976d49..d57527998 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -289,7 +289,7 @@ mod test { let db = env.db(); let conn = db.conn(); let mut backend = Storage::Database(DatabaseBackend::new(&conn)); - let (stored_files, algs) = backend.store_all(&conn, "rustdoc", dir.path()).unwrap(); + let (stored_files, _algs) = backend.store_all(&conn, "rustdoc", dir.path()).unwrap(); assert_eq!(stored_files.len(), files.len()); for name in &files { let name = Path::new(name); @@ -354,7 +354,7 @@ mod test { mime: "text/rust".into(), content: data.clone(), path: "main.rs".into(), - date_updated: Timespec::new(42, 0), + date_updated: Utc::now(), compression: Some(alg), }; test_roundtrip(std::slice::from_ref(&blob)); From 9b5d200210101f3dd00ed379c763eb993c3ac8f3 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Fri, 5 Jun 2020 09:20:27 -0400 Subject: [PATCH 11/15] Fix downgrade queries --- src/db/migrate.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/db/migrate.rs b/src/db/migrate.rs index 72ce5e640..67350fc07 100644 --- a/src/db/migrate.rs +++ b/src/db/migrate.rs @@ -365,6 +365,7 @@ pub fn migrate(version: Option, conn: &Connection) -> CratesfyiResult<( );", // downgrade query "DROP TABLE compression_rels; + ALTER TABLE files DROP COLUMN compression; DROP TABLE compression;", ), ]; From ddfb0d65dbf143f0f702601d96f936caedc3042d Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Mon, 8 Jun 2020 19:08:06 -0400 Subject: [PATCH 12/15] Make compression algorithms more extensible - Don't require writing a database migration - Make tests not compile if they aren't exhaustive --- src/db/migrate.rs | 16 ++++++---------- src/storage/database.rs | 16 +++++++++------- src/storage/mod.rs | 34 +++++++++++++++++++++++++++++----- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/src/db/migrate.rs b/src/db/migrate.rs index 67350fc07..2937bb29f 100644 --- a/src/db/migrate.rs +++ b/src/db/migrate.rs @@ -348,25 +348,21 @@ pub fn migrate(version: Option, conn: &Connection) -> CratesfyiResult<( "Add compression", // upgrade query " - CREATE TABLE compression ( - id SERIAL PRIMARY KEY, - name VARCHAR(100) - ); - INSERT INTO compression (name) VALUES ('zstd'); - -- NULL indicates the file was not compressed - ALTER TABLE files ADD COLUMN compression INT REFERENCES compression(id); + -- NULL indicates the file was not compressed. + -- There is no meaning assigned to the compression id in the database itself, + -- it is instead interpreted by the application. + ALTER TABLE files ADD COLUMN compression INT; -- many to many table between releases and compression -- stores the set of all compression algorithms used in the release files CREATE TABLE compression_rels ( release INT NOT NULL REFERENCES releases(id), - algorithm INT REFERENCES compression(id), + algorithm INT, -- make sure we don't store duplicates by accident UNIQUE(release, algorithm) );", // downgrade query "DROP TABLE compression_rels; - ALTER TABLE files DROP COLUMN compression; - DROP TABLE compression;", + ALTER TABLE files DROP COLUMN compression;" ), ]; diff --git a/src/storage/database.rs b/src/storage/database.rs index 23d1424ca..89bbbf3b6 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -17,9 +17,11 @@ impl<'a> DatabaseBackend<'a> { } pub(super) fn get(&self, path: &str) -> Result { + use std::convert::TryInto; + let rows = self.conn.query( - "SELECT path, mime, date_updated, content, compression.name - FROM files LEFT JOIN compression ON files.compression = compression.id + "SELECT path, mime, date_updated, content, compression + FROM files WHERE path = $1;", &[&path], )?; @@ -28,9 +30,9 @@ impl<'a> DatabaseBackend<'a> { Err(PathNotFoundError.into()) } else { let row = rows.get(0); - let compression = row.get::<_, Option>("name").map(|alg| { - alg.parse() - .expect("invalid or unknown compression algorithm") + let compression = row.get::<_, Option>("compression").map(|i| { + i.try_into() + .expect("invalid compression algorithm stored in database") }); Ok(Blob { path: row.get("path"), @@ -44,10 +46,10 @@ impl<'a> DatabaseBackend<'a> { pub(super) fn store_batch(&self, batch: &[Blob], trans: &Transaction) -> Result<(), Error> { for blob in batch { - let compression = blob.compression.as_ref().map(|alg| alg.to_string()); + let compression = blob.compression.map(|alg| alg as i32); trans.query( "INSERT INTO files (path, mime, content, compression) - VALUES ($1, $2, $3, (SELECT id FROM compression WHERE name = $4)) + VALUES ($1, $2, $3, $4) ON CONFLICT (path) DO UPDATE SET mime = EXCLUDED.mime, content = EXCLUDED.content, compression = EXCLUDED.compression", &[&blob.path, &blob.mime, &blob.content, &compression], diff --git a/src/storage/mod.rs b/src/storage/mod.rs index d57527998..8563bdf68 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -17,9 +17,18 @@ const MAX_CONCURRENT_UPLOADS: usize = 1000; pub type CompressionAlgorithms = HashSet; +// NOTE: the `TryFrom` impl must be updated whenever a new variant is added. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum CompressionAlgorithm { - Zstd, + Zstd = 0, +} + +impl fmt::Display for CompressionAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CompressionAlgorithm::Zstd => write!(f, "zstd"), + } + } } impl std::str::FromStr for CompressionAlgorithm { @@ -32,10 +41,12 @@ impl std::str::FromStr for CompressionAlgorithm { } } -impl fmt::Display for CompressionAlgorithm { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - CompressionAlgorithm::Zstd => write!(f, "zstd"), +impl std::convert::TryFrom for CompressionAlgorithm { + type Error = i32; + fn try_from(i: i32) -> Result { + match i { + 0 => Ok(Self::Zstd), + _ => Err(i), } } } @@ -381,4 +392,17 @@ mod test { let detected_mime = detected_mime.expect("no mime was given"); assert_eq!(detected_mime, expected_mime); } + + #[test] + fn test_compression_try_from_is_exhaustive() { + use std::convert::TryFrom; + + let a = CompressionAlgorithm::Zstd; + match a { + CompressionAlgorithm::Zstd => { + assert_eq!(a, CompressionAlgorithm::try_from(a as i32).unwrap()); + assert_eq!(a, a.to_string().parse().unwrap()); + } + } + } } From 8f6051f0dd87b45b21eef6ad4960c57d6829566d Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Mon, 8 Jun 2020 19:51:37 -0400 Subject: [PATCH 13/15] Fix outdated SQL --- src/db/add_package.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/db/add_package.rs b/src/db/add_package.rs index 1d21819bf..eae5ef26c 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -361,15 +361,10 @@ fn add_compression_into_database(conn: &Connection, algorithms: I, release_id where I: Iterator, { - let sql = " - INSERT INTO compression_rels (release, algorithm) - VALUES ( - $1, - (SELECT id FROM compression WHERE name = $2) - )"; + let sql = "INSERT INTO compression_rels (release, algorithm) VALUES ($1, $2);"; let prepared = conn.prepare_cached(sql)?; for alg in algorithms { - prepared.execute(&[&release_id, &alg.to_string()])?; + prepared.execute(&[&release_id, &(alg as i32)])?; } Ok(()) } From 869a36b3cc5b6f35cc6ef1d058b5741a75b6004d Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Tue, 9 Jun 2020 10:03:34 -0400 Subject: [PATCH 14/15] Use macro-based enum for extensibility --- src/storage/mod.rs | 57 +++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 8563bdf68..09e3a8690 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -17,37 +17,46 @@ const MAX_CONCURRENT_UPLOADS: usize = 1000; pub type CompressionAlgorithms = HashSet; -// NOTE: the `TryFrom` impl must be updated whenever a new variant is added. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] -pub enum CompressionAlgorithm { - Zstd = 0, -} +macro_rules! enum_id { + ($vis:vis enum $name:ident { $($variant:ident = $discriminant:expr,)* }) => { + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] + $vis enum $name { + $($variant = $discriminant,)* + } -impl fmt::Display for CompressionAlgorithm { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - CompressionAlgorithm::Zstd => write!(f, "zstd"), + impl fmt::Display for CompressionAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + $(Self::$variant => write!(f, stringify!($variant)),)* + } + } } - } -} -impl std::str::FromStr for CompressionAlgorithm { - type Err = (); - fn from_str(s: &str) -> Result { - match s { - "zstd" => Ok(CompressionAlgorithm::Zstd), - _ => Err(()), + impl std::str::FromStr for CompressionAlgorithm { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + $(stringify!($variant) => Ok(Self::$variant),)* + _ => Err(()), + } + } + } + + impl std::convert::TryFrom for CompressionAlgorithm { + type Error = i32; + fn try_from(i: i32) -> Result { + match i { + $($discriminant => Ok(Self::$variant),)* + _ => Err(i), + } + } } } } -impl std::convert::TryFrom for CompressionAlgorithm { - type Error = i32; - fn try_from(i: i32) -> Result { - match i { - 0 => Ok(Self::Zstd), - _ => Err(i), - } +enum_id! { + pub enum CompressionAlgorithm { + Zstd = 0, } } From 33eee9b4bd1b21d70eb838ac31248519ecf79f41 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Thu, 11 Jun 2020 17:03:11 -0400 Subject: [PATCH 15/15] Don't give an error when rebuilding documentation Previously, postgres would give a duplicate key error when a crate was built more than once: ``` thread 'main' panicked at 'Building documentation failed: Error(Db(DbError { severity: "ERROR", parsed_severity: Some(Error), code: SqlState("23505"), message: "duplicate key value violates unique constraint \"compression_rels_release_algorithm_key\"", detail: Some("Key (release, algorithm)=(1, 0) already exists."), hint: None, position: None, where_: None, schema: Some("public"), table: Some("compression_rels"), column: None, datatype: None, constraint: Some("compression_rels_release_algorithm_key"), file: Some("nbtinsert.c"), line: Some(434), routine: Some("_bt_check_unique") }))', src/bin/cratesfyi.rs:321:21 ``` Now, duplicate keys are discarded. --- src/db/add_package.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/db/add_package.rs b/src/db/add_package.rs index eae5ef26c..82b861eae 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -361,7 +361,10 @@ fn add_compression_into_database(conn: &Connection, algorithms: I, release_id where I: Iterator, { - let sql = "INSERT INTO compression_rels (release, algorithm) VALUES ($1, $2);"; + let sql = " + INSERT INTO compression_rels (release, algorithm) + VALUES ($1, $2) + ON CONFLICT DO NOTHING;"; let prepared = conn.prepare_cached(sql)?; for alg in algorithms { prepared.execute(&[&release_id, &(alg as i32)])?;