Skip to content

Commit 7797c6e

Browse files
Recompress xz -> xz with tuned compression options
This adds support for recompressing the .xz tarballs in promote-release, with a goal of moving rust-lang/rust CI back to producing "balanced" profile binaries rather than the highly compressed ones currently produced. This will cut rust-lang/rust CI times while keeping the production artifacts in static.r-l.o equivalently compressed. This PR doesn't enable the .xz recompression by default, that will be done in a follow up simpleinfra push. Depending on the impact to (especially) stable build times we may wish to bump the promote-release production container from 8 cores to 72 cores (the next largest container) so that we still finish in a reasonable amount of time.
1 parent 707e49f commit 7797c6e

File tree

3 files changed

+78
-17
lines changed

3 files changed

+78
-17
lines changed

local/run.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ export PROMOTE_RELEASE_UPLOAD_ADDR="http://localhost:9000/static"
118118
export PROMOTE_RELEASE_UPLOAD_BUCKET="static"
119119
export PROMOTE_RELEASE_UPLOAD_STORAGE_CLASS="STANDARD"
120120
export PROMOTE_RELEASE_UPLOAD_DIR="dist"
121+
# Enable xz recompression to check it in CI
122+
export PROMOTE_RELEASE_XZ_RECOMPRESS=1
121123
# Environment variables used only by local releases
122124
export PROMOTE_RELEASE_BYPASS_STARTUP_CHECKS="1"
123125
export PROMOTE_RELEASE_GZIP_COMPRESSION_LEVEL="1" # Faster recompressions

src/config.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,12 @@ pub(crate) struct Config {
125125
/// * Preventing multiple releases on stable and beta of the same version number.
126126
pub(crate) bypass_startup_checks: bool,
127127

128-
/// Whether to force the recompression of .gz files into .xz.
129-
pub(crate) wip_recompress: bool,
128+
/// Whether to force the recompression from input tarballs into .gz compressed tarballs.
129+
///
130+
/// This is on by default if .gz tarballs aren't available in the input.
131+
pub(crate) recompress_gz: bool,
132+
/// Whether to force the recompression from input tarballs into highly compressed .xz tarballs.
133+
pub(crate) recompress_xz: bool,
130134

131135
/// The compression level to use when recompressing tarballs with gzip.
132136
pub(crate) gzip_compression_level: u32,
@@ -212,7 +216,8 @@ impl Config {
212216
upload_bucket: require_env("UPLOAD_BUCKET")?,
213217
storage_class: default_env("UPLOAD_STORAGE_CLASS", "INTELLIGENT_TIERING".into())?,
214218
upload_dir: require_env("UPLOAD_DIR")?,
215-
wip_recompress: bool_env("WIP_RECOMPRESS")?,
219+
recompress_xz: bool_env("RECOMPRESS_XZ")?,
220+
recompress_gz: bool_env("RECOMPRESS_GZ")?,
216221
rustc_tag_repository: maybe_env("RUSTC_TAG_REPOSITORY")?,
217222
blog_repository: maybe_env("BLOG_REPOSITORY")?,
218223
blog_pr: maybe_env("BLOG_MERGE_PR")?,

src/main.rs

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -380,14 +380,11 @@ impl Context {
380380
Some("asc") | Some("sha256") => {
381381
fs::remove_file(&path)?;
382382
}
383-
// Generate *.gz from *.xz...
383+
// Store off the input files for potential recompression.
384384
Some("xz") => {
385-
let gz_path = path.with_extension("gz");
386-
if self.config.wip_recompress || !gz_path.is_file() {
387-
to_recompress.push((path.to_path_buf(), gz_path));
388-
}
385+
to_recompress.push(path.to_path_buf());
389386
}
390-
Some("gz") if self.config.wip_recompress => {
387+
Some("gz") if self.config.recompress_gz => {
391388
fs::remove_file(&path)?;
392389
}
393390
_ => {}
@@ -404,19 +401,76 @@ impl Context {
404401
to_recompress.len(),
405402
to_recompress.len().min(rayon::current_num_threads()),
406403
);
404+
println!("gzip recompression enabled: {}", self.config.recompress_gz);
405+
println!("xz recompression enabled: {}", self.config.recompress_xz);
407406
let recompress_start = Instant::now();
408407

408+
let recompress_gz = self.config.recompress_gz;
409+
let recompress_xz = self.config.recompress_xz;
409410
let compression_level = flate2::Compression::new(self.config.gzip_compression_level);
410411
to_recompress
411412
.par_iter()
412-
.map(|(xz_path, gz_path)| {
413-
println!("recompressing {}...", gz_path.display());
414-
415-
let xz = File::open(xz_path)?;
416-
let mut xz = XzDecoder::new(xz);
417-
let gz = File::create(gz_path)?;
418-
let mut gz = flate2::write::GzEncoder::new(gz, compression_level);
419-
io::copy(&mut xz, &mut gz)?;
413+
.map(|xz_path| {
414+
println!("recompressing {}...", xz_path.display());
415+
let gz_path = xz_path.with_extension("gz");
416+
417+
// Produce gzip if explicitly enabled or the destination file doesn't exist.
418+
if recompress_gz || !gz_path.is_file() {
419+
let mut xz_orig = XzDecoder::new(File::open(xz_path)?);
420+
let gz = File::create(gz_path)?;
421+
let mut gz = flate2::write::GzEncoder::new(gz, compression_level);
422+
io::copy(&mut xz_orig, &mut gz)?;
423+
}
424+
425+
// xz recompression with more aggressive settings than we want to take the time
426+
// for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs.
427+
//
428+
// Note that this is using a single-threaded compressor as we're parallelizing
429+
// via rayon already. In rust-lang/rust we were trying to use parallel
430+
// compression, but the default block size for that is 3*dict_size so we
431+
// weren't actually using more than one core in most of the builders with
432+
// <192MB uncompressed tarballs. In promote-release since we're recompressing
433+
// 100s of tarballs there's no need for each individual compression to be
434+
// parallel.
435+
if recompress_xz {
436+
let mut filters = xz2::stream::Filters::new();
437+
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
438+
// This sets the overall dictionary size, which is also how much memory (baseline)
439+
// is needed for decompression.
440+
lzma_ops.dict_size(64 * 1024 * 1024);
441+
// Use the best match finder for compression ratio.
442+
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
443+
lzma_ops.mode(xz2::stream::Mode::Normal);
444+
// Set nice len to the maximum for best compression ratio
445+
lzma_ops.nice_len(273);
446+
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
447+
// good results.
448+
lzma_ops.depth(1000);
449+
// 2 is the default and does well for most files
450+
lzma_ops.position_bits(2);
451+
// 0 is the default and does well for most files
452+
lzma_ops.literal_position_bits(0);
453+
// 3 is the default and does well for most files
454+
lzma_ops.literal_context_bits(3);
455+
456+
filters.lzma2(&lzma_ops);
457+
458+
// FIXME: Do we want a checksum as part of compression?
459+
let stream = xz2::stream::Stream::new_stream_encoder(
460+
&filters,
461+
xz2::stream::Check::None,
462+
)
463+
.unwrap();
464+
let xz_recompressed = xz_path.with_extension("xz_recompressed");
465+
let xz_out = File::create(&xz_recompressed)?;
466+
let mut xz_out = xz2::write::XzEncoder::new_stream(
467+
std::io::BufWriter::new(xz_out),
468+
stream,
469+
);
470+
let mut xz_orig = XzDecoder::new(File::open(xz_path)?);
471+
io::copy(&mut xz_orig, &mut xz_out)?;
472+
fs::rename(&xz_recompressed, &xz_path)?;
473+
}
420474

421475
Ok::<(), Error>(())
422476
})

0 commit comments

Comments
 (0)