Skip to content

Commit f56c4cb

Browse files
Recompress xz -> xz with tuned compression options
This adds support for recompressing the .xz tarballs in promote-release, with a goal of moving rust-lang/rust CI back to producing "balanced" profile binaries rather than the highly compressed ones currently produced. This will cut rust-lang/rust CI times while keeping the production artifacts in static.r-l.o equivalently compressed. This PR doesn't enable the .xz recompression by default, that will be done in a follow up simpleinfra push. Depending on the impact to (especially) stable build times we may wish to bump the promote-release production container from 8 cores to 72 cores (the next largest container) so that we still finish in a reasonable amount of time.
1 parent 707e49f commit f56c4cb

File tree

3 files changed

+76
-17
lines changed

3 files changed

+76
-17
lines changed

local/run.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ export PROMOTE_RELEASE_UPLOAD_ADDR="http://localhost:9000/static"
118118
export PROMOTE_RELEASE_UPLOAD_BUCKET="static"
119119
export PROMOTE_RELEASE_UPLOAD_STORAGE_CLASS="STANDARD"
120120
export PROMOTE_RELEASE_UPLOAD_DIR="dist"
121+
# Enable xz recompression to check it in CI
122+
export PROMOTE_RELEASE_XZ_RECOMPRESS=1
121123
# Environment variables used only by local releases
122124
export PROMOTE_RELEASE_BYPASS_STARTUP_CHECKS="1"
123125
export PROMOTE_RELEASE_GZIP_COMPRESSION_LEVEL="1" # Faster recompressions

src/config.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,12 @@ pub(crate) struct Config {
125125
/// * Preventing multiple releases on stable and beta of the same version number.
126126
pub(crate) bypass_startup_checks: bool,
127127

128-
/// Whether to force the recompression of .gz files into .xz.
129-
pub(crate) wip_recompress: bool,
128+
/// Whether to force the recompression from input tarballs into .gz compressed tarballs.
129+
///
130+
/// This is on by default if .gz tarballs aren't available in the input.
131+
pub(crate) recompress_gz: bool,
132+
/// Whether to force the recompression from input tarballs into highly compressed .xz tarballs.
133+
pub(crate) recompress_xz: bool,
130134

131135
/// The compression level to use when recompressing tarballs with gzip.
132136
pub(crate) gzip_compression_level: u32,
@@ -212,7 +216,8 @@ impl Config {
212216
upload_bucket: require_env("UPLOAD_BUCKET")?,
213217
storage_class: default_env("UPLOAD_STORAGE_CLASS", "INTELLIGENT_TIERING".into())?,
214218
upload_dir: require_env("UPLOAD_DIR")?,
215-
wip_recompress: bool_env("WIP_RECOMPRESS")?,
219+
recompress_xz: bool_env("RECOMPRESS_XZ")?,
220+
recompress_gz: bool_env("RECOMPRESS_GZ")?,
216221
rustc_tag_repository: maybe_env("RUSTC_TAG_REPOSITORY")?,
217222
blog_repository: maybe_env("BLOG_REPOSITORY")?,
218223
blog_pr: maybe_env("BLOG_MERGE_PR")?,

src/main.rs

Lines changed: 66 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -380,14 +380,11 @@ impl Context {
380380
Some("asc") | Some("sha256") => {
381381
fs::remove_file(&path)?;
382382
}
383-
// Generate *.gz from *.xz...
383+
// Store off the input files for potential recompression.
384384
Some("xz") => {
385-
let gz_path = path.with_extension("gz");
386-
if self.config.wip_recompress || !gz_path.is_file() {
387-
to_recompress.push((path.to_path_buf(), gz_path));
388-
}
385+
to_recompress.push(path.to_path_buf());
389386
}
390-
Some("gz") if self.config.wip_recompress => {
387+
Some("gz") if self.config.produce_gzip => {
391388
fs::remove_file(&path)?;
392389
}
393390
_ => {}
@@ -404,19 +401,74 @@ impl Context {
404401
to_recompress.len(),
405402
to_recompress.len().min(rayon::current_num_threads()),
406403
);
404+
println!("gzip recompression enabled: {}", self.config.recompress_gz);
405+
println!("xz recompression enabled: {}", self.config.recompress_xz);
407406
let recompress_start = Instant::now();
408407

409408
let compression_level = flate2::Compression::new(self.config.gzip_compression_level);
410409
to_recompress
411410
.par_iter()
412-
.map(|(xz_path, gz_path)| {
413-
println!("recompressing {}...", gz_path.display());
414-
415-
let xz = File::open(xz_path)?;
416-
let mut xz = XzDecoder::new(xz);
417-
let gz = File::create(gz_path)?;
418-
let mut gz = flate2::write::GzEncoder::new(gz, compression_level);
419-
io::copy(&mut xz, &mut gz)?;
411+
.map(|xz_path| {
412+
println!("recompressing {}...", xz_path.display());
413+
let gz_path = xz_path.with_extension("gz");
414+
415+
// Produce gzip if explicitly enabled or the destination file doesn't exist.
416+
if self.config.recompress_gz || !gz_path.is_file() {
417+
let mut xz_orig = XzDecoder::new(File::open(xz_path)?);
418+
let gz = File::create(gz_path)?;
419+
let mut gz = flate2::write::GzEncoder::new(gz, compression_level);
420+
io::copy(&mut xz_orig, &mut gz)?;
421+
}
422+
423+
// xz recompression with more aggressive settings than we want to take the time
424+
// for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs.
425+
//
426+
// Note that this is using a single-threaded compressor as we're parallelizing
427+
// via rayon already. In rust-lang/rust we were trying to use parallel
428+
// compression, but the default block size for that is 3*dict_size so we
429+
// weren't actually using more than one core in most of the builders with
430+
// <192MB uncompressed tarballs. In promote-release since we're recompressing
431+
// 100s of tarballs there's no need for each individual compression to be
432+
// parallel.
433+
if self.config.recompress_xz {
434+
let mut filters = xz2::stream::Filters::new();
435+
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
436+
// This sets the overall dictionary size, which is also how much memory (baseline)
437+
// is needed for decompression.
438+
lzma_ops.dict_size(64 * 1024 * 1024);
439+
// Use the best match finder for compression ratio.
440+
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
441+
lzma_ops.mode(xz2::stream::Mode::Normal);
442+
// Set nice len to the maximum for best compression ratio
443+
lzma_ops.nice_len(273);
444+
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
445+
// good results.
446+
lzma_ops.depth(1000);
447+
// 2 is the default and does well for most files
448+
lzma_ops.position_bits(2);
449+
// 0 is the default and does well for most files
450+
lzma_ops.literal_position_bits(0);
451+
// 3 is the default and does well for most files
452+
lzma_ops.literal_context_bits(3);
453+
454+
filters.lzma2(&lzma_ops);
455+
456+
// FIXME: Do we want a checksum as part of compression?
457+
let stream = xz2::stream::Stream::new_stream_encoder(
458+
&filters,
459+
xz2::stream::Check::None,
460+
)
461+
.unwrap();
462+
let xz_recompressed = xz_path.with_extension("xz_recompressed");
463+
let xz_out = File::create(&xz_recompressed)?;
464+
let mut xz_out = xz2::write::XzEncoder::new_stream(
465+
std::io::BufWriter::new(xz_out),
466+
stream,
467+
);
468+
let mut xz_orig = XzDecoder::new(File::open(xz_path)?);
469+
io::copy(&mut xz_orig, &mut xz_out)?;
470+
fs::rename(&xz_recompressed, &xz_path)?;
471+
}
420472

421473
Ok::<(), Error>(())
422474
})

0 commit comments

Comments
 (0)