Skip to content

Commit 55983f3

Browse files
Recompress xz -> xz with tuned compression options
This adds support for recompressing the .xz tarballs in promote-release, with a goal of moving rust-lang/rust CI back to producing "balanced" profile binaries rather than the highly compressed ones currently produced. This will cut rust-lang/rust CI times while keeping the production artifacts in static.r-l.o equivalently compressed. This PR doesn't enable the .xz recompression by default, that will be done in a follow up simpleinfra push. Depending on the impact to (especially) stable build times we may wish to bump the promote-release production container from 8 cores to 72 cores (the next largest container) so that we still finish in a reasonable amount of time.
1 parent 707e49f commit 55983f3

File tree

3 files changed

+81
-17
lines changed

3 files changed

+81
-17
lines changed

local/run.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ export PROMOTE_RELEASE_UPLOAD_ADDR="http://localhost:9000/static"
118118
export PROMOTE_RELEASE_UPLOAD_BUCKET="static"
119119
export PROMOTE_RELEASE_UPLOAD_STORAGE_CLASS="STANDARD"
120120
export PROMOTE_RELEASE_UPLOAD_DIR="dist"
121+
# Enable xz recompression to check it in CI
122+
export PROMOTE_RELEASE_RECOMPRESS_XZ=1
121123
# Environment variables used only by local releases
122124
export PROMOTE_RELEASE_BYPASS_STARTUP_CHECKS="1"
123125
export PROMOTE_RELEASE_GZIP_COMPRESSION_LEVEL="1" # Faster recompressions

src/config.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,12 @@ pub(crate) struct Config {
125125
/// * Preventing multiple releases on stable and beta of the same version number.
126126
pub(crate) bypass_startup_checks: bool,
127127

128-
/// Whether to force the recompression of .gz files into .xz.
129-
pub(crate) wip_recompress: bool,
128+
/// Whether to force the recompression from input tarballs into .gz compressed tarballs.
129+
///
130+
/// This is on by default if .gz tarballs aren't available in the input.
131+
pub(crate) recompress_gz: bool,
132+
/// Whether to force the recompression from input tarballs into highly compressed .xz tarballs.
133+
pub(crate) recompress_xz: bool,
130134

131135
/// The compression level to use when recompressing tarballs with gzip.
132136
pub(crate) gzip_compression_level: u32,
@@ -212,7 +216,8 @@ impl Config {
212216
upload_bucket: require_env("UPLOAD_BUCKET")?,
213217
storage_class: default_env("UPLOAD_STORAGE_CLASS", "INTELLIGENT_TIERING".into())?,
214218
upload_dir: require_env("UPLOAD_DIR")?,
215-
wip_recompress: bool_env("WIP_RECOMPRESS")?,
219+
recompress_xz: bool_env("RECOMPRESS_XZ")?,
220+
recompress_gz: bool_env("RECOMPRESS_GZ")?,
216221
rustc_tag_repository: maybe_env("RUSTC_TAG_REPOSITORY")?,
217222
blog_repository: maybe_env("BLOG_REPOSITORY")?,
218223
blog_pr: maybe_env("BLOG_MERGE_PR")?,

src/main.rs

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -380,14 +380,11 @@ impl Context {
380380
Some("asc") | Some("sha256") => {
381381
fs::remove_file(&path)?;
382382
}
383-
// Generate *.gz from *.xz...
383+
// Store off the input files for potential recompression.
384384
Some("xz") => {
385-
let gz_path = path.with_extension("gz");
386-
if self.config.wip_recompress || !gz_path.is_file() {
387-
to_recompress.push((path.to_path_buf(), gz_path));
388-
}
385+
to_recompress.push(path.to_path_buf());
389386
}
390-
Some("gz") if self.config.wip_recompress => {
387+
Some("gz") if self.config.recompress_gz => {
391388
fs::remove_file(&path)?;
392389
}
393390
_ => {}
@@ -404,19 +401,79 @@ impl Context {
404401
to_recompress.len(),
405402
to_recompress.len().min(rayon::current_num_threads()),
406403
);
404+
println!(
405+
"gz recompression enabled: {} (note: may occur anyway for missing gz artifacts)",
406+
self.config.recompress_gz
407+
);
408+
println!("xz recompression enabled: {}", self.config.recompress_xz);
407409
let recompress_start = Instant::now();
408410

411+
let recompress_gz = self.config.recompress_gz;
412+
let recompress_xz = self.config.recompress_xz;
409413
let compression_level = flate2::Compression::new(self.config.gzip_compression_level);
410414
to_recompress
411415
.par_iter()
412-
.map(|(xz_path, gz_path)| {
413-
println!("recompressing {}...", gz_path.display());
414-
415-
let xz = File::open(xz_path)?;
416-
let mut xz = XzDecoder::new(xz);
417-
let gz = File::create(gz_path)?;
418-
let mut gz = flate2::write::GzEncoder::new(gz, compression_level);
419-
io::copy(&mut xz, &mut gz)?;
416+
.map(|xz_path| {
417+
println!("recompressing {}...", xz_path.display());
418+
let gz_path = xz_path.with_extension("gz");
419+
420+
// Produce gzip if explicitly enabled or the destination file doesn't exist.
421+
if recompress_gz || !gz_path.is_file() {
422+
let mut xz_orig = XzDecoder::new(File::open(xz_path)?);
423+
let gz = File::create(gz_path)?;
424+
let mut gz = flate2::write::GzEncoder::new(gz, compression_level);
425+
io::copy(&mut xz_orig, &mut gz)?;
426+
}
427+
428+
// xz recompression with more aggressive settings than we want to take the time
429+
// for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs.
430+
//
431+
// Note that this is using a single-threaded compressor as we're parallelizing
432+
// via rayon already. In rust-lang/rust we were trying to use parallel
433+
// compression, but the default block size for that is 3*dict_size so we
434+
// weren't actually using more than one core in most of the builders with
435+
// <192MB uncompressed tarballs. In promote-release since we're recompressing
436+
// 100s of tarballs there's no need for each individual compression to be
437+
// parallel.
438+
if recompress_xz {
439+
let mut filters = xz2::stream::Filters::new();
440+
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
441+
// This sets the overall dictionary size, which is also how much memory (baseline)
442+
// is needed for decompression.
443+
lzma_ops.dict_size(64 * 1024 * 1024);
444+
// Use the best match finder for compression ratio.
445+
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
446+
lzma_ops.mode(xz2::stream::Mode::Normal);
447+
// Set nice len to the maximum for best compression ratio
448+
lzma_ops.nice_len(273);
449+
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
450+
// good results.
451+
lzma_ops.depth(1000);
452+
// 2 is the default and does well for most files
453+
lzma_ops.position_bits(2);
454+
// 0 is the default and does well for most files
455+
lzma_ops.literal_position_bits(0);
456+
// 3 is the default and does well for most files
457+
lzma_ops.literal_context_bits(3);
458+
459+
filters.lzma2(&lzma_ops);
460+
461+
// FIXME: Do we want a checksum as part of compression?
462+
let stream = xz2::stream::Stream::new_stream_encoder(
463+
&filters,
464+
xz2::stream::Check::None,
465+
)
466+
.unwrap();
467+
let xz_recompressed = xz_path.with_extension("xz_recompressed");
468+
let xz_out = File::create(&xz_recompressed)?;
469+
let mut xz_out = xz2::write::XzEncoder::new_stream(
470+
std::io::BufWriter::new(xz_out),
471+
stream,
472+
);
473+
let mut xz_orig = XzDecoder::new(File::open(xz_path)?);
474+
io::copy(&mut xz_orig, &mut xz_out)?;
475+
fs::rename(&xz_recompressed, xz_path)?;
476+
}
420477

421478
Ok::<(), Error>(())
422479
})

0 commit comments

Comments
 (0)