Skip to content

Recompress xz -> xz with tuned compression options #58

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions local/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ export PROMOTE_RELEASE_UPLOAD_ADDR="http://localhost:9000/static"
export PROMOTE_RELEASE_UPLOAD_BUCKET="static"
export PROMOTE_RELEASE_UPLOAD_STORAGE_CLASS="STANDARD"
export PROMOTE_RELEASE_UPLOAD_DIR="dist"
# Enable xz recompression to check it in CI
export PROMOTE_RELEASE_RECOMPRESS_XZ=1
# Environment variables used only by local releases
export PROMOTE_RELEASE_BYPASS_STARTUP_CHECKS="1"
export PROMOTE_RELEASE_GZIP_COMPRESSION_LEVEL="1" # Faster recompressions
Expand Down
11 changes: 8 additions & 3 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,12 @@ pub(crate) struct Config {
/// * Preventing multiple releases on stable and beta of the same version number.
pub(crate) bypass_startup_checks: bool,

/// Whether to force the recompression of .gz files into .xz.
pub(crate) wip_recompress: bool,
/// Whether to force the recompression from input tarballs into .gz compressed tarballs.
///
/// This is on by default if .gz tarballs aren't available in the input.
pub(crate) recompress_gz: bool,
/// Whether to force the recompression from input tarballs into highly compressed .xz tarballs.
pub(crate) recompress_xz: bool,

/// The compression level to use when recompressing tarballs with gzip.
pub(crate) gzip_compression_level: u32,
Expand Down Expand Up @@ -212,7 +216,8 @@ impl Config {
upload_bucket: require_env("UPLOAD_BUCKET")?,
storage_class: default_env("UPLOAD_STORAGE_CLASS", "INTELLIGENT_TIERING".into())?,
upload_dir: require_env("UPLOAD_DIR")?,
wip_recompress: bool_env("WIP_RECOMPRESS")?,
recompress_xz: bool_env("RECOMPRESS_XZ")?,
recompress_gz: bool_env("RECOMPRESS_GZ")?,
rustc_tag_repository: maybe_env("RUSTC_TAG_REPOSITORY")?,
blog_repository: maybe_env("BLOG_REPOSITORY")?,
blog_pr: maybe_env("BLOG_MERGE_PR")?,
Expand Down
85 changes: 71 additions & 14 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,14 +380,11 @@ impl Context {
Some("asc") | Some("sha256") => {
fs::remove_file(&path)?;
}
// Generate *.gz from *.xz...
// Store off the input files for potential recompression.
Some("xz") => {
let gz_path = path.with_extension("gz");
if self.config.wip_recompress || !gz_path.is_file() {
to_recompress.push((path.to_path_buf(), gz_path));
}
to_recompress.push(path.to_path_buf());
}
Some("gz") if self.config.wip_recompress => {
Some("gz") if self.config.recompress_gz => {
fs::remove_file(&path)?;
}
_ => {}
Expand All @@ -404,19 +401,79 @@ impl Context {
to_recompress.len(),
to_recompress.len().min(rayon::current_num_threads()),
);
println!(
"gz recompression enabled: {} (note: may occur anyway for missing gz artifacts)",
self.config.recompress_gz
);
println!("xz recompression enabled: {}", self.config.recompress_xz);
let recompress_start = Instant::now();

let recompress_gz = self.config.recompress_gz;
let recompress_xz = self.config.recompress_xz;
let compression_level = flate2::Compression::new(self.config.gzip_compression_level);
to_recompress
.par_iter()
.map(|(xz_path, gz_path)| {
println!("recompressing {}...", gz_path.display());

let xz = File::open(xz_path)?;
let mut xz = XzDecoder::new(xz);
let gz = File::create(gz_path)?;
let mut gz = flate2::write::GzEncoder::new(gz, compression_level);
io::copy(&mut xz, &mut gz)?;
.map(|xz_path| {
println!("recompressing {}...", xz_path.display());
let gz_path = xz_path.with_extension("gz");

// Produce gzip if explicitly enabled or the destination file doesn't exist.
if recompress_gz || !gz_path.is_file() {
let mut xz_orig = XzDecoder::new(File::open(xz_path)?);
let gz = File::create(gz_path)?;
let mut gz = flate2::write::GzEncoder::new(gz, compression_level);
io::copy(&mut xz_orig, &mut gz)?;
}

// xz recompression with more aggressive settings than we want to take the time
// for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs.
//
// Note that this is using a single-threaded compressor as we're parallelizing
// via rayon already. In rust-lang/rust we were trying to use parallel
// compression, but the default block size for that is 3*dict_size so we
// weren't actually using more than one core in most of the builders with
// <192MB uncompressed tarballs. In promote-release since we're recompressing
// 100s of tarballs there's no need for each individual compression to be
// parallel.
if recompress_xz {
let mut filters = xz2::stream::Filters::new();
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
// This sets the overall dictionary size, which is also how much memory (baseline)
// is needed for decompression.
lzma_ops.dict_size(64 * 1024 * 1024);
// Use the best match finder for compression ratio.
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
lzma_ops.mode(xz2::stream::Mode::Normal);
// Set nice len to the maximum for best compression ratio
lzma_ops.nice_len(273);
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
// good results.
lzma_ops.depth(1000);
// 2 is the default and does well for most files
lzma_ops.position_bits(2);
// 0 is the default and does well for most files
lzma_ops.literal_position_bits(0);
// 3 is the default and does well for most files
lzma_ops.literal_context_bits(3);

filters.lzma2(&lzma_ops);

// FIXME: Do we want a checksum as part of compression?
let stream = xz2::stream::Stream::new_stream_encoder(
&filters,
xz2::stream::Check::None,
)
.unwrap();
let xz_recompressed = xz_path.with_extension("xz_recompressed");
let xz_out = File::create(&xz_recompressed)?;
let mut xz_out = xz2::write::XzEncoder::new_stream(
std::io::BufWriter::new(xz_out),
stream,
);
let mut xz_orig = XzDecoder::new(File::open(xz_path)?);
io::copy(&mut xz_orig, &mut xz_out)?;
fs::rename(&xz_recompressed, xz_path)?;
}

Ok::<(), Error>(())
})
Expand Down