From d68bd6f88152205bb9b6395ab1b7e82786804bfd Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 22 Jan 2025 02:55:54 -0500 Subject: [PATCH 1/8] Draft experiment to investigate `rustc` SIGSEGV This is to investigate the problem on the `test-fast` job with the new ARM64 runner described in #1790. This experiment does not produce useful results yet, because it has no way to distinguish happenstance from correlation. To do that, I need either to rerun each job repeatedly, or further parameterize the matrix to do that. I'll be doing the latter, but right now this dimension has size 1 (i.e., the only value of `number` is `0`) so I don't start a large number of jobs when something is broken due to a mistake in the workflows. --- .github/workflows/arm-segv-experiment.yml | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/arm-segv-experiment.yml diff --git a/.github/workflows/arm-segv-experiment.yml b/.github/workflows/arm-segv-experiment.yml new file mode 100644 index 00000000000..bc7e4d64b1d --- /dev/null +++ b/.github/workflows/arm-segv-experiment.yml @@ -0,0 +1,37 @@ +name: AArch64 SIGSEGV experiment + +on: push + +jobs: + test-fast: + strategy: + matrix: + os-ver: [ '22.04', '24.04' ] + channel: [ stable, beta, nightly ] + increase-stack: [ false, true ] + number: [ 0 ] # FIXME: Increase, so results are meaningful. + + fail-fast: false + + runs-on: ubuntu-${{ matrix.os-ver }}-arm + + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + toolchain: ${{ matrix.channel }} + # - uses: Swatinem/rust-cache@v2 + - uses: taiki-e/install-action@v2 + with: + tool: nextest + - name: Set RUST_MIN_STACK + if: matrix.increase-stack + run: echo RUST_MIN_STACK=16777216 >>"$GITHUB_ENV" + - name: Test (nextest) + env: + GIX_TEST_CREATE_ARCHIVES_EVEN_ON_CI: '1' + run: cargo nextest run --workspace --no-fail-fast + - name: Doctest + run: cargo test --workspace --doc --no-fail-fast + - name: Check that tracked archives are up to date + run: git diff --exit-code # If this fails, the fix is usually to commit a regenerated archive. From d0eb4c6aae89f88a63b9e09b7caf6ad7418b9d54 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 22 Jan 2025 03:11:00 -0500 Subject: [PATCH 2/8] Do the real `rustc` SIGSEGV test This makes two changes, with the intent of producing a usable test: - Removes `nightly`, since a test is currently failing on it. It can be tested later in case it fixes the SIGSEGV bug, if other changes don't help. - Have `number` take on 16 values instead of just one. This is to make it possible to figure something out about how often the failure happens with the other variables and whether the other variables make a difference. This is needed because the failures are nondeterministic, may not even usually happen, or may happen less often but still happen for some combination of the other variables. (See #1790 for context.) --- .github/workflows/arm-segv-experiment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/arm-segv-experiment.yml b/.github/workflows/arm-segv-experiment.yml index bc7e4d64b1d..338e7d44a2f 100644 --- a/.github/workflows/arm-segv-experiment.yml +++ b/.github/workflows/arm-segv-experiment.yml @@ -7,9 +7,9 @@ jobs: strategy: matrix: os-ver: [ '22.04', '24.04' ] - channel: [ stable, beta, nightly ] + channel: [ stable, beta ] # `gix-macros::macros momo::ux` currently fails on `nightly`. increase-stack: [ false, true ] - number: [ 0 ] # FIXME: Increase, so results are meaningful. + number: [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' ] fail-fast: false From 836ab21e06ba47135a19e64b5fb12e5b05efb800 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 22 Jan 2025 04:57:30 -0500 Subject: [PATCH 3/8] Test more reps; stop testing RUST_MIN_STACK The previous experiment[1][2] didn't have enough of memory-related errors to clearly show which values of the variables have an effect, though it *looked* like the memory-related errors in `rustc` only happened in Ubuntu 24.04 (not 22.04) and only happened on the stable channel (not beta). That's one reason to increase the total number of jobs in the experiment. Another reason is that the memory-related errors are more varied. Not all were true memory errors involving SIGSEGV and SIGBUS anymore. Some were, same as reported in [3]. But some others were panics, looking like this (the index and slice vary but, in each, the start index is much larger than the length): thread 'rustc' panicked at /rustc/9fc6b43126469e3858e2fe86cafb4f0fd5068869/compiler/rustc_serialize/src/opaque.rs:269:45: range start index 159846347648097871 out of range for slice of length 39963722 Since the distribution of errors across jobs might also have related to the order and times in which jobs started, for example if there are inadvertent differences between different hosts (the ARM64 Linux runners are in preview, so this seems plausible, though fairly unlikely), this now expresses the repetition with two variables: a high-order one, listed first in the matrix, and a low-order one, listed last in the matrix. Besides to allow more reps with the same values of the meaningful variables, the reason to stop testing with `RUST_MIN_STACK` is that it didn't seem to make a difference other than to change the message shown, which suggests setting it to an even higher value. [1]: e71b0cfc46f77e0888d03640e7ddfb2c5cd2c41d [2]: https://github.com/EliahKagan/gitoxide/actions/runs/12903958398 [3]: https://github.com/GitoxideLabs/gitoxide/issues/1790 --- .github/workflows/arm-segv-experiment.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/arm-segv-experiment.yml b/.github/workflows/arm-segv-experiment.yml index 338e7d44a2f..e1caecc6069 100644 --- a/.github/workflows/arm-segv-experiment.yml +++ b/.github/workflows/arm-segv-experiment.yml @@ -6,10 +6,10 @@ jobs: test-fast: strategy: matrix: + num-high: [ 0, 1, 2, 3, 4, 5, 6, 7 ] os-ver: [ '22.04', '24.04' ] channel: [ stable, beta ] # `gix-macros::macros momo::ux` currently fails on `nightly`. - increase-stack: [ false, true ] - number: [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' ] + num-low: [ 0, 1, 2, 3, 4, 5, 6, 7 ] fail-fast: false @@ -24,9 +24,9 @@ jobs: - uses: taiki-e/install-action@v2 with: tool: nextest - - name: Set RUST_MIN_STACK - if: matrix.increase-stack - run: echo RUST_MIN_STACK=16777216 >>"$GITHUB_ENV" + # - name: Set RUST_MIN_STACK + # if: matrix.increase-stack + # run: echo RUST_MIN_STACK=16777216 >>"$GITHUB_ENV" - name: Test (nextest) env: GIX_TEST_CREATE_ARCHIVES_EVEN_ON_CI: '1' From 8d4a22e6621c7ca7f75aafd426b3cb481739b6c6 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Thu, 23 Jan 2025 03:44:11 -0500 Subject: [PATCH 4/8] Redo experiment 1 with current beta and nightly When using `dtolnay/rust-toolchain` with the `toolchain` key to specify a channel, the action version should be given as `@master`. But I accidentally kept it at `@stable`! This caused `beta` and `nightly` to refer to the most recent beta and nightly builds *prior* to the current stable version. That made the conclucions about beta and nightly builds inaccurate. This rectifies that error and repeats the experiment. See e71b0cf (1f3f6b5), #1790, and rust-lang/rust#135867 for context. (I made this mistake in both experiment 1 and experiment 2, having wrongly thought I'd changed `@stable` to `@master` for experiment 1. This commit just repeats experiment 1, but experiment 2 should also be repeated for the same reason.) --- .github/workflows/arm-segv-experiment.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/arm-segv-experiment.yml b/.github/workflows/arm-segv-experiment.yml index e1caecc6069..74d70e40735 100644 --- a/.github/workflows/arm-segv-experiment.yml +++ b/.github/workflows/arm-segv-experiment.yml @@ -6,10 +6,10 @@ jobs: test-fast: strategy: matrix: - num-high: [ 0, 1, 2, 3, 4, 5, 6, 7 ] os-ver: [ '22.04', '24.04' ] channel: [ stable, beta ] # `gix-macros::macros momo::ux` currently fails on `nightly`. - num-low: [ 0, 1, 2, 3, 4, 5, 6, 7 ] + increase-stack: [ false, true ] + number: [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' ] fail-fast: false @@ -17,16 +17,16 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable + - uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.channel }} # - uses: Swatinem/rust-cache@v2 - uses: taiki-e/install-action@v2 with: tool: nextest - # - name: Set RUST_MIN_STACK - # if: matrix.increase-stack - # run: echo RUST_MIN_STACK=16777216 >>"$GITHUB_ENV" + - name: Set RUST_MIN_STACK + if: matrix.increase-stack + run: echo RUST_MIN_STACK=16777216 >>"$GITHUB_ENV" - name: Test (nextest) env: GIX_TEST_CREATE_ARCHIVES_EVEN_ON_CI: '1' From d2d386fec45727092de6e81e16b929c04bfa6845 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Thu, 23 Jan 2025 05:42:04 -0500 Subject: [PATCH 5/8] Redo experiment 2 with current beta As noted in the preceding commit, when I ran experiments 1 and 2 the first time, I accidentally used `dtolnay/rust-toolchain@stable` instead of `dtolnay/rust-toolchain@master`, even though the latter is needed to use current values of the `toolchain` key rather than the builds they referred to at the time the most recent stable build was updated. The preceding commit redid experiment 1 with that fixed. This commit redoes experiment 2 with te same fix. See 5a71963 (1b3e2cd), #1790, and rust-lang/rust#135867 for context. --- .github/workflows/arm-segv-experiment.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/arm-segv-experiment.yml b/.github/workflows/arm-segv-experiment.yml index 74d70e40735..30275df1233 100644 --- a/.github/workflows/arm-segv-experiment.yml +++ b/.github/workflows/arm-segv-experiment.yml @@ -6,10 +6,10 @@ jobs: test-fast: strategy: matrix: + num-high: [ 0, 1, 2, 3, 4, 5, 6, 7 ] os-ver: [ '22.04', '24.04' ] - channel: [ stable, beta ] # `gix-macros::macros momo::ux` currently fails on `nightly`. - increase-stack: [ false, true ] - number: [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' ] + channel: [ stable, beta ] + num-low: [ 0, 1, 2, 3, 4, 5, 6, 7 ] fail-fast: false @@ -24,9 +24,9 @@ jobs: - uses: taiki-e/install-action@v2 with: tool: nextest - - name: Set RUST_MIN_STACK - if: matrix.increase-stack - run: echo RUST_MIN_STACK=16777216 >>"$GITHUB_ENV" + # - name: Set RUST_MIN_STACK + # if: matrix.increase-stack + # run: echo RUST_MIN_STACK=16777216 >>"$GITHUB_ENV" - name: Test (nextest) env: GIX_TEST_CREATE_ARCHIVES_EVEN_ON_CI: '1' From 3beb112cf48809c74ee81646e0402edd008590a8 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Thu, 23 Jan 2025 06:56:28 -0500 Subject: [PATCH 6/8] Draft experiment 3 to try more ways to install In case the installation method makes a difference. Also, this brings back testing of the unstable toolchain. This has just one job for each meaningful combination, so mistakes in the experiment workflow can be found before doing nine times as much work. The experiment this prepares should hopefully shed more light on #1790 (or increase confidence in the observations so far), but this is just preparation: variation across runs will likely be due to the bug being nondeterministic. --- .github/workflows/arm-segv-experiment.yml | 29 ++++++++++++++++------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/.github/workflows/arm-segv-experiment.yml b/.github/workflows/arm-segv-experiment.yml index 30275df1233..25a0a2ce4f8 100644 --- a/.github/workflows/arm-segv-experiment.yml +++ b/.github/workflows/arm-segv-experiment.yml @@ -6,10 +6,12 @@ jobs: test-fast: strategy: matrix: - num-high: [ 0, 1, 2, 3, 4, 5, 6, 7 ] + num-high: [ 0 ] # [ 0, 1, 2 ] os-ver: [ '22.04', '24.04' ] - channel: [ stable, beta ] - num-low: [ 0, 1, 2, 3, 4, 5, 6, 7 ] + channel: [ stable, beta, nightly ] + get-rust-by: [ rt-action, curl-sh ] + get-nextest-by: [ i-action, cargo-qi ] + num-low: [ 0 ] # [ 0, 1, 2 ] fail-fast: false @@ -17,16 +19,27 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@master + - if: matrix.get-rust-by == 'rt-action' + uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.channel }} + - if: matrix.get-rust-by == 'curl-sh' + name: Install Rust via sh.rustup.rs + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | + sh -s -- -y --default-toolchain ${{ matrix.channel }} + echo "PATH=$HOME/.cargo/bin:$PATH" >> "$GITHUB_ENV" # - uses: Swatinem/rust-cache@v2 - - uses: taiki-e/install-action@v2 + - if: matrix.get-nextest-by == 'i-action' + uses: taiki-e/install-action@v2 with: tool: nextest - # - name: Set RUST_MIN_STACK - # if: matrix.increase-stack - # run: echo RUST_MIN_STACK=16777216 >>"$GITHUB_ENV" + - if: matrix.get-nextest-by == 'cargo-qi' + name: Install nextest with quickinstall/binstall + run: | + cargo install cargo-quickinstall + cargo quickinstall cargo-binstall + cargo quickinstall cargo-nextest - name: Test (nextest) env: GIX_TEST_CREATE_ARCHIVES_EVEN_ON_CI: '1' From a0e923b8d8419a084098e0020d148328f67a0e81 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Thu, 23 Jan 2025 08:26:09 -0500 Subject: [PATCH 7/8] Run experiment 3 This varies: - `ubuntu-22.04-arm` vs. `ubuntu-24.04.arm` GHA runner. - Installing Rust via the `rust-toolchain` action vs. with curl.sh. - Installing the stable vs. beta Rust toolchain. - Installing nextest via `install-action` quickinstall/binstall. *If* this also confirms that the only fully consistent factor in whether errors happen is `ubuntu-22.04-arm` vs. `ubuntu-24.04.arm`, then that will make it clearer that the problem is likely specific to the `ubuntu-24.04.arm` runner. See #1790 and rust-lang/rust#135867 for context. --- .github/workflows/arm-segv-experiment.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/arm-segv-experiment.yml b/.github/workflows/arm-segv-experiment.yml index 25a0a2ce4f8..ed13e13aa6b 100644 --- a/.github/workflows/arm-segv-experiment.yml +++ b/.github/workflows/arm-segv-experiment.yml @@ -6,12 +6,12 @@ jobs: test-fast: strategy: matrix: - num-high: [ 0 ] # [ 0, 1, 2 ] + num-high: [ 0, 1, 2, 3 ] os-ver: [ '22.04', '24.04' ] - channel: [ stable, beta, nightly ] + channel: [ stable, beta ] # `gix-macros::macros momo::ux` currently fails on `nightly`. get-rust-by: [ rt-action, curl-sh ] get-nextest-by: [ i-action, cargo-qi ] - num-low: [ 0 ] # [ 0, 1, 2 ] + num-low: [ 0, 1, 2, 3 ] fail-fast: false From 4062b567b92fb5533cf8b8781803e8abee8e7c77 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Thu, 23 Jan 2025 19:13:59 -0500 Subject: [PATCH 8/8] Use `ubuntu-22.04-arm`; clean up temporary changes In the AArch64/ARM64 (64-bit, non-containerized) test-fast job, this uses the `ubuntu-22.04-arm` runner instead of the `ubuntu-24.04-arm` runner. This is to avoid the errors described in #1790, i.e., to work around rust-lang/rust#135867. Such problems have not been observed on the 22.04 runner, including in tests intended to find them, and switching to it seems to be a complete workaround for the problem. In contrast, continuing to use the 24.04 runner, but attempting to work around the problem by switching from the stable to the beta channel, looks like it would greatly decrease the frequency of the errors but not eliminate them. A problem with `actions/checkout` failing is likewise observed on the 24.04 runner only, so using 22.04 avoids that too. Because that seems like a complete workaround, this also reverts 50da7cb (#1792). That is to say that the ARM64 test-fast job is again in the `test-fast` matrix. It is capable of cancelling or being cancelled by the other `test-fast` checks. Code duplication in the workflow is somewhat decreased. The job will again block PR auto-merge. Similar errors do not seem to have occurred in the `test-32bit` job that runs an arm32v7 Docker image in `ubuntu-24.04-arm`, and it is not clear that changing the runner image would help with #1780, nor even if that issue is still happening. Therefore, it is not changed there at this time. This affects only ARM Linux runners. The x86-64 runners continue to use `ubuntu-latest`, which is currently resolved to `ubuntu-24.04`, and that does not need to be changed. Likewise, the `macos-latest` runners use ARM processors (Apple Silicon) and they are fine. Various experiments were done in a separate workflow. This commit also removes that workflow, because it is not actively needed anymore, and because, if kept, it would have to be modified to avoid running hundreds of extra checks on each and every push. --- .github/workflows/arm-segv-experiment.yml | 50 ----------------------- .github/workflows/ci.yml | 21 +--------- 2 files changed, 1 insertion(+), 70 deletions(-) delete mode 100644 .github/workflows/arm-segv-experiment.yml diff --git a/.github/workflows/arm-segv-experiment.yml b/.github/workflows/arm-segv-experiment.yml deleted file mode 100644 index ed13e13aa6b..00000000000 --- a/.github/workflows/arm-segv-experiment.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: AArch64 SIGSEGV experiment - -on: push - -jobs: - test-fast: - strategy: - matrix: - num-high: [ 0, 1, 2, 3 ] - os-ver: [ '22.04', '24.04' ] - channel: [ stable, beta ] # `gix-macros::macros momo::ux` currently fails on `nightly`. - get-rust-by: [ rt-action, curl-sh ] - get-nextest-by: [ i-action, cargo-qi ] - num-low: [ 0, 1, 2, 3 ] - - fail-fast: false - - runs-on: ubuntu-${{ matrix.os-ver }}-arm - - steps: - - uses: actions/checkout@v4 - - if: matrix.get-rust-by == 'rt-action' - uses: dtolnay/rust-toolchain@master - with: - toolchain: ${{ matrix.channel }} - - if: matrix.get-rust-by == 'curl-sh' - name: Install Rust via sh.rustup.rs - run: | - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | - sh -s -- -y --default-toolchain ${{ matrix.channel }} - echo "PATH=$HOME/.cargo/bin:$PATH" >> "$GITHUB_ENV" - # - uses: Swatinem/rust-cache@v2 - - if: matrix.get-nextest-by == 'i-action' - uses: taiki-e/install-action@v2 - with: - tool: nextest - - if: matrix.get-nextest-by == 'cargo-qi' - name: Install nextest with quickinstall/binstall - run: | - cargo install cargo-quickinstall - cargo quickinstall cargo-binstall - cargo quickinstall cargo-nextest - - name: Test (nextest) - env: - GIX_TEST_CREATE_ARCHIVES_EVEN_ON_CI: '1' - run: cargo nextest run --workspace --no-fail-fast - - name: Doctest - run: cargo test --workspace --doc --no-fail-fast - - name: Check that tracked archives are up to date - run: git diff --exit-code # If this fails, the fix is usually to commit a regenerated archive. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06d9491298d..55f119dcf4c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -135,6 +135,7 @@ jobs: - windows-latest - macos-latest - ubuntu-latest + - ubuntu-22.04-arm runs-on: ${{ matrix.os }} @@ -160,25 +161,6 @@ jobs: - name: Check that tracked archives are up to date run: git diff --exit-code # If this fails, the fix is usually to commit a regenerated archive. - test-fast-arm: - runs-on: ubuntu-24.04-arm - - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - uses: Swatinem/rust-cache@v2 - - uses: taiki-e/install-action@v2 - with: - tool: nextest - - name: Test (nextest) - env: - GIX_TEST_CREATE_ARCHIVES_EVEN_ON_CI: '1' - run: cargo nextest run --workspace --no-fail-fast - - name: Doctest - run: cargo test --workspace --doc --no-fail-fast - - name: Check that tracked archives are up to date - run: git diff --exit-code # If this fails, the fix is usually to commit a regenerated archive. - test-fixtures-windows: runs-on: windows-latest @@ -405,7 +387,6 @@ jobs: env: # List all jobs that are intended NOT to block PR auto-merge here. EXPECTED_NONBLOCKING_JOBS: |- - test-fast-arm cargo-deny-advisories wasm tests-pass