diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
new file mode 100644
index 000000000000..e61eabfc0328
--- /dev/null
+++ b/.github/workflows/circleci-failure-summary-comment.yml
@@ -0,0 +1,195 @@
+name: CircleCI Failure Summary Comment
+# Requires repository secrets:
+# - CI_ARTIFACT_TOKEN: API token with permission to query CircleCI pipelines (same value used by CircleCI contexts)
+
+on:
+ pull_request:
+ types: [opened, synchronize, reopened]
+
+jobs:
+ comment:
+ runs-on: ubuntu-22.04
+ permissions:
+ pull-requests: write
+ env:
+ TARGET_BRANCH: ${{ github.event.pull_request.head.ref }}
+ TARGET_SHA: ${{ github.event.pull_request.head.sha }}
+ PR_NUMBER: ${{ github.event.pull_request.number }}
+ CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.13"
+
+ - name: Install dependencies
+ run: python -m pip install requests huggingface_hub
+
+ - name: Wait for CircleCI check suite completion
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
+ github_repository: ${{ github.repository }}
+ run: |
+ echo "Waiting for CircleCI check suite to complete..."
+ end=$((SECONDS+1800))
+ while [ $SECONDS -lt $end ]; do
+ suite_json=$(gh api "repos/${github_repository}/commits/${COMMIT_SHA}/check-suites" --jq '.check_suites[] | select(.app.slug=="circleci-checks")')
+ if [ -z "$suite_json" ]; then
+ echo "CircleCI check suite not found yet, retrying..."
+ else
+ status=$(echo "$suite_json" | jq -r '.status')
+ conclusion=$(echo "$suite_json" | jq -r '.conclusion // empty')
+ echo "Current CircleCI check suite status: $status (conclusion: $conclusion)"
+ if [ "$status" = "completed" ] && [ -n "$conclusion" ]; then
+ break
+ fi
+ fi
+ sleep 20
+ done
+ if [ $SECONDS -ge $end ]; then
+ echo "Timed out waiting for CircleCI check suite."
+ exit 1
+ fi
+
+ - name: Get CircleCI run's artifacts and upload them to Hub
+ id: circleci
+ env:
+ CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
+ COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
+ REPO: ${{ github.repository }}
+ run: |
+ # Step 1: Get CircleCI check suite ID
+ echo "Getting check suites for commit ${COMMIT_SHA}..."
+ check_suites=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
+ "https://api.github.com/repos/${REPO}/commits/${COMMIT_SHA}/check-suites")
+
+ circleci_suite_id=$(echo "$check_suites" | jq -r '.check_suites[] | select(.app.slug == "circleci-checks") | .id' | head -n 1)
+ echo "CircleCI check suite ID: ${circleci_suite_id}"
+
+ # Step 2: Get check runs from the CircleCI suite
+ echo "Getting check runs for suite ${circleci_suite_id}..."
+ check_runs=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
+ "https://api.github.com/repos/${REPO}/check-suites/${circleci_suite_id}/check-runs")
+
+ # Step 3: Extract workflow ID from the "run_tests" check run
+ workflow_id=$(echo "$check_runs" | jq -r '.check_runs[] | select(.name == "run_tests") | .details_url' | grep -oP 'workflows/\K[a-f0-9-]+')
+ echo "CircleCI Workflow ID: ${workflow_id}"
+
+ # Step 4: Get all jobs in the workflow
+ echo "Getting jobs for workflow ${workflow_id}..."
+ jobs=$(curl -s -H "Circle-Token: ${CIRCLE_TOKEN}" \
+ "https://circleci.com/api/v2/workflow/${workflow_id}/job")
+
+ # Step 5: Extract collection_job details
+ collection_job_number=$(echo "$jobs" | jq -r '.items[] | select(.name == "collection_job") | .job_number')
+ collection_job_id=$(echo "$jobs" | jq -r '.items[] | select(.name == "collection_job") | .id')
+ echo "CircleCI Collection job number: ${collection_job_number}"
+ echo "CircleCI Collection job ID: ${collection_job_id}"
+
+ # Step 6: Get artifacts list
+ echo "Getting artifacts for job ${collection_job_number}..."
+ artifacts=$(curl -s -H "Circle-Token: ${CIRCLE_TOKEN}" \
+ "https://circleci.com/api/v2/project/gh/${REPO}/${collection_job_number}/artifacts")
+
+ echo "$artifacts" | jq '.'
+
+ # Step 7: Download failure_summary.json specifically
+ failure_summary_url=$(echo "$artifacts" | jq -r '.items[] | select(.path == "outputs/failure_summary.json") | .url')
+
+ if [ -z "$failure_summary_url" ]; then
+ echo "failure_summary.json not found in artifacts"
+ exit 1
+ fi
+
+ echo "Downloading failure_summary.json from: ${failure_summary_url}"
+ mkdir -p outputs
+ curl -s -L -H "Circle-Token: ${CIRCLE_TOKEN}" "${failure_summary_url}" -o outputs/failure_summary.json
+ ls -la outputs
+
+ echo "Downloaded failure_summary.json successfully"
+
+ # Verify the file was downloaded
+ if [ -f outputs/failure_summary.json ]; then
+ echo "File size: $(wc -c < outputs/failure_summary.json) bytes"
+ else
+ echo "Failed to download failure_summary.json"
+ exit 1
+ fi
+
+ # Export variables for next steps
+ echo "workflow_id=${workflow_id}" >> $GITHUB_OUTPUT
+ echo "collection_job_number=${collection_job_number}" >> $GITHUB_OUTPUT
+
+ - name: Upload summaries to Hub
+ env:
+ HF_TOKEN: ${{ secrets.HF_CI_WRITE_TOKEN }}
+ CIRCLECI_RESULTS_DATASET_ID: "transformers-community/circleci-test-results"
+ PR_NUMBER: ${{ github.event.pull_request.number }}
+ COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
+ run: |
+ python << 'EOF'
+ import os
+ from pathlib import Path
+ from huggingface_hub import HfApi
+
+ # Setup paths
+ pr_number = os.environ["PR_NUMBER"]
+ commit_short = os.environ["COMMIT_SHA"][:12]
+ folder_path = f"pr-{pr_number}/sha-{commit_short}"
+
+ # Create folder and move file
+ Path(folder_path).mkdir(parents=True, exist_ok=True)
+ Path("outputs/failure_summary.json").rename(f"{folder_path}/failure_summary.json")
+
+ # Upload to Hub
+ dataset_id = os.environ["CIRCLECI_RESULTS_DATASET_ID"]
+ api = HfApi(token=os.environ["HF_TOKEN"])
+ api.upload_folder(
+ commit_message=f"Update CircleCI artifacts for PR {pr_number} ({commit_short})",
+ folder_path=folder_path,
+ path_in_repo=folder_path,
+ repo_id=dataset_id,
+ repo_type="dataset",
+ )
+
+ print(f"Uploaded {folder_path} to {dataset_id}")
+ EOF
+
+ - name: Post comment with helper link
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ GITHUB_REPOSITORY: ${{ github.repository }}
+ PR_NUMBER: ${{ github.event.pull_request.number }}
+ PR_SHA: ${{ github.event.pull_request.head.sha }}
+ run: |
+ COMMIT_SHORT="${PR_SHA:0:12}"
+ SUMMARY_FILE="pr-${PR_NUMBER}/sha-${COMMIT_SHORT}/failure_summary.json"
+
+ if [ ! -f "$SUMMARY_FILE" ]; then
+ echo "failure_summary.json missing, skipping comment."
+ exit 0
+ fi
+
+ failures=$(jq '.failures | length' "$SUMMARY_FILE")
+ if [ "$failures" -eq 0 ]; then
+ echo "No failures detected, skipping PR comment."
+ exit 0
+ fi
+
+ # Build Space URL with encoded parameters
+ repo_enc=$(jq -rn --arg v "$GITHUB_REPOSITORY" '$v|@uri')
+ pr_enc=$(jq -rn --arg v "$PR_NUMBER" '$v|@uri')
+ sha_enc=$(jq -rn --arg v "$PR_SHA" '$v|@uri')
+ SPACE_URL="https://huggingface.co/spaces/transformers-community/circleci-test-collection-helper?repo=${repo_enc}&pr=${pr_enc}&sha=${sha_enc}"
+
+ # Post comment (using printf for proper newlines)
+ gh api \
+ --method POST \
+ -H "Accept: application/vnd.github+json" \
+ -H "X-GitHub-Api-Version: 2022-11-28" \
+ "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
+ -f body="$(printf "View the CircleCI test collection helper for this PR:\n\n%s" "$SPACE_URL")"
\ No newline at end of file
diff --git a/src/transformers/models/llama/tokenization_llama.py b/src/transformers/models/llama/tokenization_llama.py
index 971c7c49ddaf..63204a359de5 100644
--- a/src/transformers/models/llama/tokenization_llama.py
+++ b/src/transformers/models/llama/tokenization_llama.py
@@ -237,6 +237,7 @@ def tokenize(self, text: "TextInput", **kwargs) -> list[str]:
Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the
first token is special.
"""
+ return super().tokenize(text, **kwargs) # Just to have failures :)
if self.legacy or len(text) == 0:
return super().tokenize(text, **kwargs)
diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py
index 1faff1f4dcea..f1f6ef9f2df1 100644
--- a/src/transformers/models/mixtral/modeling_mixtral.py
+++ b/src/transformers/models/mixtral/modeling_mixtral.py
@@ -282,11 +282,11 @@ def eager_attention_forward(
causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
attn_weights = attn_weights + causal_mask
- attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype)
- attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training)
+ # attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype)
+ # attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training)
attn_output = torch.matmul(attn_weights, value_states)
attn_output = attn_output.transpose(1, 2).contiguous()
-
+ # TODO
return attn_output, attn_weights
diff --git a/tests/fixtures/circleci/junit_sample.xml b/tests/fixtures/circleci/junit_sample.xml
new file mode 100644
index 000000000000..43fc2a48c2e8
--- /dev/null
+++ b/tests/fixtures/circleci/junit_sample.xml
@@ -0,0 +1,65 @@
+/root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at inputself = <tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest testMethod=test_conversion_reversible>
+
+ def test_conversion_reversible(self):
+ tokenizer = self.get_tokenizer(do_lower_case=False)
+ vocab = tokenizer.get_vocab()
+ for word, ind in vocab.items():
+ if word == tokenizer.unk_token:
+ continue
+> self.assertEqual(tokenizer.convert_tokens_to_ids(word), ind)
+E AssertionError: 2 != 1
+
+tests/test_tokenization_common.py:2124: AssertionError/root/project/tests/test_tokenization_common.py:713: No integration expected tokens provided/root/project/tests/test_tokenization_common.py:735: No integration expected tokens providedself = <tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest testMethod=test_internal_consistency>
+
+ def test_internal_consistency(self):
+ tokenizer = self.get_tokenizer()
+ input_text, output_text = self.get_input_output_texts(tokenizer)
+
+ tokens = tokenizer.tokenize(input_text)
+ ids = tokenizer.convert_tokens_to_ids(tokens)
+ ids_2 = tokenizer.encode(input_text, add_special_tokens=False)
+ self.assertListEqual(ids, ids_2)
+
+ tokens_2 = tokenizer.convert_ids_to_tokens(ids)
+ self.assertNotEqual(len(tokens_2), 0)
+ text_2 = tokenizer.decode(ids)
+ self.assertIsInstance(text_2, str)
+
+> self.assertEqual(text_2, output_text)
+E AssertionError: '[SEP] 、 世界 。 [MASK]ばんは 、 世界 。' != 'こんにちは 、 世界 。 こんばんは 、 世界 。'
+E - [SEP] 、 世界 。 [MASK]ばんは 、 世界 。
+E + こんにちは 、 世界 。 こんばんは 、 世界 。
+
+tests/test_tokenization_common.py:778: AssertionError/root/project/tests/test_tokenization_common.py:998: Custom backend tokenizer/root/project/tests/test_tokenization_common.py:1176: Custom backend tokenizer/root/project/tests/test_tokenization_common.py:998: Custom backend tokenizer/root/project/tests/test_tokenization_common.py:1176: Custom backend tokenizer/root/project/tests/test_tokenization_common.py:713: No integration expected tokens provided/root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at input/root/project/tests/test_tokenization_common.py:735: No integration expected tokens providedself = <tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest testMethod=test_internal_consistency>
+
+ def test_internal_consistency(self):
+ tokenizer = self.get_tokenizer()
+ input_text, output_text = self.get_input_output_texts(tokenizer)
+
+ tokens = tokenizer.tokenize(input_text)
+ ids = tokenizer.convert_tokens_to_ids(tokens)
+ ids_2 = tokenizer.encode(input_text, add_special_tokens=False)
+ self.assertListEqual(ids, ids_2)
+
+ tokens_2 = tokenizer.convert_ids_to_tokens(ids)
+ self.assertNotEqual(len(tokens_2), 0)
+ text_2 = tokenizer.decode(ids)
+ self.assertIsInstance(text_2, str)
+
+> self.assertEqual(text_2, output_text)
+E AssertionError: '[SEP] [MASK] に ち は 、 世 界 。 [SEP] [MASK] ば [MASK] は 、 世 界 。' != 'こ ん に ち は 、 世 界 。 こ ん ば ん は 、 世 界 。'
+E - [SEP] [MASK] に ち は 、 世 界 。 [SEP] [MASK] ば [MASK] は 、 世 界 。
+E + こ ん に ち は 、 世 界 。 こ ん ば ん は 、 世 界 。
+
+tests/test_tokenization_common.py:778: AssertionError/root/project/tests/test_tokenization_common.py:799: Tokenizers backend tokenizerself = <tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest testMethod=test_conversion_reversible>
+
+ def test_conversion_reversible(self):
+ tokenizer = self.get_tokenizer(do_lower_case=False)
+ vocab = tokenizer.get_vocab()
+ for word, ind in vocab.items():
+ if word == tokenizer.unk_token:
+ continue
+> self.assertEqual(tokenizer.convert_tokens_to_ids(word), ind)
+E AssertionError: 2 != 1
+
+tests/test_tokenization_common.py:2124: AssertionError/root/project/tests/test_tokenization_common.py:799: Tokenizers backend tokenizer/root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at input/root/project/tests/models/clip/test_tokenization_clip.py:47: Skipping padding to multiple of test bc vocab is too small./root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at input/root/project/tests/test_tokenization_common.py:2366: This tokenizer has no padding token set, or pad_token_id < 0/root/project/tests/test_tokenization_common.py:1976: No padding token./root/project/tests/test_tokenization_common.py:1944: No padding token.
\ No newline at end of file
diff --git a/tests/utils/test_process_circleci_workflow_test_reports.py b/tests/utils/test_process_circleci_workflow_test_reports.py
new file mode 100644
index 000000000000..4234fca9cb6b
--- /dev/null
+++ b/tests/utils/test_process_circleci_workflow_test_reports.py
@@ -0,0 +1,112 @@
+import json
+from pathlib import Path
+from xml.etree import ElementTree as ET
+
+from utils.process_circleci_workflow_test_reports import process_circleci_workflow
+
+
+class _FakeResponse:
+ def __init__(self, *, text: str | None = None, json_data: dict | None = None, status_code: int = 200):
+ self.text = text or ""
+ self._json_data = json_data
+ self.status_code = status_code
+
+ def json(self):
+ if self._json_data is None:
+ raise ValueError("No JSON payload in fake response.")
+ return self._json_data
+
+
+def _build_artifacts_from_junit(junit_path: Path):
+ tree = ET.parse(junit_path)
+ failures = []
+ for testcase in tree.findall(".//testcase"):
+ failure = testcase.find("failure")
+ if failure is None:
+ continue
+ classname = testcase.attrib.get("classname", "")
+ class_name = classname.split(".")[-1]
+ file_path = testcase.attrib["file"]
+ nodeid = f"{file_path}::{class_name}::{testcase.attrib['name']}"
+ failure_msg = failure.attrib.get("message", "").strip() or (failure.text or "").strip()
+ failures.append((nodeid, failure_msg))
+ return failures
+
+
+def test_failure_summary_generated_from_junit_fixture(tmp_path, monkeypatch):
+ tests_dir = Path(__file__).resolve().parents[1]
+ junit_path = tests_dir / "fixtures" / "circleci" / "junit_sample.xml"
+ junit_failures = _build_artifacts_from_junit(junit_path)
+
+ summary_lines = [f"FAILED {nodeid} - {message}" for nodeid, message in junit_failures]
+ failure_lines = [f"{nodeid}: {message}" for nodeid, message in junit_failures]
+
+ # Add a synthetic failure under tests/models to exercise the per-model aggregation.
+ model_test = "tests/models/bert/test_modeling_bert.py::BertModelTest::test_forward"
+ model_error = "AssertionError: logits mismatch"
+ summary_lines.append(f"FAILED {model_test} - {model_error}")
+ failure_lines.append(f"{model_test}: {model_error}")
+
+ summary_short_text = "\n".join(summary_lines)
+ failures_line_text = "\n".join(failure_lines)
+
+ workflow_response = {
+ "items": [
+ {
+ "project_slug": "gh/huggingface/transformers",
+ "job_number": 42,
+ "name": "tests_torch",
+ }
+ ]
+ }
+ artifacts_response = {
+ "items": [
+ {"path": "reports/tests_torch/summary_short.txt", "url": "https://example.com/summary", "node_index": 0},
+ {"path": "reports/tests_torch/failures_line.txt", "url": "https://example.com/failures", "node_index": 0},
+ ]
+ }
+
+ def fake_get(url, headers=None):
+ if url.endswith("/workflow/test-workflow/job"):
+ return _FakeResponse(json_data=workflow_response)
+ if url.endswith("/project/gh/huggingface/transformers/42/artifacts"):
+ return _FakeResponse(json_data=artifacts_response)
+ if url == "https://example.com/summary":
+ return _FakeResponse(text=summary_short_text)
+ if url == "https://example.com/failures":
+ return _FakeResponse(text=failures_line_text)
+ raise AssertionError(f"Unexpected URL requested: {url}")
+
+ monkeypatch.chdir(tmp_path)
+ output_dir = tmp_path / "outputs"
+ process_circleci_workflow(
+ "test-workflow",
+ output_dir=str(output_dir),
+ request_get=fake_get,
+ )
+
+ failure_summary_path = output_dir / "failure_summary.json"
+ assert failure_summary_path.is_file()
+
+ with open(failure_summary_path) as fp:
+ failure_summary = json.load(fp)
+
+ assert len(failure_summary["failures"]) == len(summary_lines)
+
+ sample_test = junit_failures[0][0]
+ assert sample_test in failure_summary["by_test"]
+ assert failure_summary["by_test"][sample_test]["count"] == 1
+ error_key = f"{sample_test}: {junit_failures[0][1]}"
+ assert error_key in failure_summary["by_test"][sample_test]["errors"]
+ assert sample_test in failure_summary["by_test"][sample_test]["variants"]
+
+ assert "bert" in failure_summary["by_model"]
+ assert failure_summary["by_model"]["bert"]["count"] == 1
+ model_error_key = f"{model_test}: {model_error}"
+ assert failure_summary["by_model"]["bert"]["errors"][model_error_key] == 1
+
+ failure_summary_md = output_dir / "failure_summary.md"
+ assert failure_summary_md.is_file()
+ md_contents = failure_summary_md.read_text()
+ assert "Failure summary" in md_contents
+ assert "tests/models/bert/test_modeling_bert.py" in md_contents
diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py
index eb61f6d586e5..570709b58a31 100644
--- a/utils/process_circleci_workflow_test_reports.py
+++ b/utils/process_circleci_workflow_test_reports.py
@@ -11,47 +11,219 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import annotations
+
import argparse
import json
import os
+import re
+from collections import Counter
+from datetime import datetime, timezone
+from typing import Callable
import requests
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--workflow_id", type=str, required=True)
- args = parser.parse_args()
- workflow_id = args.workflow_id
+def _extract_failed_tests(summary_short: str) -> list[tuple[str, str]]:
+ """
+ Return a list of tuples (, ).
+ """
+ failed_tests = []
+ for line in summary_short.splitlines():
+ if not line.startswith("FAILED "):
+ continue
+ # Skip subprocess failures created by `run_test_using_subprocess`
+ if " - Failed: (subprocess)" in line:
+ continue
+ failure_line = line[len("FAILED ") :].strip()
+ test_node, sep, error_message = failure_line.partition(" - ")
+ failed_tests.append((test_node.strip(), error_message.strip()))
+
+ return failed_tests
+
+
+def _extract_failure_lines(failures_line: str | None) -> list[str]:
+ if not failures_line:
+ return []
+
+ failure_lines = []
+ for raw_line in failures_line.splitlines():
+ raw_line = raw_line.strip()
+ if (
+ not raw_line
+ or raw_line.startswith("=")
+ or raw_line.startswith("_")
+ or raw_line.lower().startswith("short test summary")
+ ):
+ continue
+ if ": " not in raw_line:
+ continue
+ failure_lines.append(raw_line)
+
+ return failure_lines
+
+
+def _derive_model_name(test_node_id: str) -> str | None:
+ """
+ Given a pytest node id (e.g. tests/models/bart/test_modeling_bart.py::BartModelTest::test_forward),
+ extract the model name when it lives under `tests/models`.
+ """
+ file_path = test_node_id.split("::", maxsplit=1)[0]
+ if file_path.startswith("tests/models/"):
+ parts = file_path.split("/")
+ if len(parts) >= 3:
+ return parts[2]
+ return None
+
+
+def _aggregate_failures(failure_entries: list[dict]) -> tuple[dict, dict]:
+ by_test: dict[str, dict] = {}
+ by_model: dict[str, dict] = {}
+
+ for entry in failure_entries:
+ test_name = entry["test_name"]
+ model_name = entry["model_name"]
+ error_message = entry["error"]
+ normalized_test_name = _normalize_test_nodeid(test_name)
+
+ test_info = by_test.setdefault(
+ normalized_test_name, {"count": 0, "errors": Counter(), "jobs": set(), "variants": set()}
+ )
+ test_info["count"] += 1
+ test_info["errors"][error_message] += 1
+ test_info["jobs"].add(entry["job_name"])
+ test_info["variants"].add(test_name)
+
+ if model_name:
+ model_info = by_model.setdefault(model_name, {"count": 0, "errors": Counter(), "tests": set()})
+ model_info["count"] += 1
+ model_info["errors"][error_message] += 1
+ model_info["tests"].add(test_name)
+
+ # Convert counters and sets to serializable forms
+ def _prepare(entries: dict, include_tests: bool = False):
+ prepared = {}
+ for key, value in entries.items():
+ prepared[key] = {
+ "count": value["count"],
+ "errors": dict(value["errors"].most_common()),
+ }
+ if include_tests:
+ prepared[key]["tests"] = sorted(value["tests"])
+ else:
+ prepared[key]["jobs"] = sorted(value["jobs"])
+ prepared[key]["variants"] = sorted(value["variants"])
+ return prepared
+
+ return _prepare(by_test), _prepare(by_model, include_tests=True)
+
+
+def _format_error_messages(errors: dict[str, int]) -> str:
+ return "; ".join(f"{count}× {msg}" for msg, count in errors.items()) or "N/A"
+
+
+def _format_markdown_table(rows: list[list[str]], headers: list[str]) -> str:
+ if not rows:
+ return "No data\n"
- r = requests.get(
+ header_line = "| " + " | ".join(headers) + " |"
+ separator = "| " + " | ".join(["---"] * len(headers)) + " |"
+ table_lines = [header_line, separator]
+ table_lines.extend("| " + " | ".join(row) + " |" for row in rows)
+ return "\n".join(table_lines) + "\n"
+
+
+def _normalize_test_nodeid(nodeid: str) -> str:
+ """
+ Normalizes a pytest node id by removing bracketed parametrization info
+ and collapsing suffixes such as `_05_fp16_pad_left` that come from parameter ids.
+ """
+ base_nodeid = nodeid.split("[", 1)[0]
+ parts = base_nodeid.split("::")
+ if not parts:
+ return base_nodeid
+ test_name = parts[-1]
+ test_name = re.sub(r"_\d{2,}.*$", "", test_name)
+ normalized = "::".join(parts[:-1] + [test_name])
+ return normalized
+
+
+def _collect_metadata(workflow_id: str) -> dict[str, str | None]:
+ repo_owner = os.environ.get("CIRCLE_PROJECT_USERNAME")
+ repo_name = os.environ.get("CIRCLE_PROJECT_REPONAME")
+ repo_slug = "/".join(part for part in [repo_owner, repo_name] if part)
+ commit_sha = os.environ.get("CIRCLE_SHA1")
+ branch = os.environ.get("CIRCLE_BRANCH")
+ pull_request = os.environ.get("CIRCLE_PULL_REQUEST")
+ pr_number = os.environ.get("CIRCLE_PR_NUMBER")
+ if not pr_number and pull_request and "/" in pull_request:
+ pr_number = pull_request.rsplit("/", 1)[-1]
+ build_num = os.environ.get("CIRCLE_BUILD_NUM")
+ timestamp = os.environ.get("CIRCLE_WORKFLOW_CREATED_AT")
+ if not timestamp:
+ timestamp = datetime.now(timezone.utc).isoformat()
+ commit_short = (commit_sha or "unknown")[:8]
+ dataset_subfolder = f"{repo_slug.replace('/', '__') or 'unknown_repo'}/pr-{pr_number or 'none'}/sha-{commit_short}/workflow-{workflow_id}"
+ metadata = {
+ "workflow_id": workflow_id,
+ "repo_owner": repo_owner,
+ "repo_name": repo_name,
+ "repository": repo_slug,
+ "branch": branch,
+ "commit_sha": commit_sha,
+ "pull_request": pull_request,
+ "pull_request_number": pr_number,
+ "build_number": build_num,
+ "collected_at": timestamp,
+ "dataset_subfolder": dataset_subfolder,
+ }
+ return metadata
+
+
+def process_circleci_workflow(
+ workflow_id: str,
+ output_dir: str = "outputs",
+ request_get: Callable = requests.get,
+):
+ print(f"[collection_job] Processing CircleCI workflow {workflow_id}")
+ response = request_get(
f"https://circleci.com/api/v2/workflow/{workflow_id}/job",
headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")},
)
- jobs = r.json()["items"]
+ jobs = response.json()["items"]
+ print(f"[collection_job] Found {len(jobs)} jobs in workflow.")
- os.makedirs("outputs", exist_ok=True)
+ os.makedirs(output_dir, exist_ok=True)
workflow_summary = {}
+ failure_entries: list[dict] = []
# for each job, download artifacts
for job in jobs:
project_slug = job["project_slug"]
if job["name"].startswith(("tests_", "examples_", "pipelines_")):
+ print(f"[collection_job] Fetching artifacts for job {job['name']} (#{job['job_number']})")
url = f"https://circleci.com/api/v2/project/{project_slug}/{job['job_number']}/artifacts"
- r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
+ r = request_get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
job_artifacts = r.json()["items"]
+ print(f"[collection_job] Retrieved {len(job_artifacts)} artifacts for {job['name']}.")
- os.makedirs(job["name"], exist_ok=True)
- os.makedirs(f"outputs/{job['name']}", exist_ok=True)
+ job_output_dir = os.path.join(output_dir, job["name"])
+ os.makedirs(job_output_dir, exist_ok=True)
job_test_summaries = {}
+ job_failure_lines = {}
for artifact in job_artifacts:
if artifact["path"].startswith("reports/") and artifact["path"].endswith("/summary_short.txt"):
node_index = artifact["node_index"]
- url = artifact["url"]
- r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
+ artifact_url = artifact["url"]
+ r = request_get(artifact_url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
test_summary = r.text
job_test_summaries[node_index] = test_summary
+ elif artifact["path"].startswith("reports/") and artifact["path"].endswith("/failures_line.txt"):
+ node_index = artifact["node_index"]
+ artifact_url = artifact["url"]
+ r = request_get(artifact_url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
+ job_failure_lines[node_index] = r.text
summary = {}
for node_index, node_test_summary in job_test_summaries.items():
@@ -65,11 +237,32 @@
# failed before passed
summary = dict(sorted(summary.items(), key=lambda x: (x[1], x[0])))
workflow_summary[job["name"]] = summary
+ print(f"[collection_job] Recorded {len(summary)} test rows for {job['name']}.")
# collected version
- with open(f"outputs/{job['name']}/test_summary.json", "w") as fp:
+ with open(os.path.join(job_output_dir, "test_summary.json"), "w") as fp:
json.dump(summary, fp, indent=4)
+ # Collect failure details per node for this job
+ for node_index, summary_short in job_test_summaries.items():
+ failed_tests = _extract_failed_tests(summary_short)
+ failure_lines = _extract_failure_lines(job_failure_lines.get(node_index))
+ for idx, (test_name, short_error) in enumerate(failed_tests):
+ full_error = failure_lines[idx] if idx < len(failure_lines) else short_error
+ failure_entries.append(
+ {
+ "job_name": job["name"],
+ "node_index": node_index,
+ "test_name": test_name,
+ "short_error": short_error,
+ "error": full_error,
+ "model_name": _derive_model_name(test_name),
+ }
+ )
+ if job_test_summaries:
+ failures_in_job = sum(1 for status in summary.values() if status == "failed")
+ print(f"[collection_job] Aggregated {failures_in_job} failures for {job['name']}.")
+
new_workflow_summary = {}
for job_name, job_summary in workflow_summary.items():
for test, status in job_summary.items():
@@ -81,5 +274,74 @@
new_workflow_summary[test] = dict(sorted(result.items()))
new_workflow_summary = dict(sorted(new_workflow_summary.items()))
- with open("outputs/test_summary.json", "w") as fp:
+ with open(os.path.join(output_dir, "test_summary.json"), "w") as fp:
json.dump(new_workflow_summary, fp, indent=4)
+
+ failures_by_test, failures_by_model = _aggregate_failures(failure_entries)
+ failure_summary = {
+ "failures": failure_entries,
+ "by_test": failures_by_test,
+ "by_model": failures_by_model,
+ }
+ print(f"[collection_job] Total failing entries collected: {len(failure_entries)}.")
+
+ with open(os.path.join(output_dir, "failure_summary.json"), "w") as fp:
+ json.dump(failure_summary, fp, indent=4)
+
+ markdown_buffer = ["# Failure summary\n"]
+ if failure_entries:
+ markdown_buffer.append("## By test\n")
+ test_rows = []
+ for test_name, info in sorted(failures_by_test.items(), key=lambda x: x[1]["count"], reverse=True):
+ test_rows.append(
+ [
+ test_name,
+ str(info["count"]),
+ _format_error_messages(info["errors"]),
+ ]
+ )
+ markdown_buffer.append(_format_markdown_table(test_rows, ["Test", "Failures", "Full error(s)"]))
+
+ markdown_buffer.append("## By model\n")
+ model_rows = []
+ for model_name, info in sorted(failures_by_model.items(), key=lambda x: x[1]["count"], reverse=True):
+ model_rows.append(
+ [
+ model_name,
+ str(info["count"]),
+ _format_error_messages(info["errors"]),
+ ]
+ )
+ markdown_buffer.append(_format_markdown_table(model_rows, ["Model", "Failures", "Full error(s)"]))
+ else:
+ markdown_buffer.append("No failures were reported.\n")
+
+ markdown_text = "\n".join(markdown_buffer)
+ with open(os.path.join(output_dir, "failure_summary.md"), "w") as fp:
+ fp.write(markdown_text)
+
+ metadata = _collect_metadata(workflow_id)
+ aggregate_payload = {
+ "metadata": metadata,
+ "jobs": workflow_summary,
+ "tests": new_workflow_summary,
+ "failures": failure_entries,
+ "failures_by_test": failures_by_test,
+ "failures_by_model": failures_by_model,
+ }
+ with open(os.path.join(output_dir, "collection_summary.json"), "w") as fp:
+ json.dump(aggregate_payload, fp, indent=4)
+ with open(os.path.join(output_dir, "metadata.json"), "w") as fp:
+ json.dump(metadata, fp, indent=4)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--workflow_id", type=str, required=True)
+ args = parser.parse_args()
+ workflow_id = args.workflow_id
+ process_circleci_workflow(workflow_id)
+
+
+if __name__ == "__main__":
+ main()