diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml new file mode 100644 index 000000000000..e61eabfc0328 --- /dev/null +++ b/.github/workflows/circleci-failure-summary-comment.yml @@ -0,0 +1,195 @@ +name: CircleCI Failure Summary Comment +# Requires repository secrets: +# - CI_ARTIFACT_TOKEN: API token with permission to query CircleCI pipelines (same value used by CircleCI contexts) + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + comment: + runs-on: ubuntu-22.04 + permissions: + pull-requests: write + env: + TARGET_BRANCH: ${{ github.event.pull_request.head.ref }} + TARGET_SHA: ${{ github.event.pull_request.head.sha }} + PR_NUMBER: ${{ github.event.pull_request.number }} + CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Install dependencies + run: python -m pip install requests huggingface_hub + + - name: Wait for CircleCI check suite completion + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMMIT_SHA: ${{ github.event.pull_request.head.sha }} + github_repository: ${{ github.repository }} + run: | + echo "Waiting for CircleCI check suite to complete..." + end=$((SECONDS+1800)) + while [ $SECONDS -lt $end ]; do + suite_json=$(gh api "repos/${github_repository}/commits/${COMMIT_SHA}/check-suites" --jq '.check_suites[] | select(.app.slug=="circleci-checks")') + if [ -z "$suite_json" ]; then + echo "CircleCI check suite not found yet, retrying..." + else + status=$(echo "$suite_json" | jq -r '.status') + conclusion=$(echo "$suite_json" | jq -r '.conclusion // empty') + echo "Current CircleCI check suite status: $status (conclusion: $conclusion)" + if [ "$status" = "completed" ] && [ -n "$conclusion" ]; then + break + fi + fi + sleep 20 + done + if [ $SECONDS -ge $end ]; then + echo "Timed out waiting for CircleCI check suite." + exit 1 + fi + + - name: Get CircleCI run's artifacts and upload them to Hub + id: circleci + env: + CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }} + COMMIT_SHA: ${{ github.event.pull_request.head.sha }} + REPO: ${{ github.repository }} + run: | + # Step 1: Get CircleCI check suite ID + echo "Getting check suites for commit ${COMMIT_SHA}..." + check_suites=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${REPO}/commits/${COMMIT_SHA}/check-suites") + + circleci_suite_id=$(echo "$check_suites" | jq -r '.check_suites[] | select(.app.slug == "circleci-checks") | .id' | head -n 1) + echo "CircleCI check suite ID: ${circleci_suite_id}" + + # Step 2: Get check runs from the CircleCI suite + echo "Getting check runs for suite ${circleci_suite_id}..." + check_runs=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${REPO}/check-suites/${circleci_suite_id}/check-runs") + + # Step 3: Extract workflow ID from the "run_tests" check run + workflow_id=$(echo "$check_runs" | jq -r '.check_runs[] | select(.name == "run_tests") | .details_url' | grep -oP 'workflows/\K[a-f0-9-]+') + echo "CircleCI Workflow ID: ${workflow_id}" + + # Step 4: Get all jobs in the workflow + echo "Getting jobs for workflow ${workflow_id}..." + jobs=$(curl -s -H "Circle-Token: ${CIRCLE_TOKEN}" \ + "https://circleci.com/api/v2/workflow/${workflow_id}/job") + + # Step 5: Extract collection_job details + collection_job_number=$(echo "$jobs" | jq -r '.items[] | select(.name == "collection_job") | .job_number') + collection_job_id=$(echo "$jobs" | jq -r '.items[] | select(.name == "collection_job") | .id') + echo "CircleCI Collection job number: ${collection_job_number}" + echo "CircleCI Collection job ID: ${collection_job_id}" + + # Step 6: Get artifacts list + echo "Getting artifacts for job ${collection_job_number}..." + artifacts=$(curl -s -H "Circle-Token: ${CIRCLE_TOKEN}" \ + "https://circleci.com/api/v2/project/gh/${REPO}/${collection_job_number}/artifacts") + + echo "$artifacts" | jq '.' + + # Step 7: Download failure_summary.json specifically + failure_summary_url=$(echo "$artifacts" | jq -r '.items[] | select(.path == "outputs/failure_summary.json") | .url') + + if [ -z "$failure_summary_url" ]; then + echo "failure_summary.json not found in artifacts" + exit 1 + fi + + echo "Downloading failure_summary.json from: ${failure_summary_url}" + mkdir -p outputs + curl -s -L -H "Circle-Token: ${CIRCLE_TOKEN}" "${failure_summary_url}" -o outputs/failure_summary.json + ls -la outputs + + echo "Downloaded failure_summary.json successfully" + + # Verify the file was downloaded + if [ -f outputs/failure_summary.json ]; then + echo "File size: $(wc -c < outputs/failure_summary.json) bytes" + else + echo "Failed to download failure_summary.json" + exit 1 + fi + + # Export variables for next steps + echo "workflow_id=${workflow_id}" >> $GITHUB_OUTPUT + echo "collection_job_number=${collection_job_number}" >> $GITHUB_OUTPUT + + - name: Upload summaries to Hub + env: + HF_TOKEN: ${{ secrets.HF_CI_WRITE_TOKEN }} + CIRCLECI_RESULTS_DATASET_ID: "transformers-community/circleci-test-results" + PR_NUMBER: ${{ github.event.pull_request.number }} + COMMIT_SHA: ${{ github.event.pull_request.head.sha }} + run: | + python << 'EOF' + import os + from pathlib import Path + from huggingface_hub import HfApi + + # Setup paths + pr_number = os.environ["PR_NUMBER"] + commit_short = os.environ["COMMIT_SHA"][:12] + folder_path = f"pr-{pr_number}/sha-{commit_short}" + + # Create folder and move file + Path(folder_path).mkdir(parents=True, exist_ok=True) + Path("outputs/failure_summary.json").rename(f"{folder_path}/failure_summary.json") + + # Upload to Hub + dataset_id = os.environ["CIRCLECI_RESULTS_DATASET_ID"] + api = HfApi(token=os.environ["HF_TOKEN"]) + api.upload_folder( + commit_message=f"Update CircleCI artifacts for PR {pr_number} ({commit_short})", + folder_path=folder_path, + path_in_repo=folder_path, + repo_id=dataset_id, + repo_type="dataset", + ) + + print(f"Uploaded {folder_path} to {dataset_id}") + EOF + + - name: Post comment with helper link + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + PR_SHA: ${{ github.event.pull_request.head.sha }} + run: | + COMMIT_SHORT="${PR_SHA:0:12}" + SUMMARY_FILE="pr-${PR_NUMBER}/sha-${COMMIT_SHORT}/failure_summary.json" + + if [ ! -f "$SUMMARY_FILE" ]; then + echo "failure_summary.json missing, skipping comment." + exit 0 + fi + + failures=$(jq '.failures | length' "$SUMMARY_FILE") + if [ "$failures" -eq 0 ]; then + echo "No failures detected, skipping PR comment." + exit 0 + fi + + # Build Space URL with encoded parameters + repo_enc=$(jq -rn --arg v "$GITHUB_REPOSITORY" '$v|@uri') + pr_enc=$(jq -rn --arg v "$PR_NUMBER" '$v|@uri') + sha_enc=$(jq -rn --arg v "$PR_SHA" '$v|@uri') + SPACE_URL="https://huggingface.co/spaces/transformers-community/circleci-test-collection-helper?repo=${repo_enc}&pr=${pr_enc}&sha=${sha_enc}" + + # Post comment (using printf for proper newlines) + gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \ + -f body="$(printf "View the CircleCI test collection helper for this PR:\n\n%s" "$SPACE_URL")" \ No newline at end of file diff --git a/src/transformers/models/llama/tokenization_llama.py b/src/transformers/models/llama/tokenization_llama.py index 971c7c49ddaf..63204a359de5 100644 --- a/src/transformers/models/llama/tokenization_llama.py +++ b/src/transformers/models/llama/tokenization_llama.py @@ -237,6 +237,7 @@ def tokenize(self, text: "TextInput", **kwargs) -> list[str]: Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the first token is special. """ + return super().tokenize(text, **kwargs) # Just to have failures :) if self.legacy or len(text) == 0: return super().tokenize(text, **kwargs) diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py index 1faff1f4dcea..f1f6ef9f2df1 100644 --- a/src/transformers/models/mixtral/modeling_mixtral.py +++ b/src/transformers/models/mixtral/modeling_mixtral.py @@ -282,11 +282,11 @@ def eager_attention_forward( causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] attn_weights = attn_weights + causal_mask - attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype) - attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training) + # attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype) + # attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training) attn_output = torch.matmul(attn_weights, value_states) attn_output = attn_output.transpose(1, 2).contiguous() - + # TODO return attn_output, attn_weights diff --git a/tests/fixtures/circleci/junit_sample.xml b/tests/fixtures/circleci/junit_sample.xml new file mode 100644 index 000000000000..43fc2a48c2e8 --- /dev/null +++ b/tests/fixtures/circleci/junit_sample.xml @@ -0,0 +1,65 @@ +/root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at inputself = <tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest testMethod=test_conversion_reversible> + + def test_conversion_reversible(self): + tokenizer = self.get_tokenizer(do_lower_case=False) + vocab = tokenizer.get_vocab() + for word, ind in vocab.items(): + if word == tokenizer.unk_token: + continue +> self.assertEqual(tokenizer.convert_tokens_to_ids(word), ind) +E AssertionError: 2 != 1 + +tests/test_tokenization_common.py:2124: AssertionError/root/project/tests/test_tokenization_common.py:713: No integration expected tokens provided/root/project/tests/test_tokenization_common.py:735: No integration expected tokens providedself = <tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest testMethod=test_internal_consistency> + + def test_internal_consistency(self): + tokenizer = self.get_tokenizer() + input_text, output_text = self.get_input_output_texts(tokenizer) + + tokens = tokenizer.tokenize(input_text) + ids = tokenizer.convert_tokens_to_ids(tokens) + ids_2 = tokenizer.encode(input_text, add_special_tokens=False) + self.assertListEqual(ids, ids_2) + + tokens_2 = tokenizer.convert_ids_to_tokens(ids) + self.assertNotEqual(len(tokens_2), 0) + text_2 = tokenizer.decode(ids) + self.assertIsInstance(text_2, str) + +> self.assertEqual(text_2, output_text) +E AssertionError: '[SEP] 、 世界 。 [MASK]ばんは 、 世界 。' != 'こんにちは 、 世界 。 こんばんは 、 世界 。' +E - [SEP] 、 世界 。 [MASK]ばんは 、 世界 。 +E + こんにちは 、 世界 。 こんばんは 、 世界 。 + +tests/test_tokenization_common.py:778: AssertionError/root/project/tests/test_tokenization_common.py:998: Custom backend tokenizer/root/project/tests/test_tokenization_common.py:1176: Custom backend tokenizer/root/project/tests/test_tokenization_common.py:998: Custom backend tokenizer/root/project/tests/test_tokenization_common.py:1176: Custom backend tokenizer/root/project/tests/test_tokenization_common.py:713: No integration expected tokens provided/root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at input/root/project/tests/test_tokenization_common.py:735: No integration expected tokens providedself = <tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest testMethod=test_internal_consistency> + + def test_internal_consistency(self): + tokenizer = self.get_tokenizer() + input_text, output_text = self.get_input_output_texts(tokenizer) + + tokens = tokenizer.tokenize(input_text) + ids = tokenizer.convert_tokens_to_ids(tokens) + ids_2 = tokenizer.encode(input_text, add_special_tokens=False) + self.assertListEqual(ids, ids_2) + + tokens_2 = tokenizer.convert_ids_to_tokens(ids) + self.assertNotEqual(len(tokens_2), 0) + text_2 = tokenizer.decode(ids) + self.assertIsInstance(text_2, str) + +> self.assertEqual(text_2, output_text) +E AssertionError: '[SEP] [MASK] に ち は 、 世 界 。 [SEP] [MASK] ば [MASK] は 、 世 界 。' != 'こ ん に ち は 、 世 界 。 こ ん ば ん は 、 世 界 。' +E - [SEP] [MASK] に ち は 、 世 界 。 [SEP] [MASK] ば [MASK] は 、 世 界 。 +E + こ ん に ち は 、 世 界 。 こ ん ば ん は 、 世 界 。 + +tests/test_tokenization_common.py:778: AssertionError/root/project/tests/test_tokenization_common.py:799: Tokenizers backend tokenizerself = <tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest testMethod=test_conversion_reversible> + + def test_conversion_reversible(self): + tokenizer = self.get_tokenizer(do_lower_case=False) + vocab = tokenizer.get_vocab() + for word, ind in vocab.items(): + if word == tokenizer.unk_token: + continue +> self.assertEqual(tokenizer.convert_tokens_to_ids(word), ind) +E AssertionError: 2 != 1 + +tests/test_tokenization_common.py:2124: AssertionError/root/project/tests/test_tokenization_common.py:799: Tokenizers backend tokenizer/root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at input/root/project/tests/models/clip/test_tokenization_clip.py:47: Skipping padding to multiple of test bc vocab is too small./root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at input/root/project/tests/test_tokenization_common.py:2366: This tokenizer has no padding token set, or pad_token_id < 0/root/project/tests/test_tokenization_common.py:1976: No padding token./root/project/tests/test_tokenization_common.py:1944: No padding token. \ No newline at end of file diff --git a/tests/utils/test_process_circleci_workflow_test_reports.py b/tests/utils/test_process_circleci_workflow_test_reports.py new file mode 100644 index 000000000000..4234fca9cb6b --- /dev/null +++ b/tests/utils/test_process_circleci_workflow_test_reports.py @@ -0,0 +1,112 @@ +import json +from pathlib import Path +from xml.etree import ElementTree as ET + +from utils.process_circleci_workflow_test_reports import process_circleci_workflow + + +class _FakeResponse: + def __init__(self, *, text: str | None = None, json_data: dict | None = None, status_code: int = 200): + self.text = text or "" + self._json_data = json_data + self.status_code = status_code + + def json(self): + if self._json_data is None: + raise ValueError("No JSON payload in fake response.") + return self._json_data + + +def _build_artifacts_from_junit(junit_path: Path): + tree = ET.parse(junit_path) + failures = [] + for testcase in tree.findall(".//testcase"): + failure = testcase.find("failure") + if failure is None: + continue + classname = testcase.attrib.get("classname", "") + class_name = classname.split(".")[-1] + file_path = testcase.attrib["file"] + nodeid = f"{file_path}::{class_name}::{testcase.attrib['name']}" + failure_msg = failure.attrib.get("message", "").strip() or (failure.text or "").strip() + failures.append((nodeid, failure_msg)) + return failures + + +def test_failure_summary_generated_from_junit_fixture(tmp_path, monkeypatch): + tests_dir = Path(__file__).resolve().parents[1] + junit_path = tests_dir / "fixtures" / "circleci" / "junit_sample.xml" + junit_failures = _build_artifacts_from_junit(junit_path) + + summary_lines = [f"FAILED {nodeid} - {message}" for nodeid, message in junit_failures] + failure_lines = [f"{nodeid}: {message}" for nodeid, message in junit_failures] + + # Add a synthetic failure under tests/models to exercise the per-model aggregation. + model_test = "tests/models/bert/test_modeling_bert.py::BertModelTest::test_forward" + model_error = "AssertionError: logits mismatch" + summary_lines.append(f"FAILED {model_test} - {model_error}") + failure_lines.append(f"{model_test}: {model_error}") + + summary_short_text = "\n".join(summary_lines) + failures_line_text = "\n".join(failure_lines) + + workflow_response = { + "items": [ + { + "project_slug": "gh/huggingface/transformers", + "job_number": 42, + "name": "tests_torch", + } + ] + } + artifacts_response = { + "items": [ + {"path": "reports/tests_torch/summary_short.txt", "url": "https://example.com/summary", "node_index": 0}, + {"path": "reports/tests_torch/failures_line.txt", "url": "https://example.com/failures", "node_index": 0}, + ] + } + + def fake_get(url, headers=None): + if url.endswith("/workflow/test-workflow/job"): + return _FakeResponse(json_data=workflow_response) + if url.endswith("/project/gh/huggingface/transformers/42/artifacts"): + return _FakeResponse(json_data=artifacts_response) + if url == "https://example.com/summary": + return _FakeResponse(text=summary_short_text) + if url == "https://example.com/failures": + return _FakeResponse(text=failures_line_text) + raise AssertionError(f"Unexpected URL requested: {url}") + + monkeypatch.chdir(tmp_path) + output_dir = tmp_path / "outputs" + process_circleci_workflow( + "test-workflow", + output_dir=str(output_dir), + request_get=fake_get, + ) + + failure_summary_path = output_dir / "failure_summary.json" + assert failure_summary_path.is_file() + + with open(failure_summary_path) as fp: + failure_summary = json.load(fp) + + assert len(failure_summary["failures"]) == len(summary_lines) + + sample_test = junit_failures[0][0] + assert sample_test in failure_summary["by_test"] + assert failure_summary["by_test"][sample_test]["count"] == 1 + error_key = f"{sample_test}: {junit_failures[0][1]}" + assert error_key in failure_summary["by_test"][sample_test]["errors"] + assert sample_test in failure_summary["by_test"][sample_test]["variants"] + + assert "bert" in failure_summary["by_model"] + assert failure_summary["by_model"]["bert"]["count"] == 1 + model_error_key = f"{model_test}: {model_error}" + assert failure_summary["by_model"]["bert"]["errors"][model_error_key] == 1 + + failure_summary_md = output_dir / "failure_summary.md" + assert failure_summary_md.is_file() + md_contents = failure_summary_md.read_text() + assert "Failure summary" in md_contents + assert "tests/models/bert/test_modeling_bert.py" in md_contents diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py index eb61f6d586e5..570709b58a31 100644 --- a/utils/process_circleci_workflow_test_reports.py +++ b/utils/process_circleci_workflow_test_reports.py @@ -11,47 +11,219 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import argparse import json import os +import re +from collections import Counter +from datetime import datetime, timezone +from typing import Callable import requests -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--workflow_id", type=str, required=True) - args = parser.parse_args() - workflow_id = args.workflow_id +def _extract_failed_tests(summary_short: str) -> list[tuple[str, str]]: + """ + Return a list of tuples (, ). + """ + failed_tests = [] + for line in summary_short.splitlines(): + if not line.startswith("FAILED "): + continue + # Skip subprocess failures created by `run_test_using_subprocess` + if " - Failed: (subprocess)" in line: + continue + failure_line = line[len("FAILED ") :].strip() + test_node, sep, error_message = failure_line.partition(" - ") + failed_tests.append((test_node.strip(), error_message.strip())) + + return failed_tests + + +def _extract_failure_lines(failures_line: str | None) -> list[str]: + if not failures_line: + return [] + + failure_lines = [] + for raw_line in failures_line.splitlines(): + raw_line = raw_line.strip() + if ( + not raw_line + or raw_line.startswith("=") + or raw_line.startswith("_") + or raw_line.lower().startswith("short test summary") + ): + continue + if ": " not in raw_line: + continue + failure_lines.append(raw_line) + + return failure_lines + + +def _derive_model_name(test_node_id: str) -> str | None: + """ + Given a pytest node id (e.g. tests/models/bart/test_modeling_bart.py::BartModelTest::test_forward), + extract the model name when it lives under `tests/models`. + """ + file_path = test_node_id.split("::", maxsplit=1)[0] + if file_path.startswith("tests/models/"): + parts = file_path.split("/") + if len(parts) >= 3: + return parts[2] + return None + + +def _aggregate_failures(failure_entries: list[dict]) -> tuple[dict, dict]: + by_test: dict[str, dict] = {} + by_model: dict[str, dict] = {} + + for entry in failure_entries: + test_name = entry["test_name"] + model_name = entry["model_name"] + error_message = entry["error"] + normalized_test_name = _normalize_test_nodeid(test_name) + + test_info = by_test.setdefault( + normalized_test_name, {"count": 0, "errors": Counter(), "jobs": set(), "variants": set()} + ) + test_info["count"] += 1 + test_info["errors"][error_message] += 1 + test_info["jobs"].add(entry["job_name"]) + test_info["variants"].add(test_name) + + if model_name: + model_info = by_model.setdefault(model_name, {"count": 0, "errors": Counter(), "tests": set()}) + model_info["count"] += 1 + model_info["errors"][error_message] += 1 + model_info["tests"].add(test_name) + + # Convert counters and sets to serializable forms + def _prepare(entries: dict, include_tests: bool = False): + prepared = {} + for key, value in entries.items(): + prepared[key] = { + "count": value["count"], + "errors": dict(value["errors"].most_common()), + } + if include_tests: + prepared[key]["tests"] = sorted(value["tests"]) + else: + prepared[key]["jobs"] = sorted(value["jobs"]) + prepared[key]["variants"] = sorted(value["variants"]) + return prepared + + return _prepare(by_test), _prepare(by_model, include_tests=True) + + +def _format_error_messages(errors: dict[str, int]) -> str: + return "; ".join(f"{count}× {msg}" for msg, count in errors.items()) or "N/A" + + +def _format_markdown_table(rows: list[list[str]], headers: list[str]) -> str: + if not rows: + return "No data\n" - r = requests.get( + header_line = "| " + " | ".join(headers) + " |" + separator = "| " + " | ".join(["---"] * len(headers)) + " |" + table_lines = [header_line, separator] + table_lines.extend("| " + " | ".join(row) + " |" for row in rows) + return "\n".join(table_lines) + "\n" + + +def _normalize_test_nodeid(nodeid: str) -> str: + """ + Normalizes a pytest node id by removing bracketed parametrization info + and collapsing suffixes such as `_05_fp16_pad_left` that come from parameter ids. + """ + base_nodeid = nodeid.split("[", 1)[0] + parts = base_nodeid.split("::") + if not parts: + return base_nodeid + test_name = parts[-1] + test_name = re.sub(r"_\d{2,}.*$", "", test_name) + normalized = "::".join(parts[:-1] + [test_name]) + return normalized + + +def _collect_metadata(workflow_id: str) -> dict[str, str | None]: + repo_owner = os.environ.get("CIRCLE_PROJECT_USERNAME") + repo_name = os.environ.get("CIRCLE_PROJECT_REPONAME") + repo_slug = "/".join(part for part in [repo_owner, repo_name] if part) + commit_sha = os.environ.get("CIRCLE_SHA1") + branch = os.environ.get("CIRCLE_BRANCH") + pull_request = os.environ.get("CIRCLE_PULL_REQUEST") + pr_number = os.environ.get("CIRCLE_PR_NUMBER") + if not pr_number and pull_request and "/" in pull_request: + pr_number = pull_request.rsplit("/", 1)[-1] + build_num = os.environ.get("CIRCLE_BUILD_NUM") + timestamp = os.environ.get("CIRCLE_WORKFLOW_CREATED_AT") + if not timestamp: + timestamp = datetime.now(timezone.utc).isoformat() + commit_short = (commit_sha or "unknown")[:8] + dataset_subfolder = f"{repo_slug.replace('/', '__') or 'unknown_repo'}/pr-{pr_number or 'none'}/sha-{commit_short}/workflow-{workflow_id}" + metadata = { + "workflow_id": workflow_id, + "repo_owner": repo_owner, + "repo_name": repo_name, + "repository": repo_slug, + "branch": branch, + "commit_sha": commit_sha, + "pull_request": pull_request, + "pull_request_number": pr_number, + "build_number": build_num, + "collected_at": timestamp, + "dataset_subfolder": dataset_subfolder, + } + return metadata + + +def process_circleci_workflow( + workflow_id: str, + output_dir: str = "outputs", + request_get: Callable = requests.get, +): + print(f"[collection_job] Processing CircleCI workflow {workflow_id}") + response = request_get( f"https://circleci.com/api/v2/workflow/{workflow_id}/job", headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")}, ) - jobs = r.json()["items"] + jobs = response.json()["items"] + print(f"[collection_job] Found {len(jobs)} jobs in workflow.") - os.makedirs("outputs", exist_ok=True) + os.makedirs(output_dir, exist_ok=True) workflow_summary = {} + failure_entries: list[dict] = [] # for each job, download artifacts for job in jobs: project_slug = job["project_slug"] if job["name"].startswith(("tests_", "examples_", "pipelines_")): + print(f"[collection_job] Fetching artifacts for job {job['name']} (#{job['job_number']})") url = f"https://circleci.com/api/v2/project/{project_slug}/{job['job_number']}/artifacts" - r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")}) + r = request_get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")}) job_artifacts = r.json()["items"] + print(f"[collection_job] Retrieved {len(job_artifacts)} artifacts for {job['name']}.") - os.makedirs(job["name"], exist_ok=True) - os.makedirs(f"outputs/{job['name']}", exist_ok=True) + job_output_dir = os.path.join(output_dir, job["name"]) + os.makedirs(job_output_dir, exist_ok=True) job_test_summaries = {} + job_failure_lines = {} for artifact in job_artifacts: if artifact["path"].startswith("reports/") and artifact["path"].endswith("/summary_short.txt"): node_index = artifact["node_index"] - url = artifact["url"] - r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")}) + artifact_url = artifact["url"] + r = request_get(artifact_url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")}) test_summary = r.text job_test_summaries[node_index] = test_summary + elif artifact["path"].startswith("reports/") and artifact["path"].endswith("/failures_line.txt"): + node_index = artifact["node_index"] + artifact_url = artifact["url"] + r = request_get(artifact_url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")}) + job_failure_lines[node_index] = r.text summary = {} for node_index, node_test_summary in job_test_summaries.items(): @@ -65,11 +237,32 @@ # failed before passed summary = dict(sorted(summary.items(), key=lambda x: (x[1], x[0]))) workflow_summary[job["name"]] = summary + print(f"[collection_job] Recorded {len(summary)} test rows for {job['name']}.") # collected version - with open(f"outputs/{job['name']}/test_summary.json", "w") as fp: + with open(os.path.join(job_output_dir, "test_summary.json"), "w") as fp: json.dump(summary, fp, indent=4) + # Collect failure details per node for this job + for node_index, summary_short in job_test_summaries.items(): + failed_tests = _extract_failed_tests(summary_short) + failure_lines = _extract_failure_lines(job_failure_lines.get(node_index)) + for idx, (test_name, short_error) in enumerate(failed_tests): + full_error = failure_lines[idx] if idx < len(failure_lines) else short_error + failure_entries.append( + { + "job_name": job["name"], + "node_index": node_index, + "test_name": test_name, + "short_error": short_error, + "error": full_error, + "model_name": _derive_model_name(test_name), + } + ) + if job_test_summaries: + failures_in_job = sum(1 for status in summary.values() if status == "failed") + print(f"[collection_job] Aggregated {failures_in_job} failures for {job['name']}.") + new_workflow_summary = {} for job_name, job_summary in workflow_summary.items(): for test, status in job_summary.items(): @@ -81,5 +274,74 @@ new_workflow_summary[test] = dict(sorted(result.items())) new_workflow_summary = dict(sorted(new_workflow_summary.items())) - with open("outputs/test_summary.json", "w") as fp: + with open(os.path.join(output_dir, "test_summary.json"), "w") as fp: json.dump(new_workflow_summary, fp, indent=4) + + failures_by_test, failures_by_model = _aggregate_failures(failure_entries) + failure_summary = { + "failures": failure_entries, + "by_test": failures_by_test, + "by_model": failures_by_model, + } + print(f"[collection_job] Total failing entries collected: {len(failure_entries)}.") + + with open(os.path.join(output_dir, "failure_summary.json"), "w") as fp: + json.dump(failure_summary, fp, indent=4) + + markdown_buffer = ["# Failure summary\n"] + if failure_entries: + markdown_buffer.append("## By test\n") + test_rows = [] + for test_name, info in sorted(failures_by_test.items(), key=lambda x: x[1]["count"], reverse=True): + test_rows.append( + [ + test_name, + str(info["count"]), + _format_error_messages(info["errors"]), + ] + ) + markdown_buffer.append(_format_markdown_table(test_rows, ["Test", "Failures", "Full error(s)"])) + + markdown_buffer.append("## By model\n") + model_rows = [] + for model_name, info in sorted(failures_by_model.items(), key=lambda x: x[1]["count"], reverse=True): + model_rows.append( + [ + model_name, + str(info["count"]), + _format_error_messages(info["errors"]), + ] + ) + markdown_buffer.append(_format_markdown_table(model_rows, ["Model", "Failures", "Full error(s)"])) + else: + markdown_buffer.append("No failures were reported.\n") + + markdown_text = "\n".join(markdown_buffer) + with open(os.path.join(output_dir, "failure_summary.md"), "w") as fp: + fp.write(markdown_text) + + metadata = _collect_metadata(workflow_id) + aggregate_payload = { + "metadata": metadata, + "jobs": workflow_summary, + "tests": new_workflow_summary, + "failures": failure_entries, + "failures_by_test": failures_by_test, + "failures_by_model": failures_by_model, + } + with open(os.path.join(output_dir, "collection_summary.json"), "w") as fp: + json.dump(aggregate_payload, fp, indent=4) + with open(os.path.join(output_dir, "metadata.json"), "w") as fp: + json.dump(metadata, fp, indent=4) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--workflow_id", type=str, required=True) + args = parser.parse_args() + workflow_id = args.workflow_id + process_circleci_workflow(workflow_id) + + +if __name__ == "__main__": + main()