From af806d65219342c48e7321a26d7766431fdd1534 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 13:38:43 +0100
Subject: [PATCH 01/16] trying something out

---
 .circleci/create_circleci_config.py           |   6 +
 tests/fixtures/circleci/junit_sample.xml      |  65 ++++
 ..._process_circleci_workflow_test_reports.py | 125 ++++++++
 .../process_circleci_workflow_test_reports.py | 278 +++++++++++++++++-
 4 files changed, 459 insertions(+), 15 deletions(-)
 create mode 100644 tests/fixtures/circleci/junit_sample.xml
 create mode 100644 tests/utils/test_process_circleci_workflow_test_reports.py
diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
index 6e98ee0f1493..de9a24030332 100644
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@@ -61,9 +61,15 @@ class EmptyJob:
     def to_dict(self):
         steps = [{"run": 'ls -la'}]
         if self.job_name == "collection_job":
+            # Export the PR number once (if we have one) so the failure summary can post a GitHub comment.
             steps.extend(
                 [
                     "checkout",
+                    {
+                        "run": (
+                            'echo "export PR_NUMBER=$(python utils/extract_pr_number_from_circleci.py)" >> $BASH_ENV'
+                        )
+                    },
                     {"run": "pip install requests || true"},
                     {"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
                     {"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
diff --git a/tests/fixtures/circleci/junit_sample.xml b/tests/fixtures/circleci/junit_sample.xml
new file mode 100644
index 000000000000..43fc2a48c2e8
--- /dev/null
+++ b/tests/fixtures/circleci/junit_sample.xml
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="utf-8"?><testsuites name="pytest tests"><testsuite name="pytest" errors="0" failures="4" skipped="18" tests="243" time="28.981" timestamp="2025-11-16T22:40:27.820441+00:00" hostname="d89944057df4"><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_chat_template_save_loading" file="tests/test_tokenization_common.py" line="882" time="0.913"><skipped type="pytest.skip" message="tokenizer doesn't accept chat templates at input">/root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at input</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_continue_final_message" file="tests/test_tokenization_common.py" line="1278" time="0.007" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_chat_template" file="tests/test_tokenization_common.py" line="821" time="0.950" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_chat_template_batched" file="tests/test_tokenization_common.py" line="924" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_jinja_loopcontrols" file="tests/test_tokenization_common.py" line="958" time="0.966" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_mecab_tokenizer_no_normalize" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="192" time="0.967" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_continue_final_message_with_decoy_earlier_message" file="tests/test_tokenization_common.py" line="1336" time="0.005" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_jinja_strftime" file="tests/test_tokenization_common.py" line="978" time="0.004" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_mecab_tokenizer_unidic" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="153" time="0.009" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_chat_template_dict" file="tests/test_tokenization_common.py" line="1361" time="0.004" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_encode_plus_with_padding_0" file="tests/test_tokenization_common.py" line="1992" time="1.002" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_encode_plus_with_padding_1" file="tests/test_tokenization_common.py" line="1992" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_continue_final_message_with_trim" file="tests/test_tokenization_common.py" line="1306" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_mecab_tokenizer_unidic_lite" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="142" time="0.002" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_chat_template_dict_saving" file="tests/test_tokenization_common.py" line="1377" time="0.008" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_full_tokenizer" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="98" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_mecab_tokenizer_with_option" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="178" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_jumanpp_tokenizer_trim_whitespace" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="335" time="1.058" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_mask_output" file="tests/test_tokenization_common.py" line="779" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_jumanpp_full_tokenizer_with_jumanpp_kwargs_trim_whitespace" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="344" time="0.066" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_chat_template_file_priority" file="tests/test_tokenization_common.py" line="1409" time="0.005" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_get_vocab" file="tests/test_tokenization_common.py" line="2104" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_conversion_reversible" file="tests/test_tokenization_common.py" line="2117" time="0.003"><failure message="AssertionError: 2 != 1">self = &lt;tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest testMethod=test_conversion_reversible&gt;
+
+    def test_conversion_reversible(self):
+        tokenizer = self.get_tokenizer(do_lower_case=False)
+        vocab = tokenizer.get_vocab()
+        for word, ind in vocab.items():
+            if word == tokenizer.unk_token:
+                continue
+&gt;           self.assertEqual(tokenizer.convert_tokens_to_ids(word), ind)
+E           AssertionError: 2 != 1
+
+tests/test_tokenization_common.py:2124: AssertionError</failure></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_model_input_names_signature" file="tests/test_tokenization_common.py" line="511" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_empty_input_string" file="tests/test_tokenization_common.py" line="2462" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_jumanpp_tokenizer" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="316" time="0.027" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_maximum_encoding_length_pair_input" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="92" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_integration" file="tests/test_tokenization_common.py" line="712" time="0.001"><skipped type="pytest.skip" message="No integration expected tokens provided">/root/project/tests/test_tokenization_common.py:713: No integration expected tokens provided</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_encode_basic_padding" file="tests/test_tokenization_common.py" line="1860" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_number_of_added_tokens" file="tests/test_tokenization_common.py" line="1423" time="0.004" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_maximum_encoding_length_single_input" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="95" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_jumanpp_tokenizer_ext" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="355" time="0.028" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_integration_from_extractor" file="tests/test_tokenization_common.py" line="734" time="0.001"><skipped type="pytest.skip" message="No integration expected tokens provided">/root/project/tests/test_tokenization_common.py:735: No integration expected tokens provided</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_pretokenized_inputs" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="89" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_pad_token_initialization" file="tests/test_tokenization_common.py" line="2490" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_jumanpp_tokenizer_lower" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="323" time="0.026" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_mecab_full_tokenizer_with_mecab_kwargs" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="125" time="0.002" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_right_and_left_truncation" file="tests/test_tokenization_common.py" line="1889" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_jumanpp_tokenizer_no_normalize" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="329" time="0.026" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_mecab_tokenizer_ipadic" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="134" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_internal_consistency" file="tests/test_tokenization_common.py" line="763" time="0.004"><failure message="AssertionError: '[SEP] 、 世界 。 [MASK]ばんは 、 世界 。' != 'こんにちは 、 世界 。 こんばんは 、 世界 。'&#10;- [SEP] 、 世界 。 [MASK]ばんは 、 世界 。&#10;+ こんにちは 、 世界 。 こんばんは 、 世界 。">self = &lt;tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest testMethod=test_internal_consistency&gt;
+
+    def test_internal_consistency(self):
+        tokenizer = self.get_tokenizer()
+        input_text, output_text = self.get_input_output_texts(tokenizer)
+    
+        tokens = tokenizer.tokenize(input_text)
+        ids = tokenizer.convert_tokens_to_ids(tokens)
+        ids_2 = tokenizer.encode(input_text, add_special_tokens=False)
+        self.assertListEqual(ids, ids_2)
+    
+        tokens_2 = tokenizer.convert_ids_to_tokens(ids)
+        self.assertNotEqual(len(tokens_2), 0)
+        text_2 = tokenizer.decode(ids)
+        self.assertIsInstance(text_2, str)
+    
+&gt;       self.assertEqual(text_2, output_text)
+E       AssertionError: '[SEP] 、 世界 。 [MASK]ばんは 、 世界 。' != 'こんにちは 、 世界 。 こんばんは 、 世界 。'
+E       - [SEP] 、 世界 。 [MASK]ばんは 、 世界 。
+E       + こんにちは 、 世界 。 こんばんは 、 世界 。
+
+tests/test_tokenization_common.py:778: AssertionError</failure></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_tokenizer_initialization_with_conflicting_key" file="tests/test_tokenization_common.py" line="2455" time="0.006" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_mecab_tokenizer_lower" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="170" time="0.002" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sudachi_tokenizer_split_mode_A" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="232" time="0.054" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_save_and_load_tokenizer" file="tests/test_tokenization_common.py" line="602" time="0.018" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_tokenizer_store_full_signature" file="tests/test_tokenization_common.py" line="522" time="0.002" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sudachi_tokenizer_split_mode_B" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="238" time="0.041" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sudachi_tokenizer_split_mode_C" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="244" time="0.024" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_tokenizers_common_ids_setters" file="tests/test_tokenization_common.py" line="569" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_tokenizers_common_properties" file="tests/test_tokenization_common.py" line="537" time="0.002" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sudachi_tokenizer_trim_whitespace" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="286" time="0.028" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_token_type_ids" file="tests/test_tokenization_common.py" line="787" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_tokenize_special_tokens" file="tests/test_tokenization_common.py" line="491" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_added_tokens_serialization" file="tests/test_tokenization_common.py" line="2428" time="1.455" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_chat_template_return_assistant_tokens_mask" file="tests/test_tokenization_common.py" line="997" time="0.339"><skipped type="pytest.skip" message="Custom backend tokenizer">/root/project/tests/test_tokenization_common.py:998: Custom backend tokenizer</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_added_tokens_serialization" file="tests/test_tokenization_common.py" line="2428" time="0.429" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_batch_encode_dynamic_overflowing" file="tests/test_tokenization_common.py" line="2365" time="0.004" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_batch_encode_plus_batch_sequence_length" file="tests/test_tokenization_common.py" line="2155" time="0.011" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_character_tokenizer" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="447" time="0.420" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_chat_template" file="tests/test_tokenization_common.py" line="821" time="0.013" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_batch_encode_plus_padding" file="tests/test_tokenization_common.py" line="2201" time="0.008" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_chat_template_batched" file="tests/test_tokenization_common.py" line="924" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_bos_token_with_add_bos_token_false" file="tests/test_tokenization_common.py" line="2540" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_chat_template_dict" file="tests/test_tokenization_common.py" line="1361" time="0.004" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_bos_token_with_add_bos_token_true" file="tests/test_tokenization_common.py" line="2523" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_chat_template_return_assistant_tokens_mask_truncated" file="tests/test_tokenization_common.py" line="1175" time="0.394"><skipped type="pytest.skip" message="Custom backend tokenizer">/root/project/tests/test_tokenization_common.py:1176: Custom backend tokenizer</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_batch_encode_dynamic_overflowing" file="tests/test_tokenization_common.py" line="2365" time="0.402" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_special_tokens_mask_input_pairs" file="tests/test_tokenization_common.py" line="1803" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_chat_template_dict_saving" file="tests/test_tokenization_common.py" line="1377" time="0.008" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_batch_encode_plus_batch_sequence_length" file="tests/test_tokenization_common.py" line="2155" time="0.007" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_call" file="tests/test_tokenization_common.py" line="2126" time="0.010" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_chat_template_file_priority" file="tests/test_tokenization_common.py" line="1409" time="0.005" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_batch_encode_plus_padding" file="tests/test_tokenization_common.py" line="2201" time="0.007" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_full_tokenizer" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="438" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_bos_token_with_add_bos_token_false" file="tests/test_tokenization_common.py" line="2540" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_jinja_loopcontrols" file="tests/test_tokenization_common.py" line="958" time="0.005" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sudachi_full_tokenizer_with_sudachi_kwargs_split_mode_B" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="250" time="0.048" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_get_vocab" file="tests/test_tokenization_common.py" line="2104" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_chat_template_return_assistant_tokens_mask" file="tests/test_tokenization_common.py" line="997" time="0.484"><skipped type="pytest.skip" message="Custom backend tokenizer">/root/project/tests/test_tokenization_common.py:998: Custom backend tokenizer</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_chat_template_return_assistant_tokens_mask_truncated" file="tests/test_tokenization_common.py" line="1175" time="0.002"><skipped type="pytest.skip" message="Custom backend tokenizer">/root/project/tests/test_tokenization_common.py:1176: Custom backend tokenizer</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_jinja_strftime" file="tests/test_tokenization_common.py" line="978" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_bos_token_with_add_bos_token_true" file="tests/test_tokenization_common.py" line="2523" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sudachi_full_tokenizer_with_sudachi_kwargs_sudachi_projection" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="266" time="0.042" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_integration" file="tests/test_tokenization_common.py" line="712" time="0.001"><skipped type="pytest.skip" message="No integration expected tokens provided">/root/project/tests/test_tokenization_common.py:713: No integration expected tokens provided</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_chat_template_save_loading" file="tests/test_tokenization_common.py" line="882" time="0.002"><skipped type="pytest.skip" message="tokenizer doesn't accept chat templates at input">/root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at input</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sudachi_tokenizer_core" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="221" time="0.023" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_mask_output" file="tests/test_tokenization_common.py" line="779" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_call" file="tests/test_tokenization_common.py" line="2126" time="0.007" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_integration_from_extractor" file="tests/test_tokenization_common.py" line="734" time="0.001"><skipped type="pytest.skip" message="No integration expected tokens provided">/root/project/tests/test_tokenization_common.py:735: No integration expected tokens provided</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sudachi_tokenizer_lower" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="274" time="0.024" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_continue_final_message" file="tests/test_tokenization_common.py" line="1278" time="0.006" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_maximum_encoding_length_pair_input" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="432" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sudachi_tokenizer_no_normalize" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="280" time="0.024" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_continue_final_message_with_decoy_earlier_message" file="tests/test_tokenization_common.py" line="1336" time="0.005" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_maximum_encoding_length_single_input" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="435" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_internal_consistency" file="tests/test_tokenization_common.py" line="763" time="0.003"><failure message="AssertionError: '[SEP] [MASK] に ち は 、 世 界 。 [SEP] [MASK] ば [MASK] は 、 世 界 。' != 'こ ん に ち は 、 世 界 。 こ ん ば ん は 、 世 界 。'&#10;- [SEP] [MASK] に ち は 、 世 界 。 [SEP] [MASK] ば [MASK] は 、 世 界 。&#10;+ こ ん に ち は 、 世 界 。 こ ん ば ん は 、 世 界 。">self = &lt;tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest testMethod=test_internal_consistency&gt;
+
+    def test_internal_consistency(self):
+        tokenizer = self.get_tokenizer()
+        input_text, output_text = self.get_input_output_texts(tokenizer)
+    
+        tokens = tokenizer.tokenize(input_text)
+        ids = tokenizer.convert_tokens_to_ids(tokens)
+        ids_2 = tokenizer.encode(input_text, add_special_tokens=False)
+        self.assertListEqual(ids, ids_2)
+    
+        tokens_2 = tokenizer.convert_ids_to_tokens(ids)
+        self.assertNotEqual(len(tokens_2), 0)
+        text_2 = tokenizer.decode(ids)
+        self.assertIsInstance(text_2, str)
+    
+&gt;       self.assertEqual(text_2, output_text)
+E       AssertionError: '[SEP] [MASK] に ち は 、 世 界 。 [SEP] [MASK] ば [MASK] は 、 世 界 。' != 'こ ん に ち は 、 世 界 。 こ ん ば ん は 、 世 界 。'
+E       - [SEP] [MASK] に ち は 、 世 界 。 [SEP] [MASK] ば [MASK] は 、 世 界 。
+E       + こ ん に ち は 、 世 界 。 こ ん ば ん は 、 世 界 。
+
+tests/test_tokenization_common.py:778: AssertionError</failure></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_padding_with_attention_mask" file="tests/test_tokenization_common.py" line="1975" time="0.002" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_continue_final_message_with_trim" file="tests/test_tokenization_common.py" line="1306" time="0.002" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sudachi_tokenizer_projection" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="258" time="0.024" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_prepare_seq2seq_batch" file="tests/test_tokenization_common.py" line="2323" time="0.011" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_pretokenized_inputs" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="429" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_right_and_left_truncation" file="tests/test_tokenization_common.py" line="1889" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_truncation_side_in_kwargs" file="tests/test_tokenization_common.py" line="1843" time="0.128" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_save_and_load_tokenizer" file="tests/test_tokenization_common.py" line="602" time="0.019" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_truncation_side_in_kwargs" file="tests/test_tokenization_common.py" line="1843" time="0.927" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_wordpiece_tokenizer" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="364" time="0.001" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_model_input_names_signature" file="tests/test_tokenization_common.py" line="511" time="0.402" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_number_of_added_tokens" file="tests/test_tokenization_common.py" line="1423" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_pad_token_initialization" file="tests/test_tokenization_common.py" line="2490" time="0.004" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_tokenize_special_tokens" file="tests/test_tokenization_common.py" line="491" time="0.409" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_tokenizer_initialization_with_conflicting_key" file="tests/test_tokenization_common.py" line="2455" time="0.005" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_tokenizer_store_full_signature" file="tests/test_tokenization_common.py" line="522" time="0.002" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.AutoTokenizerCustomTest" name="test_tokenizer_bert_japanese" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="477" time="0.332" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_tokenizers_common_ids_setters" file="tests/test_tokenization_common.py" line="569" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_padding_side_in_kwargs" file="tests/test_tokenization_common.py" line="1826" time="0.127" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_padding_to_multiple_of" file="tests/test_tokenization_common.py" line="1943" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_tokenizers_common_properties" file="tests/test_tokenization_common.py" line="537" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sentencepiece_tokenizer" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="380" time="1.477" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_sequence_builders" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="461" time="0.737" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_sequence_ids" file="tests/test_tokenization_common.py" line="798" time="0.004"><skipped type="pytest.skip" message="Tokenizers backend tokenizer">/root/project/tests/test_tokenization_common.py:799: Tokenizers backend tokenizer</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_conversion_reversible" file="tests/test_tokenization_common.py" line="2117" time="0.450"><failure message="AssertionError: 2 != 1">self = &lt;tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest testMethod=test_conversion_reversible&gt;
+
+    def test_conversion_reversible(self):
+        tokenizer = self.get_tokenizer(do_lower_case=False)
+        vocab = tokenizer.get_vocab()
+        for word, ind in vocab.items():
+            if word == tokenizer.unk_token:
+                continue
+&gt;           self.assertEqual(tokenizer.convert_tokens_to_ids(word), ind)
+E           AssertionError: 2 != 1
+
+tests/test_tokenization_common.py:2124: AssertionError</failure></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_empty_input_string" file="tests/test_tokenization_common.py" line="2462" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_special_tokens_mask" file="tests/test_tokenization_common.py" line="1786" time="0.005" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_encode_basic_padding" file="tests/test_tokenization_common.py" line="1860" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_special_tokens_mask_input_pairs" file="tests/test_tokenization_common.py" line="1803" time="0.005" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_encode_plus_with_padding_0" file="tests/test_tokenization_common.py" line="1992" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_token_type_ids" file="tests/test_tokenization_common.py" line="787" time="0.004" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseCharacterTokenizationTest" name="test_encode_plus_with_padding_1" file="tests/test_tokenization_common.py" line="1992" time="0.004" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sequence_builders" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="390" time="0.426" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_sequence_ids" file="tests/test_tokenization_common.py" line="798" time="0.003"><skipped type="pytest.skip" message="Tokenizers backend tokenizer">/root/project/tests/test_tokenization_common.py:799: Tokenizers backend tokenizer</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_special_tokens_mask" file="tests/test_tokenization_common.py" line="1786" time="0.003" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_batch_encode_dynamic_overflowing" file="tests/test_tokenization_common.py" line="2365" time="1.410" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_bos_token_with_add_bos_token_true" file="tests/test_tokenization_common.py" line="2523" time="1.050" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_chat_template_dict" file="tests/test_tokenization_common.py" line="1361" time="1.172" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertTokenizerMismatchTest" name="test_tokenizer_mismatch_warning" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="484" time="1.324" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_continue_final_message_with_trim" file="tests/test_tokenization_common.py" line="1306" time="1.128" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_call" file="tests/test_tokenization_common.py" line="2126" time="0.422" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_batch_encode_plus_batch_sequence_length" file="tests/test_tokenization_common.py" line="2155" time="0.444" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_chat_template_return_assistant_tokens_mask_truncated" file="tests/test_tokenization_common.py" line="1175" time="1.207" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_encode_plus_with_padding_0" file="tests/test_tokenization_common.py" line="1992" time="1.082" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_padding_side_in_kwargs" file="tests/test_tokenization_common.py" line="1826" time="4.437" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_padding_to_multiple_of" file="tests/test_tokenization_common.py" line="1943" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_padding_with_attention_mask" file="tests/test_tokenization_common.py" line="1975" time="0.003" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_pickle_jumanpp_tokenizer" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="295" time="0.091" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_pickle_mecab_tokenizer" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="105" time="0.003" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_chat_template_save_loading" file="tests/test_tokenization_common.py" line="882" time="0.445"><skipped type="pytest.skip" message="tokenizer doesn't accept chat templates at input">/root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at input</skipped></testcase><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_pickle_sudachi_tokenizer" file="tests/models/bert_japanese/test_tokenization_bert_japanese.py" line="200" time="0.114" /><testcase classname="tests.models.bert_japanese.test_tokenization_bert_japanese.BertJapaneseTokenizationTest" name="test_prepare_seq2seq_batch" file="tests/test_tokenization_common.py" line="2323" time="0.012" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_encode_plus_with_padding_1" file="tests/test_tokenization_common.py" line="1992" time="0.421" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_chat_template_dict_saving" file="tests/test_tokenization_common.py" line="1377" time="0.940" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_conversion_reversible" file="tests/test_tokenization_common.py" line="2117" time="0.704" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_continue_final_message" file="tests/test_tokenization_common.py" line="1278" time="0.256" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_batch_encode_plus_padding" file="tests/test_tokenization_common.py" line="2201" time="0.768" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_empty_input_string" file="tests/test_tokenization_common.py" line="2462" time="0.236" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_chat_template" file="tests/test_tokenization_common.py" line="821" time="0.966" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_get_vocab" file="tests/test_tokenization_common.py" line="2104" time="0.477" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_bos_token_with_add_bos_token_false" file="tests/test_tokenization_common.py" line="2540" time="0.334" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_chat_template_batched" file="tests/test_tokenization_common.py" line="924" time="0.220" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_continue_final_message_with_decoy_earlier_message" file="tests/test_tokenization_common.py" line="1336" time="0.455" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_pad_token_initialization" file="tests/test_tokenization_common.py" line="2490" time="0.224" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_encode_basic_padding" file="tests/test_tokenization_common.py" line="1860" time="0.433" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_added_tokens_serialization" file="tests/test_tokenization_common.py" line="2428" time="1.619" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_chat_template_file_priority" file="tests/test_tokenization_common.py" line="1409" time="0.798" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_integration" file="tests/test_tokenization_common.py" line="712" time="0.495" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_padding_with_attention_mask" file="tests/test_tokenization_common.py" line="1975" time="0.234" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_right_and_left_truncation" file="tests/test_tokenization_common.py" line="1889" time="0.412" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_jinja_strftime" file="tests/test_tokenization_common.py" line="978" time="1.083" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_special_tokens_mask" file="tests/test_tokenization_common.py" line="1786" time="0.227" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_chat_template_return_assistant_tokens_mask" file="tests/test_tokenization_common.py" line="997" time="0.522" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_special_tokens_mask_input_pairs" file="tests/test_tokenization_common.py" line="1803" time="0.233" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_prepare_seq2seq_batch" file="tests/test_tokenization_common.py" line="2323" time="0.439" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_tokenizer_initialization_with_conflicting_key" file="tests/test_tokenization_common.py" line="2455" time="0.005" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_mask_output" file="tests/test_tokenization_common.py" line="779" time="0.404" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_token_type_ids" file="tests/test_tokenization_common.py" line="787" time="0.266" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_integration_from_extractor" file="tests/test_tokenization_common.py" line="734" time="0.925" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_padding_side_in_kwargs" file="tests/test_tokenization_common.py" line="1826" time="0.989" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_padding_to_multiple_of" file="tests/models/clip/test_tokenization_clip.py" line="46" time="0.001"><skipped type="pytest.skip" message="Skipping padding to multiple of test bc vocab is too small.">/root/project/tests/models/clip/test_tokenization_clip.py:47: Skipping padding to multiple of test bc vocab is too small.</skipped></testcase><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_pretokenized_inputs" file="tests/test_tokenization_common.py" line="2243" time="0.280" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_tokenizer_store_full_signature" file="tests/test_tokenization_common.py" line="522" time="0.434" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_tokenize_special_tokens" file="tests/test_tokenization_common.py" line="491" time="0.416" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_tokenizers_common_ids_setters" file="tests/test_tokenization_common.py" line="569" time="0.466" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_tokenizers_common_properties" file="tests/test_tokenization_common.py" line="537" time="0.232" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_bos_token_with_add_bos_token_true" file="tests/test_tokenization_common.py" line="2523" time="0.966" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_truncation_side_in_kwargs" file="tests/test_tokenization_common.py" line="1843" time="0.825" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_call" file="tests/test_tokenization_common.py" line="2126" time="0.182" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_batch_encode_plus_padding" file="tests/test_tokenization_common.py" line="2201" time="0.927" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_maximum_encoding_length_single_input" file="tests/test_tokenization_common.py" line="1435" time="2.473" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_bos_token_with_add_bos_token_false" file="tests/test_tokenization_common.py" line="2540" time="0.204" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_chat_template_dict" file="tests/test_tokenization_common.py" line="1361" time="0.375" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_model_input_names_signature" file="tests/test_tokenization_common.py" line="511" time="0.314" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_chat_template_return_assistant_tokens_mask" file="tests/test_tokenization_common.py" line="997" time="0.307" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_chat_template" file="tests/test_tokenization_common.py" line="821" time="1.286" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_chat_template_file_priority" file="tests/test_tokenization_common.py" line="1409" time="0.449" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_added_tokens_serialization" file="tests/test_tokenization_common.py" line="2428" time="1.039" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_number_of_added_tokens" file="tests/test_tokenization_common.py" line="1423" time="0.437" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_chat_template_return_assistant_tokens_mask_truncated" file="tests/test_tokenization_common.py" line="1175" time="0.451" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_internal_consistency" file="tests/test_tokenization_common.py" line="763" time="2.269" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_maximum_encoding_length_pair_input" file="tests/test_tokenization_common.py" line="1530" time="2.596" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_chat_template_batched" file="tests/test_tokenization_common.py" line="924" time="0.393" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_continue_final_message_with_decoy_earlier_message" file="tests/test_tokenization_common.py" line="1336" time="0.208" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_chat_template_save_loading" file="tests/test_tokenization_common.py" line="882" time="0.429"><skipped type="pytest.skip" message="tokenizer doesn't accept chat templates at input">/root/project/tests/test_tokenization_common.py:883: tokenizer doesn't accept chat templates at input</skipped></testcase><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_save_and_load_tokenizer" file="tests/test_tokenization_common.py" line="602" time="3.233" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_continue_final_message_with_trim" file="tests/test_tokenization_common.py" line="1306" time="0.199" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_encode_plus_with_padding_0" file="tests/test_tokenization_common.py" line="1992" time="0.201" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_continue_final_message" file="tests/test_tokenization_common.py" line="1278" time="0.568" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_jinja_loopcontrols" file="tests/test_tokenization_common.py" line="958" time="0.468" /><testcase classname="tests.models.clip.test_tokenization_clip.CLIPTokenizationTest" name="test_sequence_ids" file="tests/test_tokenization_common.py" line="798" time="0.217" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_conversion_reversible" file="tests/test_tokenization_common.py" line="2117" time="0.391" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_chat_template_dict_saving" file="tests/test_tokenization_common.py" line="1377" time="0.855" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_get_vocab" file="tests/test_tokenization_common.py" line="2104" time="0.349" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_encode_basic_padding" file="tests/test_tokenization_common.py" line="1860" time="0.380" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_empty_input_string" file="tests/test_tokenization_common.py" line="2462" time="0.190" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_integration" file="tests/test_tokenization_common.py" line="712" time="0.516" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_mask_output" file="tests/test_tokenization_common.py" line="779" time="0.212" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_batch_encode_dynamic_overflowing" file="tests/test_tokenization_common.py" line="2365" time="0.923"><skipped type="pytest.skip" message="This tokenizer has no padding token set, or pad_token_id &lt; 0">/root/project/tests/test_tokenization_common.py:2366: This tokenizer has no padding token set, or pad_token_id &lt; 0</skipped></testcase><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_encode_plus_with_padding_1" file="tests/test_tokenization_common.py" line="1992" time="0.601" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_jinja_loopcontrols" file="tests/test_tokenization_common.py" line="958" time="0.399" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_jinja_strftime" file="tests/test_tokenization_common.py" line="978" time="0.209" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_number_of_added_tokens" file="tests/test_tokenization_common.py" line="1423" time="0.388" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_batch_encode_plus_batch_sequence_length" file="tests/test_tokenization_common.py" line="2155" time="0.205" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_pad_token_initialization" file="tests/test_tokenization_common.py" line="2490" time="0.219" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_padding_with_attention_mask" file="tests/test_tokenization_common.py" line="1975" time="0.207"><skipped type="pytest.skip" message="No padding token.">/root/project/tests/test_tokenization_common.py:1976: No padding token.</skipped></testcase><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_integration_from_extractor" file="tests/test_tokenization_common.py" line="734" time="0.758" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_padding_to_multiple_of" file="tests/test_tokenization_common.py" line="1943" time="0.191"><skipped type="pytest.skip" message="No padding token.">/root/project/tests/test_tokenization_common.py:1944: No padding token.</skipped></testcase><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_model_input_names_signature" file="tests/test_tokenization_common.py" line="511" time="0.188" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_tokenizer_initialization_with_conflicting_key" file="tests/test_tokenization_common.py" line="2455" time="0.002" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_special_tokens_mask" file="tests/test_tokenization_common.py" line="1786" time="0.183" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_pretokenized_inputs" file="tests/test_tokenization_common.py" line="2243" time="0.414" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_sequence_ids" file="tests/test_tokenization_common.py" line="798" time="0.400" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_special_tokens_mask_input_pairs" file="tests/test_tokenization_common.py" line="1803" time="0.207" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_padding_side_in_kwargs" file="tests/test_tokenization_common.py" line="1826" time="0.895" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_tokenize_special_tokens" file="tests/test_tokenization_common.py" line="491" time="0.214" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_tokenizers_common_ids_setters" file="tests/test_tokenization_common.py" line="569" time="0.414" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_tokenizer_store_full_signature" file="tests/test_tokenization_common.py" line="522" time="0.391" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_prepare_seq2seq_batch" file="tests/test_tokenization_common.py" line="2323" time="0.208" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_tokenizers_common_properties" file="tests/test_tokenization_common.py" line="537" time="0.182" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_truncation_side_in_kwargs" file="tests/test_tokenization_common.py" line="1843" time="0.664" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_internal_consistency" file="tests/test_tokenization_common.py" line="763" time="2.333" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_maximum_encoding_length_pair_input" file="tests/test_tokenization_common.py" line="1530" time="2.014" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_right_and_left_truncation" file="tests/test_tokenization_common.py" line="1889" time="0.192" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_save_and_load_tokenizer" file="tests/test_tokenization_common.py" line="602" time="2.921" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_token_type_ids" file="tests/test_tokenization_common.py" line="787" time="0.179" /><testcase classname="tests.models.openai.test_tokenization_openai.OpenAIGPTTokenizationTest" name="test_maximum_encoding_length_single_input" file="tests/test_tokenization_common.py" line="1435" time="1.935" /></testsuite></testsuites>
\ No newline at end of file
diff --git a/tests/utils/test_process_circleci_workflow_test_reports.py b/tests/utils/test_process_circleci_workflow_test_reports.py
new file mode 100644
index 000000000000..a3b5128a4576
--- /dev/null
+++ b/tests/utils/test_process_circleci_workflow_test_reports.py
@@ -0,0 +1,125 @@
+import json
+from pathlib import Path
+from xml.etree import ElementTree as ET
+
+from utils.process_circleci_workflow_test_reports import process_circleci_workflow
+
+
+class _FakeResponse:
+    def __init__(self, *, text: str | None = None, json_data: dict | None = None, status_code: int = 200):
+        self.text = text or ""
+        self._json_data = json_data
+        self.status_code = status_code
+
+    def json(self):
+        if self._json_data is None:
+            raise ValueError("No JSON payload in fake response.")
+        return self._json_data
+
+
+def _build_artifacts_from_junit(junit_path: Path):
+    tree = ET.parse(junit_path)
+    failures = []
+    for testcase in tree.findall(".//testcase"):
+        failure = testcase.find("failure")
+        if failure is None:
+            continue
+        classname = testcase.attrib.get("classname", "")
+        class_name = classname.split(".")[-1]
+        file_path = testcase.attrib["file"]
+        nodeid = f"{file_path}::{class_name}::{testcase.attrib['name']}"
+        failure_msg = failure.attrib.get("message", "").strip() or (failure.text or "").strip()
+        failures.append((nodeid, failure_msg))
+    return failures
+
+
+def test_failure_summary_generated_from_junit_fixture(tmp_path, monkeypatch):
+    tests_dir = Path(__file__).resolve().parents[1]
+    junit_path = tests_dir / "fixtures" / "circleci" / "junit_sample.xml"
+    junit_failures = _build_artifacts_from_junit(junit_path)
+
+    summary_lines = [f"FAILED {nodeid} - {message}" for nodeid, message in junit_failures]
+    failure_lines = [f"{nodeid}: {message}" for nodeid, message in junit_failures]
+
+    # Add a synthetic failure under tests/models to exercise the per-model aggregation.
+    model_test = "tests/models/bert/test_modeling_bert.py::BertModelTest::test_forward"
+    model_error = "AssertionError: logits mismatch"
+    summary_lines.append(f"FAILED {model_test} - {model_error}")
+    failure_lines.append(f"{model_test}: {model_error}")
+
+    summary_short_text = "\n".join(summary_lines)
+    failures_line_text = "\n".join(failure_lines)
+
+    workflow_response = {
+        "items": [
+            {
+                "project_slug": "gh/huggingface/transformers",
+                "job_number": 42,
+                "name": "tests_torch",
+            }
+        ]
+    }
+    artifacts_response = {
+        "items": [
+            {"path": "reports/tests_torch/summary_short.txt", "url": "https://example.com/summary", "node_index": 0},
+            {"path": "reports/tests_torch/failures_line.txt", "url": "https://example.com/failures", "node_index": 0},
+        ]
+    }
+
+    def fake_get(url, headers=None):
+        if url.endswith("/workflow/test-workflow/job"):
+            return _FakeResponse(json_data=workflow_response)
+        if url.endswith("/project/gh/huggingface/transformers/42/artifacts"):
+            return _FakeResponse(json_data=artifacts_response)
+        if url == "https://example.com/summary":
+            return _FakeResponse(text=summary_short_text)
+        if url == "https://example.com/failures":
+            return _FakeResponse(text=failures_line_text)
+        raise AssertionError(f"Unexpected URL requested: {url}")
+
+    captured_post = {}
+
+    def fake_post(url, headers=None, json=None):
+        captured_post["url"] = url
+        captured_post["headers"] = headers
+        captured_post["json"] = json
+        return _FakeResponse(text="ok", status_code=201)
+
+    monkeypatch.setenv("CIRCLE_PULL_REQUEST", "https://github.com/huggingface/transformers/pull/456")
+    monkeypatch.setenv("GITHUB_TOKEN", "dummy-token")
+    monkeypatch.chdir(tmp_path)
+    output_dir = tmp_path / "outputs"
+    process_circleci_workflow(
+        "test-workflow",
+        output_dir=str(output_dir),
+        request_get=fake_get,
+        request_post=fake_post,
+    )
+
+    failure_summary_path = output_dir / "failure_summary.json"
+    assert failure_summary_path.is_file()
+
+    with open(failure_summary_path) as fp:
+        failure_summary = json.load(fp)
+
+    assert len(failure_summary["failures"]) == len(summary_lines)
+
+    sample_test = junit_failures[0][0]
+    assert sample_test in failure_summary["by_test"]
+    assert failure_summary["by_test"][sample_test]["count"] == 1
+    error_key = f"{sample_test}: {junit_failures[0][1]}"
+    assert error_key in failure_summary["by_test"][sample_test]["errors"]
+
+    assert "bert" in failure_summary["by_model"]
+    assert failure_summary["by_model"]["bert"]["count"] == 1
+    model_error_key = f"{model_test}: {model_error}"
+    assert failure_summary["by_model"]["bert"]["errors"][model_error_key] == 1
+
+    failure_summary_md = output_dir / "failure_summary.md"
+    assert failure_summary_md.is_file()
+    md_contents = failure_summary_md.read_text()
+    assert "Failure summary" in md_contents
+    assert "tests/models/bert/test_modeling_bert.py" in md_contents
+
+    assert captured_post["url"].endswith("/issues/456/comments")
+    assert captured_post["json"]["body"] == md_contents
diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py
index eb61f6d586e5..2b0813f0a87a 100644
--- a/utils/process_circleci_workflow_test_reports.py
+++ b/utils/process_circleci_workflow_test_reports.py
@@ -11,47 +11,222 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
+
 import argparse
 import json
 import os
+import re
+from collections import Counter
+from typing import Callable
 
 import requests
 
 
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--workflow_id", type=str, required=True)
-    args = parser.parse_args()
-    workflow_id = args.workflow_id
+def _extract_failed_tests(summary_short: str) -> list[tuple[str, str]]:
+    """
+    Return a list of tuples (<test node id>, <error message from short summary>).
+    """
+    failed_tests = []
+    for line in summary_short.splitlines():
+        if not line.startswith("FAILED "):
+            continue
+        # Skip subprocess failures created by `run_test_using_subprocess`
+        if " - Failed: (subprocess)" in line:
+            continue
+        failure_line = line[len("FAILED ") :].strip()
+        test_node, sep, error_message = failure_line.partition(" - ")
+        failed_tests.append((test_node.strip(), error_message.strip()))
+
+    return failed_tests
+
+
+def _extract_failure_lines(failures_line: str | None) -> list[str]:
+    if not failures_line:
+        return []
+
+    failure_lines = []
+    for raw_line in failures_line.splitlines():
+        raw_line = raw_line.strip()
+        if (
+            not raw_line
+            or raw_line.startswith("=")
+            or raw_line.startswith("_")
+            or raw_line.lower().startswith("short test summary")
+        ):
+            continue
+        if ": " not in raw_line:
+            continue
+        failure_lines.append(raw_line)
+
+    return failure_lines
+
+
+def _derive_model_name(test_node_id: str) -> str | None:
+    """
+    Given a pytest node id (e.g. tests/models/bart/test_modeling_bart.py::BartModelTest::test_forward),
+    extract the model name when it lives under `tests/models`.
+    """
+    file_path = test_node_id.split("::", maxsplit=1)[0]
+    if file_path.startswith("tests/models/"):
+        parts = file_path.split("/")
+        if len(parts) >= 3:
+            return parts[2]
+    return None
+
+
+def _aggregate_failures(failure_entries: list[dict]) -> tuple[dict, dict]:
+    by_test: dict[str, dict] = {}
+    by_model: dict[str, dict] = {}
+
+    for entry in failure_entries:
+        test_name = entry["test_name"]
+        model_name = entry["model_name"]
+        error_message = entry["error"]
+
+        test_info = by_test.setdefault(test_name, {"count": 0, "errors": Counter(), "jobs": set()})
+        test_info["count"] += 1
+        test_info["errors"][error_message] += 1
+        test_info["jobs"].add(entry["job_name"])
+
+        if model_name:
+            model_info = by_model.setdefault(model_name, {"count": 0, "errors": Counter(), "tests": set()})
+            model_info["count"] += 1
+            model_info["errors"][error_message] += 1
+            model_info["tests"].add(test_name)
+
+    # Convert counters and sets to serializable forms
+    def _prepare(entries: dict, include_tests: bool = False):
+        prepared = {}
+        for key, value in entries.items():
+            prepared[key] = {
+                "count": value["count"],
+                "errors": dict(value["errors"].most_common()),
+            }
+            if include_tests:
+                prepared[key]["tests"] = sorted(value["tests"])
+            else:
+                prepared[key]["jobs"] = sorted(value["jobs"])
+        return prepared
+
+    return _prepare(by_test), _prepare(by_model, include_tests=True)
+
+
+def _format_error_messages(errors: dict[str, int]) -> str:
+    return "; ".join(f"{count}× {msg}" for msg, count in errors.items()) or "N/A"
+
+
+def _format_markdown_table(rows: list[list[str]], headers: list[str]) -> str:
+    if not rows:
+        return "No data\n"
+
+    header_line = "| " + " | ".join(headers) + " |"
+    separator = "| " + " | ".join(["---"] * len(headers)) + " |"
+    table_lines = [header_line, separator]
+    table_lines.extend("| " + " | ".join(row) + " |" for row in rows)
+    return "\n".join(table_lines) + "\n"
+
 
-    r = requests.get(
+def _get_pr_details_from_env() -> tuple[str, str, str] | None:
+    """
+    Returns (owner, repo, pr_number) if we can infer them from the environment.
+
+    CircleCI does not always expose `CIRCLE_PULL_REQUEST`, so the collection job exports `PR_NUMBER`
+    beforehand via `utils/extract_pr_number_from_circleci.py`. We try every known source before giving up.
+    """
+    pr_url_candidates = [
+        os.environ.get("CIRCLE_PULL_REQUEST"),
+        os.environ.get("GITHUB_PULL_REQUEST_URL"),
+    ]
+    for pr_url in pr_url_candidates:
+        if not pr_url:
+            continue
+        match = re.match(
+            r"https://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)/pull/(?P<number>\d+)", pr_url
+        )
+        if match:
+            return match.group("owner"), match.group("repo"), match.group("number")
+
+    repo = os.environ.get("GITHUB_REPOSITORY")
+    pr_number = os.environ.get("PR_NUMBER")
+    if not pr_number:
+        github_ref = os.environ.get("GITHUB_REF", "")
+        match = re.search(r"refs/pull/(\d+)/", github_ref)
+        if match:
+            pr_number = match.group(1)
+    if repo and pr_number:
+        owner, repo_name = repo.split("/", 1)
+        return owner, repo_name, pr_number
+    return None
+
+
+def _get_github_token() -> str | None:
+    for env_var in ("GITHUB_TOKEN", "GH_TOKEN", "GITHUB_ACCESS_TOKEN"):
+        token = os.environ.get(env_var)
+        if token:
+            return token
+    return None
+
+
+def _post_failure_summary_comment(markdown_text: str, request_post: Callable = requests.post) -> bool:
+    pr_details = _get_pr_details_from_env()
+    token = _get_github_token()
+    if not pr_details or not token:
+        return False
+    owner, repo, pr_number = pr_details
+    url = f"https://api.github.com/repos/{owner}/{repo}/issues/{pr_number}/comments"
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/vnd.github+json",
+    }
+    response = request_post(url, headers=headers, json={"body": markdown_text})
+    if not (200 <= getattr(response, "status_code", 0) < 300):
+        print(f"Failed to post PR comment: {getattr(response, 'status_code', 'unknown')} {getattr(response, 'text', '')}")
+        return False
+    return True
+
+
+def process_circleci_workflow(
+    workflow_id: str,
+    output_dir: str = "outputs",
+    request_get: Callable = requests.get,
+    request_post: Callable = requests.post,
+):
+    response = request_get(
         f"https://circleci.com/api/v2/workflow/{workflow_id}/job",
         headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")},
     )
-    jobs = r.json()["items"]
+    jobs = response.json()["items"]
 
-    os.makedirs("outputs", exist_ok=True)
+    os.makedirs(output_dir, exist_ok=True)
 
     workflow_summary = {}
+    failure_entries: list[dict] = []
     # for each job, download artifacts
     for job in jobs:
         project_slug = job["project_slug"]
         if job["name"].startswith(("tests_", "examples_", "pipelines_")):
             url = f"https://circleci.com/api/v2/project/{project_slug}/{job['job_number']}/artifacts"
-            r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
+            r = request_get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
             job_artifacts = r.json()["items"]
 
-            os.makedirs(job["name"], exist_ok=True)
-            os.makedirs(f"outputs/{job['name']}", exist_ok=True)
+            job_output_dir = os.path.join(output_dir, job["name"])
+            os.makedirs(job_output_dir, exist_ok=True)
 
             job_test_summaries = {}
+            job_failure_lines = {}
             for artifact in job_artifacts:
                 if artifact["path"].startswith("reports/") and artifact["path"].endswith("/summary_short.txt"):
                     node_index = artifact["node_index"]
-                    url = artifact["url"]
-                    r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
+                    artifact_url = artifact["url"]
+                    r = request_get(artifact_url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
                     test_summary = r.text
                     job_test_summaries[node_index] = test_summary
+                elif artifact["path"].startswith("reports/") and artifact["path"].endswith("/failures_line.txt"):
+                    node_index = artifact["node_index"]
+                    artifact_url = artifact["url"]
+                    r = request_get(artifact_url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
+                    job_failure_lines[node_index] = r.text
 
             summary = {}
             for node_index, node_test_summary in job_test_summaries.items():
@@ -67,9 +242,26 @@
             workflow_summary[job["name"]] = summary
 
             # collected version
-            with open(f"outputs/{job['name']}/test_summary.json", "w") as fp:
+            with open(os.path.join(job_output_dir, "test_summary.json"), "w") as fp:
                 json.dump(summary, fp, indent=4)
 
+            # Collect failure details per node for this job
+            for node_index, summary_short in job_test_summaries.items():
+                failed_tests = _extract_failed_tests(summary_short)
+                failure_lines = _extract_failure_lines(job_failure_lines.get(node_index))
+                for idx, (test_name, short_error) in enumerate(failed_tests):
+                    full_error = failure_lines[idx] if idx < len(failure_lines) else short_error
+                    failure_entries.append(
+                        {
+                            "job_name": job["name"],
+                            "node_index": node_index,
+                            "test_name": test_name,
+                            "short_error": short_error,
+                            "error": full_error,
+                            "model_name": _derive_model_name(test_name),
+                        }
+                    )
+
     new_workflow_summary = {}
     for job_name, job_summary in workflow_summary.items():
         for test, status in job_summary.items():
@@ -81,5 +273,61 @@
         new_workflow_summary[test] = dict(sorted(result.items()))
     new_workflow_summary = dict(sorted(new_workflow_summary.items()))
 
-    with open("outputs/test_summary.json", "w") as fp:
+    with open(os.path.join(output_dir, "test_summary.json"), "w") as fp:
         json.dump(new_workflow_summary, fp, indent=4)
+
+    failures_by_test, failures_by_model = _aggregate_failures(failure_entries)
+    failure_summary = {
+        "failures": failure_entries,
+        "by_test": failures_by_test,
+        "by_model": failures_by_model,
+    }
+
+    with open(os.path.join(output_dir, "failure_summary.json"), "w") as fp:
+        json.dump(failure_summary, fp, indent=4)
+
+    markdown_buffer = ["# Failure summary\n"]
+    if failure_entries:
+        markdown_buffer.append("## By test\n")
+        test_rows = []
+        for test_name, info in sorted(failures_by_test.items(), key=lambda x: x[1]["count"], reverse=True):
+            test_rows.append(
+                [
+                    test_name,
+                    str(info["count"]),
+                    _format_error_messages(info["errors"]),
+                ]
+            )
+        markdown_buffer.append(_format_markdown_table(test_rows, ["Test", "Failures", "Full error(s)"]))
+
+        markdown_buffer.append("## By model\n")
+        model_rows = []
+        for model_name, info in sorted(failures_by_model.items(), key=lambda x: x[1]["count"], reverse=True):
+            model_rows.append(
+                [
+                    model_name,
+                    str(info["count"]),
+                    _format_error_messages(info["errors"]),
+                ]
+            )
+        markdown_buffer.append(_format_markdown_table(model_rows, ["Model", "Failures", "Full error(s)"]))
+    else:
+        markdown_buffer.append("No failures were reported.\n")
+
+    markdown_text = "\n".join(markdown_buffer)
+    with open(os.path.join(output_dir, "failure_summary.md"), "w") as fp:
+        fp.write(markdown_text)
+
+    _post_failure_summary_comment(markdown_text, request_post=request_post)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--workflow_id", type=str, required=True)
+    args = parser.parse_args()
+    workflow_id = args.workflow_id
+    process_circleci_workflow(workflow_id)
+
+
+if __name__ == "__main__":
+    main()

From 29a4e0992b45829926c550e9c28448f1ca19db95 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 13:44:46 +0100
Subject: [PATCH 02/16] add forcful failures

---
 src/transformers/models/llama/tokenization_llama.py | 1 +
 src/transformers/models/mixtral/modeling_mixtral.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/transformers/models/llama/tokenization_llama.py b/src/transformers/models/llama/tokenization_llama.py
index 971c7c49ddaf..63204a359de5 100644
--- a/src/transformers/models/llama/tokenization_llama.py
+++ b/src/transformers/models/llama/tokenization_llama.py
@@ -237,6 +237,7 @@ def tokenize(self, text: "TextInput", **kwargs) -> list[str]:
         Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the
         first token is special.
         """
+        return super().tokenize(text, **kwargs) # Just to have failures :)
         if self.legacy or len(text) == 0:
             return super().tokenize(text, **kwargs)
 
diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py
index 1faff1f4dcea..90ea7d1973c4 100644
--- a/src/transformers/models/mixtral/modeling_mixtral.py
+++ b/src/transformers/models/mixtral/modeling_mixtral.py
@@ -282,8 +282,8 @@ def eager_attention_forward(
         causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
         attn_weights = attn_weights + causal_mask
 
-    attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype)
-    attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training)
+    # attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype)
+    # attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training)
     attn_output = torch.matmul(attn_weights, value_states)
     attn_output = attn_output.transpose(1, 2).contiguous()
 

From 7fb137552d1a4d5e17eeca884bd1fc0a3d619017 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 14:43:16 +0100
Subject: [PATCH 03/16] add prints to debug the hell out of it

---
 .../process_circleci_workflow_test_reports.py | 47 ++++++++++++++++---
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py
index 2b0813f0a87a..9a61f7ff075b 100644
--- a/utils/process_circleci_workflow_test_reports.py
+++ b/utils/process_circleci_workflow_test_reports.py
@@ -127,13 +127,26 @@ def _format_markdown_table(rows: list[list[str]], headers: list[str]) -> str:
     return "\n".join(table_lines) + "\n"
 
 
+def _get_repo_owner_defaults() -> tuple[str, str]:
+    repo = os.environ.get("GITHUB_REPOSITORY")
+    if repo and "/" in repo:
+        owner, repo_name = repo.split("/", 1)
+        print(f"Detected repository from GITHUB_REPOSITORY: {owner}/{repo_name}")
+        return owner, repo_name
+    # CircleCI does not always set GITHUB_REPOSITORY; we fall back to the canonical repository.
+    print("GITHUB_REPOSITORY not set; defaulting to huggingface/transformers.")
+    return "huggingface", "transformers"
+
+
 def _get_pr_details_from_env() -> tuple[str, str, str] | None:
     """
     Returns (owner, repo, pr_number) if we can infer them from the environment.
 
     CircleCI does not always expose `CIRCLE_PULL_REQUEST`, so the collection job exports `PR_NUMBER`
-    beforehand via `utils/extract_pr_number_from_circleci.py`. We try every known source before giving up.
+    beforehand via `utils/extract_pr_number_from_circleci.py`. We try every known source before giving up, falling
+    back to CircleCI specific environment variables when needed.
     """
+    owner, repo_name = _get_repo_owner_defaults()
     pr_url_candidates = [
         os.environ.get("CIRCLE_PULL_REQUEST"),
         os.environ.get("GITHUB_PULL_REQUEST_URL"),
@@ -145,18 +158,27 @@ def _get_pr_details_from_env() -> tuple[str, str, str] | None:
             r"https://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)/pull/(?P<number>\d+)", pr_url
         )
         if match:
-            return match.group("owner"), match.group("repo"), match.group("number")
+            owner = match.group("owner")
+            repo_name = match.group("repo")
+            pr_number = match.group("number")
+            print(f"Detected PR info from PR URL: {owner}/{repo_name}#{pr_number}")
+            return owner, repo_name, pr_number
 
-    repo = os.environ.get("GITHUB_REPOSITORY")
-    pr_number = os.environ.get("PR_NUMBER")
+    pr_number = os.environ.get("PR_NUMBER") or os.environ.get("CIRCLE_PR_NUMBER")
     if not pr_number:
         github_ref = os.environ.get("GITHUB_REF", "")
         match = re.search(r"refs/pull/(\d+)/", github_ref)
         if match:
             pr_number = match.group(1)
-    if repo and pr_number:
-        owner, repo_name = repo.split("/", 1)
+    if pr_number:
+        print(f"Detected PR info from environment variables: {owner}/{repo_name}#{pr_number}")
         return owner, repo_name, pr_number
+    circle_owner = os.environ.get("CIRCLE_PROJECT_USERNAME") or owner
+    circle_repo = os.environ.get("CIRCLE_PROJECT_REPONAME") or repo_name
+    circle_pr = os.environ.get("PR_NUMBER") or os.environ.get("CIRCLE_PR_NUMBER")
+    if circle_pr:
+        print(f"Detected PR info from CircleCI variables: {circle_owner}/{circle_repo}#{circle_pr}")
+        return circle_owner, circle_repo, circle_pr
     return None
 
 
@@ -164,7 +186,9 @@ def _get_github_token() -> str | None:
     for env_var in ("GITHUB_TOKEN", "GH_TOKEN", "GITHUB_ACCESS_TOKEN"):
         token = os.environ.get(env_var)
         if token:
+            print(f"Using GitHub token from {env_var}.")
             return token
+    print("GitHub token not found in environment (GITHUB_TOKEN / GH_TOKEN / GITHUB_ACCESS_TOKEN).")
     return None
 
 
@@ -172,6 +196,10 @@ def _post_failure_summary_comment(markdown_text: str, request_post: Callable = r
     pr_details = _get_pr_details_from_env()
     token = _get_github_token()
     if not pr_details or not token:
+        if not pr_details:
+            print("Skipping PR comment: PR metadata not available in the environment.")
+        if not token:
+            print("Skipping PR comment: missing GitHub token (GITHUB_TOKEN / GH_TOKEN / GITHUB_ACCESS_TOKEN).")
         return False
     owner, repo, pr_number = pr_details
     url = f"https://api.github.com/repos/{owner}/{repo}/issues/{pr_number}/comments"
@@ -179,10 +207,15 @@ def _post_failure_summary_comment(markdown_text: str, request_post: Callable = r
         "Authorization": f"Bearer {token}",
         "Accept": "application/vnd.github+json",
     }
+    print(f"Posting failure summary comment to {owner}/{repo}#{pr_number}.")
     response = request_post(url, headers=headers, json={"body": markdown_text})
     if not (200 <= getattr(response, "status_code", 0) < 300):
-        print(f"Failed to post PR comment: {getattr(response, 'status_code', 'unknown')} {getattr(response, 'text', '')}")
+        print(
+            f"Failed to post PR comment: {getattr(response, 'status_code', 'unknown')} "
+            f"{getattr(response, 'text', '')}"
+        )
         return False
+    print("Posted failure summary comment on the pull request.")
     return True
 
 

From e5f8eb4fb03fb8c1114d9f5b9677a40dc7f04f08 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 14:52:18 +0100
Subject: [PATCH 04/16] small updates

---
 ..._process_circleci_workflow_test_reports.py |  1 +
 .../process_circleci_workflow_test_reports.py | 31 ++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/tests/utils/test_process_circleci_workflow_test_reports.py b/tests/utils/test_process_circleci_workflow_test_reports.py
index a3b5128a4576..4e84f2b21c0f 100644
--- a/tests/utils/test_process_circleci_workflow_test_reports.py
+++ b/tests/utils/test_process_circleci_workflow_test_reports.py
@@ -109,6 +109,7 @@ def fake_post(url, headers=None, json=None):
     assert failure_summary["by_test"][sample_test]["count"] == 1
     error_key = f"{sample_test}: {junit_failures[0][1]}"
     assert error_key in failure_summary["by_test"][sample_test]["errors"]
+    assert sample_test in failure_summary["by_test"][sample_test]["variants"]
 
     assert "bert" in failure_summary["by_model"]
     assert failure_summary["by_model"]["bert"]["count"] == 1
diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py
index 9a61f7ff075b..8dff7169fad4 100644
--- a/utils/process_circleci_workflow_test_reports.py
+++ b/utils/process_circleci_workflow_test_reports.py
@@ -83,11 +83,15 @@ def _aggregate_failures(failure_entries: list[dict]) -> tuple[dict, dict]:
         test_name = entry["test_name"]
         model_name = entry["model_name"]
         error_message = entry["error"]
+        normalized_test_name = _normalize_test_nodeid(test_name)
 
-        test_info = by_test.setdefault(test_name, {"count": 0, "errors": Counter(), "jobs": set()})
+        test_info = by_test.setdefault(
+            normalized_test_name, {"count": 0, "errors": Counter(), "jobs": set(), "variants": set()}
+        )
         test_info["count"] += 1
         test_info["errors"][error_message] += 1
         test_info["jobs"].add(entry["job_name"])
+        test_info["variants"].add(test_name)
 
         if model_name:
             model_info = by_model.setdefault(model_name, {"count": 0, "errors": Counter(), "tests": set()})
@@ -107,6 +111,7 @@ def _prepare(entries: dict, include_tests: bool = False):
                 prepared[key]["tests"] = sorted(value["tests"])
             else:
                 prepared[key]["jobs"] = sorted(value["jobs"])
+                prepared[key]["variants"] = sorted(value["variants"])
         return prepared
 
     return _prepare(by_test), _prepare(by_model, include_tests=True)
@@ -127,6 +132,21 @@ def _format_markdown_table(rows: list[list[str]], headers: list[str]) -> str:
     return "\n".join(table_lines) + "\n"
 
 
+def _normalize_test_nodeid(nodeid: str) -> str:
+    """
+    Normalizes a pytest node id by removing bracketed parametrization info
+    and collapsing suffixes such as `_05_fp16_pad_left` that come from parameter ids.
+    """
+    base_nodeid = nodeid.split("[", 1)[0]
+    parts = base_nodeid.split("::")
+    if not parts:
+        return base_nodeid
+    test_name = parts[-1]
+    test_name = re.sub(r"_\d{2,}.*$", "", test_name)
+    normalized = "::".join(parts[:-1] + [test_name])
+    return normalized
+
+
 def _get_repo_owner_defaults() -> tuple[str, str]:
     repo = os.environ.get("GITHUB_REPOSITORY")
     if repo and "/" in repo:
@@ -225,11 +245,13 @@ def process_circleci_workflow(
     request_get: Callable = requests.get,
     request_post: Callable = requests.post,
 ):
+    print(f"[collection_job] Processing CircleCI workflow {workflow_id}")
     response = request_get(
         f"https://circleci.com/api/v2/workflow/{workflow_id}/job",
         headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")},
     )
     jobs = response.json()["items"]
+    print(f"[collection_job] Found {len(jobs)} jobs in workflow.")
 
     os.makedirs(output_dir, exist_ok=True)
 
@@ -239,9 +261,11 @@ def process_circleci_workflow(
     for job in jobs:
         project_slug = job["project_slug"]
         if job["name"].startswith(("tests_", "examples_", "pipelines_")):
+            print(f"[collection_job] Fetching artifacts for job {job['name']} (#{job['job_number']})")
             url = f"https://circleci.com/api/v2/project/{project_slug}/{job['job_number']}/artifacts"
             r = request_get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
             job_artifacts = r.json()["items"]
+            print(f"[collection_job] Retrieved {len(job_artifacts)} artifacts for {job['name']}.")
 
             job_output_dir = os.path.join(output_dir, job["name"])
             os.makedirs(job_output_dir, exist_ok=True)
@@ -273,6 +297,7 @@ def process_circleci_workflow(
             # failed before passed
             summary = dict(sorted(summary.items(), key=lambda x: (x[1], x[0])))
             workflow_summary[job["name"]] = summary
+            print(f"[collection_job] Recorded {len(summary)} test rows for {job['name']}.")
 
             # collected version
             with open(os.path.join(job_output_dir, "test_summary.json"), "w") as fp:
@@ -294,6 +319,9 @@ def process_circleci_workflow(
                             "model_name": _derive_model_name(test_name),
                         }
                     )
+            if job_test_summaries:
+                failures_in_job = sum(1 for status in summary.values() if status == "failed")
+                print(f"[collection_job] Aggregated {failures_in_job} failures for {job['name']}.")
 
     new_workflow_summary = {}
     for job_name, job_summary in workflow_summary.items():
@@ -315,6 +343,7 @@ def process_circleci_workflow(
         "by_test": failures_by_test,
         "by_model": failures_by_model,
     }
+    print(f"[collection_job] Total failing entries collected: {len(failure_entries)}.")
 
     with open(os.path.join(output_dir, "failure_summary.json"), "w") as fp:
         json.dump(failure_summary, fp, indent=4)

From 8f794ebceb4e25e265334fb053df9ce2e327b62b Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 16:55:46 +0100
Subject: [PATCH 05/16] try

---
 .../circleci-failure-summary-comment.yml      | 67 +++++++++++++
 scripts/find_circleci_workflow.py             | 93 ++++++++++++++++++
 ..._process_circleci_workflow_test_reports.py | 14 ---
 .../process_circleci_workflow_test_reports.py | 95 -------------------
 4 files changed, 160 insertions(+), 109 deletions(-)
 create mode 100644 .github/workflows/circleci-failure-summary-comment.yml
 create mode 100644 scripts/find_circleci_workflow.py

diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
new file mode 100644
index 000000000000..9f1fce8935cd
--- /dev/null
+++ b/.github/workflows/circleci-failure-summary-comment.yml
@@ -0,0 +1,67 @@
+name: CircleCI Failure Summary Comment
+# Requires repository secrets:
+# - CIRCLECI_TOKEN: API token with permission to query CircleCI pipelines
+
+on:
+  check_suite:
+    types:
+      - completed
+
+jobs:
+  comment:
+    if: >
+      github.event.check_suite.app.slug == 'circleci-checks' &&
+      github.event.check_suite.conclusion != '' &&
+      github.event.check_suite.pull_requests[0]
+    runs-on: ubuntu-22.04
+    permissions:
+      pull-requests: write
+    env:
+      TARGET_BRANCH: ${{ github.event.check_suite.head_branch }}
+      TARGET_SHA: ${{ github.event.check_suite.head_sha }}
+      PR_NUMBER: ${{ github.event.check_suite.pull_requests[0].number }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install dependencies
+        run: python -m pip install requests
+
+      - name: Find CircleCI workflow
+        id: circleci
+        env:
+          CIRCLECI_TOKEN: ${{ secrets.CIRCLECI_TOKEN }}
+        run: |
+          WORKFLOW_ID=$(python scripts/find_circleci_workflow.py --branch "$TARGET_BRANCH" --sha "$TARGET_SHA")
+          echo "workflow_id=$WORKFLOW_ID" >> $GITHUB_OUTPUT
+
+      - name: Generate failure summary
+        env:
+          CIRCLE_TOKEN: ${{ secrets.CIRCLECI_TOKEN }}
+        run: |
+          python utils/process_circleci_workflow_test_reports.py --workflow_id "${{ steps.circleci.outputs.workflow_id }}"
+
+      - name: Post comment with failure summary
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if [ ! -f outputs/failure_summary.json ]; then
+            echo "failure_summary.json missing, skipping comment."
+            exit 0
+          fi
+          failures=$(python -c "import json; print(len(json.load(open('outputs/failure_summary.json'))['failures']))")
+          if [ "$failures" -eq 0 ]; then
+            echo "No failures detected, skipping PR comment."
+            exit 0
+          fi
+          body="$(cat outputs/failure_summary.md)"
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" \
+            -f body="$body"
diff --git a/scripts/find_circleci_workflow.py b/scripts/find_circleci_workflow.py
new file mode 100644
index 000000000000..e6baefb5f8cd
--- /dev/null
+++ b/scripts/find_circleci_workflow.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2025
+#
+# Utility script to retrieve a CircleCI workflow ID for a given branch and commit SHA.
+#
+# Usage:
+#   python scripts/find_circleci_workflow.py --branch main --sha <commit_sha>
+#
+# Environment:
+#   CIRCLECI_TOKEN must be set with a token that has permission to query the CircleCI API.
+
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from typing import Optional
+
+import requests
+
+
+CIRCLE_API = "https://circleci.com/api/v2"
+PROJECT_SLUG = "gh/huggingface/transformers"
+
+
+def _get_circle_token(token: Optional[str]) -> str:
+    token = token or os.environ.get("CIRCLECI_TOKEN") or os.environ.get("CCI_TOKEN") or os.environ.get("CIRCLE_TOKEN")
+    if not token:
+        raise SystemExit("CIRCLECI_TOKEN (or CCI_TOKEN / CIRCLE_TOKEN) must be provided.")
+    return token
+
+
+def _request(url: str, token: str, params: Optional[dict] = None) -> dict:
+    response = requests.get(
+        url,
+        params=params,
+        headers={"Circle-Token": token},
+    )
+    response.raise_for_status()
+    return response.json()
+
+
+def _find_pipeline_id(branch: str, revision: str, token: str) -> str:
+    url = f"{CIRCLE_API}/project/{PROJECT_SLUG}/pipeline"
+    params = {"branch": branch}
+    pages_checked = 0
+    while True:
+        payload = _request(url, token, params=params)
+        for pipeline in payload.get("items", []):
+            vcs = pipeline.get("vcs") or {}
+            if vcs.get("revision") == revision:
+                return pipeline["id"]
+        next_token = payload.get("next_page_token")
+        if not next_token or pages_checked > 10:
+            break
+        params["page-token"] = next_token
+        pages_checked += 1
+    raise SystemExit(f"Unable to find CircleCI pipeline for branch {branch} and revision {revision}.")
+
+
+def _workflow_has_collection_job(workflow_id: str, token: str) -> bool:
+    jobs = _request(f"{CIRCLE_API}/workflow/{workflow_id}/job", token)
+    return any(job.get("name") == "collection_job" for job in jobs.get("items", []))
+
+
+def _find_workflow_with_collection_job(pipeline_id: str, token: str) -> str:
+    payload = _request(f"{CIRCLE_API}/pipeline/{pipeline_id}/workflow", token)
+    workflows = payload.get("items", [])
+    for workflow in workflows:
+        workflow_id = workflow["id"]
+        if _workflow_has_collection_job(workflow_id, token):
+            return workflow_id
+    if workflows:
+        return workflows[0]["id"]
+    raise SystemExit(f"No workflows found for pipeline {pipeline_id}.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Find CircleCI workflow id for a commit.")
+    parser.add_argument("--branch", required=True, help="Branch name for the CircleCI pipeline.")
+    parser.add_argument("--sha", required=True, help="Commit SHA to match.")
+    parser.add_argument("--token", default=None, help="CircleCI API token.")
+    args = parser.parse_args()
+
+    token = _get_circle_token(args.token)
+    pipeline_id = _find_pipeline_id(args.branch, args.sha, token)
+    workflow_id = _find_workflow_with_collection_job(pipeline_id, token)
+    print(workflow_id)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/utils/test_process_circleci_workflow_test_reports.py b/tests/utils/test_process_circleci_workflow_test_reports.py
index 4e84f2b21c0f..4234fca9cb6b 100644
--- a/tests/utils/test_process_circleci_workflow_test_reports.py
+++ b/tests/utils/test_process_circleci_workflow_test_reports.py
@@ -77,23 +77,12 @@ def fake_get(url, headers=None):
             return _FakeResponse(text=failures_line_text)
         raise AssertionError(f"Unexpected URL requested: {url}")
 
-    captured_post = {}
-
-    def fake_post(url, headers=None, json=None):
-        captured_post["url"] = url
-        captured_post["headers"] = headers
-        captured_post["json"] = json
-        return _FakeResponse(text="ok", status_code=201)
-
-    monkeypatch.setenv("CIRCLE_PULL_REQUEST", "https://github.com/huggingface/transformers/pull/456")
-    monkeypatch.setenv("GITHUB_TOKEN", "dummy-token")
     monkeypatch.chdir(tmp_path)
     output_dir = tmp_path / "outputs"
     process_circleci_workflow(
         "test-workflow",
         output_dir=str(output_dir),
         request_get=fake_get,
-        request_post=fake_post,
     )
 
     failure_summary_path = output_dir / "failure_summary.json"
@@ -121,6 +110,3 @@ def fake_post(url, headers=None, json=None):
     md_contents = failure_summary_md.read_text()
     assert "Failure summary" in md_contents
     assert "tests/models/bert/test_modeling_bert.py" in md_contents
-
-    assert captured_post["url"].endswith("/issues/456/comments")
-    assert captured_post["json"]["body"] == md_contents
diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py
index 8dff7169fad4..23e8e6eab39f 100644
--- a/utils/process_circleci_workflow_test_reports.py
+++ b/utils/process_circleci_workflow_test_reports.py
@@ -147,103 +147,10 @@ def _normalize_test_nodeid(nodeid: str) -> str:
     return normalized
 
 
-def _get_repo_owner_defaults() -> tuple[str, str]:
-    repo = os.environ.get("GITHUB_REPOSITORY")
-    if repo and "/" in repo:
-        owner, repo_name = repo.split("/", 1)
-        print(f"Detected repository from GITHUB_REPOSITORY: {owner}/{repo_name}")
-        return owner, repo_name
-    # CircleCI does not always set GITHUB_REPOSITORY; we fall back to the canonical repository.
-    print("GITHUB_REPOSITORY not set; defaulting to huggingface/transformers.")
-    return "huggingface", "transformers"
-
-
-def _get_pr_details_from_env() -> tuple[str, str, str] | None:
-    """
-    Returns (owner, repo, pr_number) if we can infer them from the environment.
-
-    CircleCI does not always expose `CIRCLE_PULL_REQUEST`, so the collection job exports `PR_NUMBER`
-    beforehand via `utils/extract_pr_number_from_circleci.py`. We try every known source before giving up, falling
-    back to CircleCI specific environment variables when needed.
-    """
-    owner, repo_name = _get_repo_owner_defaults()
-    pr_url_candidates = [
-        os.environ.get("CIRCLE_PULL_REQUEST"),
-        os.environ.get("GITHUB_PULL_REQUEST_URL"),
-    ]
-    for pr_url in pr_url_candidates:
-        if not pr_url:
-            continue
-        match = re.match(
-            r"https://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)/pull/(?P<number>\d+)", pr_url
-        )
-        if match:
-            owner = match.group("owner")
-            repo_name = match.group("repo")
-            pr_number = match.group("number")
-            print(f"Detected PR info from PR URL: {owner}/{repo_name}#{pr_number}")
-            return owner, repo_name, pr_number
-
-    pr_number = os.environ.get("PR_NUMBER") or os.environ.get("CIRCLE_PR_NUMBER")
-    if not pr_number:
-        github_ref = os.environ.get("GITHUB_REF", "")
-        match = re.search(r"refs/pull/(\d+)/", github_ref)
-        if match:
-            pr_number = match.group(1)
-    if pr_number:
-        print(f"Detected PR info from environment variables: {owner}/{repo_name}#{pr_number}")
-        return owner, repo_name, pr_number
-    circle_owner = os.environ.get("CIRCLE_PROJECT_USERNAME") or owner
-    circle_repo = os.environ.get("CIRCLE_PROJECT_REPONAME") or repo_name
-    circle_pr = os.environ.get("PR_NUMBER") or os.environ.get("CIRCLE_PR_NUMBER")
-    if circle_pr:
-        print(f"Detected PR info from CircleCI variables: {circle_owner}/{circle_repo}#{circle_pr}")
-        return circle_owner, circle_repo, circle_pr
-    return None
-
-
-def _get_github_token() -> str | None:
-    for env_var in ("GITHUB_TOKEN", "GH_TOKEN", "GITHUB_ACCESS_TOKEN"):
-        token = os.environ.get(env_var)
-        if token:
-            print(f"Using GitHub token from {env_var}.")
-            return token
-    print("GitHub token not found in environment (GITHUB_TOKEN / GH_TOKEN / GITHUB_ACCESS_TOKEN).")
-    return None
-
-
-def _post_failure_summary_comment(markdown_text: str, request_post: Callable = requests.post) -> bool:
-    pr_details = _get_pr_details_from_env()
-    token = _get_github_token()
-    if not pr_details or not token:
-        if not pr_details:
-            print("Skipping PR comment: PR metadata not available in the environment.")
-        if not token:
-            print("Skipping PR comment: missing GitHub token (GITHUB_TOKEN / GH_TOKEN / GITHUB_ACCESS_TOKEN).")
-        return False
-    owner, repo, pr_number = pr_details
-    url = f"https://api.github.com/repos/{owner}/{repo}/issues/{pr_number}/comments"
-    headers = {
-        "Authorization": f"Bearer {token}",
-        "Accept": "application/vnd.github+json",
-    }
-    print(f"Posting failure summary comment to {owner}/{repo}#{pr_number}.")
-    response = request_post(url, headers=headers, json={"body": markdown_text})
-    if not (200 <= getattr(response, "status_code", 0) < 300):
-        print(
-            f"Failed to post PR comment: {getattr(response, 'status_code', 'unknown')} "
-            f"{getattr(response, 'text', '')}"
-        )
-        return False
-    print("Posted failure summary comment on the pull request.")
-    return True
-
-
 def process_circleci_workflow(
     workflow_id: str,
     output_dir: str = "outputs",
     request_get: Callable = requests.get,
-    request_post: Callable = requests.post,
 ):
     print(f"[collection_job] Processing CircleCI workflow {workflow_id}")
     response = request_get(
@@ -380,8 +287,6 @@ def process_circleci_workflow(
     with open(os.path.join(output_dir, "failure_summary.md"), "w") as fp:
         fp.write(markdown_text)
 
-    _post_failure_summary_comment(markdown_text, request_post=request_post)
-
 
 def main():
     parser = argparse.ArgumentParser()

From 8edef99cb897dd3bd6ea6a938bb953b403017e9b Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 17:44:14 +0100
Subject: [PATCH 06/16] use another token.

---
 .github/workflows/circleci-failure-summary-comment.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
index 9f1fce8935cd..441d74347938 100644
--- a/.github/workflows/circleci-failure-summary-comment.yml
+++ b/.github/workflows/circleci-failure-summary-comment.yml
@@ -1,6 +1,6 @@
 name: CircleCI Failure Summary Comment
 # Requires repository secrets:
-# - CIRCLECI_TOKEN: API token with permission to query CircleCI pipelines
+# - CIRCLE_TOKEN: API token with permission to query CircleCI pipelines
 
 on:
   check_suite:
@@ -35,14 +35,14 @@ jobs:
       - name: Find CircleCI workflow
         id: circleci
         env:
-          CIRCLECI_TOKEN: ${{ secrets.CIRCLECI_TOKEN }}
+          CIRCLE_TOKEN: ${{ secrets.CIRCLE_TOKEN }}
         run: |
           WORKFLOW_ID=$(python scripts/find_circleci_workflow.py --branch "$TARGET_BRANCH" --sha "$TARGET_SHA")
           echo "workflow_id=$WORKFLOW_ID" >> $GITHUB_OUTPUT
 
       - name: Generate failure summary
         env:
-          CIRCLE_TOKEN: ${{ secrets.CIRCLECI_TOKEN }}
+          CIRCLE_TOKEN: ${{ secrets.CIRCLE_TOKEN }}
         run: |
           python utils/process_circleci_workflow_test_reports.py --workflow_id "${{ steps.circleci.outputs.workflow_id }}"
 

From bbdce1ca2a5f1d27ec534e597a39ae11cadf0145 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 17:44:17 +0100
Subject: [PATCH 07/16] nit

---
 .github/workflows/circleci-failure-summary-comment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
index 441d74347938..0552d02b3ead 100644
--- a/.github/workflows/circleci-failure-summary-comment.yml
+++ b/.github/workflows/circleci-failure-summary-comment.yml
@@ -27,7 +27,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
+          python-version: "3.13"
 
       - name: Install dependencies
         run: python -m pip install requests

From 98ce57868b335b016b2e6e2aad0543c168d56111 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 18:30:54 +0100
Subject: [PATCH 08/16] update?

---
 .../circleci-failure-summary-comment.yml      | 50 ++++++++++++++-----
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
index 0552d02b3ead..033b409c4a2c 100644
--- a/.github/workflows/circleci-failure-summary-comment.yml
+++ b/.github/workflows/circleci-failure-summary-comment.yml
@@ -3,23 +3,18 @@ name: CircleCI Failure Summary Comment
 # - CIRCLE_TOKEN: API token with permission to query CircleCI pipelines
 
 on:
-  check_suite:
-    types:
-      - completed
+  pull_request:
+    types: [opened, synchronize, reopened]
 
 jobs:
   comment:
-    if: >
-      github.event.check_suite.app.slug == 'circleci-checks' &&
-      github.event.check_suite.conclusion != '' &&
-      github.event.check_suite.pull_requests[0]
     runs-on: ubuntu-22.04
     permissions:
       pull-requests: write
     env:
-      TARGET_BRANCH: ${{ github.event.check_suite.head_branch }}
-      TARGET_SHA: ${{ github.event.check_suite.head_sha }}
-      PR_NUMBER: ${{ github.event.check_suite.pull_requests[0].number }}
+      TARGET_BRANCH: ${{ github.event.pull_request.head.ref }}
+      TARGET_SHA: ${{ github.event.pull_request.head.sha }}
+      PR_NUMBER: ${{ github.event.pull_request.number }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -32,6 +27,33 @@ jobs:
       - name: Install dependencies
         run: python -m pip install requests
 
+      - name: Wait for CircleCI check suite completion
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
+          github_repository: ${{ github.repository }}
+        run: |
+          echo "Waiting for CircleCI check suite to complete..."
+          end=$((SECONDS+1800))
+          while [ $SECONDS -lt $end ]; do
+            suite_json=$(gh api "repos/${github_repository}/commits/${COMMIT_SHA}/check-suites" --jq '.check_suites[] | select(.app.slug=="circleci-checks")')
+            if [ -z "$suite_json" ]; then
+              echo "CircleCI check suite not found yet, retrying..."
+            else
+              status=$(echo "$suite_json" | jq -r '.status')
+              conclusion=$(echo "$suite_json" | jq -r '.conclusion // empty')
+              echo "Current CircleCI check suite status: $status (conclusion: $conclusion)"
+              if [ "$status" = "completed" ] && [ -n "$conclusion" ]; then
+                break
+              fi
+            fi
+            sleep 20
+          done
+          if [ $SECONDS -ge $end ]; then
+            echo "Timed out waiting for CircleCI check suite."
+            exit 1
+          fi
+
       - name: Find CircleCI workflow
         id: circleci
         env:
@@ -48,7 +70,10 @@ jobs:
 
       - name: Post comment with failure summary
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
+          github_repository: ${{ github.repository }}
+          pr_number: ${{ github.event.pull_request.number }}
         run: |
           if [ ! -f outputs/failure_summary.json ]; then
             echo "failure_summary.json missing, skipping comment."
@@ -63,5 +88,6 @@ jobs:
           gh api \
             --method POST \
             -H "Accept: application/vnd.github+json" \
-            "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            "repos/${github_repository}/issues/${pr_number}/comments" \
             -f body="$body"

From be22c6b8e621a32462b3e949374d8fe5590ec77f Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 18:41:41 +0100
Subject: [PATCH 09/16] up up up

---
 scripts/find_circleci_workflow.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/find_circleci_workflow.py b/scripts/find_circleci_workflow.py
index e6baefb5f8cd..e80e46d5e90f 100644
--- a/scripts/find_circleci_workflow.py
+++ b/scripts/find_circleci_workflow.py
@@ -8,7 +8,7 @@
 #   python scripts/find_circleci_workflow.py --branch main --sha <commit_sha>
 #
 # Environment:
-#   CIRCLECI_TOKEN must be set with a token that has permission to query the CircleCI API.
+#   CIRCLE_TOKEN must be set with a token that has permission to query the CircleCI API.
 
 from __future__ import annotations
 
@@ -25,9 +25,9 @@
 
 
 def _get_circle_token(token: Optional[str]) -> str:
-    token = token or os.environ.get("CIRCLECI_TOKEN") or os.environ.get("CCI_TOKEN") or os.environ.get("CIRCLE_TOKEN")
+    token = token or os.environ.get("CIRCLE_TOKEN") or os.environ.get("CCI_TOKEN") or os.environ.get("CIRCLE_TOKEN")
     if not token:
-        raise SystemExit("CIRCLECI_TOKEN (or CCI_TOKEN / CIRCLE_TOKEN) must be provided.")
+        raise SystemExit("CIRCLE_TOKEN (or CCI_TOKEN / CIRCLE_TOKEN) must be provided.")
     return token
 
 

From bc4915b13a28668cda23d607230f79e6c53f1c99 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 18:44:08 +0100
Subject: [PATCH 10/16] up

---
 .github/workflows/circleci-failure-summary-comment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
index 033b409c4a2c..f994a030c55f 100644
--- a/.github/workflows/circleci-failure-summary-comment.yml
+++ b/.github/workflows/circleci-failure-summary-comment.yml
@@ -15,6 +15,7 @@ jobs:
       TARGET_BRANCH: ${{ github.event.pull_request.head.ref }}
       TARGET_SHA: ${{ github.event.pull_request.head.sha }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
+      CIRCLE_TOKEN: ${{ secrets.CIRCLE_TOKEN }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4

From 3e9912eaf7af4ae5664af4273b0b95f98f4f25be Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 19:06:45 +0100
Subject: [PATCH 11/16] new secret

---
 .github/workflows/circleci-failure-summary-comment.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
index f994a030c55f..d8b547a10e4b 100644
--- a/.github/workflows/circleci-failure-summary-comment.yml
+++ b/.github/workflows/circleci-failure-summary-comment.yml
@@ -1,6 +1,6 @@
 name: CircleCI Failure Summary Comment
 # Requires repository secrets:
-# - CIRCLE_TOKEN: API token with permission to query CircleCI pipelines
+# - CI_ARTIFACT_TOKEN: API token with permission to query CircleCI pipelines (same value used by CircleCI contexts)
 
 on:
   pull_request:
@@ -15,7 +15,7 @@ jobs:
       TARGET_BRANCH: ${{ github.event.pull_request.head.ref }}
       TARGET_SHA: ${{ github.event.pull_request.head.sha }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
-      CIRCLE_TOKEN: ${{ secrets.CIRCLE_TOKEN }}
+      CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -58,14 +58,14 @@ jobs:
       - name: Find CircleCI workflow
         id: circleci
         env:
-          CIRCLE_TOKEN: ${{ secrets.CIRCLE_TOKEN }}
+          CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
         run: |
           WORKFLOW_ID=$(python scripts/find_circleci_workflow.py --branch "$TARGET_BRANCH" --sha "$TARGET_SHA")
           echo "workflow_id=$WORKFLOW_ID" >> $GITHUB_OUTPUT
 
       - name: Generate failure summary
         env:
-          CIRCLE_TOKEN: ${{ secrets.CIRCLE_TOKEN }}
+          CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
         run: |
           python utils/process_circleci_workflow_test_reports.py --workflow_id "${{ steps.circleci.outputs.workflow_id }}"
 

From 03562e61aa6bc8d0e26a2f09f2d6ff9b5e423928 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Mon, 17 Nov 2025 19:13:19 +0100
Subject: [PATCH 12/16] try it for real!

---
 src/transformers/models/mixtral/modeling_mixtral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py
index 90ea7d1973c4..f1f6ef9f2df1 100644
--- a/src/transformers/models/mixtral/modeling_mixtral.py
+++ b/src/transformers/models/mixtral/modeling_mixtral.py
@@ -286,7 +286,7 @@ def eager_attention_forward(
     # attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training)
     attn_output = torch.matmul(attn_weights, value_states)
     attn_output = attn_output.transpose(1, 2).contiguous()
-
+    # TODO
     return attn_output, attn_weights
 
 

From dbfc6746d37652aa1f3dff6eb47f309566f7128e Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Tue, 18 Nov 2025 08:40:28 +0100
Subject: [PATCH 13/16] small updates

---
 .../circleci-failure-summary-comment.yml      | 33 ++++++-
 .../process_circleci_workflow_test_reports.py | 47 ++++++++++
 utils/upload_circleci_results.py              | 94 +++++++++++++++++++
 3 files changed, 171 insertions(+), 3 deletions(-)
 create mode 100644 utils/upload_circleci_results.py

diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
index d8b547a10e4b..39eecd97e858 100644
--- a/.github/workflows/circleci-failure-summary-comment.yml
+++ b/.github/workflows/circleci-failure-summary-comment.yml
@@ -26,7 +26,7 @@ jobs:
           python-version: "3.13"
 
       - name: Install dependencies
-        run: python -m pip install requests
+        run: python -m pip install requests huggingface_hub
 
       - name: Wait for CircleCI check suite completion
         env:
@@ -69,12 +69,20 @@ jobs:
         run: |
           python utils/process_circleci_workflow_test_reports.py --workflow_id "${{ steps.circleci.outputs.workflow_id }}"
 
-      - name: Post comment with failure summary
+      - name: Upload summaries to Hub
+        env:
+          HF_TOKEN: ${{ secrets.TRANSFORMERS_HUB_BOT_HF_TOKEN }}
+          CIRCLECI_RESULTS_DATASET_ID: transformers-community/circleci-test-results
+        run: |
+          python utils/upload_circleci_results.py --source-dir outputs --dataset-id "${CIRCLECI_RESULTS_DATASET_ID}"
+
+      - name: Post comment with helper link
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
           github_repository: ${{ github.repository }}
           pr_number: ${{ github.event.pull_request.number }}
+          pr_sha: ${{ github.event.pull_request.head.sha }}
         run: |
           if [ ! -f outputs/failure_summary.json ]; then
             echo "failure_summary.json missing, skipping comment."
@@ -85,7 +93,26 @@ jobs:
             echo "No failures detected, skipping PR comment."
             exit 0
           fi
-          body="$(cat outputs/failure_summary.md)"
+          SPACE_SLUG="transformers-community/circleci-test-collection-helper"
+          SPACE_BASE="https://huggingface.co/spaces/${SPACE_SLUG}"
+          QUERY=$(python - <<'PY'
+import os
+from urllib.parse import urlencode
+
+params = {
+    "repo": os.environ.get("github_repository"),
+    "pr": os.environ.get("pr_number"),
+    "sha": os.environ.get("pr_sha"),
+}
+print(urlencode({k: v for k, v in params.items() if v}))
+PY
+)
+          if [ -n "$QUERY" ]; then
+            SPACE_URL="${SPACE_BASE}?${QUERY}"
+          else
+            SPACE_URL="${SPACE_BASE}"
+          fi
+          body="View the CircleCI test collection helper for this PR:\n\n${SPACE_URL}"
           gh api \
             --method POST \
             -H "Accept: application/vnd.github+json" \
diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py
index 23e8e6eab39f..570709b58a31 100644
--- a/utils/process_circleci_workflow_test_reports.py
+++ b/utils/process_circleci_workflow_test_reports.py
@@ -18,6 +18,7 @@
 import os
 import re
 from collections import Counter
+from datetime import datetime, timezone
 from typing import Callable
 
 import requests
@@ -147,6 +148,38 @@ def _normalize_test_nodeid(nodeid: str) -> str:
     return normalized
 
 
+def _collect_metadata(workflow_id: str) -> dict[str, str | None]:
+    repo_owner = os.environ.get("CIRCLE_PROJECT_USERNAME")
+    repo_name = os.environ.get("CIRCLE_PROJECT_REPONAME")
+    repo_slug = "/".join(part for part in [repo_owner, repo_name] if part)
+    commit_sha = os.environ.get("CIRCLE_SHA1")
+    branch = os.environ.get("CIRCLE_BRANCH")
+    pull_request = os.environ.get("CIRCLE_PULL_REQUEST")
+    pr_number = os.environ.get("CIRCLE_PR_NUMBER")
+    if not pr_number and pull_request and "/" in pull_request:
+        pr_number = pull_request.rsplit("/", 1)[-1]
+    build_num = os.environ.get("CIRCLE_BUILD_NUM")
+    timestamp = os.environ.get("CIRCLE_WORKFLOW_CREATED_AT")
+    if not timestamp:
+        timestamp = datetime.now(timezone.utc).isoformat()
+    commit_short = (commit_sha or "unknown")[:8]
+    dataset_subfolder = f"{repo_slug.replace('/', '__') or 'unknown_repo'}/pr-{pr_number or 'none'}/sha-{commit_short}/workflow-{workflow_id}"
+    metadata = {
+        "workflow_id": workflow_id,
+        "repo_owner": repo_owner,
+        "repo_name": repo_name,
+        "repository": repo_slug,
+        "branch": branch,
+        "commit_sha": commit_sha,
+        "pull_request": pull_request,
+        "pull_request_number": pr_number,
+        "build_number": build_num,
+        "collected_at": timestamp,
+        "dataset_subfolder": dataset_subfolder,
+    }
+    return metadata
+
+
 def process_circleci_workflow(
     workflow_id: str,
     output_dir: str = "outputs",
@@ -287,6 +320,20 @@ def process_circleci_workflow(
     with open(os.path.join(output_dir, "failure_summary.md"), "w") as fp:
         fp.write(markdown_text)
 
+    metadata = _collect_metadata(workflow_id)
+    aggregate_payload = {
+        "metadata": metadata,
+        "jobs": workflow_summary,
+        "tests": new_workflow_summary,
+        "failures": failure_entries,
+        "failures_by_test": failures_by_test,
+        "failures_by_model": failures_by_model,
+    }
+    with open(os.path.join(output_dir, "collection_summary.json"), "w") as fp:
+        json.dump(aggregate_payload, fp, indent=4)
+    with open(os.path.join(output_dir, "metadata.json"), "w") as fp:
+        json.dump(metadata, fp, indent=4)
+
 
 def main():
     parser = argparse.ArgumentParser()
diff --git a/utils/upload_circleci_results.py b/utils/upload_circleci_results.py
new file mode 100644
index 000000000000..a1822309ea3a
--- /dev/null
+++ b/utils/upload_circleci_results.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# Copyright 2025 The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import json
+import os
+from pathlib import Path
+
+from huggingface_hub import CommitOperationAdd, HfApi
+
+DEFAULT_DATASET_ID = os.environ.get("CIRCLECI_RESULTS_DATASET_ID", "transformers-community/circleci-test-results")
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Upload CircleCI collection outputs to the Hub.")
+    parser.add_argument("--source-dir", type=str, default="outputs", help="Directory containing summary files.")
+    parser.add_argument("--dataset-id", type=str, default=DEFAULT_DATASET_ID, help="Target dataset ID to update.")
+    return parser.parse_args()
+
+
+def _load_metadata(source_dir: Path) -> dict:
+    metadata_path = source_dir / "metadata.json"
+    if not metadata_path.exists():
+        raise FileNotFoundError(f"metadata.json missing in {source_dir}")
+    with metadata_path.open() as fp:
+        return json.load(fp)
+
+
+def _collect_files(source_dir: Path, base_dir: str) -> list[CommitOperationAdd]:
+    filenames = [
+        "collection_summary.json",
+        "failure_summary.json",
+        "failure_summary.md",
+        "test_summary.json",
+        "metadata.json",
+    ]
+    operations = []
+    for filename in filenames:
+        path = source_dir / filename
+        if not path.exists():
+            continue
+        remote = f"{base_dir}/{filename}"
+        operations.append(CommitOperationAdd(path_in_repo=remote, path_or_fileobj=str(path)))
+    return operations
+
+
+def main():
+    args = parse_args()
+    source_dir = Path(args.source_dir).resolve()
+    dataset_id = args.dataset_id
+    if not dataset_id:
+        raise ValueError("Dataset ID is required.")
+
+    token = os.environ.get("HF_TOKEN") or os.environ.get("TRANSFORMERS_HUB_BOT_HF_TOKEN")
+    if not token:
+        raise RuntimeError("HF token not provided. Set HF_TOKEN or TRANSFORMERS_HUB_BOT_HF_TOKEN.")
+
+    metadata = _load_metadata(source_dir)
+    pr_number = metadata.get("pull_request_number") or "none"
+    commit_sha = metadata.get("commit_sha") or "unknown"
+    commit_short = commit_sha[:12]
+    base_dir = f"pr-{pr_number}/sha-{commit_short}"
+
+    operations = _collect_files(source_dir, base_dir)
+    if not operations:
+        raise RuntimeError(f"No summary files found in {source_dir}.")
+
+    api = HfApi(token=token)
+    api.create_repo(repo_id=dataset_id, repo_type="dataset", exist_ok=True, token=token)
+
+    commit_message = f"Update CircleCI artifacts for PR {pr_number} ({commit_short})"
+    api.create_commit(
+        repo_id=dataset_id,
+        repo_type="dataset",
+        operations=operations,
+        commit_message=commit_message,
+        token=token,
+    )
+    print(f"Uploaded {len(operations)} files to {dataset_id}:{base_dir}")
+
+
+if __name__ == "__main__":
+    main()

From 7fcf84e3c794d166bf99b7f2143bb0e6c9486608 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Tue, 18 Nov 2025 08:55:29 +0100
Subject: [PATCH 14/16] update query

---
 .../circleci-failure-summary-comment.yml      | 22 ++++++++-----------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
index 39eecd97e858..2ff470bc47c5 100644
--- a/.github/workflows/circleci-failure-summary-comment.yml
+++ b/.github/workflows/circleci-failure-summary-comment.yml
@@ -72,7 +72,7 @@ jobs:
       - name: Upload summaries to Hub
         env:
           HF_TOKEN: ${{ secrets.TRANSFORMERS_HUB_BOT_HF_TOKEN }}
-          CIRCLECI_RESULTS_DATASET_ID: transformers-community/circleci-test-results
+          CIRCLECI_RESULTS_DATASET_ID: "transformers-community/circleci-test-results"
         run: |
           python utils/upload_circleci_results.py --source-dir outputs --dataset-id "${CIRCLECI_RESULTS_DATASET_ID}"
 
@@ -95,18 +95,14 @@ jobs:
           fi
           SPACE_SLUG="transformers-community/circleci-test-collection-helper"
           SPACE_BASE="https://huggingface.co/spaces/${SPACE_SLUG}"
-          QUERY=$(python - <<'PY'
-import os
-from urllib.parse import urlencode
-
-params = {
-    "repo": os.environ.get("github_repository"),
-    "pr": os.environ.get("pr_number"),
-    "sha": os.environ.get("pr_sha"),
-}
-print(urlencode({k: v for k, v in params.items() if v}))
-PY
-)
+          repo_enc=$(jq -rn --arg v "${github_repository}" '$v|@uri')
+          pr_enc=$(jq -rn --arg v "${pr_number}" '$v|@uri')
+          sha_enc=$(jq -rn --arg v "${pr_sha}" '$v|@uri')
+          parts=()
+          [ -n "${repo_enc}" ] && parts+=("repo=${repo_enc}")
+          [ -n "${pr_enc}" ] && parts+=("pr=${pr_enc}")
+          [ -n "${sha_enc}" ] && parts+=("sha=${sha_enc}")
+          QUERY=$(IFS="&"; echo "${parts[*]}")
           if [ -n "$QUERY" ]; then
             SPACE_URL="${SPACE_BASE}?${QUERY}"
           else

From 9664edac08729aabb154073c9cd52e944b7cbb3d Mon Sep 17 00:00:00 2001
From: Arthur <arthur.zucker@gmail.com>
Date: Tue, 18 Nov 2025 09:07:44 +0100
Subject: [PATCH 15/16] a different token

---
 .github/workflows/circleci-failure-summary-comment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
index 2ff470bc47c5..d4fc389d7948 100644
--- a/.github/workflows/circleci-failure-summary-comment.yml
+++ b/.github/workflows/circleci-failure-summary-comment.yml
@@ -71,7 +71,7 @@ jobs:
 
       - name: Upload summaries to Hub
         env:
-          HF_TOKEN: ${{ secrets.TRANSFORMERS_HUB_BOT_HF_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_CI_WRITE_TOKEN }}
           CIRCLECI_RESULTS_DATASET_ID: "transformers-community/circleci-test-results"
         run: |
           python utils/upload_circleci_results.py --source-dir outputs --dataset-id "${CIRCLECI_RESULTS_DATASET_ID}"

From 215ea77a013c435cfc01194c5b089173cf96ada0 Mon Sep 17 00:00:00 2001
From: ydshieh <ydshieh@users.noreply.github.com>
Date: Thu, 20 Nov 2025 17:04:55 +0100
Subject: [PATCH 16/16] trigger