From 516af7c30dd7fe920397b9f23124a03b71b22671 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Tue, 18 Nov 2025 17:57:38 +0100 Subject: [PATCH 1/4] fix --- tests/models/cwm/test_modeling_cwm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/models/cwm/test_modeling_cwm.py b/tests/models/cwm/test_modeling_cwm.py index 8ed6e335c62d..2c0e43f45997 100644 --- a/tests/models/cwm/test_modeling_cwm.py +++ b/tests/models/cwm/test_modeling_cwm.py @@ -17,6 +17,7 @@ from transformers import is_torch_available from transformers.testing_utils import ( cleanup, + require_read_token, require_torch, require_torch_accelerator, slow, @@ -85,6 +86,7 @@ class CwmModelTest(CausalLMModelTest, unittest.TestCase): @require_torch_accelerator @slow +@require_read_token class CwmIntegrationTest(unittest.TestCase): def setUp(self): cleanup(torch_device, gc_collect=True) From 86451708823baa8e6ee82ca2d737e1a67eb8683f Mon Sep 17 00:00:00 2001 From: ydshieh Date: Wed, 19 Nov 2025 11:39:10 +0100 Subject: [PATCH 2/4] fix --- tests/models/cwm/test_modeling_cwm.py | 83 ++++----------------------- 1 file changed, 11 insertions(+), 72 deletions(-) diff --git a/tests/models/cwm/test_modeling_cwm.py b/tests/models/cwm/test_modeling_cwm.py index 2c0e43f45997..30d147434299 100644 --- a/tests/models/cwm/test_modeling_cwm.py +++ b/tests/models/cwm/test_modeling_cwm.py @@ -118,45 +118,14 @@ def test_cwm_integration(self): with torch.no_grad(): out = model(**inputs) + # fmt: off expected_logits = torch.tensor( - [ - 0.5625, - 2.9531, - 9.1875, - 0.4746, - -0.3613, - 2.2031, - 2.9844, - 1.5312, - 0.5859, - 1.5391, - 2.7500, - 3.4375, - 2.0156, - 2.1719, - 1.5469, - 2.5469, - 2.8438, - 1.8203, - 1.7188, - 1.3984, - 1.0469, - 0.1748, - 0.4453, - 0.1533, - -0.1157, - 0.8516, - 2.2344, - 5.2188, - 1.2891, - 1.5234, - 0.8555, - 0.6992, - ], + [0.5625, 2.9531, 9.1875, 0.5039, -0.3262, 2.2344, 3.0312, 1.5312, 0.5664, 1.5625, 2.7656, 3.4219, 2.0312, 2.1719, 1.5391, 2.5469, 2.8281, 1.8125, 1.7109, 1.3906, 1.0391, 0.1621, 0.4277, 0.1455, -0.1230, 0.8477, 2.2344, 5.2188, 1.2969, 1.5547, 0.8516, 0.7148], dtype=torch.bfloat16, ).to(model.device) + # fmt: on - self.assertTrue(torch.allclose(out.logits[0, -1, :32], expected_logits, atol=1e-2, rtol=1e-2)) + torch.testing.assert_close(out.logits[0, -1, :32], expected_logits, atol=1e-2, rtol=1e-2) self.assertEqual(out.logits.shape[1], inputs.input_ids.shape[1]) self.assertEqual(out.logits.shape[2], model.config.vocab_size) @@ -170,8 +139,9 @@ def test_cwm_sliding_window_long_sequence(self): tokenizer = AutoTokenizer.from_pretrained("facebook/cwm") model = CwmForCausalLM.from_pretrained("facebook/cwm", device_map="auto", dtype=torch.bfloat16) - sliding_window = model.config.sliding_window - long_text = "for i in range(1000):\n print(f'iteration {i}')\n" * 600 + sliding_window = 4096 + model.config.sliding_window = sliding_window + long_text = "for i in range(1000):\n print(f'iteration {i}')\n" * 270 inputs = tokenizer(long_text, return_tensors="pt").to(model.device) seq_len = inputs.input_ids.shape[1] @@ -184,45 +154,14 @@ def test_cwm_sliding_window_long_sequence(self): with torch.no_grad(): out = model(**inputs) + # fmt: off expected_logits = torch.tensor( - [ - 4.7812, - 6.1875, - 13.1875, - 4.4062, - 5.0312, - 3.9844, - 6.6875, - 4.8438, - 2.3125, - 6.5000, - 4.4688, - 0.5195, - 5.6562, - 3.3125, - 2.7500, - 4.9062, - 5.5938, - 4.1562, - 3.9531, - 2.4062, - 3.2812, - 2.8594, - 3.4688, - 2.9688, - 2.6875, - 3.4531, - 2.7344, - 7.2812, - 4.5000, - 5.7500, - 2.3438, - 5.9688, - ], + [5.2812, 6.4688, 12.8125, 4.6875, 5.2500, 4.2500, 6.9688, 4.9375, 2.7656, 6.5938, 4.9688, 1.1016, 5.9375, 3.7500, 3.1094, 5.5312, 6.1250, 4.7500, 4.5312, 2.8281, 4.0625, 3.3125, 3.9219, 3.3906, 3.1406, 3.6719, 3.2031, 7.0938, 4.8750, 6.0000, 2.7188, 6.2500], dtype=torch.bfloat16, ).to(model.device) + # fmt: on - self.assertTrue(torch.allclose(out.logits[0, -1, :32], expected_logits, atol=1e-2, rtol=1e-2)) + torch.testing.assert_close(out.logits[0, -1, :32], expected_logits, atol=1e-2, rtol=1e-2) self.assertEqual(out.logits.shape[1], seq_len) self.assertEqual(out.logits.shape[2], model.config.vocab_size) From fb8106768b0f9ffdc8fefe51d8981e89b184ee14 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Wed, 19 Nov 2025 11:42:15 +0100 Subject: [PATCH 3/4] fix --- tests/models/cwm/test_modeling_cwm.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/models/cwm/test_modeling_cwm.py b/tests/models/cwm/test_modeling_cwm.py index 30d147434299..97f9616025b2 100644 --- a/tests/models/cwm/test_modeling_cwm.py +++ b/tests/models/cwm/test_modeling_cwm.py @@ -137,10 +137,11 @@ def test_cwm_sliding_window_long_sequence(self): from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("facebook/cwm") - model = CwmForCausalLM.from_pretrained("facebook/cwm", device_map="auto", dtype=torch.bfloat16) + model = CwmForCausalLM.from_pretrained( + "facebook/cwm", device_map="auto", dtype=torch.bfloat16, sliding_window=4096 + ) - sliding_window = 4096 - model.config.sliding_window = sliding_window + sliding_window = model.config.sliding_window long_text = "for i in range(1000):\n print(f'iteration {i}')\n" * 270 inputs = tokenizer(long_text, return_tensors="pt").to(model.device) From e7cb6cb90622716ed14614ef770c63661c88afbf Mon Sep 17 00:00:00 2001 From: ydshieh Date: Wed, 19 Nov 2025 12:10:37 +0100 Subject: [PATCH 4/4] fix --- tests/models/cwm/test_modeling_cwm.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/models/cwm/test_modeling_cwm.py b/tests/models/cwm/test_modeling_cwm.py index 97f9616025b2..1381454a2b6a 100644 --- a/tests/models/cwm/test_modeling_cwm.py +++ b/tests/models/cwm/test_modeling_cwm.py @@ -137,6 +137,7 @@ def test_cwm_sliding_window_long_sequence(self): from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("facebook/cwm") + # original `sliding_window` is `8192`, but it causes GPU OOM on A10 model = CwmForCausalLM.from_pretrained( "facebook/cwm", device_map="auto", dtype=torch.bfloat16, sliding_window=4096 ) @@ -164,10 +165,12 @@ def test_cwm_sliding_window_long_sequence(self): torch.testing.assert_close(out.logits[0, -1, :32], expected_logits, atol=1e-2, rtol=1e-2) - self.assertEqual(out.logits.shape[1], seq_len) - self.assertEqual(out.logits.shape[2], model.config.vocab_size) - self.assertFalse(torch.isnan(out.logits).any()) - self.assertFalse(torch.isinf(out.logits).any()) + logits = out.logits.to("cpu") + + self.assertEqual(logits.shape[1], seq_len) + self.assertEqual(logits.shape[2], model.config.vocab_size) + self.assertFalse(torch.isnan(logits).any()) + self.assertFalse(torch.isinf(logits).any()) for i, layer in enumerate(model.model.layers): if model.config.layer_types[i] == "sliding_attention":