Add a test for map_location="cpu"

jerryzh168 · jerryzh168 · commit 9aefadf807bd · 2024-07-10T14:36:52.000-07:00
Summary:
torchtune is using torch.load(file_name, map_location="cpu", mmap=True), so we add a test
to make sure this works with tensor subclass API

Test Plan:
python test/quantization/test_quant_api.py -k test_quantized_tensor_subclass_save_load_map_location

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py
@@ -635,6 +635,26 @@ def test_quantized_model_to_device(self):
         cuda_res = m(*example_inputs_cuda)
         self.assertEqual(cuda_res.cpu(), ref)
 
+    @unittest.skipIf(not TORCH_VERSION_AFTER_2_4, "Test only enabled for 2.4+")
+    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
+    def test_quantized_tensor_subclass_save_load_map_location(self):
+        m = ToyLinearModel().eval().to(dtype=torch.bfloat16, device="cuda")
+        m_copy = copy.deepcopy(m)
+        example_inputs = m.example_inputs(dtype=torch.bfloat16, device="cuda")
+
+        quantize_(m, int8_weight_only())
+        ref = m(*example_inputs)
+        with tempfile.NamedTemporaryFile() as f:
+            torch.save(m.state_dict(), f)
+            f.seek(0)
+            state_dict = torch.load(f.name, map_location="cpu", mmap=True)
+
+        m_copy.load_state_dict(state_dict, assign=True)
+        m_copy.to(dtype=torch.bfloat16, device="cuda")
+
+        res = m_copy(*example_inputs)
+        self.assertEqual(res, ref)
+
 
 if __name__ == "__main__":
     unittest.main()