diff --git a/recipes_source/recipes/save_load_across_devices.py b/recipes_source/recipes/save_load_across_devices.py new file mode 100644 index 00000000000..c2d86fbab50 --- /dev/null +++ b/recipes_source/recipes/save_load_across_devices.py @@ -0,0 +1,190 @@ +""" +Saving and loading models across devices in PyTorch +=================================================== + +There may be instances where you want to save and load your neural +networks across different devices. + +Introduction +------------ + +Saving and loading models across devices is relatively straightforward +using PyTorch. In this recipe, we will experiment with saving and +loading models across CPUs and GPUs. + +Setup +----- + +In order for every code block to run properly in this recipe, you must +first change the runtime to “GPU” or higher. Once you do, we need to +install ``torch`` if it isn’t already available. + +:: + + pip install torch + +""" + + +###################################################################### +# Steps +# ----- +# +# 1. Import all necessary libraries for loading our data +# 2. Define and intialize the neural network +# 3. Save on a GPU, load on a CPU +# 4. Save on a GPU, load on a GPU +# 5. Save on a CPU, load on a GPU +# 6. Saving and loading ``DataParallel`` models +# +# 1. Import necessary libraries for loading our data +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# For this recipe, we will use ``torch`` and its subsidiaries ``torch.nn`` +# and ``torch.optim``. +# + +import torch +import torch.nn as nn +import torch.optim as optim + + +###################################################################### +# 2. Define and intialize the neural network +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# For sake of example, we will create a neural network for training +# images. To learn more see the Defining a Neural Network recipe. +# + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + +net = Net() +print(net) + + +###################################################################### +# 3. Save on GPU, Load on CPU +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# When loading a model on a CPU that was trained with a GPU, pass +# ``torch.device('cpu')`` to the ``map_location`` argument in the +# ``torch.load()`` function. +# + +# Specify a path to save to +PATH = "model.pt" + +# Save +torch.save(net.state_dict(), PATH) + +# Load +device = torch.device('cpu') +model = Net() +model.load_state_dict(torch.load(PATH, map_location=device)) + + +###################################################################### +# In this case, the storages underlying the tensors are dynamically +# remapped to the CPU device using the ``map_location`` argument. +# +# 4. Save on GPU, Load on GPU +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# When loading a model on a GPU that was trained and saved on GPU, simply +# convert the initialized model to a CUDA optimized model using +# ``model.to(torch.device('cuda'))``. +# +# Be sure to use the ``.to(torch.device('cuda'))`` function on all model +# inputs to prepare the data for the model. +# + +# Save +torch.save(net.state_dict(), PATH) + +# Load +device = torch.device("cuda") +model = Net() +model.load_state_dict(torch.load(PATH)) +model.to(device) + + +###################################################################### +# Note that calling ``my_tensor.to(device)`` returns a new copy of +# ``my_tensor`` on GPU. It does NOT overwrite ``my_tensor``. Therefore, +# remember to manually overwrite tensors: +# ``my_tensor = my_tensor.to(torch.device('cuda'))``. +# +# 5. Save on CPU, Load on GPU +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# When loading a model on a GPU that was trained and saved on CPU, set the +# ``map_location`` argument in the ``torch.load()`` function to +# ``cuda:device_id``. This loads the model to a given GPU device. +# +# Be sure to call ``model.to(torch.device('cuda'))`` to convert the +# model’s parameter tensors to CUDA tensors. +# +# Finally, also be sure to use the ``.to(torch.device('cuda'))`` function +# on all model inputs to prepare the data for the CUDA optimized model. +# + +# Save +torch.save(net.state_dict(), PATH) + +# Load +device = torch.device("cuda") +model = Net() +# Choose whatever GPU device number you want +model.load_state_dict(torch.load(PATH, map_location="cuda:0")) +# Make sure to call input = input.to(device) on any input tensors that you feed to the model +model.to(device) + + +###################################################################### +# 6. Saving ``torch.nn.DataParallel`` Models +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# ``torch.nn.DataParallel`` is a model wrapper that enables parallel GPU +# utilization. +# +# To save a ``DataParallel`` model generically, save the +# ``model.module.state_dict()``. This way, you have the flexibility to +# load the model any way you want to any device you want. +# + +# Save +torch.save(net.module.state_dict(), PATH) + +# Load to whatever device you want + + +###################################################################### +# Congratulations! You have successfully saved and loaded models across +# devices in PyTorch. +# +# Learn More +# ---------- +# +# Take a look at these other recipes to continue your learning: +# +# - TBD +# - TBD +#