|
19 | 19 | class Net(nn.Module): |
20 | 20 | def __init__(self, num_gpus=0): |
21 | 21 | super(Net, self).__init__() |
22 | | - print("Using {} GPUs to train".format(num_gpus)) |
| 22 | + print(f"Using {num_gpus} GPUs to train") |
23 | 23 | self.num_gpus = num_gpus |
24 | 24 | device = torch.device( |
25 | 25 | "cuda:0" if torch.cuda.is_available() and self.num_gpus > 0 else "cpu") |
26 | | - print("Putting first 2 convs on {}".format(str(device))) |
| 26 | + print(f"Putting first 2 convs on {str(device)}") |
27 | 27 | # Put conv layers on the first cuda device |
28 | 28 | self.conv1 = nn.Conv2d(1, 32, 3, 1).to(device) |
29 | 29 | self.conv2 = nn.Conv2d(32, 64, 3, 1).to(device) |
30 | 30 | # Put rest of the network on the 2nd cuda device, if there is one |
31 | 31 | if "cuda" in str(device) and num_gpus > 1: |
32 | 32 | device = torch.device("cuda:1") |
33 | 33 |
|
34 | | - print("Putting rest of layers on {}".format(str(device))) |
| 34 | + print(f"Putting rest of layers on {str(device)}") |
35 | 35 | self.dropout1 = nn.Dropout2d(0.25).to(device) |
36 | 36 | self.dropout2 = nn.Dropout2d(0.5).to(device) |
37 | 37 | self.fc1 = nn.Linear(9216, 128).to(device) |
@@ -179,9 +179,7 @@ def run_training_loop(rank, num_gpus, train_loader, test_loader): |
179 | 179 | target = target.to(model_output.device) |
180 | 180 | loss = F.nll_loss(model_output, target) |
181 | 181 | if i % 5 == 0: |
182 | | - print( |
183 | | - "Rank {} training batch {} loss {}".format( |
184 | | - rank, i, loss.item())) |
| 182 | + print(f"Rank {rank} training batch {i} loss {loss.item()}") |
185 | 183 | dist_autograd.backward(cid, [loss]) |
186 | 184 | # Ensure that dist autograd ran successfully and gradients were |
187 | 185 | # returned. |
@@ -209,18 +207,18 @@ def get_accuracy(test_loader, model): |
209 | 207 | correct = pred.eq(target.view_as(pred)).sum().item() |
210 | 208 | correct_sum += correct |
211 | 209 |
|
212 | | - print("Accuracy {}".format(correct_sum / len(test_loader.dataset))) |
| 210 | + print(f"Accuracy {correct_sum / len(test_loader.dataset)}") |
213 | 211 |
|
214 | 212 |
|
215 | 213 | # Main loop for trainers. |
216 | 214 | def run_worker(rank, world_size, num_gpus, train_loader, test_loader): |
217 | | - print("Worker rank {} initializing RPC".format(rank)) |
| 215 | + print(f"Worker rank {rank} initializing RPC") |
218 | 216 | rpc.init_rpc( |
219 | | - name="trainer_{}".format(rank), |
| 217 | + name=f"trainer_{rank}", |
220 | 218 | rank=rank, |
221 | 219 | world_size=world_size) |
222 | 220 |
|
223 | | - print("Worker {} done initializing RPC".format(rank)) |
| 221 | + print(f"Worker {rank} done initializing RPC") |
224 | 222 |
|
225 | 223 | run_training_loop(rank, num_gpus, train_loader, test_loader) |
226 | 224 | rpc.shutdown() |
|
0 commit comments