From b1cefec24590c915800a24d17abc805f8972b11b Mon Sep 17 00:00:00 2001 From: pritam Date: Thu, 9 Jul 2020 00:12:44 -0700 Subject: [PATCH 1/3] Tutorial for DDP + RPC. Summary: Based on example from https://github.com/pytorch/examples/pull/800 --- advanced_source/rpc_ddp_tutorial.rst | 308 +++++++++++++++++++++++++++ 1 file changed, 308 insertions(+) create mode 100644 advanced_source/rpc_ddp_tutorial.rst diff --git a/advanced_source/rpc_ddp_tutorial.rst b/advanced_source/rpc_ddp_tutorial.rst new file mode 100644 index 00000000000..45b97a8f5d5 --- /dev/null +++ b/advanced_source/rpc_ddp_tutorial.rst @@ -0,0 +1,308 @@ +Combining Distributed DataParallel with Distributed RPC Framework +================================================================= +**Author**: `Pritam Damania `_ + + +This tutorial uses a simple example to demonstrate how you can combine +`DistributedDataParallel `__ (DDP) +with the `Distributed RPC framework `__ +to combine distributed data parallelism with distributed model parallelism to +train a simple model. Source code of the example can be found `here `__. + +Previous tutorials, +`Getting Started With Distributed Data Parallel `__ +and `Getting Started with Distributed RPC Framework `__, +described how to perform distributed data parallel and distributed model +parallel training respectively. Although, there are several training paradigms +where you might want to combine these two techniques. For example: + +1) If we have a model with a sparse part (large embedding table) and a dense + part (FC layers), we might want to put the embedding table on a parameter + server and replicate the FC layer across multiple trainers using `DistributedDataParallel `__. + The `Distributed RPC framework `__ + can be used to perform embedding lookups on the parameter server. +2) Enable hybrid parallelism as described in the `PipeDream `__ paper. + We can use the `Distributed RPC framework `__ + to pipeline stages of the model across multiple workers and replicate each + stage (if needed) using `DistributedDataParallel `__. + +| +In this tutorial we will cover case 1 mentioned above. We have a total of 4 +workers in our setup as follows: + + +1) 1 Master, which is responsible for creating an embedding table + (nn.EmbeddingBag) on the parameter server. The master also drives the + training loop on the two trainers. +2) 1 Parameter Server, which basically holds the embedding table in memory and + responds to RPCs from the Master and Trainers. +3) 2 Trainers, which store a FC layer (nn.Linear) which is replicated amongst + themselves using `DistributedDataParallel `__. + The trainers are also responsible for executing the forward pass, backward + pass and optimizer step. + +| +The entire training process is executed as follows: + +1) The master creates an embedding table on the Parameter Server and holds a + `RRef `__ to it. +2) The master, then kicks of the training loop on the trainers and passes the + embedding table RRef to the trainers. +3) The trainers create a ``HybridModel`` which first performs an embedding lookup + using the embedding table RRef provided by the master and then executes the + FC layer which is wrapped inside DDP. +4) The trainer executes the forward pass of the model and uses the loss to + execute the backward pass using `Distributed Autograd `__. +5) As part of the backward pass, the gradients for the FC layer are computed + first and synced to all trainers via allreduce in DDP. +6) Next, Distributed Autograd propagates the gradients to the parameter server, + where the gradients for the embedding table are updated. +7) Finally, the `Distributed Optimizer `__ is used to update all the parameters. + + +| +**NOTE**: You should always use `Distributed Autograd `__ for the backward pass if you're combining DDP and RPC. + + +Now, lets go through each part in detail. Firstly, we need to setup all of our +workers before we can perform any training. We create 4 processes such that +ranks 0 and 1 are our trainers, rank 2 is the master and rank 3 is the +parameter server. + +We initialize the RPC framework on all 4 workers using the TCP init_method. +Once RPC initialization is done, the master creates an `EmbeddingBag `__ +on the Parameter Server using `rpc.remote `__. +The master then loops through each trainer and kicks of the training loop by +calling ``_run_trainer`` on each trainer using `rpc_async `__. +Finally, the master waits for all training to finish before exiting. + +The trainers first initialize a ``ProcessGroup`` for DDP with world_size=2 +(for two trainers) using `init_process_group `__. +Next, they initialize the RPC framework using the TCP init_method. Note that +the ports are different in RPC initialization and ProcessGroup intialization. +This is to avoid port conflicts between initialization of both frameworks. +Once the initialization is done, the trainers just wait for the ``_run_trainer`` +RPC from the master. + +The parameter server just initializes the RPC framework and waits for RPCs from +the trainers and master. + +.. code:: python + + def run_worker(rank, world_size): + r""" + A wrapper function that initializes RPC, calls the function, and shuts down + RPC. + """ + + # We need to use different port numbers in TCP init_method for init_rpc and + # init_process_group to avoid port conflicts. + rpc_backend_options = ProcessGroupRpcBackendOptions() + rpc_backend_options.init_method='tcp://localhost:29501' + + # Rank 2 is master, 3 is ps and 0 and 1 are trainers. + if rank == 2: + rpc.init_rpc( + "master", + rank=rank, + world_size=world_size, + rpc_backend_options=rpc_backend_options) + + # Build the embedding table on the ps. + emb_rref = rpc.remote( + "ps", + torch.nn.EmbeddingBag, + args=(NUM_EMBEDDINGS, EMBEDDING_DIM), + kwargs={"mode": "sum"}) + + # Run the training loop on trainers. + futs = [] + for trainer_rank in [0, 1]: + trainer_name = "trainer{}".format(trainer_rank) + fut = rpc.rpc_async( + trainer_name, _run_trainer, args=(emb_rref, rank)) + futs.append(fut) + + # Wait for all training to finish. + for fut in futs: + fut.wait() + elif rank <= 1: + # Initialize process group for Distributed DataParallel on trainers. + dist.init_process_group( + backend="gloo", rank=rank, world_size=2, + init_method='tcp://localhost:29500') + + # Initialize RPC. + trainer_name = "trainer{}".format(rank) + rpc.init_rpc( + trainer_name, + rank=rank, + world_size=world_size, + rpc_backend_options=rpc_backend_options) + + # Trainer just waits for RPCs from master. + else: + rpc.init_rpc( + "ps", + rank=rank, + world_size=world_size, + rpc_backend_options=rpc_backend_options) + # parameter server do nothing + pass + + # block until all rpcs finish + rpc.shutdown() + + + if __name__=="__main__": + # 2 trainers, 1 parameter server, 1 master. + world_size = 4 + mp.spawn(run_worker, args=(world_size, ), nprocs=world_size, join=True) + +Before we discuss details of the Trainer, lets introduce the ``HybridModel`` that +the trainer uses. As described below, the ``HybridModel`` is initialized using an +RRef to the embedding table (emb_rref) on the parameter server and the ``device`` +to use for DDP. The initialization of the model wraps a +`nn.Linear `__ +layer inside DDP to replicate and synchronize this layer across all trainers. + +The forward method of the model is pretty straightforward. It performs an +embedding lookup on the parameter server using an +`RRef helper `__ +and passes its output onto the FC layer. + + +.. code:: python + + class HybridModel(torch.nn.Module): + r""" + The model consists of a sparse part and a dense part. The dense part is an + nn.Linear module that is replicated across all trainers using + DistributedDataParallel. The sparse part is an nn.EmbeddingBag that is + stored on the parameter server. + + The model holds a Remote Reference to the embedding table on the parameter + server. + """ + + def __init__(self, emb_rref, device): + super(HybridModel, self).__init__() + self.emb_rref = emb_rref + self.fc = DDP(torch.nn.Linear(16, 8).cuda(device), device_ids=[device]) + self.device = device + + def forward(self, indices, offsets): + emb_lookup = self.emb_rref.rpc_sync().forward(indices, offsets) + return self.fc(emb_lookup.cuda(self.device)) + +Next, lets look at the setup on the Trainer. The trainer first creates the +``HybridModel`` described above using an RRef to the embedding table on the +parameter server and its own rank. + +Now, we need to retrieve a list of RRefs to all the parameters that we would +like to optimize with `DistributedOptimizer `__. +To retrieve the parameters for the embedding table from the parameter server, +we define a simple helper function ``_retrieve_embedding_parameters``, which +basically walks through all the parameters for the embedding table and returns +a list of RRefs. The trainer calls this method on the parameter server via RPC +to receive a list of RRefs to the desired parameters. Since the +DistributedOptimizer always takes a list of RRefs to parameters that need to +be optimized, we need to create RRefs even for the local parameters for our +FC layers. This is done by walking ``model.parameters()``, creating an RRef for +each parameter and appending it to a list. Note that ``model.parameters()`` only +returns local parameters and doesn't include ``emb_rref``. + +Finally, we create our DistributedOptimizer using all the RRefs and define a +CrossEntropyLoss function. + +.. code:: python + + def _retrieve_embedding_parameters(emb_rref): + return [RRef(p) for p in emb_rref.local_value().parameters()] + + + def _run_trainer(emb_rref, rank): + r""" + Each trainer runs a forward pass which involves an embedding lookup on the + parameter server and running nn.Linear locally. During the backward pass, + DDP is responsible for aggregating the gradients for the dense part + (nn.Linear) and distributed autograd ensures gradients updates are + propagated to the parameter server. + """ + + # Setup the model. + model = HybridModel(emb_rref, rank) + + # Retrieve all model parameters as rrefs for DistributedOptimizer. + + # Retrieve parameters for embedding table. + model_parameter_rrefs = rpc.rpc_sync( + "ps", _retrieve_embedding_parameters, args=(emb_rref,)) + + # model.parameters() only includes local parameters. + for param in model.parameters(): + model_parameter_rrefs.append(RRef(param)) + + # Setup distributed optimizer + opt = DistributedOptimizer( + optim.SGD, + model_parameter_rrefs, + lr=0.05, + ) + + criterion = torch.nn.CrossEntropyLoss() + +Now we're ready to introduce the main training loop that is run on each trainer. +``get_next_batch`` is just a helper function to generate random inputs and +targets for training. We run the training loop for multiple epochs and for each +batch: + +1) Setup a `Distributed Autograd Context `__ + for Distributed Autograd. +2) Run the forward pass of the model and retrieve its output. +3) Compute the loss based on our outputs and targets using the loss function. +4) Use Distributed Autograd to execute a distributed backward pass using the loss. +5) Finally, run a Distributed Optimizer step to optimize all the parameters. + +.. code:: python + + # def _run_trainer(emb_rref, rank): continued... + + def get_next_batch(rank): + for _ in range(10): + num_indices = random.randint(20, 50) + indices = torch.LongTensor(num_indices).random_(0, NUM_EMBEDDINGS) + + # Generate offsets. + offsets = [] + start = 0 + batch_size = 0 + while start < num_indices: + offsets.append(start) + start += random.randint(1, 10) + batch_size += 1 + + offsets_tensor = torch.LongTensor(offsets) + target = torch.LongTensor(batch_size).random_(8).cuda(rank) + yield indices, offsets_tensor, target + + # Train for 100 epochs + for epoch in range(100): + # create distributed autograd context + for indices, offsets, target in get_next_batch(rank): + with dist_autograd.context() as context_id: + output = model(indices, offsets) + loss = criterion(output, target) + + # Run distributed backward pass + dist_autograd.backward(context_id, [loss]) + + # Tun distributed optimizer + opt.step(context_id) + + # Not necessary to zero grads as each iteration creates a different + # distributed autograd context which hosts different grads + print("Training done for epoch {}".format(epoch)) + +| +Source code for the entire example can be found `here `__. From 0aaeb4e504eb6a2c8c7502a8de33c401eb379349 Mon Sep 17 00:00:00 2001 From: pritam Date: Thu, 9 Jul 2020 00:22:47 -0700 Subject: [PATCH 2/3] Add to main section Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- ...aParallel-with-Distributed-RPC-Framework.png | Bin 0 -> 35776 bytes index.rst | 7 +++++++ 2 files changed, 7 insertions(+) create mode 100644 _static/img/thumbnails/cropped/Combining-Distributed-DataParallel-with-Distributed-RPC-Framework.png diff --git a/_static/img/thumbnails/cropped/Combining-Distributed-DataParallel-with-Distributed-RPC-Framework.png b/_static/img/thumbnails/cropped/Combining-Distributed-DataParallel-with-Distributed-RPC-Framework.png new file mode 100644 index 0000000000000000000000000000000000000000..426a14d98f5f7fbbf695626658ad1946fe4ef63e GIT binary patch literal 35776 zcmeEtbcnTI)&B)>Oj7rou)-Lc&u~R(Owu^vdwR2NV4z(ry2B4GD=JNk!qU zZeYP_7sh9r#ZbtDAAG_UiA(Gz6Q1<_OD+;+>G=~*zEZu#3PU>WIv-4D-){6C*cXst=D#k!;j7i3u!AhP{V)= zWaN#4(zMfeQ^1d2G$Vv^m<%z5v0Ep)*e`E6k|fmZ|3V6V%9dFFH<0|l!2j~`KREpF zH2hZ`{#OkCR~-M>8~#6NjSFI|Ezj?_6M|`8ninipY=Ma{ovmwGs~5zaOvE5pOI}KY z@~s^sP$mSts7EXpABu_1I(=nKr%j>=3!mO7er#x*j;~kza3eNESMo9D=Pm>Oo9hvN z=b|(onfMub<`}{U4l4@lll|(YrXXVTpc~Rbt7Lh6Nz_J%>31YUEPozl@qb>VqArNV zto$Ym#N0-rm3ad3mwk*O^wW)gy~0!%Lug$b8Rc}fKWX^<_)!Yp@5A!jJOrUAbu}ST z6?&{PkuOQ`e4_S@_`qrYEU>?EI-Wss;r#w}m+PqO7CJhASat`x1 z@Tbayc-Vtw$~TUy_n|w%WP_35E>ch?=L}oGg~t))L5s zs22%Q&i&9DY5FyXE+;RA)}ckhR5#4PA5C74NgJ>c-Qw|xWOxUTUlm1m^MDDydC3)B zre-9-()+;H*U!)D_}{O?(^K^?d15*bGEww(VCEVYd~c#u249o~ZFzC*%{b6|7+1Fg5wk)-R6mMA6@wo}(QHWjf#f7j2Y zg466!?-2RW_e13Y+`}FWSi!-i*8cvSzp*CQn%Gzk;mu? z*pw|(guP<;E_KR3ZgMdH?(cy6D*zGb_keaX=4{8!2s(fIcvTPMe47IMDW)DJ*`R9= zGh*%dMhbCalH7JxxL_)G;Y!ku>(JeID+|EY9mRq?>bZt24*zSue_c$M&r-3ZsXGe-)nav&lrUJdijq|_SV~>I$yX#-FuIB_h$i+`6!fjz zM+fKg=?8xN_<>Q|ea~_K@PH9x>%1J%o;&xF7@O{Vw0N3(o`-szsy-j(MJk@G89Mu zpu@kS9UVSh*c*UA4m@PxGpJizz=~kxQdIJG9V_vs^VvY**vf--6)uv-K1dxI-XscDZr{r=5(;D_I$;qc56T1 zW7sB5+Mbc*SD;r}MY_JV6Q~Cd{&6Sx^5qNO`r0scc#+~+4u|%ao{Rq^5OI-4u-rlz z%kS&P$iny9&?YfX;C>72C+a!hgG8|5NEvej!;bohn)YgZf?*U|^o_hb$bf%&aFnph zn{w{EhG3*(fO(^eUw~6V346_L-UF~&SWEgQ^C=(P<+&xtC3(tM5DIA%yW7q1nu)~7 zrDkW#`UK*E3l~sl0Y7|KA5C>PFb+HA4od&0K0*3+LvI3lcy^{}Vv=8lX{``Xy}qxZ z4ci=)f8|1gnbA^k%ph!UXAEWUC(adY?ou_V0)V95;p-K`M3pp}E8!*^{l* z#MUyPe{PcF+bXS-D%qT6j`cAVoG@yE|***CiCpLDFdqy@_BcqyS0#O8f{tp-F zpdn+VFh|8}PW7;>aT!%Bg3e$FpO;<_^kbbw^iB5f_qaD@8w-)`izx3}75a1)nm*}1 z5c>O;Ir(alyh(OG#Q8bSh$;-;Zt>4NUNHWVLUfsIf$hwqrFt>gb?si}lIKKBFxfP! zH8GG7VQ*cv3@a=tAOBphM52v*+r5)2S@-eAc`a=947B@2&9g7oV`NR-M}RX4Cqh&- zPxOM@Bxsz7`{Te+fpF2>@X6dMcp#MJ`PrUdH)XMT-p>OD9d-kS*3-j3hAR8#5DtCm z2#>?pX@|4WzcwsK3da6;gVZHto*99Cd=BXZ+)jz=Son1@zq`xYeP4G_GNc(78b4Pf zmR0wut~T-GX)9*m<<^MZt(T*IRaoG8CI1Q|fH_+zp0ba$oMe&vfg4w~B{xB0Uh)Ku zHDH%zC4~ETHsnpfrq$~L2wHS21}?ttv{o+<&-fXNO|Ro}8THYyiTmJ=3*AU!oWZ6I z;E5tt5&>fH0{Zu{-dY|_H70$KpYk(c(%d~a27#}($IQsFOBlBT_#*qUoLL+45_}l2 zN3`S^1_$50ATHh-px3^y>cvsxj?(==YB`jme}G*`8vKzUY&<>Q_sT?9{-w%wQ zQLnYDOCiihPX&K=;;l>Zd-Pwi&|-(&m!E4%QAQfPOIdZY9xTJCOXc98E>_xGc8EBW zJ}i_#d&1zBbJ`F2_sDZ?L}wiVqe!T`%5!ym^=1Bh8IDl0Hjaz_`>lQ)-oc4nK|vx^ zQRN_~ep!wQ3UG_f2tG0Pqa@0KxRZ~ZT{UYW=CG-kfP;D}u}O-%YWHx@MQCVODCpvQ zex2VqS$-yE@t(qFx91dm)dOhizx-{fGbkxx_MOhZiNe`qkAb%*OYJ`vt>%C0{rse- ztD5zeN5#DLO`6neC0ak-%TpmQei4W~XF$b(5&Ps9z1q@NR`xkQm;EXRC}|mHnf-5$ zy-95o4-17C7?f!I?*CdT2|??=BDI{?NVsX8i?b*{;;6f=6w|m!8~8osqG!+hDSU{P zFmzL1c2O#`yrL?4aS)~}$J_tlA^SM@j1CDW`ROL-G=}TTB~2E1a2G!%1K6l&Ln?1~ zR7rqUR{|tk^9g)C&RTGLfQTL7aQz6KOHEPdUY}Z0RNl;`w1QO(?Yd;u0@3>bVntHP zwK?pTpmBStW*EEF`6f!X;vbd5Vvouuqu``Y=J78?F9+ly9!&<)0hI~48@Cv4`t`wU zTZQ#Z&`m#8Y#Wot|Io_FFL+UI_-T=i(PT(c_@VXbAc*B-_~Q;Be>|(dwR$bv=(Uq# z@YBtr*YU3(+ezUUioU-7OU+Xz#WQlccv{tzuUjD2 z$wKwO<=(j0!&&RjvO}?Av%YXEJ&vn{mzA}6qN{*?Ch_ufmRU(R$jcwPI;po`I4!Hg|WTj#<{C2t8u`IkxpuU2PeMfg1O@Ft>5w45f(Jk)sn5aX~K1WZ~Tw9v@4efn`D%7?`%Jc_1<8M>q2Q-g8;fRCVzd7A8Nxa=+I4x5c7LuaG z+>Rs`JrC|E5EpZ%ZPWsKDJp-%6SW%aU!5tdY8eLQ0y^AF3 z@vbd6&S;t9(EJjH7dg*X@s4YJ{04c7l&?_2jfw}f`HIGO=B;6!B{Jd+jKDK#b#ETq zuaW(=s??4n>b@3lfw;^OyZ>44b!ZjGvXH|!bBy_o%+G~ zK?Sm^SEbav9KnuB&TQ+=G#3E~y1?527+R5qz_?N2vQ}IbD#&--jNRPFu<>O0JPamwdTmnH9PXNj6Kua--A}_QazHuc4ME_5E;9^_rL+SATG7TNFAf^; z@2)nZ3AQ6K=#;l>HcLrxYe;xMIf#*$OcgHWI5qzv+{xd6lfCptRfh9ByuahgebdC^ z-Q)S0z6W+89bwFQIYp#}+^@CNC7bW`>&h}^K{!NVmxA#d!R*^A89IrKncHkw7C(L_ zW`qTj=(+~&#K|1%ByYUum21a$pov(CJ+Lw{p_-tLi*NOKOXDVDV5_xR`p}s{#2bb) zmR_vPb&^${C)1(Y?fI*6(A|sJ>a`)z?OrJ4c&@nLF2~IN+1?1FLZ1wqKM|!J%L05Y zK^JoP)4&eo&}>htCFn=t6!Pk)W(hy!O+#d(J{y?ZqM zF#0n+(vI)b6UJ3}YO4ISYeLYla)B9D>7)7fQ#?2)+mi*SZk}bl>=cF9)#5N^$ z!bNOY(++y5GHTc~(O|WwXXK_#C)l9l>1OkX_ls53dm}tY>O>E^=3&F@yMm83EaCo( z0(95g=fy6U_a#@eyU<{@$?hMC?*=KOoB-XVG`*#_(Q z@V!(A*DgxP`xTpka==#~p~Lfa@zET{1dN*SFQ19OZ;)u9cO;@NxLezOKbHA8K&ayo z<9|&LI^)^2sCh-#(Ff^y9~}HzQZI!t=T*#}Ob4sg83%~Ej03SY)My@rmv};&4sAlOHQ%w2OD9T7Le1l#teADJN$Dv{K zYr{LFl{9IhKApI7XXX#mx=U;;EccjJgbXkWWU}NFKQ)`4-gKtMG0)9p3@+Rx0@LIV zn&Ly)$t$w?=%6>%NkiWFE{iqIA`&EqqxPMy%oC$?vusSalH)Ql&7SJD(fNlaq+WKs zO4KFg6?p%OY69a7FWwJ-P}jGoy$Jc|BHY2s7Jmh2fTAsSq{ER``EGpSOyLv7R0R4= z0kG5Mul%9tFRa%Y*gZH*!pXrXQzYR&_8R2sy5y!pab$1ix&?!{yN2A^N!?t}88&$` zXo?{MJ8A{hQbjEz`6F{R&(#r@)5^x4)oFVzXNM=y@%uo{e340G(+kiK^}?vB)Tur> z&J?v~G%R#p<-GKSRWYJ2$UVKN%z57R>0vz&e!hrX1Hk;v!i*1@L&eP!g}k`K6pebm z1m0QUSxY!=VIijXv}hWzO8mscoAK1c{J1Fj6M5`_9g&DxiCBQcy=jngR&@gSy+7x9 zr%_JghPfn*Ig0s!*je!yT`^?CIto8Lz>r>@0*@=tdwI%w@pJDA$}w|G6;@S>w=Ck2C&h+C&;Jn3^;>5~TtWfi7BuSVfby)$#QfQ&sVbM)ljDioQs9NsF*I zx{<(x_kL#uZeIuoZpGdh!Y3p`5Ld+`39Ln8Kg}{bhFi4HUt2Z*N5Xgx;Q8aTHLP7U zt`orv?bOAVCDZ*T_(u$JnA+EJWvxfZ@d}F9a}U>jwo%Dru~yC80)tTWYmMC&ckjrH zrUyykzb?%8#P>th8I+0oV`q^uXd=o}rC!Hzn>m+x9{*#j!lw z$&;lGaUEz>>C&*ac$^GV=YxAcXvx@(jC@8d3fSShNJm98uO9&LWgzX{$L?`WHm)U3Ys zQ`;ks9A4Ueh!mI;j*b}0LC*MY_>ezijoLX_K^;4)y-CoTO-VN#P-aMx7A;3_^27J9=2aNsm6?NdwAMXw!#_at_1JjZJ zLk=VZ5qAd~Y~Ek&t6Po3@uWkgS(G15*-&C*bBx0R@x|JnA?@I;DM_eM>n-fM=tuJ) ztt1rBJwLaZL^Q_iwb<+j$HJifTzjJLGrnjcS2%fFeDDzMh9$lV>m0_5StUTTi(EZ? zJBgQNp~<(WAZup6e%v%O>XC@Fw^aYat9JEAIm-)E2Sxz0&uP!3pt~H) zMcUN2GCKqAo#^v-S2M6#E&8HH;QdjvuvK;XIiN)sjUX?fb!b9dJ{+W2#oSSxfXbpT%+7o%s-($sfW zQQqH4fuNN^B#z%c{P~4jFb)sjH?Dh|54mD%_(BTp*bQAifZJM5S&R=)r_IYk?-L37 z)uL7_pY){aVOVQkBeWpI{mG=a4bKl_(n7bw!-9VaxjyzQ@-oV%Llh*npG=(y5zl)$ z7Q^>9CmMB+mj*q9SR=&|I6Q&WQ!)oM`D++ZaFGeN*<-;_I`T_dBCitTsn%{P(4m-Z z7>54rc@NlT#D5 z&jPV$v7IteWNMeh_}bPZZra zkphyqt?f^TBYYY`vzPvt>_0X$Gc@};?|d?b^!|OcsL!CNvfrLqe{LGC4hq40kwkd7 zdag99OxymB^??|Nn>sI6N^|pxyzD8%pHdbtE{Aif-#+;*n#EmB0|DecaO;Awp=&U0^GR6B<|m8ZsKT#f2U?1j zS9XAVi4(^Yt1Q@2e7XB=5*boXD0<-H$fj0acH+;98okfjjq~XF7f?bxv0?!#qWx*B_l$+9^>`8;@-J_G2@$)P zO#CNZiOvZ^kIjW2@9IDC5DA64LYqdDI6d(r3O2taH%F>Bb&W1j%W?c(SAeEQdI{0Q zx4N#_?y;K+e{3|pzWxf5+DR5XeO5Fr=aP3$9$m9keob8VZ%o;7Z);Ft>+VI*SRNcX zk+-rsU=25miri5RqjXJoYUL+^H7y^XP8bz~VJ6)9T9p<^z^qGL~^~c%vuO_|-wMew@Zz1xNGLtN2!wRpgF}JGANe>G}t- z*=zqYEu02$e;zk!RtVS`oJ#L?R&>p(CcLY;O^SQJL;ywX{d_tG&eR`eF$LBOkjAUU zKXD%t$YmZg--;l0MOyYSp98&^&!^HnD;hx6OASdOzJq7WiL+Z!D=z`C%WVNCSeC%< zYbGgMIl;UhyxeylPQ<^@fjY*V7glLso;WVK`Jw{vkZtxLsQbPU>&Wao5~u5U;NAEG z&Vo0)X;i3K%^VT|!cw^2>ATgs1%?Hy1bCcQI5lTSqYb!b^fhRsyynS*Q&K1a;LujL z5hJYIWtg{Q%h{4Wiuex4#78Ul!gh^h_b3h_x1NN`R8D_m3b8wH0R>G|uZFD(@6-Ab z`+B-y8y(8j`q!rRU05d~%b>6AFPtC%{EUE4%E`G9)Jk(p2jw&;9E%C5`E*we)djZU zHdow74uP}URrjeX^pb0Y+Do0-AC^XB?BOWzgp7(5!Q|#Oy!83NZnKhr4+h_X|1{uh zKGR#377tX2C-C!ncffG+5zOd-Wqc%%nHlXblpW6%2#oak`(yn0e1HHz;*iO!_IbU0 z6l=UHNvp;z34l0My1l6lcT0M0Y}9r*x|S?nx)2!J*o)vBTSKJS_#sTafQ6pBPt#vJ zDr>t}xKhI}+kkx?;461xf1`v17NOLtJ1%P>Lsuu#uDkvGN#|i(s&+Sa*y-(G+_^__ zZ~g1x1M%W-rwGU(SK7|Y0M{$peW&5N-ZIG*@3a4cj1@elI%C2Inco*oKG^8ye+Q;b zo@x52!=gvEWHSGi^xF>VU`T}vI@9{$F!Dk-V;a$J?tHdFk25fNyGg~)=&p|y_^2bo95ku0>!_|G*tnH+{owWMN zgO&pk6n3<+KpV2YP?M7Gt(5>x}Cou>j(hN$k^UK;X-wf13@CuabT zGxre8WBA(h+XR*{)OqyyATYS7IcG%w^ujWU!Y1Ca%ev%+o!_yX94*{VeA`=>FaQv8 zI^&sql^2&^@h-2EIpv^`@$P%Rdw*;94Y$dekM}my;!;6QMpg%bpx$pn;E>43uZ z7$t_@)$b&kA6j>NgGv;C(PYi)J2l?sK|`Uz`kgQwi`%Fy%zE`X-K8{&*-h&L5(A5e zmBsVo=ey$E6K>w9K>R=t+i6#G6ZFw1S|Qut7mXwaDeg1F54~0XPNCe1jK-DSPiG$a zNQ%b|0gQjJvX_j0{sp&r7@3K#uti7W4tMf-O|D((Gwdn=dg6d2E2gMAmHjeTtvwX@r9w+dFMe0n$OV6+H3+%H ze&DHhQMPQ3*#1FspPjyFP?Jaho2!TP;%u?>l$H1$Eom@~T637Lt5oGzQ2%u-+tbB! z^hyUX5#;0krRJmot?}-+5uZ}uh%AmiP3d%PIrm|I<_?eCG}?3-?5c8xE)J7YDjPL* zxdt^ojXuwLq4l2Ipfqek1(WBV(D{t{omI+%x8VR*&FrUFi6|l%ZNp1A1~CndDF&dw?V!lz;!VrP;dA!Q;1HQPmGN%L zE|=V*9_Kv{)VD#Og^`@-`J+R`M?aua1lOw#*hPy3yES>%Fq@;a>x97?i*qZ7+xlNs zx`;G2v~>Vf&EFfidoSAhba-wz`A`HNo}3Bx?N;SWK_iO?CHBmWr`{hy0=jeD8QTXb zYJFcuK0~R08x4(Dr)^>-5Zc>OtuX8DL^$VkEV1C)r*U&1{yOy_948t3%=pOjAkwEil&1GK zOJ1%>)kNK^SfC-tTwm>|ce-NXJlK3qSTJ5NGE*gm+uG*26(+X(Sze&8t0RdnV5*=a z4X4s0WQTe`szU9{!BE2cMe6y&s}Oa{p)%&N-f7e~(|%!IyK6;v(0y>*=8p8;PGxv< zG=Vq;^y6a3r_&#fsi^umY?@;?6gt0>MR?y`4|c_eJz21MgbpP(!hIkKm5xa(hEbs< zq=UP;y>>a}_Yh7(t-M%oX}R4vdm_Qgj|H|k#<5#O*<32Q-!U1YiBD+b2_&$y)PN|@ zdw#uzq60f8`hiXE`kB2qJ6k?5XVSApv$S*24iS&3vr*bkialrlg*JXfGz}|0zV(#A zmM2Q%!GT%QpZ6W&6oNL%xgUCNhj_WrV^=*<=rBx%;cMZ~p(6DkEX50CIR!9<+075_ zG6s+5E6=an{n&CaOlSfpOq)Mk7y7R_9mVB0(JbTTEg(_ZfOuD1+y~Oauiu1RnScd! z0S^VhiaD-;NDA#w044MebCf35!GP0U5q_Y^?J%4)mb)fF^npVS-kNz7A;%P_IaKjn4cD7594%la<^g6EyKJjTGfQ|n=)r^V^#@qT0q zrF{kFwAhhB_~p&r)&9=;8+_3dz_g!Iu(O6b4%Jn6#n4qzFr@E4BhIVD2S}>lwN}X%Yz&A>mJP1_h410!}Shk@j6@9Bbui>xX+lyn=vX$ByDgZ|5I@wsox>k6%jeoKdAbIqSA*GGQjxeqDn#eb|fHsen(>Q5x$J z%8RKQHI(KHiT!x4G&H&QlPYg|un(TwoXz1Af@Dd4{aCC_8NvEPZDRb^C=K(Ssw|%E zXik(%4R$`+&c&_owNG_qXRsqosnaV6w~N51J41@fj%N2q(_x9wFlaCt&H(*0vYM@N z2Ak$FaF!MX?thw~x`Va$(2i&GG?F0n$tXt@J$hyo(044|+xN)U+CtjelEs5Qh=GDf z*U3Xt7I$83Ud<=4#cw^szkU=-*miWtS6;aihySvzIj*clh)P|Z3VxMzaEDP!BKU+{ zPKU@Xl5u!INJ(Toa*LTY46ELq2hz|!!IPrC!hm(BB%Ew}OQ%GAG5+3zUBL4Npg!cz zn^>a;VZ8B8jgtU6=Op;us0Bc*s3~#8#I@$ZU_YI^ei=xsF0j<#mNXuX}{u0 zirlewn4`|G+Lnd}gaL`A_b2m#p3g0Hd<0b%7$ZSmk0mCb7hGAo)$^nXdm*S&#o^ch zncyZmUg=Z5i9$(&IR|yXm!qQ+wukdLVDboE5Nc1`ek`Yq*-fmUA7MI*xgp+ z3)nDS{bW#gGv<(S;kfxxkhw6jL)pykPK=R`la*xH!tP+CdKk{=Gd9$+Tg_j-#W(?0 zyGxcCHsxp8%UU!aP$zp*M@uc%|7YKi%nDv^v^VhwOM-7S^KUP+hxNrccdsc0PY*uJ z6AZEcC~{uFX-Cc_QmQnPfehP)U*w(NbmV|jUa;B0LWM7LYZ+|c*y{jZJqeC^P)=Io ztnGG&aIi@GYCy(NMTbaxuB4%%X~MF=vww43eBln~xnhvp9nXKWN?QeuQ~fOv_lZIv z@55B58fUl$UGO!(Poa+K#VxzLX;ea^r`y?vJF}uAC1Mi;=y=nNFLmC#kNn7ONeT}t zyqXXgY`}FW4iYH}Mu=Aiy-Qc(WUNkY(!Yc*5O!yO!p@A#Ht_^?NP1lO)ktDXEYR(M zdawR?MN9+JEF=&C0=Pi*EZpZ*C`bg$QPC=%d7n}%sJAaUef_* z!1i4d-%%k{3EW^)rLWI^tTE-ehRzF{YeC2A`#G-ww;UpUtgvKTk8L4$E9_kbqpSrB z);%s_K84Ze#ViS6r&j!QgbBO2IfCeHbbY&j?lGkO}+kx z_G0nj$faShxM?!E#}yp(iNgKe-VO2N1R?uR&mP*j+2c7gl#a%fwwwd|M*96?_Acjo5+tEVYLQNl?j{%?-_e) zr67xf+ zpB`vC|o)WY|`XR-&VK!9BN7u14PAPsgR91l2sg9-%P7;^y~czO(J-srI3G8Pt?A9z>5T+_bmlLf+p(y%3GT1Y0#fPR5mgcS{4Yw$7qkL@we+9qQY zeH%{FFUdP=>$~U%5>aF-?YM0b_xFoE?u%dGIx?=22MG&v^OXR8&gv8^f8%YgIX&Ex zJh)^jfaq9D8hs7!SoUsQHe9>;`(*a_)5KE^AZp*8hrHiHvBqtSxd zo&kpB&Hl$mY{hq)k}dDMQd^AC%$sos{jLp;f96P`POOFAdTs7G{H33~@Z+*c(9n-Z zC&jZXKwWpFi7yZ<4MulrA;%C{NU%P|_hw`~dKLqH!;N;ImV`<*+UI^lY7?tc&Bu#+ ztZ^s~Uf6&_$LPDq1D#MVZSyMuMsAe`qWr;55>+^d-$33k;|i>#^lz(;5Ngx-y>|vV zH*wXkn1B6WT3u(+1}z=X;m?!cN4$-T)Bsy3J2phVD-V=NIX&e1w%wKzqU-+>bFKK*7z?fwq$pY#fIXX!%b;~o3AQ1Uhn%}3u4h*`iBkqn%*SZz>m@CiSo zg~!R23Rd#jd!gaQluu_~&p7_m$M2xQ2f}<)=BRiN2!{bZ??`jYDfE8Wj4#rjbYKTqn)>Wao;Ju9n!gK|6{C{tx^W;J~~@%#~S_Izj33Ezg>hfw>mI9#=M-K{R>nSvXsNit1=?_yE))uf1NO^BqP*y^0ByXLW}JR7b# zSQ@tJPR?)(e6c06i(I`Z7Q8>{ZFXTlf~>>SM%HsRxoDf0aUukHk=n`nlR9*M-}IIL zu@+AN0dAV?(2K%p0*_~#a;I;ZEi&wdJZpfvF_?m-6pjVF0!Qd!5CDoNfoaHQL=q#A zXYbWAt!Z}ISO*4>*Ri)o4{z6w z@Y>woiS-{%3WBQ{I~ghB;swE9w-b%x@yWrg=x-wDU*X#z_7}IL;UtoS%hWdLt#NjZ zwxt4CcBuffM#91%^zB@Xi23;AZ&dB#MKNxe&dAPTF@LBnjyD=i|2A!%*X`&4_b2{Q zEvB?DH!?QC;T1!=ksv4~kIAOqL!7HiCx70rCzZ!O*;5kM&6C=WjaQ0hp){g1?fYHH z^M*~&o1lVWP|{;WKraHr(3RMmTXhXARzLIpodJ=?1KZ!kJv`qdhaGEjSEzNW=_Do&P&fV)3Ro-iYa)ss$5 zdoR`KDjbHCyNNvLE3xO!tBZQzBZa+Yp%o*6_(|SF;r3TF(W7nKzfnGiTdgfdG49vZeP97=Dv6td| zoF^L%Z1F-lA2d!H`rXRU*EPq{F=9nppKBX=%dPZA;$Cb}Kat^`s_ft4;*R!17jT$2{_54_gPTB|~lDRG=FcNfdnJOe+h{dY%6};m?dX&kQ4Lm{fXnaV@ z@an!v&i095ZGu{)k~_){drtq3d%idlSHJ~JekzPr%&`TIYwHhO znh$aNtB2!o?L__t(jR%Ub2r zPXcogw+fonrK4p_BZjK0)&KJXCQCY| zwAU(E>A_s$h+h5PtBhUoG1nv#JR$_$NtMPobJFUcwcO=4Mk&jwnH>QHNx7^PSp2#w zps71VA|{_|u$=FqUin25yXubEuq@;j3HhG%1#TF|5FvzAc+*4&f+D+q+)1$oz%L)| zNOz6OzUvRLv%YBIa;ElsvMyvEbxSd#whbGU%fT;>Bd(;-zs}?$h&4bK^wg$h?8Ss8GRRuiB&~*t3$f6&n@exQg8U;wQ%F*)U66j$o_?C+a4J-XJhb;$5g zjGC{yg{MkzlR;3WFRss^M>!Nz| z=^;eK9kNmH@R0Q!0D*@vPL`bEwgQ49I$GNd^Mrluvo~kCTC(6s<0iPLwhSG0S`3l% ziL}zapEP8+O-tbeLWAwuJH3BTa~1CMT6sw|>wr0TlqY)uV9F*RPK zxYiWqh6?Yp_&Ur7-sk?P9-1xsk|tiZxfL_*XCe}9@UWQBmZa(b|)eYTv*p0^9HY_G5-nni|e!ftivIgK5Q)jG+(P|%s)d*82fds|gWpah_ zZOH)({{R|p-9|f>d11Oi*b2qRKa76rl_(2fzW9cw#!7yU^P=fGPdEyW13nsv!ASiRq2rME3SN6MQV=ZBYfQ-~O8S?Q`c( zuJn5&D_$JC)EW~sbvuJgHk}UWDtax0&<(&p50&Tv#qTwgH+)9UD>xOTpV?uMismNC+~VsRR9B&e+c zy7gBKnTyq#UL>>GMS{)-@wU6Drgrg3C^F~syPsa>rgem0WyVnrv4jJO6>W!p3G-C- zr^)~0bi+KDKpPXBLjpV2Nl&S9-oV`7lE5PU2BT-xKJk!^QI$*n@|vOirSf%kJLdjx z60cEZW^Kq#&Z@ zuPv2?Rn-_#7~4|`vXWO!hK%F|`5fd{ewHiV76s%Co{YG>cJsG!YyiKZRm4UC7@d-P48Zvod zSnFf}b7F#b)bEpd=gy`MBYm9jS`q)fyMb-&`q8JdOU$XLz>exFvQR2Es5${XDI>dU zl8a~upVtfJZ!Fy%Nec7eoPC`N-Q(F5#er@{F+w4}avjq}50zG>3T@!;BR|{QS+2%E zTLND%s~usenpLL)S}ybB)D|s~q4Q&UWaIs(%n+0$@%EWAA-_c&ujHC(tQPR5N#pC?yT6ZxS?d7vyE;3L{ptBB>(0Ew z57_|F9=z4Cek`QX$sjCZL_R5@^4MHA;>`0QvsDw~miSat=eMfgsQ$i6T@&SYJwV5@ zF3pTAK#1<#GS-|uFsQbjOaFjnS9qs+H!6ZL)ZmW^VPSkgT`hof4Dfk1yU|={iG*?! zs%n1?zuCGNp=_z2q-X+Mp9~Y2$F*#(lJUwrCVvGYve|a?(Q*Q45V`-H)6!I&vP8l|M2|q z-4FX^$F6(FeeXK2>w2H>^K}|TnD}r&6~8vi9M<8KnqH65-Yy018QmRk(9-t%vRwJq z4Wp&StYOG0SZOaBJ=Zr z2HNQtJLUP9$*+*<<>3_Ny((I^9fqE#h6bx|WO$B`@g@-VW+9ntW2%Y_cS7lwOEz>q zM*NN{cY^J_>kfPhrCXj0K9!w!{*M0V6-b);Z3SiLiK1sib0M36B~x)|?@cvcwB>-`V0^UfGHncl0*cO&Pru9*F~`EJi1x zZJY%g@?t@?!ViU?oK9ygUWz?*nyErMMv8)74WbN5>gXlng`QI4o49{-Lfw|aC2k}; z<5!#z*N2qT)mOKVzsVl^ysk5Y5YByqG0W_(&c4L6ijN&PoOW?FPj|ZCeGXT>qux;K zWT6u${;^cRv;T_+ROw?&b*O>0rrbB#5Z@03*fb|83bdsOexAyVN48jz<)t`F-MBt~Q@(*>W|0KG5Zm7kXX!w3Ug=%s?#Y ztJF;cddxW~mjK`NPMDWf#FBbgr1u4P%qnK#*dM|ppXH(1T>SG8n-KJAd(P=Q`!K3#?7ASVUI%d3gtHK6rW$DxXPp0>H#vL1 zAi;??HMY1*U0$k@ITL#5ax}U4g9#8P#4>I z8{70ZhVhW(@Djk2jf59v6lHb|UcWivR$X`8EK>NwJU%z(Mp2gZ9=goYnDiAW${Iyh z%pBXvjpOz?rZYVfrk7)Zuk5Jb3D!Yos%+j6Hmt)+{h7G%am~1>76!_4A&f($NL6Rp z(aQ_iYxz9Wn&}ckbhI`Am+wsxUz5|+aUF|rh(#q1 zN&mvt*c_lx)13A4mmije@{oxxr@1t;uyuL+F#eL0+DfUyx8O8Hvbgms=84^?3WIN9xA0u`MELv7FByCnV4mE}R~7 z7$jFJmjI#trPpzFJc8b(gUlvhHl7hulLS@Nn0ELa(8EJ&vJa=^n0PR!*4V7*ntOw3 z;zAKJZh2Ixw+Y(Mnm*HN3}%9M?Zp|q&C~ZJzO>2i?=;-mi9Dy3$$$PTbc)ZmuVE>h zf>I-4>6eJz;Na5g3&tpqitbc?LUk8FfC>ouZ z#z*B^iyrsexRoX7+*Xq?(+dl*r2%7;*d0F2=E$xk(A~>?wUEs-_moNo7BjbA_}WV~ z*+fHG=Z6I2h1VOin+)$sekq2-w6J~&8#mGiW0@n(A? zF>a}RcEM%lhf~w?iz@%m#~~N=hCBLVvIvt-w8;K+_~P)y=W>S7>Ped=DSSv8r?5J% zs4w9Kjo}ZaR?!BNx|b>owb`&nA*M7Dx2&1jZ?0iOOEam$=Vl_|-;)tjFm5oq+4+N0 z?AIP?I~+4Y@KM|#qg3?gP^Cju_Lv5Lm@;_@9kUdd*Y8_Djb`SG>w5MEq|fVEe)!#Q zK6~B{HTe*}bi3A5D}0QrQXKCnv@}%ycN`&l9Ol0E`oTjEVS6XkhxgZk4+9^x$Akzo zBoAM#kyn@)<)(HWtr)c5V?eBy>Xij8uaxyAKOCtMFSXQk^_YxPHmXL;qTO62RaAxd z>}oOJ_FLa2(|A~u5`LIpLKa}+DnHLK9n5Fq#ayL-J)9~~z!5BhBGHJ}s~XQY}jx(fM+ z`vp8k+sD=l?wj|T+Se;)iLN-mDVJmqR9J3fNff$@Wq=DBb#>a<^~8=~sUiudUK(A8 zKNaNX*%>~n{b!x>s_8(x(35)?c0V}d(}x}B(P1Fcoq!+aW)b~NRpwG zBE~FY#^}a8Dk!{NDJuY28&U`#&>Iyq0M`ZuWbvi&I2Tqce!P41OuK`Fcg^iW?QKU{&Xd0;yjw4lFYzY6{|NgA z8*gBviCUfIDcbLI>r&C>&L-{m+i;P!*eHhxdx~*BoTjqo1m!x{N-%qw6Iq7-3$*sO3b?Mfq&;S<@1|E{nD;W@tq$LDW*_nghK$u& zdY`;Kn0!%C*?J}650y0n=?M<$;UM(=y^|MgWP_PesRm&q9q`OhS>sTDrx-NUDI@5k zrC67ibil3Clad#<;O6cm8On*tvZ5O5%i-T#;0TWxLvXMw>wkIcDjJ`#x_&~o#q{Uw zNcz`zn&@bty``SC=Cnq%lp|$+^x_!z9FMMrncU@4y~ww|du`tO@&xR{ic?apDDlxD2k8EdrRR?Ah{{re1MbC)Z<8`6iO9(b{3`n`4T5q3=f4WZ0W6 zWj`(7!{f;ZfckrI+)&}*Z<2x$u#+QBHf?A#+f9AT+?(A@(&Krg?|lv1AUC?Unj;>x zi||sY+6Uz)*axFH${%XNjt-&`Yp*On^_ggy-T|1|a;1AeCmq+1)1GG2W$Sb9Nzu~X z1s3G-S}3?n$(7KdCrvLBO0-L+Zgi#Yy9|;wUPMdoWga)9UeN$tPWV=amk&n(F35o= z1s%?B#l$lhfLO+JN41sK0HD~dP|SW%-OqiCY)p?uGG17Ul&dtHKC zb^A}f*4o&1 zc69N<{PWYjGhJ>ZU~*RV*12@hAz0@AXe@rk5!Q6c-+22)#T*4KvbMv|opaV4LK)0r zs54mwEN&}NrkHQ(iqOu=q6NO*HrUA-elhxEC`!Widt55iY)=CE@^F2q<8(da`xD9n zCljVv?vy{%x8VBaWn)2YxPlB{MsWRVvTT=LMoD5GFRS0qzFFDa1<7-4M7QTp=bxz? zK6UE0ZnF73dt@`SitqLvzdNBxRP-)WggjPcb#^t5IqKJA{*`Y(IM4FIwg%s!Iu2{f*emUseV2n;H7a@FB)*6X*R=<9Oj<|n=Ki@iZoq8x7gA6TJ`5@FzKm8Q|y4LtL z;h&A|pvTck;ZgGag7L{$?U@$Tfyg`Fu8@buYt^+YtP7ATdZ=au( zQqXu--9*T}7tnYMDs$*z{t)nMNA zGhP}K<{s0$q}3cW=cnXUfO8LIt^{avRQ}s4{?4WZZ;@E3j&ho5q@+j(@GrXEz=3vfk47Zj@3rfri+Z$kN{1k}7{ML?8r|B)R|c*vhuuLpT{Ibu*49rf z1&fK(Mv{PFvmOIZ@tUW<#d0r2C@{Dj(BJw zNm`gF8#6+9s#B^$eQ&SbsqRlF6-F}NHy+fdoIbzvxd%cjdG?q(brN`Gj;M8m>*3=E zm9Kts1uNq;2UA!}CXOrW9`uXS1OqIvg+sG!Z|nn4uvqR1M>-OfsS@+fCYkl|0d>w- zlOoAy_Y0^Unli4}h%ZS?7P2TLkf#!s{idWmRqhTyZoT+8dhtx{o~&r8tz2xm+e2(Q ze!2N3t|ygiYR7$@)$d$b>g3;7vEa9^ZP+n06)zr;%UiW+d+IWyZo`CKroKb10geQw zFuu(PNOsb_oQdZli*)!I#vv#HEoD`Fk?2SCy}1 zEaOT39Dj9r4rv`6$-M2@#;BM73>+{Lvvs+$)j-YQB;@sxwC(Yf3nX&r(HMnU;;a%M zZ(#ME6+k6tw_;&~!q#|eQqeh){wOERx$q+&*VZNPlCgM$cF%99+v!V%XZVL_@(s0h zLi)bq+VVk}xL@}LT3EeL^*dL23V&_AuX&>TyreovZ2mdq_iK-5jkV0zPV!**YBw40 zqDDr936IRR6)3ge!$C>=yyP4Ch`W;Fzq$1PeU(k|vc6sYL`&w-ec<5$qPornyDC;{ zRNWmOCXt;UJ6QVehX5fQ>dKvefd1QROoCjwDeAyx?36Yum?%;XccO#l&Aih^?1rn< z+x+=Bxc(8(h!vSh^N0!CJ!>DPm3T34s2F(8{z5oQ1DV+G1Ci-eWsTTW(p`%;#_ivMQ_s`MgClz^nq^#rWAIH4R zTu8J*<>c{iWP`)x>KczQX6ew_1|j z*Www8E+Y7LSM(*+HN;~ zo`= zUl{`T3iXhy*#Ew8Xjf$qON4ba;01BcUZ^ZX)OoUbqJOFOScMYVCs3#fZK z{L@`7+#A-4gu^p!%gR|FPqy3DYJgt}I&A&w29$I*jvS!@AKIx~sUZ_wM ziJg-9>aRoAfEE0qSD@5<^<9&@d_&#&aU8qPood*Iwr+=$V8HO!7%Qq%g6;0j^v}Fo zB9aM?Sp35+*>AlO|B`qM6KA0}xNt30tBBpsl{az-3psd{xHXf+^RS?pp{U;3#W;&9 z&o_A$sgv9Lynnv_IfUbXAC;tO`}k0t@rvrT2YA~n!}a8LC&AwHO!mvJLq>Pg175te z>5j!lRCZAZDYFJ#kCs|eNi#e!N+^?_mZy>V#)Xo0C||wlcuY*~nflLe*&Bk+po|TP zO4>4V<6B7h40|l--8chM6!F4INv<_UjB2cWyATus^%>H;j@0*Y=kru})2z4(+w5e$ zaqBLV=~@1GHrVsKKGj`{e*I2um!_k0{y_43MyFZL^CM$qhp*0WcPSe`v6p)NkwV7x z+lp?ah3g@EK6jD2lK*&7p@8%qPLR5xoZjD6UV-i>r`)b`zRrLpiwoxYt();@v9du8 z7K%eF$Z0$`-`El@@IiL;{GV^l@i)uozi#9&Ja1U{RB)wBx|}xs<55F-C!f7=O1tXh zA zIA5op^h%P^G2$zmZ2Af5v#MB*eIw@iL;T~DuS2$;u_3MW(+^dG=s946Q;^|$eB*i} zz)nddhqK&PX@mGTpX0UKaXL|qcHa-H4xsm0@PCeR>W$cExQm|iI#nDnJJgl|EPc%neSxk!Nh!!0 zOBOJh&59UUwq{r*;#$f963a1ZW~CBL84V|ZUDqfe0Hh^sUL3*Dod$O>^qtocFSe7d z_GRNQomPp%&TLR&xKc4O>sf*3LQO~UREC$8u$bm%^V*P6dU?HwP8$1H*WN&kdm98DJb{qBxEtw10ct3Y0vXUD?RS)H#=h>G2RF(>2gt!iws0 zgK<%!SbkB!slMtk!q7-S_tj_tcXe_8VA&g;CS6ykiEFji=~g-SX{}I+Et9OWnX2h~ z5_K}+hIqdl21qPoh4F9n*#X0J<+b^0c-t7q8t3RxMW&&K zf8?7X15Rm|OI{f((Rp5a$W31BoJV5gpRT4X#7to2l@Iu-t^+$SNhWLkz!0V=UtG#M z%waj-^I8kdYx+qi>vjEKW^01ONHhz!-ZNLdbJ>w$J8wO6TS;xlP*2{kN!u%M)>8pV zdHvBQ(m)&`Z965GZLGWwdg#m*qN^o|kFAM+JK7=jfWgo&6WwbC4}ItHX)I*7gGQ#* zpw>xncX*Zbf=L4X9Mnt<=i+RbO-Fxdgh}B@2KVHZ9({VAGog=*B_0 zoovEyELmmMMjJZJLAf)EKcUmcW#-F{(yp@0G6AZ{zYx`kS*u^wH%i}!fJUF}aAv7e z6v5Pr2t```M9hU6^!=ywVmcFyzF7uSVE)dqP+{d7*kiiF7y7IVkz1y&-9jsBq66^~ zhQGnG6kp9zZ&`R3`y%zU<|S8HhCaR2Sw_TZUUvblKKl&-C2#M56$i1;7Y?$mu92vb z7UR~WV&j#J7G$v$f?$M_>^)XGNHanA4)v=UIPSMA$_Gp-2s|s-G|-Efk6E#9vRsrC z=iLC!%uRlJ3$f~&BC)9^Qbj>wrQJu638y^|%;+RD(u?Wy{%yBQco zmJFfcV|J(YOuw=;bIHUh;e?Ojqlf5!)4aWhBNl60%R5ouWNrlbqBFT_j7~`v&_+(6 zL}tZRa`w5QzAu83zQ3VrSPb*e8UPaa->O4z|ZYBR{s;^OS~a8 zSRHOVAj+yCnH!9<0Df7w(?=Y$A_-X6izrfMhcZR%HmGBk+sC$!dKDtyrF$>^6ofHa93~_k_bAz`5uHjI11bIG}{*WXM6BC*8E%!Q>M??&CsCOiQAcDEN*adAf znV{2B(DiRwtr3OqKSrXE-dQ8}@^3`6iq=%d%nJ{1#V-MKeJ9A3av@V2|Bggx15a+) zR44aFQE(vW@JL$t&Ieq)gePB_+WT7P9YW+m%Q+RY<&;yy2UM`7ZARf*R{Er5%))?n zjoF~ewdxuR!+YK8hVjNS2wuSmy4C>qYB{i84LmgHzub(O?62V~pE+MJ?}9dMO*8O7 zhk)fx?9BGGq37$Se}mIxvLPbt#hEjAI@EO!hO@7b9A2f7=BE<8Ccd}@OxBlv9x#PH zli4O)4OiMlnuZ;Ks4kpm^Q?7#(}X%X3OLg@srj3*>$xbgY-YDH=QjMr*o}^Q$}?(x z_bqOMeKoWrI)(C4iEMS8Op9Vfi`M1uvddvd{4pLuRZkNF4i}_BIn_0Bs+)&>OuK|1 ze3L?5950l+K(1dzFa~?=Sbx zVr09ZMCNmeq1PGRUTAAH>6+cV+0{hN*@VIE5n2e5#(`IM{mqwGri*?8so9Ms9jcjn zE}Y@jFKL457LHeS6IbK1b1kZBsKWKth`Bv2CEaWnm5f*Cl(ieeo3MYs>lpd+;^O`D zCBA?(TC`clzz%2ro9ltHP|gAU>d^R8a2`;Zexo_;^?2y9;oF~Q5vBbC5Iyrr=gzKt zxKmNHORxRz4z?ZiY>`v}?5em6yz8{snoJ-T(Y54-U$i*yG=8pJc)=^AL`EC4T?zR8 z{J2t@AtdU~ENi5gv1nrvltmYvu*bA;5++%e4=iUO_ReVDwxJ5P+|jT&{mq3>HFroK{!uru>m-_&79JkNA*-k4D;X7xL`jWTbQE(I50_7SCKAhDb7szxs@1i-a% z3Hwfhn^8yJteeCS8SyX&Iff7PLY~jml3t{Fsyowyux=w;hgnyRV&|Xag{yvdf*FSe zPo8B(fX^x~eh*-qyp9car)WqP#ACymTR@LM1*%ECK#qC6bhzGtZf&iB*tWaP)#{^h#a#pAUyc-zEhpA;?rrTEf4+8cF|&rhXem6n?MF)+*`d4zQdG+8jI?v5g5{Z2VA zJpVGqGw*AcRr)tt-Xg5w5$>h6N+jRJ4FPO>_~s2f{6k5TT&D4geU&Me2&^hOfW6yrXj zQC5dpurP>mkCnBqqOE5zxjacW=-T&3A?8=bBx-{|E-%h$y@1UxcA~yQNcGErGm>$< z5bDxcxiFI9uJktjk*gq|@qKC*vd5Rc9Hr6Nf2eKNul!n**z9=2xKB&wb@hDdjzz4f zq^uS>C|WvihhP@Y8ldM28xWM)+HxsEVsx_bio0($UFSa=i4A%27#~+GQEL@le%(%L z*>$uT_{C=qQ@KdZwypGl5kDuvT=zLf`lFn)Rb@`LCM1LszF2P$FnaI5-BgY#cD zjf&y(IQD4(z2@Cs(C2fpbg5@lci|bnx#OHAA$V1h9di}tzIBST%il`c`ni29DloFr zFP80pRNZ?0@#d1`tl}xQTMM0Mw3SKr{?$pHDa^l~sjISGLI* zR=+{KVbwpYo^jMCqq@qXc1c`a2jw3VmnI~osgRIz7J(^hU?7!OvW$N1c~be2mE%}U zHYwLtaJ3^0xAY(`%;WOgOKX#aj%s9_SNd@yBpbIjT0Usi30`_Lwb`h4bX>%{ZpE62Zs{AQK_|@|?x1u(m)ss{t!k`)YE4H|9NF?EGym zz}K3J-hFS#8R}CmDwQD<*le4eaab}jz}^?eg59zfePW!7em|V12+4(S#(&=mxO{> zHxd6@@KYvdgLcx(&Bx1GC*SA=4M(Nj%T&-!M&anbJVQ$3BPPf4#E5>&^@s+2F*Lu+ zKt5&5>5xkUdZyyzS1Law@ng*2P`>S*zyX2UI^5;_Ga1jD-qcYjkQ0pP1P^1Y-)ArD zK{J^wd>&cf+Fr=)Ak7Kio?e<+#K{O`w6|=*vpS{~iTR(L1nS=M<=1jGL zjL#`2=5Y6T<7ZFS=D}d!8@I^|{ai(OKZu)8}#ssLH#^4Mt zzUhiC!#Kx79_3AF>iCX77cI#6(+%TM&3CVt^TiYRX4Cj)kvX$WCb=l{c(Ink9lH^S ze{h3D9OGyrupBWUW81U{}lPdG3r-eHTxYK1DIRBLIKMd(yYtKX; z^K~)C$NiglAnRSpE>#VL026g(kj>xj@B-&gom-j3EM?9deU}&2Sr?@y8K?U(fu?*0 zW$p_g41N$e){ zt)=EWr~oW#)`Xd*tZ(ur=(T~lPoBBYsy<{*Byf|wj7|8!S=A|VOfDH9=!Nn4MUuqJ znOqvR4a_`}=mpGIlx&rWO>=Ji)d9im-+g4DU|noY*7`Aym^tO|5|!z_blXd&#(AM; z0Tp9gs+9BSz}9tIBbrvO=M3_$Rf1m|+;OVG;5#k_bXnK)FuQ!_Vf*t2V<2=bL1r@X z(oAO_3v9l-Vq(hly;G?+`*~C6`|nkCd6&6)z9q_ig!NqPNYQC*V){${{lUR#-59@8PzTkVh^{e|9pReUU8{74b$Q zim@MrKyD1oLx9cwFnv&nrIeYERS{n|WQT@yN^z>tXEC#hU?{n&}b~UB$2jn0~)vy{vMH!JMsU zd1+X+9A@lpG!t=d3zF8|k)Cx248By4|3guEpq*>$&vLDUS-xAGcahYR2_=lrdIs+C z$(Z6odz%)+?0%JPsgR4e5smc8A1b@4cB{qqQ0%&Yl3%2f_n8UHJn2y`o<+}CJury^ znO!(eZCT!GpEKj?=eQQeW;mj?sgm)hHw+z3xZKaOa z6)8Zrq8a<0B+BRit~9X$t=IZgo4xUs4CHxht=gBm#!g#w?G3_VCjzGO6Jc9f6=l-}>OJ3srEVS|YU2q_WI_Fs=9j`!;Jp6$FbslMtWcC@HNYYoG@i{?!sDok6;vmi1(`U(x|$!Yg570P zL*h~JKQOzP8W(gvN#Giz+D0LsdlH{CS32YX-6&=WhuCZ|`wMz*CwD9=@B)RejDZY# zEV6CyOWWT(x%}xLP^Tzkq(&bMavc(7y?`#92^_yQ5w(x`8k)P+PKJ_Wj~2C=rS*VG zm_k!S`P0U$d-}<@292s;Tc5dWWQk!YtD1;}1_CpwImv9ZGh&;~Zm|!=KMQNf;w9iC zc}EmUT*)tg2XB^+4Hs+IyY*UDqDDuwI@>cY!mY!a?!85^KEAg7vTiJ=v1~4EeL!Ev z#|b_DI;n?RbWKR_VomEky*V(@tZzalc7ARN%&p3}s%NeS&CQeA(r3;CuM~rQ`tmvB zQx6JP9pQ7%^{q+zd^QoSbXo#ouyHFPxst;zz5LXYNU!ZXW+2KEit3R95j!4>{QzVj zPAPA|;QfSM!`LoxY8zbhF?4k1wM`AJf6FfvM?4MvGXrtSGEHjML!rw9bua^lRNDV8 zM*6G}@%ZGqB{km)oa%=tH@sZm137u!Mc|cA9pebPY-;I*f01g(z_hBJ#hMCmJnvV| zF)`NB83e> z8nm40GlZT0SlHA5A!x6WGJa3hli31-*FTO+LbIH@se8~BEYMWRa9~U9@hc8EDFJ(1dO@XMADM>yB?ZQ32fCix*@aReQ|nD} zis6{}Q^T1^qi`AT9CzHQBVZ4JY`#>q5t1pM24Ytgi}VuJ^=CJd;6hc3!EoEt4&)cE zs5*V!PkjZf2(OxylK7Ft0)Q&NixH>aB;bTfK|r*0#$4=aV0mdeFE;*6v*kku>by!~ z-mB?Mi#M4EHFry~%vi@gGn#32ij~-M$(7{UF>s-I&9VGWF%Ig*e*kCotc3sj1Su~C zc3M2%d2>onaVcj-Ip!H0FY{5ey<=VLKz{x`pUbiA9f#ho&P&|a4>QJX?FDod=)#_# zhnkZum_wJ@vooOh)|hBsVSN$(9}{SD=!+J|p-b3U0YPbOb=Ni~4ge;=JLH1_FKU7j zl;l)h1W};!5mU=)TEot`+Odg|@0;>qN6-)dX&?^~ll60@pW`8C2kv{+r;GXBi_DUe zh6@7Pj3u42Vk_K8za0Ux7$38*_1;^A`34ku_LK*>;D^Fx);vw<{RKF*vZRq3BjPtx zi@i`0g5vDGXi1SoD$*Kfy=YO+Xd39MaQZ>~+{;Nz0}-fpOuP{O5#;eEtjySpe}Exv z^OLF>tz!{AGKpV8qp!5yQP8ZonGI8L#$JLvU1A>}PL5wQSES%QQ!}FaUb4z>syW0$|{%N)-3i2_vUM#7Mkat|NP;lPiYblW+}Ju-Q+yJ zk`9{f>PF!VKf*L<8jQ26>eoKK`PiV5@IAD6D|C!A{+|L1aC=pvq8z&JPXs=H;oc^9*;lz(2B z6k4>HgAx3o*i~q)*F45tbd{`h(uDQdQhtPWBdNiSe%6Qc$IS*LljN7hGVclTDdV;S zu^kpKKQduZ*23(y0f;tz>3h6=_D59RVvd1833~8GR!&#%`rfWY&4UU#-h#dC1FYge zQkrsjzJ@+*2T+{l6J4c`;t5=HP}G_)cfvx(LFL@Lzfz&BUD`2ocY|Dvn3JfL2T<|E z`m!fLdDoJ%Y4cAM0yci(_wHZBO@8!on0mYQW)~B_;Ff8uHj+TkuRAV^u_?b+d*_wD zascfPzl=Kmlilt^=$`wZx1+2Ne$0sp4G~!VdtNO}k86H&TOULoQ2jzs2V>h8#y0F@ z&I(N**gPA|Pgz0y>RH{-{MYGx+$LxW+`}656-oUiw);~tBb#-UzpRWqLlW-Zc-rE! zyz{miLy1wkW>9Vp-3`1V8rOm|YQPIy<}G4$8{6))&I)=pK|gK?O(P$WCoveN8Npe=T4cg=-I>^njq%CtQHx|m!N#Tx(YDVYy4ij0;Uy(KWDuz)iYr7=#18cAmT7RU@>Fb_6|_ z$FKNxW(eXgG3nDP#R~yzBu24&$#v&hTpw4~q&y8Ecl~PF$JyT|r>3#F3sMdcg2*-k zyddTITZvGBWqmrbLo>kN>|cG+Or?iaA&e>i09v#DD9d}8*s6mfFG*JpXeR7jXATI|cV zLCEa#B1LqTYArH1U~AZ1gz(YAZlZ(mTgb5D-FmuTv|!^uxnsj2nbS|To3*;lA3Z_0 zlzqUYtyEGZeVK%WWWV;vl8hd6ieD!%XN~UczOyFE!t5FTTtE3zuxA%J7#-TSKgh0GIcY zo+@eLyl9v<6ly}Zi}~B=6(i}>V}#qvEHVn02+%Q{( zNf{M8m;eunWkcb|ufvl{2xTWDS4$%vJ?jz9rvA<`kf@ANXWr$QtCbk|PNNNu$9U(% zW}2vmY3qS*il67MYm5B;iL&6ewNp!$1ky=qkt$Xh@gp>`z^Jk-YRMA-WL*_pK{0iO zBE^R08$8m`WWJcdQn!IKFQN6wZ3`p9;8a;M(yHK~yt zgVoOSfPS6=`{B8_V2`wfUmkB)#gyb(U-&WSWQVr>-HTxI*GP*Rp1uIJT=uW)l_WJh zcwz!@c;-}vlssFHW_rM{{0aMbhI?Sx3}MP9vaO9z$U5(gvY%1apfVv4jVtgb)HAN1 zW7L#?fy78Ygh#fS{~DG}TH*h!gZo+8iC3FF8f?C7M0fR-?+irf2X{RHH#{<@N6m2?B<2;*^zjPLUrVHW?3mw$nDQ+#z+aksJ&)** z<4a2$5m2Ajleha-f~tB@T=!9c{b4OdG{(tp;<;WK(D|JP+;shv1Ihz;NHWIVdBjwN zfv5UuGXr|W*{Xc^NE*veaQxekJM|B#qgM1 zY8X%*llu7aIM=|+YkMdkThv-JeDHI7={MyhF8A`dvmQJNK4A)VT&#!eUS8vKTMw6q zoS(z&%st~|elNc@lJupKQR?WZ1=N;i4u>Qn?Pjd40&B&=gd4+-k4j=9LYbK~(nS!r zn(1qtCQE?jlesg2Gyjs6aRgrW5O$UV|LNxMIZvwGWOLtZFKI&|Eal?j%8&AIeGTt- z7V`gVu|M30EJPP-WTFhqsV}=2w^S~Aa zz4=B*z;SxEJ9{`sYDRocqo*IFykJ=txF1bhHmB6kq4}X_t2o(8GTG{Iz4=lawZ48z zk|esCkjM{&GM}~ETy?3e3}t$rSEHlPcYZFhwtOwMV1xeLid&*;g9t=|9O4wZK%j4E z*jUW4al!l;KANCc3cH#-ZZrRL?*u-!7$te8ANjqC*J+R_9DMKSMcXiv&{$bi@u(ja zYyjPsN)%_BQXZw;?%za~Z?J3=d;DSd?vK9E@!-0)6B9)DVe%d@z-47?p=60Ltb5bf z7>C+%ZYkfBk!Ca{H(a=~O;CM$NW`;6DfvBP(_nX1Hr9K;gsugnsz;=@mEND{w|dIK zHd&a8DmJP4KC|`N_xMU_5Mp;|v^|7d|1!fnn(SHk{4O8GU_Ld*vPEC*sP=at#y$qJ zc3#-m+C3h+j*?})wdm+>niN#aglfruv+3;Z@YQ5!Ekf@iwt0I%w;VL%vtB>TO7_n1 zI{!axcN2TG>EANvcZkg-4Xpk*EUVQj?^E=~lo#7gvTyRO64C|XtjO_ z^qU5a@dYx)E@Lw}jDoD4-SV1p{aE-IN~UEVezU^&At-mFWjK}HxQ&#RJ`33%3%Eh{ zZ|}p(J?2nTM0i#0uu58+@nPr9fW073+}vjJg&pILs()?_y^?MC}Z^ zKjLfQJXh!1>nU3XkS|#U($11v?+!~=V)hxy?$ci*jcZivi2}?-eKJCwWlzlHM={|^ zorCC^B)!~WUw`k8I}@j8Zq!ZP7Y1~TcijkL4-KMZhMwQ}=UUoX^RtLfXr0Jbx8NLO zZBlCJcf-PNSg(|fjLP7>f~;OEc(K2xhh)A@q8BnZ^>>+>$$0KXb}NE3KwhzjZgcAe z3Aa79g* zc0nU^f7MlOkt|bFRXf2m3V2%0Z)xT~)+Mn$DDwUCyh@md>?t!j{mb_&tAWK%Vv_VN zUq)lhXZqWA=ZR*dettvv`hzR5h23Jmu}EnHDbXV3@VWY9ZBGg!qtb@!I?Yvr(h zHn5e~SQ(OhWjfN#3fX2_nEXlu*vP*Dbwl*?yxX3gN1+N<2YzkGC;hs6D$VEaweNV8 zMn=Yb{ji&??FcFh@nmue-IXEDzIZR)=wlE~!v+Q8#llGigM z-(5pHo}(-CD>Ru{FBW1Gr6^=pU&WI9e85S}*GAOPRiTwUh?H1q<=8vhF5f1x?%n@x z%!100Ms^uR!)0X@H?l~`G+^MFU0|^&Xv%&z(E(c|TQXF&3omMW>$bMhuw6FcP>?l$ zX}SZXQ34|}ev5u1yU%=$>am@V(a}n~Zi;hhQ#%P$k;h=}EgJ(kD-<-U*I5@ZS-UeK z@N`JxWAav%l|)5BfikJdZE>a_;Vp#3teo-!2KLawiA8O|8L7!jn06V7=npgq{I}H> zmeY^n{zdgMn~aJq`a0<@M*Ay!Bn8IgOk-(INUVLwg=UB#$Hl+JoA_y6txcB0HBu`! zO;omGg!_0O*CvVN=C*LyJtYn!)Wdz(QtV7yN}h<=*2VEzJl+-e2sj)P_ZU-T*d(!c z$LD<9&@G>IJpE(hRkSnr_9$8(ylW-z);4$e{*Q#NAWg#dc*^V)5t5d|No7XrE*mb^ zQe{NrA)T9>Oe;Xw?<+uTtn`IC%4@Uzb4p7;JzU>@#CG+qq~##1MDuM!R?p5;GUf-@ z`7N1G$2g4wM~vgZ=7CrbgFgoxN!bi8bW zoo8VV4Wz^m{x3~nuZMH(JKG!^xP*`o*<+e?=U~@QIZ6(|8kOE{K`Fpx=khEm(UV55 zXG6tIWh|Cz!-gE=y0Qaan!rsEaEFS$|Fq{#Yc}HVe61`Tcp%_LlfpI2^ZOM+t}6}f zh9=+Y*4oE10SHo$BMH)V`$%D|#{-0nO-y@FK%O05kD1g3}7$O87M@8N$83gef7!3_oOG&o_ESn{CTq!9aphiRs2 zvjvt^T;4K*@2i?E!IcRzRFHPy7}bs%DFfp&4rXUR7P8iprR*jsFirWqj@x4R5bVi5w++At_vwy;sy!D3h}c`9C68$iDyp literal 0 HcmV?d00001 diff --git a/index.rst b/index.rst index 9a8762e8026..666bc2276be 100644 --- a/index.rst +++ b/index.rst @@ -332,6 +332,13 @@ Welcome to PyTorch Tutorials :link: intermediate/rpc_param_server_tutorial.html :tags: Parallel-and-Distributed-Training +.. customcarditem:: + :header: Combining Distributed DataParallel with Distributed RPC Framework + :card_description: Walk through a through a simple example of how to combine distributed data parallelism with distributed model parallelism. + :image: _static/img/thumbnails/cropped/Combining-Distributed-DataParallel-with-Distributed-RPC-Framework.png + :link: advanced/rpc_ddp_tutorial.html + :tags: Parallel-and-Distributed-Training + .. End of tutorial card section .. raw:: html From a701926ca2e13d2c30d554ee545d5d2fd587fd9e Mon Sep 17 00:00:00 2001 From: pritam Date: Tue, 14 Jul 2020 17:21:24 -0700 Subject: [PATCH 3/3] Added separate code file and used literalinclude Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- advanced_source/rpc_ddp_tutorial.rst | 205 ++++------------------- advanced_source/rpc_ddp_tutorial/main.py | 191 +++++++++++++++++++++ index.rst | 1 + 3 files changed, 220 insertions(+), 177 deletions(-) create mode 100644 advanced_source/rpc_ddp_tutorial/main.py diff --git a/advanced_source/rpc_ddp_tutorial.rst b/advanced_source/rpc_ddp_tutorial.rst index 45b97a8f5d5..fd0d66e5d19 100644 --- a/advanced_source/rpc_ddp_tutorial.rst +++ b/advanced_source/rpc_ddp_tutorial.rst @@ -36,7 +36,7 @@ workers in our setup as follows: training loop on the two trainers. 2) 1 Parameter Server, which basically holds the embedding table in memory and responds to RPCs from the Master and Trainers. -3) 2 Trainers, which store a FC layer (nn.Linear) which is replicated amongst +3) 2 Trainers, which store an FC layer (nn.Linear) which is replicated amongst themselves using `DistributedDataParallel `__. The trainers are also responsible for executing the forward pass, backward pass and optimizer step. @@ -44,9 +44,9 @@ workers in our setup as follows: | The entire training process is executed as follows: -1) The master creates an embedding table on the Parameter Server and holds a +1) The master creates an embedding table on the Parameter Server and holds an `RRef `__ to it. -2) The master, then kicks of the training loop on the trainers and passes the +2) The master, then kicks off the training loop on the trainers and passes the embedding table RRef to the trainers. 3) The trainers create a ``HybridModel`` which first performs an embedding lookup using the embedding table RRef provided by the master and then executes the @@ -60,11 +60,13 @@ The entire training process is executed as follows: 7) Finally, the `Distributed Optimizer `__ is used to update all the parameters. -| -**NOTE**: You should always use `Distributed Autograd `__ for the backward pass if you're combining DDP and RPC. +.. attention:: + + You should always use `Distributed Autograd `__ + for the backward pass if you're combining DDP and RPC. -Now, lets go through each part in detail. Firstly, we need to setup all of our +Now, let's go through each part in detail. Firstly, we need to setup all of our workers before we can perform any training. We create 4 processes such that ranks 0 and 1 are our trainers, rank 2 is the master and rank 3 is the parameter server. @@ -79,7 +81,7 @@ Finally, the master waits for all training to finish before exiting. The trainers first initialize a ``ProcessGroup`` for DDP with world_size=2 (for two trainers) using `init_process_group `__. Next, they initialize the RPC framework using the TCP init_method. Note that -the ports are different in RPC initialization and ProcessGroup intialization. +the ports are different in RPC initialization and ProcessGroup initialization. This is to avoid port conflicts between initialization of both frameworks. Once the initialization is done, the trainers just wait for the ``_run_trainer`` RPC from the master. @@ -87,82 +89,16 @@ RPC from the master. The parameter server just initializes the RPC framework and waits for RPCs from the trainers and master. -.. code:: python - - def run_worker(rank, world_size): - r""" - A wrapper function that initializes RPC, calls the function, and shuts down - RPC. - """ - - # We need to use different port numbers in TCP init_method for init_rpc and - # init_process_group to avoid port conflicts. - rpc_backend_options = ProcessGroupRpcBackendOptions() - rpc_backend_options.init_method='tcp://localhost:29501' - - # Rank 2 is master, 3 is ps and 0 and 1 are trainers. - if rank == 2: - rpc.init_rpc( - "master", - rank=rank, - world_size=world_size, - rpc_backend_options=rpc_backend_options) - - # Build the embedding table on the ps. - emb_rref = rpc.remote( - "ps", - torch.nn.EmbeddingBag, - args=(NUM_EMBEDDINGS, EMBEDDING_DIM), - kwargs={"mode": "sum"}) - - # Run the training loop on trainers. - futs = [] - for trainer_rank in [0, 1]: - trainer_name = "trainer{}".format(trainer_rank) - fut = rpc.rpc_async( - trainer_name, _run_trainer, args=(emb_rref, rank)) - futs.append(fut) - - # Wait for all training to finish. - for fut in futs: - fut.wait() - elif rank <= 1: - # Initialize process group for Distributed DataParallel on trainers. - dist.init_process_group( - backend="gloo", rank=rank, world_size=2, - init_method='tcp://localhost:29500') - - # Initialize RPC. - trainer_name = "trainer{}".format(rank) - rpc.init_rpc( - trainer_name, - rank=rank, - world_size=world_size, - rpc_backend_options=rpc_backend_options) - # Trainer just waits for RPCs from master. - else: - rpc.init_rpc( - "ps", - rank=rank, - world_size=world_size, - rpc_backend_options=rpc_backend_options) - # parameter server do nothing - pass +.. literalinclude:: ../advanced_source/rpc_ddp_tutorial/main.py + :language: py + :start-after: BEGIN run_worker + :end-before: END run_worker - # block until all rpcs finish - rpc.shutdown() - - - if __name__=="__main__": - # 2 trainers, 1 parameter server, 1 master. - world_size = 4 - mp.spawn(run_worker, args=(world_size, ), nprocs=world_size, join=True) - -Before we discuss details of the Trainer, lets introduce the ``HybridModel`` that +Before we discuss details of the Trainer, let's introduce the ``HybridModel`` that the trainer uses. As described below, the ``HybridModel`` is initialized using an RRef to the embedding table (emb_rref) on the parameter server and the ``device`` -to use for DDP. The initialization of the model wraps a +to use for DDP. The initialization of the model wraps an `nn.Linear `__ layer inside DDP to replicate and synchronize this layer across all trainers. @@ -172,30 +108,12 @@ embedding lookup on the parameter server using an and passes its output onto the FC layer. -.. code:: python - - class HybridModel(torch.nn.Module): - r""" - The model consists of a sparse part and a dense part. The dense part is an - nn.Linear module that is replicated across all trainers using - DistributedDataParallel. The sparse part is an nn.EmbeddingBag that is - stored on the parameter server. +.. literalinclude:: ../advanced_source/rpc_ddp_tutorial/main.py + :language: py + :start-after: BEGIN hybrid_model + :end-before: END hybrid_model - The model holds a Remote Reference to the embedding table on the parameter - server. - """ - - def __init__(self, emb_rref, device): - super(HybridModel, self).__init__() - self.emb_rref = emb_rref - self.fc = DDP(torch.nn.Linear(16, 8).cuda(device), device_ids=[device]) - self.device = device - - def forward(self, indices, offsets): - emb_lookup = self.emb_rref.rpc_sync().forward(indices, offsets) - return self.fc(emb_lookup.cuda(self.device)) - -Next, lets look at the setup on the Trainer. The trainer first creates the +Next, let's look at the setup on the Trainer. The trainer first creates the ``HybridModel`` described above using an RRef to the embedding table on the parameter server and its own rank. @@ -215,42 +133,10 @@ returns local parameters and doesn't include ``emb_rref``. Finally, we create our DistributedOptimizer using all the RRefs and define a CrossEntropyLoss function. -.. code:: python - - def _retrieve_embedding_parameters(emb_rref): - return [RRef(p) for p in emb_rref.local_value().parameters()] - - - def _run_trainer(emb_rref, rank): - r""" - Each trainer runs a forward pass which involves an embedding lookup on the - parameter server and running nn.Linear locally. During the backward pass, - DDP is responsible for aggregating the gradients for the dense part - (nn.Linear) and distributed autograd ensures gradients updates are - propagated to the parameter server. - """ - - # Setup the model. - model = HybridModel(emb_rref, rank) - - # Retrieve all model parameters as rrefs for DistributedOptimizer. - - # Retrieve parameters for embedding table. - model_parameter_rrefs = rpc.rpc_sync( - "ps", _retrieve_embedding_parameters, args=(emb_rref,)) - - # model.parameters() only includes local parameters. - for param in model.parameters(): - model_parameter_rrefs.append(RRef(param)) - - # Setup distributed optimizer - opt = DistributedOptimizer( - optim.SGD, - model_parameter_rrefs, - lr=0.05, - ) - - criterion = torch.nn.CrossEntropyLoss() +.. literalinclude:: ../advanced_source/rpc_ddp_tutorial/main.py + :language: py + :start-after: BEGIN setup_trainer + :end-before: END setup_trainer Now we're ready to introduce the main training loop that is run on each trainer. ``get_next_batch`` is just a helper function to generate random inputs and @@ -264,45 +150,10 @@ batch: 4) Use Distributed Autograd to execute a distributed backward pass using the loss. 5) Finally, run a Distributed Optimizer step to optimize all the parameters. +.. literalinclude:: ../advanced_source/rpc_ddp_tutorial/main.py + :language: py + :start-after: BEGIN run_trainer + :end-before: END run_trainer .. code:: python - # def _run_trainer(emb_rref, rank): continued... - - def get_next_batch(rank): - for _ in range(10): - num_indices = random.randint(20, 50) - indices = torch.LongTensor(num_indices).random_(0, NUM_EMBEDDINGS) - - # Generate offsets. - offsets = [] - start = 0 - batch_size = 0 - while start < num_indices: - offsets.append(start) - start += random.randint(1, 10) - batch_size += 1 - - offsets_tensor = torch.LongTensor(offsets) - target = torch.LongTensor(batch_size).random_(8).cuda(rank) - yield indices, offsets_tensor, target - - # Train for 100 epochs - for epoch in range(100): - # create distributed autograd context - for indices, offsets, target in get_next_batch(rank): - with dist_autograd.context() as context_id: - output = model(indices, offsets) - loss = criterion(output, target) - - # Run distributed backward pass - dist_autograd.backward(context_id, [loss]) - - # Tun distributed optimizer - opt.step(context_id) - - # Not necessary to zero grads as each iteration creates a different - # distributed autograd context which hosts different grads - print("Training done for epoch {}".format(epoch)) - -| Source code for the entire example can be found `here `__. diff --git a/advanced_source/rpc_ddp_tutorial/main.py b/advanced_source/rpc_ddp_tutorial/main.py new file mode 100644 index 00000000000..f83384d0a8d --- /dev/null +++ b/advanced_source/rpc_ddp_tutorial/main.py @@ -0,0 +1,191 @@ +import os +from functools import wraps + +import random +import torch +import torch.distributed as dist +import torch.distributed.autograd as dist_autograd +import torch.distributed.rpc as rpc +from torch.distributed.rpc import ProcessGroupRpcBackendOptions +import torch.multiprocessing as mp +import torch.optim as optim +from torch.distributed.optim import DistributedOptimizer +from torch.distributed.rpc import RRef +from torch.nn.parallel import DistributedDataParallel as DDP + +NUM_EMBEDDINGS = 100 +EMBEDDING_DIM = 16 + +# BEGIN hybrid_model +class HybridModel(torch.nn.Module): + r""" + The model consists of a sparse part and a dense part. The dense part is an + nn.Linear module that is replicated across all trainers using + DistributedDataParallel. The sparse part is an nn.EmbeddingBag that is + stored on the parameter server. + + The model holds a Remote Reference to the embedding table on the parameter + server. + """ + + def __init__(self, emb_rref, device): + super(HybridModel, self).__init__() + self.emb_rref = emb_rref + self.fc = DDP(torch.nn.Linear(16, 8).cuda(device), device_ids=[device]) + self.device = device + + def forward(self, indices, offsets): + emb_lookup = self.emb_rref.rpc_sync().forward(indices, offsets) + return self.fc(emb_lookup.cuda(self.device)) +# END hybrid_model + +# BEGIN setup_trainer +def _retrieve_embedding_parameters(emb_rref): + param_rrefs = [] + for param in emb_rref.local_value().parameters(): + param_rrefs.append(RRef(param)) + return param_rrefs + + +def _run_trainer(emb_rref, rank): + r""" + Each trainer runs a forward pass which involves an embedding lookup on the + parameter server and running nn.Linear locally. During the backward pass, + DDP is responsible for aggregating the gradients for the dense part + (nn.Linear) and distributed autograd ensures gradients updates are + propagated to the parameter server. + """ + + # Setup the model. + model = HybridModel(emb_rref, rank) + + # Retrieve all model parameters as rrefs for DistributedOptimizer. + + # Retrieve parameters for embedding table. + model_parameter_rrefs = rpc.rpc_sync( + "ps", _retrieve_embedding_parameters, args=(emb_rref,)) + + # model.parameters() only includes local parameters. + for param in model.parameters(): + model_parameter_rrefs.append(RRef(param)) + + # Setup distributed optimizer + opt = DistributedOptimizer( + optim.SGD, + model_parameter_rrefs, + lr=0.05, + ) + + criterion = torch.nn.CrossEntropyLoss() + # END setup_trainer + + # BEGIN run_trainer + def get_next_batch(rank): + for _ in range(10): + num_indices = random.randint(20, 50) + indices = torch.LongTensor(num_indices).random_(0, NUM_EMBEDDINGS) + + # Generate offsets. + offsets = [] + start = 0 + batch_size = 0 + while start < num_indices: + offsets.append(start) + start += random.randint(1, 10) + batch_size += 1 + + offsets_tensor = torch.LongTensor(offsets) + target = torch.LongTensor(batch_size).random_(8).cuda(rank) + yield indices, offsets_tensor, target + + # Train for 100 epochs + for epoch in range(100): + # create distributed autograd context + for indices, offsets, target in get_next_batch(rank): + with dist_autograd.context() as context_id: + output = model(indices, offsets) + loss = criterion(output, target) + + # Run distributed backward pass + dist_autograd.backward(context_id, [loss]) + + # Tun distributed optimizer + opt.step(context_id) + + # Not necessary to zero grads as each iteration creates a different + # distributed autograd context which hosts different grads + print("Training done for epoch {}".format(epoch)) + # END run_trainer + + +# BEGIN run_worker +def run_worker(rank, world_size): + r""" + A wrapper function that initializes RPC, calls the function, and shuts down + RPC. + """ + os.environ['MASTER_ADDR'] = 'localhost' + os.environ['MASTER_PORT'] = '29500' + + + rpc_backend_options = ProcessGroupRpcBackendOptions() + rpc_backend_options.init_method='tcp://localhost:29501' + + # Rank 2 is master, 3 is ps and 0 and 1 are trainers. + if rank == 2: + rpc.init_rpc( + "master", + rank=rank, + world_size=world_size, + rpc_backend_options=rpc_backend_options) + + # Build the embedding table on the ps. + emb_rref = rpc.remote( + "ps", + torch.nn.EmbeddingBag, + args=(NUM_EMBEDDINGS, EMBEDDING_DIM), + kwargs={"mode": "sum"}) + + # Run the training loop on trainers. + futs = [] + for trainer_rank in [0, 1]: + trainer_name = "trainer{}".format(trainer_rank) + fut = rpc.rpc_async( + trainer_name, _run_trainer, args=(emb_rref, rank)) + futs.append(fut) + + # Wait for all training to finish. + for fut in futs: + fut.wait() + elif rank <= 1: + # Initialize process group for Distributed DataParallel on trainers. + dist.init_process_group( + backend="gloo", rank=rank, world_size=2) + + # Initialize RPC. + trainer_name = "trainer{}".format(rank) + rpc.init_rpc( + trainer_name, + rank=rank, + world_size=world_size, + rpc_backend_options=rpc_backend_options) + + # Trainer just waits for RPCs from master. + else: + rpc.init_rpc( + "ps", + rank=rank, + world_size=world_size, + rpc_backend_options=rpc_backend_options) + # parameter server do nothing + pass + + # block until all rpcs finish + rpc.shutdown() + + +if __name__=="__main__": + # 2 trainers, 1 parameter server, 1 master. + world_size = 4 + mp.spawn(run_worker, args=(world_size, ), nprocs=world_size, join=True) +# END run_worker diff --git a/index.rst b/index.rst index 666bc2276be..ff4129c945e 100644 --- a/index.rst +++ b/index.rst @@ -504,3 +504,4 @@ Additional Resources intermediate/rpc_tutorial beginner/aws_distributed_training_tutorial intermediate/rpc_param_server_tutorial + advanced/rpc_ddp_tutorial