@@ -471,7 +471,7 @@ def trimRareWords(voc, pairs, MIN_COUNT):
471471# with mini-batches.
472472#
473473# Using mini-batches also means that we must be mindful of the variation
474- # of sentence length in our batches. To accomodate sentences of different
474+ # of sentence length in our batches. To accommodate sentences of different
475475# sizes in the same batch, we will make our batched input tensor of shape
476476# *(max_length, batch_size)*, where sentences shorter than the
477477# *max_length* are zero padded after an *EOS_token*.
@@ -615,7 +615,7 @@ def batch2TrainData(voc, pair_batch):
615615# in normal sequential order, and one that is fed the input sequence in
616616# reverse order. The outputs of each network are summed at each time step.
617617# Using a bidirectional GRU will give us the advantage of encoding both
618- # past and future context .
618+ # past and future contexts .
619619#
620620# Bidirectional RNN:
621621#
@@ -700,7 +700,7 @@ def forward(self, input_seq, input_lengths, hidden=None):
700700# states to generate the next word in the sequence. It continues
701701# generating words until it outputs an *EOS_token*, representing the end
702702# of the sentence. A common problem with a vanilla seq2seq decoder is that
703- # if we rely soley on the context vector to encode the entire input
703+ # if we rely solely on the context vector to encode the entire input
704704# sequence’s meaning, it is likely that we will have information loss.
705705# This is especially the case when dealing with long input sequences,
706706# greatly limiting the capability of our decoder.
@@ -950,7 +950,7 @@ def maskNLLLoss(inp, target, mask):
950950# sequence (or batch of sequences). We use the ``GRU`` layer like this in
951951# the ``encoder``. The reality is that under the hood, there is an
952952# iterative process looping over each time step calculating hidden states.
953- # Alternatively, you ran run these modules one time-step at a time. In
953+ # Alternatively, you can run these modules one time-step at a time. In
954954# this case, we manually loop over the sequences during the training
955955# process like we must do for the ``decoder`` model. As long as you
956956# maintain the correct conceptual model of these modules, implementing
@@ -1115,7 +1115,7 @@ def trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, deco
11151115# softmax value. This decoding method is optimal on a single time-step
11161116# level.
11171117#
1118- # To facilite the greedy decoding operation, we define a
1118+ # To facilitate the greedy decoding operation, we define a
11191119# ``GreedySearchDecoder`` class. When run, an object of this class takes
11201120# an input sequence (``input_seq``) of shape *(input_seq length, 1)*, a
11211121# scalar input length (``input_length``) tensor, and a ``max_length`` to
0 commit comments