From 5be85f36e6ad57746299be6dbcbc4e5fd9898bc6 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Wed, 3 Aug 2022 18:30:02 -0400
Subject: [PATCH 01/28] demo t5 model on sentiment classification and
 translation

---
 examples/tutorials/cnndm_summarization.py | 350 ++++++++++++++++++----
 1 file changed, 286 insertions(+), 64 deletions(-)

diff --git a/examples/tutorials/cnndm_summarization.py b/examples/tutorials/cnndm_summarization.py
index 604d962350..9f6f2dd86d 100644
--- a/examples/tutorials/cnndm_summarization.py
+++ b/examples/tutorials/cnndm_summarization.py
@@ -1,6 +1,6 @@
 """
-CNNDM Text Summarization with T5-Base model
-=======================================================
+T5-BASE MODEL FOR SUMMARIZATION, SENTIMENT CLASSIFICATION, AND TRANSLATION
+==========================================================================
 
 **Author**: `Pendo Abbo <pabbo@fb.com>`__
 
@@ -10,12 +10,13 @@
 # Overview
 # --------
 #
-# This tutorial demonstrates how to use a pre-trained T5 Model for text summarization on the CNN-DailyMail dataset.
-# We will demonstrate how to use the torchtext library to:
+# This tutorial demonstrates how to use a pre-trained T5 Model for summarization, sentiment classification, and
+# translation tasks. We will demonstrate how to use the torchtext library to:
 #
 # 1. Build a text pre-processing pipeline for a T5 model
-# 2. Read in the CNNDM dataset and pre-process the text
-# 3. Instantiate a pre-trained T5 model with base configuration, and perform text summarization on input text
+# 2. Instantiate a pre-trained T5 model with base configuration
+# 3. Read in the CNNDM, IMDB, and Multi30k datasets and pre-process their texts in preparation for the model
+# 4. Perform text summarization, sentiment classification, and translation
 #
 #
 
@@ -69,60 +70,6 @@
 #   transform = T5_BASE_GENERATION.transform()
 #
 
-#######################################################################
-# Dataset
-# -------
-# torchtext provides several standard NLP datasets. For a complete list, refer to the documentation at https://pytorch.org/text/stable/datasets.html.
-# These datasets are built using composable torchdata datapipes and hence support standard flow-control and mapping/transformation
-# using user defined functions and transforms. Below, we demonstrate how to pre-process the CNNDM dataset to include the prefix necessary
-# for the model to identify the task it is performing.
-#
-# The CNNDM dataset has a train, validation, and test split. Below we demo on the test split.
-#
-# .. note::
-#       Using datapipes is still currently subject to a few caveats. If you wish
-#       to extend this example to include shuffling, multi-processing, or
-#       distributed learning, please see :ref:`this note <datapipes_warnings>`
-#       for further instructions.
-
-from functools import partial
-
-from torch.utils.data import DataLoader
-from torchtext.datasets.cnndm import CNNDM
-
-batch_size = 5
-test_datapipe = CNNDM(split="test")
-task = "summarize"
-
-
-def apply_prefix(task, x):
-    return f"{task}: " + x[0], x[1]
-
-
-test_datapipe = test_datapipe.map(partial(apply_prefix, task))
-test_datapipe = test_datapipe.batch(batch_size)
-test_datapipe = test_datapipe.rows2columnar(["article", "abstract"])
-test_dataloader = DataLoader(test_datapipe, batch_size=None)
-
-#######################################################################
-# Alternately we can also use batched API (i.e apply the prefix on the whole batch)
-#
-# ::
-#
-#   def batch_prefix(task, x):
-#    return {
-#        "article": [f'{task}: ' + y for y in x["article"]],
-#        "abstract": x["abstract"]
-#    }
-#
-#   batch_size = 5
-#   test_datapipe = CNNDM(split="test")
-#   task = 'summarize'
-#
-#   test_datapipe = test_datapipe.batch(batch_size).rows2columnar(["article", "abstract"])
-#   test_datapipe = test_datapipe.map(partial(batch_prefix, task))
-#   test_dataloader = DataLoader(test_datapipe, batch_size=None)
-#
 
 ######################################################################
 # Model Preparation
@@ -286,15 +233,119 @@ def generate(encoder_tokens: Tensor, eos_idx: int, model: T5Model, beam_size: in
     return decoder_tokens
 
 
+#######################################################################
+# Datasets
+# --------
+# torchtext provides several standard NLP datasets. For a complete list, refer to the documentation
+# at https://pytorch.org/text/stable/datasets.html. These datasets are built using composable torchdata
+# datapipes and hence support standard flow-control and mapping/transformation using user defined
+# functions and transforms.
+#
+# Below, we demonstrate how to pre-process the CNNDM dataset to include the prefix necessary for the
+# model to indentify the task it is performing. The CNNDM dataset has a train, validation, and test
+# split. Below we demo on the test split.
+#
+# The T5 model uses the prefix "summarize" for text summarization. For more information on task
+# prefixes, please visit Appendix D of the T5 Paper at https://arxiv.org/pdf/1910.10683.pdf
+#
+# .. note::
+#       Using datapipes is still currently subject to a few caveats. If you wish
+#       to extend this example to include shuffling, multi-processing, or
+#       distributed learning, please see :ref:`this note <datapipes_warnings>`
+#       for further instructions.
+
+from functools import partial
+
+from torch.utils.data import DataLoader
+from torchtext.datasets.cnndm import CNNDM
+
+batch_size = 5
+cnndm_datapipe = CNNDM(split="test")
+task = "summarize"
+
+
+def apply_prefix(task, x):
+    return f"{task}: " + x[0], x[1]
+
+
+cnndm_datapipe = cnndm_datapipe.map(partial(apply_prefix, task))
+cnndm_datapipe = cnndm_datapipe.batch(batch_size)
+cnndm_datapipe = cnndm_datapipe.rows2columnar(["article", "abstract"])
+cnndm_dataloader = DataLoader(cnndm_datapipe, batch_size=None)
+
+#######################################################################
+# Alternately we can also use batched API (i.e apply the prefix on the whole batch)
+#
+# ::
+#
+#   def batch_prefix(task, x):
+#    return {
+#        "article": [f'{task}: ' + y for y in x["article"]],
+#        "abstract": x["abstract"]
+#    }
+#
+#   batch_size = 5
+#   cnndm_datapipe = CNNDM(split="test")
+#   task = 'summarize'
+#
+#   cnndm_datapipe = cnndm_datapipe.batch(batch_size).rows2columnar(["article", "abstract"])
+#   cnndm_datapipe = cnndm_datapipe.map(partial(batch_prefix, task))
+#   cnndm_dataloader = DataLoader(cnndm_datapipe, batch_size=None)
+#
+
+#######################################################################
+# We can also load the IMDB dataset, which will be used to demonstrate sentiment classification using the T5 model.
+# This dataset has a train and test split. Below we demo on the test split.
+#
+# The T5 model was trained on the SST2 dataset (also available in torchtext) for sentiment classification using the
+# prefix "sst2 sentence". Therefore, we will use this prefix to perform sentiment classification on the IMDB dataset.
+#
+
+from torchtext.datasets import IMDB
+
+batch_size = 3
+imdb_datapipe = IMDB(split="test")
+task = "sst2 sentence"
+labels = {"neg": "negative", "pos": "positive"}
+
+
+def process_labels(labels, x):
+    return x[1], labels[x[0]]
+
+
+imdb_datapipe = imdb_datapipe.map(partial(process_labels, labels))
+imdb_datapipe = imdb_datapipe.map(partial(apply_prefix, task))
+imdb_datapipe = imdb_datapipe.batch(batch_size)
+imdb_datapipe = imdb_datapipe.rows2columnar(["text", "label"])
+imdb_dataloader = DataLoader(imdb_datapipe, batch_size=None)
+
+#######################################################################
+# Finally, we can also load the Multi30k dataset to demonstrate English to German translation using the T5 model.
+# This dataset has a train, validation, and test split. Below we demo on the test split.
+#
+# The T5 model uses the prefix "translate English to German" for this task.
+
+from torchtext.datasets import Multi30k
+
+batch_size = 5
+language_pair = ("en", "de")
+multi_datapipe = Multi30k(split="test", language_pair=language_pair)
+task = "translate English to German"
+
+multi_datapipe = multi_datapipe.map(partial(apply_prefix, task))
+multi_datapipe = multi_datapipe.batch(batch_size)
+multi_datapipe = multi_datapipe.rows2columnar(["english", "german"])
+multi_dataloader = DataLoader(multi_datapipe, batch_size=None)
+
 #######################################################################
 # Generate Summaries
 # ------------------
 #
-# Finally we put all of the components together to generate summaries on the first batch of articles in the CNNDM test set
+# We can put all of the components together the generate summaries on the first batch of articles in the CNNDM test set
 # using a beam size of 3.
 #
 
-batch = next(iter(test_dataloader))
+batch = next(iter(cnndm_dataloader))
 input_text = batch["article"]
 model_input = transform(input_text)
 target = batch["abstract"]
@@ -311,8 +362,8 @@ def generate(encoder_tokens: Tensor, eos_idx: int, model: T5Model, beam_size: in
 
 
 #######################################################################
-# Output
-# ------
+# Summarization Output
+# --------------------
 #
 # ::
 #
@@ -374,3 +425,174 @@ def generate(encoder_tokens: Tensor, eos_idx: int, model: T5Model, beam_size: in
 #    review . School officials identified student during investigation and the person
 #    admitted to hanging the noose, Duke says . The noose, made of rope, was discovered on
 #    campus about 2 a.m.
+#
+
+
+#######################################################################
+# Generate Sentiment Classifications
+# ----------------------------------
+#
+# Similarly, we can now use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set
+# using a beam size of 1.
+#
+
+batch = next(iter(imdb_dataloader))
+input_text = batch["text"]
+model_input = transform(input_text)
+target = batch["label"]
+beam_size = 1
+
+model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size)
+output_text = transform.decode(model_output.tolist())
+
+for i in range(batch_size):
+
+    print(f"Example {i+1}:\n")
+    print(f"input_text: {input_text[i]}\n")
+    print(f"prediction: {output_text[i]}\n")
+    print(f"target: {target[i]}\n\n")
+
+#######################################################################
+# Sentiment Output
+# ----------------
+#
+# ::
+#
+#    Example 1:
+#
+#    input_text: sst2 sentence: I love sci-fi and am willing to put up with a lot. Sci-fi
+#    movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like
+#    this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original).
+#    Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the
+#    background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi'
+#    setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV.
+#    It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character
+#    development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may
+#    treat important issues, yet not as a serious philosophy. It's really difficult to care about
+#    the characters here as they are not simply foolish, just missing a spark of life. Their
+#    actions and reactions are wooden and predictable, often painful to watch. The makers of Earth
+#    KNOW it's rubbish as they have to always say "Gene Roddenberry's Earth..." otherwise people
+#    would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull,
+#    cheap, poorly edited (watching it without advert breaks really brings this home) trudging
+#    Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring
+#    him back as another actor. Jeeez! Dallas all over again.
+#
+#    prediction: negative
+#
+#    target: negative
+#
+#
+#    Example 2:
+#
+#    input_text: sst2 sentence: Worth the entertainment value of a rental, especially if you like
+#    action movies. This one features the usual car chases, fights with the great Van Damme kick
+#    style, shooting battles with the 40 shell load shotgun, and even terrorist style bombs. All
+#    of this is entertaining and competently handled but there is nothing that really blows you
+#    away if you've seen your share before.<br /><br />The plot is made interesting by the
+#    inclusion of a rabbit, which is clever but hardly profound. Many of the characters are
+#    heavily stereotyped -- the angry veterans, the terrified illegal aliens, the crooked cops,
+#    the indifferent feds, the bitchy tough lady station head, the crooked politician, the fat
+#    federale who looks like he was typecast as the Mexican in a Hollywood movie from the 1940s.
+#    All passably acted but again nothing special.<br /><br />I thought the main villains were
+#    pretty well done and fairly well acted. By the end of the movie you certainly knew who the
+#    good guys were and weren't. There was an emotional lift as the really bad ones got their just
+#    deserts. Very simplistic, but then you weren't expecting Hamlet, right? The only thing I found
+#    really annoying was the constant cuts to VDs daughter during the last fight scene.<br /><br />
+#    Not bad. Not good. Passable 4.
+#
+#    prediction: negative
+#
+#    target: negative
+#
+#
+#    Example 3:
+#
+#    input_text: sst2 sentence: its a totally average film with a few semi-alright action sequences
+#    that make the plot seem a little better and remind the viewer of the classic van dam films.
+#    parts of the plot don't make sense and seem to be added in to use up time. the end plot is that
+#    of a very basic type that doesn't leave the viewer guessing and any twists are obvious from the
+#    beginning. the end scene with the flask backs don't make sense as they are added in and seem to
+#    have little relevance to the history of van dam's character. not really worth watching again,
+#    bit disappointed in the end production, even though it is apparent it was shot on a low budget
+#    certain shots and sections in the film are of poor directed quality
+#
+#    prediction: negative
+#
+#    target: negative
+#
+
+
+#######################################################################
+# Generate Translations
+# ---------------------
+#
+# Similarly, we can now use the model to generate sentiment classification on the first batch of reviews from the IMDB test set
+# using a beam size of 4.
+#
+
+batch = next(iter(multi_dataloader))
+input_text = batch["english"]
+model_input = transform(input_text)
+target = batch["german"]
+beam_size = 4
+
+model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size)
+output_text = transform.decode(model_output.tolist())
+
+for i in range(batch_size):
+
+    print(f"Example {i+1}:\n")
+    print(f"input_text: {input_text[i]}\n")
+    print(f"prediction: {output_text[i]}\n")
+    print(f"target: {target[i]}\n\n")
+
+#######################################################################
+# Translation Output
+# ------------------
+#
+# ::
+#
+#    Example 1:
+#
+#    input_text: translate English to German: A man in an orange hat starring at something.
+#
+#    prediction: Ein Mann in einem orangen Hut, der an etwas schaut.
+#
+#    target: Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.
+#
+#
+#    Example 2:
+#
+#    input_text: translate English to German: A Boston Terrier is running on lush green grass in front of a white fence.
+#
+#    prediction: Ein Boston Terrier läuft auf üppigem grünem Gras vor einem weißen Zaun.
+#
+#    target: Ein Boston Terrier läuft über saftig-grünes Gras vor einem weißen Zaun.
+#
+#
+#    Example 3:
+#
+#    input_text: translate English to German: A girl in karate uniform breaking a stick with a front kick.
+#
+#    prediction: Ein Mädchen in Karate-Uniform bricht einen Stöck mit einem Frontkick.
+#
+#    target: Ein Mädchen in einem Karateanzug bricht ein Brett mit einem Tritt.
+#
+#
+#    Example 4:
+#
+#    input_text: translate English to German: Five people wearing winter jackets and helmets stand in the snow, with snowmobiles in the background.
+#
+#    prediction: Fünf Menschen mit Winterjacken und Helmen stehen im Schnee, mit Schneemobilen im Hintergrund.
+#
+#    target: Fünf Leute in Winterjacken und mit Helmen stehen im Schnee mit Schneemobilen im Hintergrund.
+#
+#
+#    Example 5:
+#
+#    input_text: translate English to German: People are fixing the roof of a house.
+#
+#    prediction: Die Leute fixieren das Dach eines Hauses.
+#
+#    target: Leute Reparieren das Dach eines Hauses.
+#

From 0bb1c1c53e31d9773923735f1df2fb8774f2a1cb Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Wed, 3 Aug 2022 18:32:42 -0400
Subject: [PATCH 02/28] renaming tutorial file

---
 examples/tutorials/{cnndm_summarization.py => t5_demo.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename examples/tutorials/{cnndm_summarization.py => t5_demo.py} (100%)

diff --git a/examples/tutorials/cnndm_summarization.py b/examples/tutorials/t5_demo.py
similarity index 100%
rename from examples/tutorials/cnndm_summarization.py
rename to examples/tutorials/t5_demo.py

From aa25e6c684cef75b28070804b1e7b012ec0dd72c Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Wed, 3 Aug 2022 19:48:42 -0400
Subject: [PATCH 03/28] update source/index.rst

---
 docs/source/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 9ebc235d57..7de6b45d02 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -51,7 +51,7 @@ Getting Started
    :caption: Getting Started
 
    tutorials/sst2_classification_non_distributed
-   tutorials/cnndm_summarization
+   tutorials/t5_demo
 
 
 .. automodule:: torchtext

From bb5f19240d6e985a71c5fdaefbe34ea57f9ac958 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Wed, 3 Aug 2022 20:02:48 -0400
Subject: [PATCH 04/28] correct title format

---
 examples/tutorials/t5_demo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_demo.py
index 9f6f2dd86d..16f35efbcd 100644
--- a/examples/tutorials/t5_demo.py
+++ b/examples/tutorials/t5_demo.py
@@ -1,5 +1,5 @@
 """
-T5-BASE MODEL FOR SUMMARIZATION, SENTIMENT CLASSIFICATION, AND TRANSLATION
+T5-Base Model for Summarization, Sentiment Classification, and Translation
 ==========================================================================
 
 **Author**: `Pendo Abbo <pabbo@fb.com>`__

From 31696a77e53f515d91672abe37f99f87561058dc Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 10:30:47 -0400
Subject: [PATCH 05/28] correct description for generate translations section

---
 examples/tutorials/t5_demo.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_demo.py
index 16f35efbcd..c02b87338c 100644
--- a/examples/tutorials/t5_demo.py
+++ b/examples/tutorials/t5_demo.py
@@ -347,15 +347,14 @@ def process_labels(labels, x):
 
 batch = next(iter(cnndm_dataloader))
 input_text = batch["article"]
-model_input = transform(input_text)
 target = batch["abstract"]
 beam_size = 3
 
+model_input = transform(input_text)
 model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size)
 output_text = transform.decode(model_output.tolist())
 
 for i in range(batch_size):
-
     print(f"Example {i+1}:\n")
     print(f"prediction: {output_text[i]}\n")
     print(f"target: {target[i]}\n\n")
@@ -438,15 +437,14 @@ def process_labels(labels, x):
 
 batch = next(iter(imdb_dataloader))
 input_text = batch["text"]
-model_input = transform(input_text)
 target = batch["label"]
 beam_size = 1
 
+model_input = transform(input_text)
 model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size)
 output_text = transform.decode(model_output.tolist())
 
 for i in range(batch_size):
-
     print(f"Example {i+1}:\n")
     print(f"input_text: {input_text[i]}\n")
     print(f"prediction: {output_text[i]}\n")
@@ -526,21 +524,20 @@ def process_labels(labels, x):
 # Generate Translations
 # ---------------------
 #
-# Similarly, we can now use the model to generate sentiment classification on the first batch of reviews from the IMDB test set
-# using a beam size of 4.
+# Finally, we can also use the model to generate English to German translations on the first batch of examples from the Multi30k
+# test set using a beam size of 4.
 #
 
 batch = next(iter(multi_dataloader))
 input_text = batch["english"]
-model_input = transform(input_text)
 target = batch["german"]
 beam_size = 4
 
+model_input = transform(input_text)
 model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size)
 output_text = transform.decode(model_output.tolist())
 
 for i in range(batch_size):
-
     print(f"Example {i+1}:\n")
     print(f"input_text: {input_text[i]}\n")
     print(f"prediction: {output_text[i]}\n")

From 0b3c95edff42fae5fe6485c91c819929006ba20f Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 11:49:02 -0400
Subject: [PATCH 06/28] specifying batch_size variable names

---
 examples/tutorials/t5_demo.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_demo.py
index c02b87338c..9860da465e 100644
--- a/examples/tutorials/t5_demo.py
+++ b/examples/tutorials/t5_demo.py
@@ -259,7 +259,7 @@ def generate(encoder_tokens: Tensor, eos_idx: int, model: T5Model, beam_size: in
 from torch.utils.data import DataLoader
 from torchtext.datasets.cnndm import CNNDM
 
-batch_size = 5
+cnndm_batch_size = 5
 cnndm_datapipe = CNNDM(split="test")
 task = "summarize"
 
@@ -269,7 +269,7 @@ def apply_prefix(task, x):
 
 
 cnndm_datapipe = cnndm_datapipe.map(partial(apply_prefix, task))
-cnndm_datapipe = cnndm_datapipe.batch(batch_size)
+cnndm_datapipe = cnndm_datapipe.batch(cnndm_batch_size)
 cnndm_datapipe = cnndm_datapipe.rows2columnar(["article", "abstract"])
 cnndm_dataloader = DataLoader(cnndm_datapipe, batch_size=None)
 
@@ -284,11 +284,11 @@ def apply_prefix(task, x):
 #        "abstract": x["abstract"]
 #    }
 #
-#   batch_size = 5
+#   cnndm_batch_size = 5
 #   cnndm_datapipe = CNNDM(split="test")
 #   task = 'summarize'
 #
-#   cnndm_datapipe = cnndm_datapipe.batch(batch_size).rows2columnar(["article", "abstract"])
+#   cnndm_datapipe = cnndm_datapipe.batch(cnndm_batch_size).rows2columnar(["article", "abstract"])
 #   cnndm_datapipe = cnndm_datapipe.map(partial(batch_prefix, task))
 #   cnndm_dataloader = DataLoader(cnndm_datapipe, batch_size=None)
 #
@@ -303,7 +303,7 @@ def apply_prefix(task, x):
 
 from torchtext.datasets import IMDB
 
-batch_size = 3
+imdb_batch_size = 3
 imdb_datapipe = IMDB(split="test")
 task = "sst2 sentence"
 labels = {"neg": "negative", "pos": "positive"}
@@ -315,7 +315,7 @@ def process_labels(labels, x):
 
 imdb_datapipe = imdb_datapipe.map(partial(process_labels, labels))
 imdb_datapipe = imdb_datapipe.map(partial(apply_prefix, task))
-imdb_datapipe = imdb_datapipe.batch(batch_size)
+imdb_datapipe = imdb_datapipe.batch(imdb_batch_size)
 imdb_datapipe = imdb_datapipe.rows2columnar(["text", "label"])
 imdb_dataloader = DataLoader(imdb_datapipe, batch_size=None)
 
@@ -327,13 +327,13 @@ def process_labels(labels, x):
 
 from torchtext.datasets import Multi30k
 
-batch_size = 5
+multi_batch_size = 5
 language_pair = ("en", "de")
 multi_datapipe = Multi30k(split="test", language_pair=language_pair)
 task = "translate English to German"
 
 multi_datapipe = multi_datapipe.map(partial(apply_prefix, task))
-multi_datapipe = multi_datapipe.batch(batch_size)
+multi_datapipe = multi_datapipe.batch(multi_batch_size)
 multi_datapipe = multi_datapipe.rows2columnar(["english", "german"])
 multi_dataloader = DataLoader(multi_datapipe, batch_size=None)
 
@@ -354,7 +354,7 @@ def process_labels(labels, x):
 model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size)
 output_text = transform.decode(model_output.tolist())
 
-for i in range(batch_size):
+for i in range(cnndm_batch_size):
     print(f"Example {i+1}:\n")
     print(f"prediction: {output_text[i]}\n")
     print(f"target: {target[i]}\n\n")
@@ -444,12 +444,13 @@ def process_labels(labels, x):
 model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size)
 output_text = transform.decode(model_output.tolist())
 
-for i in range(batch_size):
+for i in range(imdb_batch_size):
     print(f"Example {i+1}:\n")
     print(f"input_text: {input_text[i]}\n")
     print(f"prediction: {output_text[i]}\n")
     print(f"target: {target[i]}\n\n")
 
+
 #######################################################################
 # Sentiment Output
 # ----------------
@@ -512,7 +513,7 @@ def process_labels(labels, x):
 #    beginning. the end scene with the flask backs don't make sense as they are added in and seem to
 #    have little relevance to the history of van dam's character. not really worth watching again,
 #    bit disappointed in the end production, even though it is apparent it was shot on a low budget
-#    certain shots and sections in the film are of poor directed quality
+#    certain shots and sections in the film are of poor directed quality.
 #
 #    prediction: negative
 #
@@ -537,12 +538,13 @@ def process_labels(labels, x):
 model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size)
 output_text = transform.decode(model_output.tolist())
 
-for i in range(batch_size):
+for i in range(multi_batch_size):
     print(f"Example {i+1}:\n")
     print(f"input_text: {input_text[i]}\n")
     print(f"prediction: {output_text[i]}\n")
     print(f"target: {target[i]}\n\n")
 
+
 #######################################################################
 # Translation Output
 # ------------------

From 6538f6fd3a99b0e7cda23dc49770c5d874d7cd81 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 11:50:08 -0400
Subject: [PATCH 07/28] fixing format issue with sentiment output

---
 examples/tutorials/t5_demo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_demo.py
index 9860da465e..f639c406ad 100644
--- a/examples/tutorials/t5_demo.py
+++ b/examples/tutorials/t5_demo.py
@@ -474,7 +474,7 @@ def process_labels(labels, x):
 #    would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull,
 #    cheap, poorly edited (watching it without advert breaks really brings this home) trudging
 #    Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring
-#    him back as another actor. Jeeez! Dallas all over again.
+#    him back as another actor. Jeeez. Dallas all over again.
 #
 #    prediction: negative
 #

From 9b11269dc129ea6415f3c053b79031b398a8caa1 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 19:31:25 +0000
Subject: [PATCH 08/28] renaming tutorial and removing hard-coded outputs

---
 .../tutorials/{t5_demo.py => t5_tutorial.py}  | 201 +-----------------
 1 file changed, 6 insertions(+), 195 deletions(-)
 rename examples/tutorials/{t5_demo.py => t5_tutorial.py} (64%)

diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_tutorial.py
similarity index 64%
rename from examples/tutorials/t5_demo.py
rename to examples/tutorials/t5_tutorial.py
index f639c406ad..77f240c4ee 100644
--- a/examples/tutorials/t5_demo.py
+++ b/examples/tutorials/t5_tutorial.py
@@ -86,8 +86,8 @@
 t5_base = T5_BASE_GENERATION
 transform = t5_base.transform()
 model = t5_base.get_model()
-model.eval()
-model.to(DEVICE)
+model = model.eval()
+model = model.to(DEVICE)
 
 
 #######################################################################
@@ -303,7 +303,7 @@ def apply_prefix(task, x):
 
 from torchtext.datasets import IMDB
 
-imdb_batch_size = 3
+imdb_batch_size = 5
 imdb_datapipe = IMDB(split="test")
 task = "sst2 sentence"
 labels = {"neg": "negative", "pos": "positive"}
@@ -338,7 +338,7 @@ def process_labels(labels, x):
 multi_dataloader = DataLoader(multi_datapipe, batch_size=None)
 
 #######################################################################
-# Generate Summaries
+# Summaries
 # ------------------
 #
 # We can put all of the components together the generate summaries on the first batch of articles in the CNNDM test set
@@ -361,74 +361,7 @@ def process_labels(labels, x):
 
 
 #######################################################################
-# Summarization Output
-# --------------------
-#
-# ::
-#
-#    Example 1:
-#
-#    prediction: the Palestinians become the 123rd member of the international criminal
-#    court . the accession was marked by a ceremony at the Hague, where the court is based .
-#    the ICC opened a preliminary examination into the situation in the occupied
-#    Palestinian territory .
-#
-#    target: Membership gives the ICC jurisdiction over alleged crimes committed in
-#    Palestinian territories since last June . Israel and the United States opposed the
-#    move, which could open the door to war crimes investigations against Israelis .
-#
-#
-#    Example 2:
-#
-#    prediction: a stray pooch has used up at least three of her own after being hit by a
-#    car and buried in a field . the dog managed to stagger to a nearby farm, dirt-covered
-#    and emaciated, where she was found . she suffered a dislocated jaw, leg injuries and a
-#    caved-in sinus cavity -- and still requires surgery to help her breathe .
-#
-#    target: Theia, a bully breed mix, was apparently hit by a car, whacked with a hammer
-#    and buried in a field . "She's a true miracle dog and she deserves a good life," says
-#    Sara Mellado, who is looking for a home for Theia .
-#
-#
-#    Example 3:
-#
-#    prediction: mohammad Javad Zarif arrived in Iran on a sunny friday morning . he has gone
-#    a long way to bring Iran in from the cold and allow it to rejoin the international
-#    community . but there are some facts about him that are less well-known .
-#
-#    target: Mohammad Javad Zarif has spent more time with John Kerry than any other
-#    foreign minister . He once participated in a takeover of the Iranian Consulate in San
-#    Francisco . The Iranian foreign minister tweets in English .
-#
-#
-#    Example 4:
-#
-#    prediction: five americans were monitored for three weeks after being exposed to Ebola in
-#    west africa . one of the five had a heart-related issue and has been discharged but hasn't
-#    left the area . they are clinicians for Partners in Health, a Boston-based aid group .
-#
-#    target: 17 Americans were exposed to the Ebola virus while in Sierra Leone in March .
-#    Another person was diagnosed with the disease and taken to hospital in Maryland .
-#    National Institutes of Health says the patient is in fair condition after weeks of
-#    treatment .
-#
-#
-#    Example 5:
-#
-#    prediction: the student was identified during an investigation by campus police and
-#    the office of student affairs . he admitted to placing the noose on the tree early
-#    Wednesday morning . the incident is one of several recent racist events to affect
-#    college students .
-#
-#    target: Student is no longer on Duke University campus and will face disciplinary
-#    review . School officials identified student during investigation and the person
-#    admitted to hanging the noose, Duke says . The noose, made of rope, was discovered on
-#    campus about 2 a.m.
-#
-
-
-#######################################################################
-# Generate Sentiment Classifications
+# Sentiment Classifications
 # ----------------------------------
 #
 # Similarly, we can now use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set
@@ -452,77 +385,7 @@ def process_labels(labels, x):
 
 
 #######################################################################
-# Sentiment Output
-# ----------------
-#
-# ::
-#
-#    Example 1:
-#
-#    input_text: sst2 sentence: I love sci-fi and am willing to put up with a lot. Sci-fi
-#    movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like
-#    this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original).
-#    Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the
-#    background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi'
-#    setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV.
-#    It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character
-#    development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may
-#    treat important issues, yet not as a serious philosophy. It's really difficult to care about
-#    the characters here as they are not simply foolish, just missing a spark of life. Their
-#    actions and reactions are wooden and predictable, often painful to watch. The makers of Earth
-#    KNOW it's rubbish as they have to always say "Gene Roddenberry's Earth..." otherwise people
-#    would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull,
-#    cheap, poorly edited (watching it without advert breaks really brings this home) trudging
-#    Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring
-#    him back as another actor. Jeeez. Dallas all over again.
-#
-#    prediction: negative
-#
-#    target: negative
-#
-#
-#    Example 2:
-#
-#    input_text: sst2 sentence: Worth the entertainment value of a rental, especially if you like
-#    action movies. This one features the usual car chases, fights with the great Van Damme kick
-#    style, shooting battles with the 40 shell load shotgun, and even terrorist style bombs. All
-#    of this is entertaining and competently handled but there is nothing that really blows you
-#    away if you've seen your share before.<br /><br />The plot is made interesting by the
-#    inclusion of a rabbit, which is clever but hardly profound. Many of the characters are
-#    heavily stereotyped -- the angry veterans, the terrified illegal aliens, the crooked cops,
-#    the indifferent feds, the bitchy tough lady station head, the crooked politician, the fat
-#    federale who looks like he was typecast as the Mexican in a Hollywood movie from the 1940s.
-#    All passably acted but again nothing special.<br /><br />I thought the main villains were
-#    pretty well done and fairly well acted. By the end of the movie you certainly knew who the
-#    good guys were and weren't. There was an emotional lift as the really bad ones got their just
-#    deserts. Very simplistic, but then you weren't expecting Hamlet, right? The only thing I found
-#    really annoying was the constant cuts to VDs daughter during the last fight scene.<br /><br />
-#    Not bad. Not good. Passable 4.
-#
-#    prediction: negative
-#
-#    target: negative
-#
-#
-#    Example 3:
-#
-#    input_text: sst2 sentence: its a totally average film with a few semi-alright action sequences
-#    that make the plot seem a little better and remind the viewer of the classic van dam films.
-#    parts of the plot don't make sense and seem to be added in to use up time. the end plot is that
-#    of a very basic type that doesn't leave the viewer guessing and any twists are obvious from the
-#    beginning. the end scene with the flask backs don't make sense as they are added in and seem to
-#    have little relevance to the history of van dam's character. not really worth watching again,
-#    bit disappointed in the end production, even though it is apparent it was shot on a low budget
-#    certain shots and sections in the film are of poor directed quality.
-#
-#    prediction: negative
-#
-#    target: negative
-#
-
-
-#######################################################################
-# Generate Translations
+# Translations
 # ---------------------
 #
 # Finally, we can also use the model to generate English to German translations on the first batch of examples from the Multi30k
@@ -543,55 +406,3 @@ def process_labels(labels, x):
     print(f"input_text: {input_text[i]}\n")
     print(f"prediction: {output_text[i]}\n")
     print(f"target: {target[i]}\n\n")
-
-
-#######################################################################
-# Translation Output
-# ------------------
-#
-# ::
-#
-#    Example 1:
-#
-#    input_text: translate English to German: A man in an orange hat starring at something.
-#
-#    prediction: Ein Mann in einem orangen Hut, der an etwas schaut.
-#
-#    target: Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.
-#
-#
-#    Example 2:
-#
-#    input_text: translate English to German: A Boston Terrier is running on lush green grass in front of a white fence.
-#
-#    prediction: Ein Boston Terrier läuft auf üppigem grünem Gras vor einem weißen Zaun.
-#
-#    target: Ein Boston Terrier läuft über saftig-grünes Gras vor einem weißen Zaun.
-#
-#
-#    Example 3:
-#
-#    input_text: translate English to German: A girl in karate uniform breaking a stick with a front kick.
-#
-#    prediction: Ein Mädchen in Karate-Uniform bricht einen Stöck mit einem Frontkick.
-#
-#    target: Ein Mädchen in einem Karateanzug bricht ein Brett mit einem Tritt.
-#
-#
-#    Example 4:
-#
-#    input_text: translate English to German: Five people wearing winter jackets and helmets stand in the snow, with snowmobiles in the background.
-#
-#    prediction: Fünf Menschen mit Winterjacken und Helmen stehen im Schnee, mit Schneemobilen im Hintergrund.
-#
-#    target: Fünf Leute in Winterjacken und mit Helmen stehen im Schnee mit Schneemobilen im Hintergrund.
-#
-#
-#    Example 5:
-#
-#    input_text: translate English to German: People are fixing the roof of a house.
-#
-#    prediction: Die Leute fixieren das Dach eines Hauses.
-#
-#    target: Leute Reparieren das Dach eines Hauses.
-#

From bd129b7b2fe9096b4b04c6e2aae08c16bb0e34ac Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 19:32:20 +0000
Subject: [PATCH 09/28] update index.rst

---
 docs/source/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 7de6b45d02..4a2a2cccb1 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -51,7 +51,7 @@ Getting Started
    :caption: Getting Started
 
    tutorials/sst2_classification_non_distributed
-   tutorials/t5_demo
+   tutorials/t5_tutorial
 
 
 .. automodule:: torchtext

From e50a5a075d831b13acdc00e51e6706e461b8118f Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 19:46:19 +0000
Subject: [PATCH 10/28] adding torchdata dependency for docs build

---
 docs/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 82de9e49f4..5a4c972b7a 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,3 +3,4 @@ sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
 matplotlib
 sphinx_gallery
+torchdata

From 67e2e50d594e139b91129bf4ac00cb794fd89e05 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 20:20:01 +0000
Subject: [PATCH 11/28] torchdata nightly build dependency

---
 docs/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 5a4c972b7a..c53aeab9a6 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,4 +3,5 @@ sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
 matplotlib
 sphinx_gallery
+--extra-index-url https://download.pytorch.org/whl/nightly/cpu
 torchdata

From 2f43ca27d01d8b08de4839e107408d0b093635bc Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 20:32:59 +0000
Subject: [PATCH 12/28] torchdata nightly try again

---
 docs/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index c53aeab9a6..b6f8b18927 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,5 +3,6 @@ sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
 matplotlib
 sphinx_gallery
+
 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
 torchdata

From bdc73a5b8f88a253711aac236e53d7aba1626f3d Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 20:46:57 +0000
Subject: [PATCH 13/28] torchdata nightly try again 2

---
 docs/requirements.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index b6f8b18927..7a850cdebd 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,8 +1,7 @@
+--extra-index-url https://download.pytorch.org/whl/nightly/cpu
 Jinja2<3.1.0
 sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
 matplotlib
 sphinx_gallery
-
---extra-index-url https://download.pytorch.org/whl/nightly/cpu
 torchdata

From b431da9469f7d04fc7e839d5a35e92067ae87ff5 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 20:57:34 +0000
Subject: [PATCH 14/28] try replacing extra-index-url with index-url

---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 7a850cdebd..8dc1ade75c 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,4 @@
---extra-index-url https://download.pytorch.org/whl/nightly/cpu
+--index-url https://download.pytorch.org/whl/nightly/cpu
 Jinja2<3.1.0
 sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme

From 5f2cfbbccbf43d1dd418e59f9c5fe1f672fa5e56 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Thu, 4 Aug 2022 21:03:33 +0000
Subject: [PATCH 15/28] add extra-index-url to be pypi

---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 8dc1ade75c..7382ec9cc5 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,4 @@
---index-url https://download.pytorch.org/whl/nightly/cpu
+--index-url https://download.pytorch.org/whl/nightly/cpu --extra-index-url=https://pypi.org/simple
 Jinja2<3.1.0
 sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme

From b133e1f69fd6b0658d6030ff003630ea84a49e6f Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 14:54:38 +0000
Subject: [PATCH 16/28] adding torchdata dependency to config.yml

---
 .circleci/config.yml  | 1 +
 docs/requirements.txt | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 93fe09acad..3707643318 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -553,6 +553,7 @@ jobs:
             set -x
             conda install -y make python=${PYTHON_VERSION}
             pip install $(ls ~/workspace/torchtext*.whl) --pre -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu/torch_${UPLOAD_CHANNEL}.html"
+            pip install --pre torchdata --extra-index-url "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu"
       - run:
           name: Build docs
           command: |
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 7382ec9cc5..82de9e49f4 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,7 +1,5 @@
---index-url https://download.pytorch.org/whl/nightly/cpu --extra-index-url=https://pypi.org/simple
 Jinja2<3.1.0
 sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
 matplotlib
 sphinx_gallery
-torchdata

From 9e72d3e23e2c404b0c929d468cc737825c7cc7b4 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 11:28:35 -0400
Subject: [PATCH 17/28] updating config.yml.in

---
 .circleci/config.yml.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
index 5ecd5e95bf..b27ccbc2a2 100644
--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -553,6 +553,7 @@ jobs:
             set -x
             conda install -y make python=${PYTHON_VERSION}
             pip install $(ls ~/workspace/torchtext*.whl) --pre -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu/torch_${UPLOAD_CHANNEL}.html"
+            pip install --pre torchdata --extra-index-url "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu"
       - run:
           name: Build docs
           command: |

From 015bd752af13a25dfe65c23692c0bff0c6575665 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 17:36:10 -0400
Subject: [PATCH 18/28] Revert "updating config.yml.in"

This reverts commit 9e72d3e23e2c404b0c929d468cc737825c7cc7b4.
---
 .circleci/config.yml.in | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
index b27ccbc2a2..5ecd5e95bf 100644
--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -553,7 +553,6 @@ jobs:
             set -x
             conda install -y make python=${PYTHON_VERSION}
             pip install $(ls ~/workspace/torchtext*.whl) --pre -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu/torch_${UPLOAD_CHANNEL}.html"
-            pip install --pre torchdata --extra-index-url "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu"
       - run:
           name: Build docs
           command: |

From 056509209d194828795a1771c1134e1f67d3f37d Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 17:36:25 -0400
Subject: [PATCH 19/28] Revert "adding torchdata dependency to config.yml"

This reverts commit b133e1f69fd6b0658d6030ff003630ea84a49e6f.
---
 .circleci/config.yml  | 1 -
 docs/requirements.txt | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 3707643318..93fe09acad 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -553,7 +553,6 @@ jobs:
             set -x
             conda install -y make python=${PYTHON_VERSION}
             pip install $(ls ~/workspace/torchtext*.whl) --pre -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu/torch_${UPLOAD_CHANNEL}.html"
-            pip install --pre torchdata --extra-index-url "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu"
       - run:
           name: Build docs
           command: |
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 82de9e49f4..7382ec9cc5 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,5 +1,7 @@
+--index-url https://download.pytorch.org/whl/nightly/cpu --extra-index-url=https://pypi.org/simple
 Jinja2<3.1.0
 sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
 matplotlib
 sphinx_gallery
+torchdata

From 5c9e33ce0a934beb209e73f65abf10a5e26960ce Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 17:36:29 -0400
Subject: [PATCH 20/28] Revert "add extra-index-url to be pypi"

This reverts commit 5f2cfbbccbf43d1dd418e59f9c5fe1f672fa5e56.
---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 7382ec9cc5..8dc1ade75c 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,4 @@
---index-url https://download.pytorch.org/whl/nightly/cpu --extra-index-url=https://pypi.org/simple
+--index-url https://download.pytorch.org/whl/nightly/cpu
 Jinja2<3.1.0
 sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme

From bd7b9760d4592bb814eeaf148400fb404a47a52a Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 17:36:34 -0400
Subject: [PATCH 21/28] Revert "try replacing extra-index-url with index-url"

This reverts commit b431da9469f7d04fc7e839d5a35e92067ae87ff5.
---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 8dc1ade75c..7a850cdebd 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,4 @@
---index-url https://download.pytorch.org/whl/nightly/cpu
+--extra-index-url https://download.pytorch.org/whl/nightly/cpu
 Jinja2<3.1.0
 sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme

From 92ae288795da411f54ac634804cef965e6d57107 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 17:36:38 -0400
Subject: [PATCH 22/28] Revert "torchdata nightly try again 2"

This reverts commit bdc73a5b8f88a253711aac236e53d7aba1626f3d.
---
 docs/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 7a850cdebd..b6f8b18927 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,7 +1,8 @@
---extra-index-url https://download.pytorch.org/whl/nightly/cpu
 Jinja2<3.1.0
 sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
 matplotlib
 sphinx_gallery
+
+--extra-index-url https://download.pytorch.org/whl/nightly/cpu
 torchdata

From 36a7f004dd593236e14b10ecd44d542f1f8f2cd8 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 17:36:41 -0400
Subject: [PATCH 23/28] Revert "torchdata nightly try again"

This reverts commit 2f43ca27d01d8b08de4839e107408d0b093635bc.
---
 docs/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index b6f8b18927..c53aeab9a6 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,6 +3,5 @@ sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
 matplotlib
 sphinx_gallery
-
 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
 torchdata

From 875895891dde6cfef6ea568880b50c7f9c430b33 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 17:36:44 -0400
Subject: [PATCH 24/28] Revert "torchdata nightly build dependency"

This reverts commit 67e2e50d594e139b91129bf4ac00cb794fd89e05.
---
 docs/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index c53aeab9a6..5a4c972b7a 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,5 +3,4 @@ sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
 matplotlib
 sphinx_gallery
---extra-index-url https://download.pytorch.org/whl/nightly/cpu
 torchdata

From fd4c6d7d9ee335e90246389d8e1173029fc27c7d Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 17:36:48 -0400
Subject: [PATCH 25/28] Revert "adding torchdata dependency for docs build"

This reverts commit e50a5a075d831b13acdc00e51e6706e461b8118f.
---
 docs/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 5a4c972b7a..82de9e49f4 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,4 +3,3 @@ sphinx==3.5.4
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme
 matplotlib
 sphinx_gallery
-torchdata

From 669b211a710b50adc364715f6d14d959ffea8b74 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 17:36:52 -0400
Subject: [PATCH 26/28] Revert "update index.rst"

This reverts commit bd129b7b2fe9096b4b04c6e2aae08c16bb0e34ac.
---
 docs/source/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 4a2a2cccb1..7de6b45d02 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -51,7 +51,7 @@ Getting Started
    :caption: Getting Started
 
    tutorials/sst2_classification_non_distributed
-   tutorials/t5_tutorial
+   tutorials/t5_demo
 
 
 .. automodule:: torchtext

From 35527a43797b95c25f217cebab24b4f870b5ae9f Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Fri, 5 Aug 2022 17:36:54 -0400
Subject: [PATCH 27/28] Revert "renaming tutorial and removing hard-coded
 outputs"

This reverts commit 9b11269dc129ea6415f3c053b79031b398a8caa1.
---
 .../tutorials/{t5_tutorial.py => t5_demo.py}  | 201 +++++++++++++++++-
 1 file changed, 195 insertions(+), 6 deletions(-)
 rename examples/tutorials/{t5_tutorial.py => t5_demo.py} (64%)

diff --git a/examples/tutorials/t5_tutorial.py b/examples/tutorials/t5_demo.py
similarity index 64%
rename from examples/tutorials/t5_tutorial.py
rename to examples/tutorials/t5_demo.py
index 77f240c4ee..f639c406ad 100644
--- a/examples/tutorials/t5_tutorial.py
+++ b/examples/tutorials/t5_demo.py
@@ -86,8 +86,8 @@
 t5_base = T5_BASE_GENERATION
 transform = t5_base.transform()
 model = t5_base.get_model()
-model = model.eval()
-model = model.to(DEVICE)
+model.eval()
+model.to(DEVICE)
 
 
 #######################################################################
@@ -303,7 +303,7 @@ def apply_prefix(task, x):
 
 from torchtext.datasets import IMDB
 
-imdb_batch_size = 5
+imdb_batch_size = 3
 imdb_datapipe = IMDB(split="test")
 task = "sst2 sentence"
 labels = {"neg": "negative", "pos": "positive"}
@@ -338,7 +338,7 @@ def process_labels(labels, x):
 multi_dataloader = DataLoader(multi_datapipe, batch_size=None)
 
 #######################################################################
-# Summaries
+# Generate Summaries
 # ------------------
 #
 # We can put all of the components together the generate summaries on the first batch of articles in the CNNDM test set
@@ -361,7 +361,74 @@ def process_labels(labels, x):
 
 
 #######################################################################
-# Sentiment Classifications
+# Summarization Output
+# --------------------
+#
+# ::
+#
+#    Example 1:
+#
+#    prediction: the Palestinians become the 123rd member of the international criminal
+#    court . the accession was marked by a ceremony at the Hague, where the court is based .
+#    the ICC opened a preliminary examination into the situation in the occupied
+#    Palestinian territory .
+#
+#    target: Membership gives the ICC jurisdiction over alleged crimes committed in
+#    Palestinian territories since last June . Israel and the United States opposed the
+#    move, which could open the door to war crimes investigations against Israelis .
+#
+#
+#    Example 2:
+#
+#    prediction: a stray pooch has used up at least three of her own after being hit by a
+#    car and buried in a field . the dog managed to stagger to a nearby farm, dirt-covered
+#    and emaciated, where she was found . she suffered a dislocated jaw, leg injuries and a
+#    caved-in sinus cavity -- and still requires surgery to help her breathe .
+#
+#    target: Theia, a bully breed mix, was apparently hit by a car, whacked with a hammer
+#    and buried in a field . "She's a true miracle dog and she deserves a good life," says
+#    Sara Mellado, who is looking for a home for Theia .
+#
+#
+#    Example 3:
+#
+#    prediction: mohammad Javad Zarif arrived in Iran on a sunny friday morning . he has gone
+#    a long way to bring Iran in from the cold and allow it to rejoin the international
+#    community . but there are some facts about him that are less well-known .
+#
+#    target: Mohammad Javad Zarif has spent more time with John Kerry than any other
+#    foreign minister . He once participated in a takeover of the Iranian Consulate in San
+#    Francisco . The Iranian foreign minister tweets in English .
+#
+#
+#    Example 4:
+#
+#    prediction: five americans were monitored for three weeks after being exposed to Ebola in
+#    west africa . one of the five had a heart-related issue and has been discharged but hasn't
+#    left the area . they are clinicians for Partners in Health, a Boston-based aid group .
+#
+#    target: 17 Americans were exposed to the Ebola virus while in Sierra Leone in March .
+#    Another person was diagnosed with the disease and taken to hospital in Maryland .
+#    National Institutes of Health says the patient is in fair condition after weeks of
+#    treatment .
+#
+#
+#    Example 5:
+#
+#    prediction: the student was identified during an investigation by campus police and
+#    the office of student affairs . he admitted to placing the noose on the tree early
+#    Wednesday morning . the incident is one of several recent racist events to affect
+#    college students .
+#
+#    target: Student is no longer on Duke University campus and will face disciplinary
+#    review . School officials identified student during investigation and the person
+#    admitted to hanging the noose, Duke says . The noose, made of rope, was discovered on
+#    campus about 2 a.m.
+#
+
+
+#######################################################################
+# Generate Sentiment Classifications
 # ----------------------------------
 #
 # Similarly, we can now use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set
@@ -385,7 +452,77 @@ def process_labels(labels, x):
 
 
 #######################################################################
-# Translations
+# Sentiment Output
+# ----------------
+#
+# ::
+#
+#    Example 1:
+#
+#    input_text: sst2 sentence: I love sci-fi and am willing to put up with a lot. Sci-fi
+#    movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like
+#    this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original).
+#    Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the
+#    background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi'
+#    setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV.
+#    It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character
+#    development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may
+#    treat important issues, yet not as a serious philosophy. It's really difficult to care about
+#    the characters here as they are not simply foolish, just missing a spark of life. Their
+#    actions and reactions are wooden and predictable, often painful to watch. The makers of Earth
+#    KNOW it's rubbish as they have to always say "Gene Roddenberry's Earth..." otherwise people
+#    would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull,
+#    cheap, poorly edited (watching it without advert breaks really brings this home) trudging
+#    Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring
+#    him back as another actor. Jeeez. Dallas all over again.
+#
+#    prediction: negative
+#
+#    target: negative
+#
+#
+#    Example 2:
+#
+#    input_text: sst2 sentence: Worth the entertainment value of a rental, especially if you like
+#    action movies. This one features the usual car chases, fights with the great Van Damme kick
+#    style, shooting battles with the 40 shell load shotgun, and even terrorist style bombs. All
+#    of this is entertaining and competently handled but there is nothing that really blows you
+#    away if you've seen your share before.<br /><br />The plot is made interesting by the
+#    inclusion of a rabbit, which is clever but hardly profound. Many of the characters are
+#    heavily stereotyped -- the angry veterans, the terrified illegal aliens, the crooked cops,
+#    the indifferent feds, the bitchy tough lady station head, the crooked politician, the fat
+#    federale who looks like he was typecast as the Mexican in a Hollywood movie from the 1940s.
+#    All passably acted but again nothing special.<br /><br />I thought the main villains were
+#    pretty well done and fairly well acted. By the end of the movie you certainly knew who the
+#    good guys were and weren't. There was an emotional lift as the really bad ones got their just
+#    deserts. Very simplistic, but then you weren't expecting Hamlet, right? The only thing I found
+#    really annoying was the constant cuts to VDs daughter during the last fight scene.<br /><br />
+#    Not bad. Not good. Passable 4.
+#
+#    prediction: negative
+#
+#    target: negative
+#
+#
+#    Example 3:
+#
+#    input_text: sst2 sentence: its a totally average film with a few semi-alright action sequences
+#    that make the plot seem a little better and remind the viewer of the classic van dam films.
+#    parts of the plot don't make sense and seem to be added in to use up time. the end plot is that
+#    of a very basic type that doesn't leave the viewer guessing and any twists are obvious from the
+#    beginning. the end scene with the flask backs don't make sense as they are added in and seem to
+#    have little relevance to the history of van dam's character. not really worth watching again,
+#    bit disappointed in the end production, even though it is apparent it was shot on a low budget
+#    certain shots and sections in the film are of poor directed quality.
+#
+#    prediction: negative
+#
+#    target: negative
+#
+
+
+#######################################################################
+# Generate Translations
 # ---------------------
 #
 # Finally, we can also use the model to generate English to German translations on the first batch of examples from the Multi30k
@@ -406,3 +543,55 @@ def process_labels(labels, x):
     print(f"input_text: {input_text[i]}\n")
     print(f"prediction: {output_text[i]}\n")
     print(f"target: {target[i]}\n\n")
+
+
+#######################################################################
+# Translation Output
+# ------------------
+#
+# ::
+#
+#    Example 1:
+#
+#    input_text: translate English to German: A man in an orange hat starring at something.
+#
+#    prediction: Ein Mann in einem orangen Hut, der an etwas schaut.
+#
+#    target: Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.
+#
+#
+#    Example 2:
+#
+#    input_text: translate English to German: A Boston Terrier is running on lush green grass in front of a white fence.
+#
+#    prediction: Ein Boston Terrier läuft auf üppigem grünem Gras vor einem weißen Zaun.
+#
+#    target: Ein Boston Terrier läuft über saftig-grünes Gras vor einem weißen Zaun.
+#
+#
+#    Example 3:
+#
+#    input_text: translate English to German: A girl in karate uniform breaking a stick with a front kick.
+#
+#    prediction: Ein Mädchen in Karate-Uniform bricht einen Stöck mit einem Frontkick.
+#
+#    target: Ein Mädchen in einem Karateanzug bricht ein Brett mit einem Tritt.
+#
+#
+#    Example 4:
+#
+#    input_text: translate English to German: Five people wearing winter jackets and helmets stand in the snow, with snowmobiles in the background.
+#
+#    prediction: Fünf Menschen mit Winterjacken und Helmen stehen im Schnee, mit Schneemobilen im Hintergrund.
+#
+#    target: Fünf Leute in Winterjacken und mit Helmen stehen im Schnee mit Schneemobilen im Hintergrund.
+#
+#
+#    Example 5:
+#
+#    input_text: translate English to German: People are fixing the roof of a house.
+#
+#    prediction: Die Leute fixieren das Dach eines Hauses.
+#
+#    target: Leute Reparieren das Dach eines Hauses.
+#

From 8bd65dd37997c6bb755d7d8c62c77f4498174ac0 Mon Sep 17 00:00:00 2001
From: pmabbo13 <pabbo@fb.com>
Date: Mon, 8 Aug 2022 09:56:52 -0400
Subject: [PATCH 28/28] correcting typos

---
 examples/tutorials/t5_demo.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_demo.py
index f639c406ad..fbdb932ad1 100644
--- a/examples/tutorials/t5_demo.py
+++ b/examples/tutorials/t5_demo.py
@@ -76,8 +76,9 @@
 # -----------------
 #
 # torchtext provides SOTA pre-trained models that can be used directly for NLP tasks or fine-tuned on downstream tasks. Below
-# we use the pre-trained T5 model with standard base configuration to perform text summarization. For additional details on
-# available pre-trained models, please refer to documentation at https://pytorch.org/text/main/models.html
+# we use the pre-trained T5 model with standard base configuration to perform text summarization, sentiment classification, and
+# translation. For additional details on available pre-trained models, please refer to documentation at
+# https://pytorch.org/text/main/models.html
 #
 #
 from torchtext.prototype.models import T5_BASE_GENERATION
@@ -341,7 +342,7 @@ def process_labels(labels, x):
 # Generate Summaries
 # ------------------
 #
-# We can put all of the components together the generate summaries on the first batch of articles in the CNNDM test set
+# We can put all of the components together to generate summaries on the first batch of articles in the CNNDM test set
 # using a beam size of 3.
 #
 
@@ -431,7 +432,7 @@ def process_labels(labels, x):
 # Generate Sentiment Classifications
 # ----------------------------------
 #
-# Similarly, we can now use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set
+# Similarly, we can use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set
 # using a beam size of 1.
 #