From 5be85f36e6ad57746299be6dbcbc4e5fd9898bc6 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Wed, 3 Aug 2022 18:30:02 -0400 Subject: [PATCH 01/28] demo t5 model on sentiment classification and translation --- examples/tutorials/cnndm_summarization.py | 350 ++++++++++++++++++---- 1 file changed, 286 insertions(+), 64 deletions(-) diff --git a/examples/tutorials/cnndm_summarization.py b/examples/tutorials/cnndm_summarization.py index 604d962350..9f6f2dd86d 100644 --- a/examples/tutorials/cnndm_summarization.py +++ b/examples/tutorials/cnndm_summarization.py @@ -1,6 +1,6 @@ """ -CNNDM Text Summarization with T5-Base model -======================================================= +T5-BASE MODEL FOR SUMMARIZATION, SENTIMENT CLASSIFICATION, AND TRANSLATION +========================================================================== **Author**: `Pendo Abbo `__ @@ -10,12 +10,13 @@ # Overview # -------- # -# This tutorial demonstrates how to use a pre-trained T5 Model for text summarization on the CNN-DailyMail dataset. -# We will demonstrate how to use the torchtext library to: +# This tutorial demonstrates how to use a pre-trained T5 Model for summarization, sentiment classification, and +# translation tasks. We will demonstrate how to use the torchtext library to: # # 1. Build a text pre-processing pipeline for a T5 model -# 2. Read in the CNNDM dataset and pre-process the text -# 3. Instantiate a pre-trained T5 model with base configuration, and perform text summarization on input text +# 2. Instantiate a pre-trained T5 model with base configuration +# 3. Read in the CNNDM, IMDB, and Multi30k datasets and pre-process their texts in preparation for the model +# 4. Perform text summarization, sentiment classification, and translation # # @@ -69,60 +70,6 @@ # transform = T5_BASE_GENERATION.transform() # -####################################################################### -# Dataset -# ------- -# torchtext provides several standard NLP datasets. For a complete list, refer to the documentation at https://pytorch.org/text/stable/datasets.html. -# These datasets are built using composable torchdata datapipes and hence support standard flow-control and mapping/transformation -# using user defined functions and transforms. Below, we demonstrate how to pre-process the CNNDM dataset to include the prefix necessary -# for the model to identify the task it is performing. -# -# The CNNDM dataset has a train, validation, and test split. Below we demo on the test split. -# -# .. note:: -# Using datapipes is still currently subject to a few caveats. If you wish -# to extend this example to include shuffling, multi-processing, or -# distributed learning, please see :ref:`this note ` -# for further instructions. - -from functools import partial - -from torch.utils.data import DataLoader -from torchtext.datasets.cnndm import CNNDM - -batch_size = 5 -test_datapipe = CNNDM(split="test") -task = "summarize" - - -def apply_prefix(task, x): - return f"{task}: " + x[0], x[1] - - -test_datapipe = test_datapipe.map(partial(apply_prefix, task)) -test_datapipe = test_datapipe.batch(batch_size) -test_datapipe = test_datapipe.rows2columnar(["article", "abstract"]) -test_dataloader = DataLoader(test_datapipe, batch_size=None) - -####################################################################### -# Alternately we can also use batched API (i.e apply the prefix on the whole batch) -# -# :: -# -# def batch_prefix(task, x): -# return { -# "article": [f'{task}: ' + y for y in x["article"]], -# "abstract": x["abstract"] -# } -# -# batch_size = 5 -# test_datapipe = CNNDM(split="test") -# task = 'summarize' -# -# test_datapipe = test_datapipe.batch(batch_size).rows2columnar(["article", "abstract"]) -# test_datapipe = test_datapipe.map(partial(batch_prefix, task)) -# test_dataloader = DataLoader(test_datapipe, batch_size=None) -# ###################################################################### # Model Preparation @@ -286,15 +233,119 @@ def generate(encoder_tokens: Tensor, eos_idx: int, model: T5Model, beam_size: in return decoder_tokens +####################################################################### +# Datasets +# -------- +# torchtext provides several standard NLP datasets. For a complete list, refer to the documentation +# at https://pytorch.org/text/stable/datasets.html. These datasets are built using composable torchdata +# datapipes and hence support standard flow-control and mapping/transformation using user defined +# functions and transforms. +# +# Below, we demonstrate how to pre-process the CNNDM dataset to include the prefix necessary for the +# model to indentify the task it is performing. The CNNDM dataset has a train, validation, and test +# split. Below we demo on the test split. +# +# The T5 model uses the prefix "summarize" for text summarization. For more information on task +# prefixes, please visit Appendix D of the T5 Paper at https://arxiv.org/pdf/1910.10683.pdf +# +# .. note:: +# Using datapipes is still currently subject to a few caveats. If you wish +# to extend this example to include shuffling, multi-processing, or +# distributed learning, please see :ref:`this note ` +# for further instructions. + +from functools import partial + +from torch.utils.data import DataLoader +from torchtext.datasets.cnndm import CNNDM + +batch_size = 5 +cnndm_datapipe = CNNDM(split="test") +task = "summarize" + + +def apply_prefix(task, x): + return f"{task}: " + x[0], x[1] + + +cnndm_datapipe = cnndm_datapipe.map(partial(apply_prefix, task)) +cnndm_datapipe = cnndm_datapipe.batch(batch_size) +cnndm_datapipe = cnndm_datapipe.rows2columnar(["article", "abstract"]) +cnndm_dataloader = DataLoader(cnndm_datapipe, batch_size=None) + +####################################################################### +# Alternately we can also use batched API (i.e apply the prefix on the whole batch) +# +# :: +# +# def batch_prefix(task, x): +# return { +# "article": [f'{task}: ' + y for y in x["article"]], +# "abstract": x["abstract"] +# } +# +# batch_size = 5 +# cnndm_datapipe = CNNDM(split="test") +# task = 'summarize' +# +# cnndm_datapipe = cnndm_datapipe.batch(batch_size).rows2columnar(["article", "abstract"]) +# cnndm_datapipe = cnndm_datapipe.map(partial(batch_prefix, task)) +# cnndm_dataloader = DataLoader(cnndm_datapipe, batch_size=None) +# + +####################################################################### +# We can also load the IMDB dataset, which will be used to demonstrate sentiment classification using the T5 model. +# This dataset has a train and test split. Below we demo on the test split. +# +# The T5 model was trained on the SST2 dataset (also available in torchtext) for sentiment classification using the +# prefix "sst2 sentence". Therefore, we will use this prefix to perform sentiment classification on the IMDB dataset. +# + +from torchtext.datasets import IMDB + +batch_size = 3 +imdb_datapipe = IMDB(split="test") +task = "sst2 sentence" +labels = {"neg": "negative", "pos": "positive"} + + +def process_labels(labels, x): + return x[1], labels[x[0]] + + +imdb_datapipe = imdb_datapipe.map(partial(process_labels, labels)) +imdb_datapipe = imdb_datapipe.map(partial(apply_prefix, task)) +imdb_datapipe = imdb_datapipe.batch(batch_size) +imdb_datapipe = imdb_datapipe.rows2columnar(["text", "label"]) +imdb_dataloader = DataLoader(imdb_datapipe, batch_size=None) + +####################################################################### +# Finally, we can also load the Multi30k dataset to demonstrate English to German translation using the T5 model. +# This dataset has a train, validation, and test split. Below we demo on the test split. +# +# The T5 model uses the prefix "translate English to German" for this task. + +from torchtext.datasets import Multi30k + +batch_size = 5 +language_pair = ("en", "de") +multi_datapipe = Multi30k(split="test", language_pair=language_pair) +task = "translate English to German" + +multi_datapipe = multi_datapipe.map(partial(apply_prefix, task)) +multi_datapipe = multi_datapipe.batch(batch_size) +multi_datapipe = multi_datapipe.rows2columnar(["english", "german"]) +multi_dataloader = DataLoader(multi_datapipe, batch_size=None) + ####################################################################### # Generate Summaries # ------------------ # -# Finally we put all of the components together to generate summaries on the first batch of articles in the CNNDM test set +# We can put all of the components together the generate summaries on the first batch of articles in the CNNDM test set # using a beam size of 3. # -batch = next(iter(test_dataloader)) +batch = next(iter(cnndm_dataloader)) input_text = batch["article"] model_input = transform(input_text) target = batch["abstract"] @@ -311,8 +362,8 @@ def generate(encoder_tokens: Tensor, eos_idx: int, model: T5Model, beam_size: in ####################################################################### -# Output -# ------ +# Summarization Output +# -------------------- # # :: # @@ -374,3 +425,174 @@ def generate(encoder_tokens: Tensor, eos_idx: int, model: T5Model, beam_size: in # review . School officials identified student during investigation and the person # admitted to hanging the noose, Duke says . The noose, made of rope, was discovered on # campus about 2 a.m. +# + + +####################################################################### +# Generate Sentiment Classifications +# ---------------------------------- +# +# Similarly, we can now use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set +# using a beam size of 1. +# + +batch = next(iter(imdb_dataloader)) +input_text = batch["text"] +model_input = transform(input_text) +target = batch["label"] +beam_size = 1 + +model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size) +output_text = transform.decode(model_output.tolist()) + +for i in range(batch_size): + + print(f"Example {i+1}:\n") + print(f"input_text: {input_text[i]}\n") + print(f"prediction: {output_text[i]}\n") + print(f"target: {target[i]}\n\n") + +####################################################################### +# Sentiment Output +# ---------------- +# +# :: +# +# Example 1: +# +# input_text: sst2 sentence: I love sci-fi and am willing to put up with a lot. Sci-fi +# movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like +# this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). +# Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the +# background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' +# setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. +# It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character +# development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may +# treat important issues, yet not as a serious philosophy. It's really difficult to care about +# the characters here as they are not simply foolish, just missing a spark of life. Their +# actions and reactions are wooden and predictable, often painful to watch. The makers of Earth +# KNOW it's rubbish as they have to always say "Gene Roddenberry's Earth..." otherwise people +# would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, +# cheap, poorly edited (watching it without advert breaks really brings this home) trudging +# Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring +# him back as another actor. Jeeez! Dallas all over again. +# +# prediction: negative +# +# target: negative +# +# +# Example 2: +# +# input_text: sst2 sentence: Worth the entertainment value of a rental, especially if you like +# action movies. This one features the usual car chases, fights with the great Van Damme kick +# style, shooting battles with the 40 shell load shotgun, and even terrorist style bombs. All +# of this is entertaining and competently handled but there is nothing that really blows you +# away if you've seen your share before.

The plot is made interesting by the +# inclusion of a rabbit, which is clever but hardly profound. Many of the characters are +# heavily stereotyped -- the angry veterans, the terrified illegal aliens, the crooked cops, +# the indifferent feds, the bitchy tough lady station head, the crooked politician, the fat +# federale who looks like he was typecast as the Mexican in a Hollywood movie from the 1940s. +# All passably acted but again nothing special.

I thought the main villains were +# pretty well done and fairly well acted. By the end of the movie you certainly knew who the +# good guys were and weren't. There was an emotional lift as the really bad ones got their just +# deserts. Very simplistic, but then you weren't expecting Hamlet, right? The only thing I found +# really annoying was the constant cuts to VDs daughter during the last fight scene.

+# Not bad. Not good. Passable 4. +# +# prediction: negative +# +# target: negative +# +# +# Example 3: +# +# input_text: sst2 sentence: its a totally average film with a few semi-alright action sequences +# that make the plot seem a little better and remind the viewer of the classic van dam films. +# parts of the plot don't make sense and seem to be added in to use up time. the end plot is that +# of a very basic type that doesn't leave the viewer guessing and any twists are obvious from the +# beginning. the end scene with the flask backs don't make sense as they are added in and seem to +# have little relevance to the history of van dam's character. not really worth watching again, +# bit disappointed in the end production, even though it is apparent it was shot on a low budget +# certain shots and sections in the film are of poor directed quality +# +# prediction: negative +# +# target: negative +# + + +####################################################################### +# Generate Translations +# --------------------- +# +# Similarly, we can now use the model to generate sentiment classification on the first batch of reviews from the IMDB test set +# using a beam size of 4. +# + +batch = next(iter(multi_dataloader)) +input_text = batch["english"] +model_input = transform(input_text) +target = batch["german"] +beam_size = 4 + +model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size) +output_text = transform.decode(model_output.tolist()) + +for i in range(batch_size): + + print(f"Example {i+1}:\n") + print(f"input_text: {input_text[i]}\n") + print(f"prediction: {output_text[i]}\n") + print(f"target: {target[i]}\n\n") + +####################################################################### +# Translation Output +# ------------------ +# +# :: +# +# Example 1: +# +# input_text: translate English to German: A man in an orange hat starring at something. +# +# prediction: Ein Mann in einem orangen Hut, der an etwas schaut. +# +# target: Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt. +# +# +# Example 2: +# +# input_text: translate English to German: A Boston Terrier is running on lush green grass in front of a white fence. +# +# prediction: Ein Boston Terrier läuft auf üppigem grünem Gras vor einem weißen Zaun. +# +# target: Ein Boston Terrier läuft über saftig-grünes Gras vor einem weißen Zaun. +# +# +# Example 3: +# +# input_text: translate English to German: A girl in karate uniform breaking a stick with a front kick. +# +# prediction: Ein Mädchen in Karate-Uniform bricht einen Stöck mit einem Frontkick. +# +# target: Ein Mädchen in einem Karateanzug bricht ein Brett mit einem Tritt. +# +# +# Example 4: +# +# input_text: translate English to German: Five people wearing winter jackets and helmets stand in the snow, with snowmobiles in the background. +# +# prediction: Fünf Menschen mit Winterjacken und Helmen stehen im Schnee, mit Schneemobilen im Hintergrund. +# +# target: Fünf Leute in Winterjacken und mit Helmen stehen im Schnee mit Schneemobilen im Hintergrund. +# +# +# Example 5: +# +# input_text: translate English to German: People are fixing the roof of a house. +# +# prediction: Die Leute fixieren das Dach eines Hauses. +# +# target: Leute Reparieren das Dach eines Hauses. +# From 0bb1c1c53e31d9773923735f1df2fb8774f2a1cb Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Wed, 3 Aug 2022 18:32:42 -0400 Subject: [PATCH 02/28] renaming tutorial file --- examples/tutorials/{cnndm_summarization.py => t5_demo.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/tutorials/{cnndm_summarization.py => t5_demo.py} (100%) diff --git a/examples/tutorials/cnndm_summarization.py b/examples/tutorials/t5_demo.py similarity index 100% rename from examples/tutorials/cnndm_summarization.py rename to examples/tutorials/t5_demo.py From aa25e6c684cef75b28070804b1e7b012ec0dd72c Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Wed, 3 Aug 2022 19:48:42 -0400 Subject: [PATCH 03/28] update source/index.rst --- docs/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 9ebc235d57..7de6b45d02 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -51,7 +51,7 @@ Getting Started :caption: Getting Started tutorials/sst2_classification_non_distributed - tutorials/cnndm_summarization + tutorials/t5_demo .. automodule:: torchtext From bb5f19240d6e985a71c5fdaefbe34ea57f9ac958 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Wed, 3 Aug 2022 20:02:48 -0400 Subject: [PATCH 04/28] correct title format --- examples/tutorials/t5_demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_demo.py index 9f6f2dd86d..16f35efbcd 100644 --- a/examples/tutorials/t5_demo.py +++ b/examples/tutorials/t5_demo.py @@ -1,5 +1,5 @@ """ -T5-BASE MODEL FOR SUMMARIZATION, SENTIMENT CLASSIFICATION, AND TRANSLATION +T5-Base Model for Summarization, Sentiment Classification, and Translation ========================================================================== **Author**: `Pendo Abbo `__ From 31696a77e53f515d91672abe37f99f87561058dc Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 10:30:47 -0400 Subject: [PATCH 05/28] correct description for generate translations section --- examples/tutorials/t5_demo.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_demo.py index 16f35efbcd..c02b87338c 100644 --- a/examples/tutorials/t5_demo.py +++ b/examples/tutorials/t5_demo.py @@ -347,15 +347,14 @@ def process_labels(labels, x): batch = next(iter(cnndm_dataloader)) input_text = batch["article"] -model_input = transform(input_text) target = batch["abstract"] beam_size = 3 +model_input = transform(input_text) model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size) output_text = transform.decode(model_output.tolist()) for i in range(batch_size): - print(f"Example {i+1}:\n") print(f"prediction: {output_text[i]}\n") print(f"target: {target[i]}\n\n") @@ -438,15 +437,14 @@ def process_labels(labels, x): batch = next(iter(imdb_dataloader)) input_text = batch["text"] -model_input = transform(input_text) target = batch["label"] beam_size = 1 +model_input = transform(input_text) model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size) output_text = transform.decode(model_output.tolist()) for i in range(batch_size): - print(f"Example {i+1}:\n") print(f"input_text: {input_text[i]}\n") print(f"prediction: {output_text[i]}\n") @@ -526,21 +524,20 @@ def process_labels(labels, x): # Generate Translations # --------------------- # -# Similarly, we can now use the model to generate sentiment classification on the first batch of reviews from the IMDB test set -# using a beam size of 4. +# Finally, we can also use the model to generate English to German translations on the first batch of examples from the Multi30k +# test set using a beam size of 4. # batch = next(iter(multi_dataloader)) input_text = batch["english"] -model_input = transform(input_text) target = batch["german"] beam_size = 4 +model_input = transform(input_text) model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size) output_text = transform.decode(model_output.tolist()) for i in range(batch_size): - print(f"Example {i+1}:\n") print(f"input_text: {input_text[i]}\n") print(f"prediction: {output_text[i]}\n") From 0b3c95edff42fae5fe6485c91c819929006ba20f Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 11:49:02 -0400 Subject: [PATCH 06/28] specifying batch_size variable names --- examples/tutorials/t5_demo.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_demo.py index c02b87338c..9860da465e 100644 --- a/examples/tutorials/t5_demo.py +++ b/examples/tutorials/t5_demo.py @@ -259,7 +259,7 @@ def generate(encoder_tokens: Tensor, eos_idx: int, model: T5Model, beam_size: in from torch.utils.data import DataLoader from torchtext.datasets.cnndm import CNNDM -batch_size = 5 +cnndm_batch_size = 5 cnndm_datapipe = CNNDM(split="test") task = "summarize" @@ -269,7 +269,7 @@ def apply_prefix(task, x): cnndm_datapipe = cnndm_datapipe.map(partial(apply_prefix, task)) -cnndm_datapipe = cnndm_datapipe.batch(batch_size) +cnndm_datapipe = cnndm_datapipe.batch(cnndm_batch_size) cnndm_datapipe = cnndm_datapipe.rows2columnar(["article", "abstract"]) cnndm_dataloader = DataLoader(cnndm_datapipe, batch_size=None) @@ -284,11 +284,11 @@ def apply_prefix(task, x): # "abstract": x["abstract"] # } # -# batch_size = 5 +# cnndm_batch_size = 5 # cnndm_datapipe = CNNDM(split="test") # task = 'summarize' # -# cnndm_datapipe = cnndm_datapipe.batch(batch_size).rows2columnar(["article", "abstract"]) +# cnndm_datapipe = cnndm_datapipe.batch(cnndm_batch_size).rows2columnar(["article", "abstract"]) # cnndm_datapipe = cnndm_datapipe.map(partial(batch_prefix, task)) # cnndm_dataloader = DataLoader(cnndm_datapipe, batch_size=None) # @@ -303,7 +303,7 @@ def apply_prefix(task, x): from torchtext.datasets import IMDB -batch_size = 3 +imdb_batch_size = 3 imdb_datapipe = IMDB(split="test") task = "sst2 sentence" labels = {"neg": "negative", "pos": "positive"} @@ -315,7 +315,7 @@ def process_labels(labels, x): imdb_datapipe = imdb_datapipe.map(partial(process_labels, labels)) imdb_datapipe = imdb_datapipe.map(partial(apply_prefix, task)) -imdb_datapipe = imdb_datapipe.batch(batch_size) +imdb_datapipe = imdb_datapipe.batch(imdb_batch_size) imdb_datapipe = imdb_datapipe.rows2columnar(["text", "label"]) imdb_dataloader = DataLoader(imdb_datapipe, batch_size=None) @@ -327,13 +327,13 @@ def process_labels(labels, x): from torchtext.datasets import Multi30k -batch_size = 5 +multi_batch_size = 5 language_pair = ("en", "de") multi_datapipe = Multi30k(split="test", language_pair=language_pair) task = "translate English to German" multi_datapipe = multi_datapipe.map(partial(apply_prefix, task)) -multi_datapipe = multi_datapipe.batch(batch_size) +multi_datapipe = multi_datapipe.batch(multi_batch_size) multi_datapipe = multi_datapipe.rows2columnar(["english", "german"]) multi_dataloader = DataLoader(multi_datapipe, batch_size=None) @@ -354,7 +354,7 @@ def process_labels(labels, x): model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size) output_text = transform.decode(model_output.tolist()) -for i in range(batch_size): +for i in range(cnndm_batch_size): print(f"Example {i+1}:\n") print(f"prediction: {output_text[i]}\n") print(f"target: {target[i]}\n\n") @@ -444,12 +444,13 @@ def process_labels(labels, x): model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size) output_text = transform.decode(model_output.tolist()) -for i in range(batch_size): +for i in range(imdb_batch_size): print(f"Example {i+1}:\n") print(f"input_text: {input_text[i]}\n") print(f"prediction: {output_text[i]}\n") print(f"target: {target[i]}\n\n") + ####################################################################### # Sentiment Output # ---------------- @@ -512,7 +513,7 @@ def process_labels(labels, x): # beginning. the end scene with the flask backs don't make sense as they are added in and seem to # have little relevance to the history of van dam's character. not really worth watching again, # bit disappointed in the end production, even though it is apparent it was shot on a low budget -# certain shots and sections in the film are of poor directed quality +# certain shots and sections in the film are of poor directed quality. # # prediction: negative # @@ -537,12 +538,13 @@ def process_labels(labels, x): model_output = generate(model=model, encoder_tokens=model_input, eos_idx=eos_idx, beam_size=beam_size) output_text = transform.decode(model_output.tolist()) -for i in range(batch_size): +for i in range(multi_batch_size): print(f"Example {i+1}:\n") print(f"input_text: {input_text[i]}\n") print(f"prediction: {output_text[i]}\n") print(f"target: {target[i]}\n\n") + ####################################################################### # Translation Output # ------------------ From 6538f6fd3a99b0e7cda23dc49770c5d874d7cd81 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 11:50:08 -0400 Subject: [PATCH 07/28] fixing format issue with sentiment output --- examples/tutorials/t5_demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_demo.py index 9860da465e..f639c406ad 100644 --- a/examples/tutorials/t5_demo.py +++ b/examples/tutorials/t5_demo.py @@ -474,7 +474,7 @@ def process_labels(labels, x): # would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, # cheap, poorly edited (watching it without advert breaks really brings this home) trudging # Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring -# him back as another actor. Jeeez! Dallas all over again. +# him back as another actor. Jeeez. Dallas all over again. # # prediction: negative # From 9b11269dc129ea6415f3c053b79031b398a8caa1 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 19:31:25 +0000 Subject: [PATCH 08/28] renaming tutorial and removing hard-coded outputs --- .../tutorials/{t5_demo.py => t5_tutorial.py} | 201 +----------------- 1 file changed, 6 insertions(+), 195 deletions(-) rename examples/tutorials/{t5_demo.py => t5_tutorial.py} (64%) diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_tutorial.py similarity index 64% rename from examples/tutorials/t5_demo.py rename to examples/tutorials/t5_tutorial.py index f639c406ad..77f240c4ee 100644 --- a/examples/tutorials/t5_demo.py +++ b/examples/tutorials/t5_tutorial.py @@ -86,8 +86,8 @@ t5_base = T5_BASE_GENERATION transform = t5_base.transform() model = t5_base.get_model() -model.eval() -model.to(DEVICE) +model = model.eval() +model = model.to(DEVICE) ####################################################################### @@ -303,7 +303,7 @@ def apply_prefix(task, x): from torchtext.datasets import IMDB -imdb_batch_size = 3 +imdb_batch_size = 5 imdb_datapipe = IMDB(split="test") task = "sst2 sentence" labels = {"neg": "negative", "pos": "positive"} @@ -338,7 +338,7 @@ def process_labels(labels, x): multi_dataloader = DataLoader(multi_datapipe, batch_size=None) ####################################################################### -# Generate Summaries +# Summaries # ------------------ # # We can put all of the components together the generate summaries on the first batch of articles in the CNNDM test set @@ -361,74 +361,7 @@ def process_labels(labels, x): ####################################################################### -# Summarization Output -# -------------------- -# -# :: -# -# Example 1: -# -# prediction: the Palestinians become the 123rd member of the international criminal -# court . the accession was marked by a ceremony at the Hague, where the court is based . -# the ICC opened a preliminary examination into the situation in the occupied -# Palestinian territory . -# -# target: Membership gives the ICC jurisdiction over alleged crimes committed in -# Palestinian territories since last June . Israel and the United States opposed the -# move, which could open the door to war crimes investigations against Israelis . -# -# -# Example 2: -# -# prediction: a stray pooch has used up at least three of her own after being hit by a -# car and buried in a field . the dog managed to stagger to a nearby farm, dirt-covered -# and emaciated, where she was found . she suffered a dislocated jaw, leg injuries and a -# caved-in sinus cavity -- and still requires surgery to help her breathe . -# -# target: Theia, a bully breed mix, was apparently hit by a car, whacked with a hammer -# and buried in a field . "She's a true miracle dog and she deserves a good life," says -# Sara Mellado, who is looking for a home for Theia . -# -# -# Example 3: -# -# prediction: mohammad Javad Zarif arrived in Iran on a sunny friday morning . he has gone -# a long way to bring Iran in from the cold and allow it to rejoin the international -# community . but there are some facts about him that are less well-known . -# -# target: Mohammad Javad Zarif has spent more time with John Kerry than any other -# foreign minister . He once participated in a takeover of the Iranian Consulate in San -# Francisco . The Iranian foreign minister tweets in English . -# -# -# Example 4: -# -# prediction: five americans were monitored for three weeks after being exposed to Ebola in -# west africa . one of the five had a heart-related issue and has been discharged but hasn't -# left the area . they are clinicians for Partners in Health, a Boston-based aid group . -# -# target: 17 Americans were exposed to the Ebola virus while in Sierra Leone in March . -# Another person was diagnosed with the disease and taken to hospital in Maryland . -# National Institutes of Health says the patient is in fair condition after weeks of -# treatment . -# -# -# Example 5: -# -# prediction: the student was identified during an investigation by campus police and -# the office of student affairs . he admitted to placing the noose on the tree early -# Wednesday morning . the incident is one of several recent racist events to affect -# college students . -# -# target: Student is no longer on Duke University campus and will face disciplinary -# review . School officials identified student during investigation and the person -# admitted to hanging the noose, Duke says . The noose, made of rope, was discovered on -# campus about 2 a.m. -# - - -####################################################################### -# Generate Sentiment Classifications +# Sentiment Classifications # ---------------------------------- # # Similarly, we can now use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set @@ -452,77 +385,7 @@ def process_labels(labels, x): ####################################################################### -# Sentiment Output -# ---------------- -# -# :: -# -# Example 1: -# -# input_text: sst2 sentence: I love sci-fi and am willing to put up with a lot. Sci-fi -# movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like -# this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). -# Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the -# background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' -# setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. -# It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character -# development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may -# treat important issues, yet not as a serious philosophy. It's really difficult to care about -# the characters here as they are not simply foolish, just missing a spark of life. Their -# actions and reactions are wooden and predictable, often painful to watch. The makers of Earth -# KNOW it's rubbish as they have to always say "Gene Roddenberry's Earth..." otherwise people -# would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, -# cheap, poorly edited (watching it without advert breaks really brings this home) trudging -# Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring -# him back as another actor. Jeeez. Dallas all over again. -# -# prediction: negative -# -# target: negative -# -# -# Example 2: -# -# input_text: sst2 sentence: Worth the entertainment value of a rental, especially if you like -# action movies. This one features the usual car chases, fights with the great Van Damme kick -# style, shooting battles with the 40 shell load shotgun, and even terrorist style bombs. All -# of this is entertaining and competently handled but there is nothing that really blows you -# away if you've seen your share before.

The plot is made interesting by the -# inclusion of a rabbit, which is clever but hardly profound. Many of the characters are -# heavily stereotyped -- the angry veterans, the terrified illegal aliens, the crooked cops, -# the indifferent feds, the bitchy tough lady station head, the crooked politician, the fat -# federale who looks like he was typecast as the Mexican in a Hollywood movie from the 1940s. -# All passably acted but again nothing special.

I thought the main villains were -# pretty well done and fairly well acted. By the end of the movie you certainly knew who the -# good guys were and weren't. There was an emotional lift as the really bad ones got their just -# deserts. Very simplistic, but then you weren't expecting Hamlet, right? The only thing I found -# really annoying was the constant cuts to VDs daughter during the last fight scene.

-# Not bad. Not good. Passable 4. -# -# prediction: negative -# -# target: negative -# -# -# Example 3: -# -# input_text: sst2 sentence: its a totally average film with a few semi-alright action sequences -# that make the plot seem a little better and remind the viewer of the classic van dam films. -# parts of the plot don't make sense and seem to be added in to use up time. the end plot is that -# of a very basic type that doesn't leave the viewer guessing and any twists are obvious from the -# beginning. the end scene with the flask backs don't make sense as they are added in and seem to -# have little relevance to the history of van dam's character. not really worth watching again, -# bit disappointed in the end production, even though it is apparent it was shot on a low budget -# certain shots and sections in the film are of poor directed quality. -# -# prediction: negative -# -# target: negative -# - - -####################################################################### -# Generate Translations +# Translations # --------------------- # # Finally, we can also use the model to generate English to German translations on the first batch of examples from the Multi30k @@ -543,55 +406,3 @@ def process_labels(labels, x): print(f"input_text: {input_text[i]}\n") print(f"prediction: {output_text[i]}\n") print(f"target: {target[i]}\n\n") - - -####################################################################### -# Translation Output -# ------------------ -# -# :: -# -# Example 1: -# -# input_text: translate English to German: A man in an orange hat starring at something. -# -# prediction: Ein Mann in einem orangen Hut, der an etwas schaut. -# -# target: Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt. -# -# -# Example 2: -# -# input_text: translate English to German: A Boston Terrier is running on lush green grass in front of a white fence. -# -# prediction: Ein Boston Terrier läuft auf üppigem grünem Gras vor einem weißen Zaun. -# -# target: Ein Boston Terrier läuft über saftig-grünes Gras vor einem weißen Zaun. -# -# -# Example 3: -# -# input_text: translate English to German: A girl in karate uniform breaking a stick with a front kick. -# -# prediction: Ein Mädchen in Karate-Uniform bricht einen Stöck mit einem Frontkick. -# -# target: Ein Mädchen in einem Karateanzug bricht ein Brett mit einem Tritt. -# -# -# Example 4: -# -# input_text: translate English to German: Five people wearing winter jackets and helmets stand in the snow, with snowmobiles in the background. -# -# prediction: Fünf Menschen mit Winterjacken und Helmen stehen im Schnee, mit Schneemobilen im Hintergrund. -# -# target: Fünf Leute in Winterjacken und mit Helmen stehen im Schnee mit Schneemobilen im Hintergrund. -# -# -# Example 5: -# -# input_text: translate English to German: People are fixing the roof of a house. -# -# prediction: Die Leute fixieren das Dach eines Hauses. -# -# target: Leute Reparieren das Dach eines Hauses. -# From bd129b7b2fe9096b4b04c6e2aae08c16bb0e34ac Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 19:32:20 +0000 Subject: [PATCH 09/28] update index.rst --- docs/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 7de6b45d02..4a2a2cccb1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -51,7 +51,7 @@ Getting Started :caption: Getting Started tutorials/sst2_classification_non_distributed - tutorials/t5_demo + tutorials/t5_tutorial .. automodule:: torchtext From e50a5a075d831b13acdc00e51e6706e461b8118f Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 19:46:19 +0000 Subject: [PATCH 10/28] adding torchdata dependency for docs build --- docs/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index 82de9e49f4..5a4c972b7a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,3 +3,4 @@ sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme matplotlib sphinx_gallery +torchdata From 67e2e50d594e139b91129bf4ac00cb794fd89e05 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 20:20:01 +0000 Subject: [PATCH 11/28] torchdata nightly build dependency --- docs/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index 5a4c972b7a..c53aeab9a6 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,4 +3,5 @@ sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme matplotlib sphinx_gallery +--extra-index-url https://download.pytorch.org/whl/nightly/cpu torchdata From 2f43ca27d01d8b08de4839e107408d0b093635bc Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 20:32:59 +0000 Subject: [PATCH 12/28] torchdata nightly try again --- docs/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index c53aeab9a6..b6f8b18927 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,5 +3,6 @@ sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme matplotlib sphinx_gallery + --extra-index-url https://download.pytorch.org/whl/nightly/cpu torchdata From bdc73a5b8f88a253711aac236e53d7aba1626f3d Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 20:46:57 +0000 Subject: [PATCH 13/28] torchdata nightly try again 2 --- docs/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index b6f8b18927..7a850cdebd 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,8 +1,7 @@ +--extra-index-url https://download.pytorch.org/whl/nightly/cpu Jinja2<3.1.0 sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme matplotlib sphinx_gallery - ---extra-index-url https://download.pytorch.org/whl/nightly/cpu torchdata From b431da9469f7d04fc7e839d5a35e92067ae87ff5 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 20:57:34 +0000 Subject: [PATCH 14/28] try replacing extra-index-url with index-url --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 7a850cdebd..8dc1ade75c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ ---extra-index-url https://download.pytorch.org/whl/nightly/cpu +--index-url https://download.pytorch.org/whl/nightly/cpu Jinja2<3.1.0 sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme From 5f2cfbbccbf43d1dd418e59f9c5fe1f672fa5e56 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Thu, 4 Aug 2022 21:03:33 +0000 Subject: [PATCH 15/28] add extra-index-url to be pypi --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 8dc1ade75c..7382ec9cc5 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ ---index-url https://download.pytorch.org/whl/nightly/cpu +--index-url https://download.pytorch.org/whl/nightly/cpu --extra-index-url=https://pypi.org/simple Jinja2<3.1.0 sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme From b133e1f69fd6b0658d6030ff003630ea84a49e6f Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 14:54:38 +0000 Subject: [PATCH 16/28] adding torchdata dependency to config.yml --- .circleci/config.yml | 1 + docs/requirements.txt | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 93fe09acad..3707643318 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -553,6 +553,7 @@ jobs: set -x conda install -y make python=${PYTHON_VERSION} pip install $(ls ~/workspace/torchtext*.whl) --pre -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu/torch_${UPLOAD_CHANNEL}.html" + pip install --pre torchdata --extra-index-url "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu" - run: name: Build docs command: | diff --git a/docs/requirements.txt b/docs/requirements.txt index 7382ec9cc5..82de9e49f4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,5 @@ ---index-url https://download.pytorch.org/whl/nightly/cpu --extra-index-url=https://pypi.org/simple Jinja2<3.1.0 sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme matplotlib sphinx_gallery -torchdata From 9e72d3e23e2c404b0c929d468cc737825c7cc7b4 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 11:28:35 -0400 Subject: [PATCH 17/28] updating config.yml.in --- .circleci/config.yml.in | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index 5ecd5e95bf..b27ccbc2a2 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -553,6 +553,7 @@ jobs: set -x conda install -y make python=${PYTHON_VERSION} pip install $(ls ~/workspace/torchtext*.whl) --pre -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu/torch_${UPLOAD_CHANNEL}.html" + pip install --pre torchdata --extra-index-url "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu" - run: name: Build docs command: | From 015bd752af13a25dfe65c23692c0bff0c6575665 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 17:36:10 -0400 Subject: [PATCH 18/28] Revert "updating config.yml.in" This reverts commit 9e72d3e23e2c404b0c929d468cc737825c7cc7b4. --- .circleci/config.yml.in | 1 - 1 file changed, 1 deletion(-) diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index b27ccbc2a2..5ecd5e95bf 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -553,7 +553,6 @@ jobs: set -x conda install -y make python=${PYTHON_VERSION} pip install $(ls ~/workspace/torchtext*.whl) --pre -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu/torch_${UPLOAD_CHANNEL}.html" - pip install --pre torchdata --extra-index-url "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu" - run: name: Build docs command: | From 056509209d194828795a1771c1134e1f67d3f37d Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 17:36:25 -0400 Subject: [PATCH 19/28] Revert "adding torchdata dependency to config.yml" This reverts commit b133e1f69fd6b0658d6030ff003630ea84a49e6f. --- .circleci/config.yml | 1 - docs/requirements.txt | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3707643318..93fe09acad 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -553,7 +553,6 @@ jobs: set -x conda install -y make python=${PYTHON_VERSION} pip install $(ls ~/workspace/torchtext*.whl) --pre -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu/torch_${UPLOAD_CHANNEL}.html" - pip install --pre torchdata --extra-index-url "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/cpu" - run: name: Build docs command: | diff --git a/docs/requirements.txt b/docs/requirements.txt index 82de9e49f4..7382ec9cc5 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,7 @@ +--index-url https://download.pytorch.org/whl/nightly/cpu --extra-index-url=https://pypi.org/simple Jinja2<3.1.0 sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme matplotlib sphinx_gallery +torchdata From 5c9e33ce0a934beb209e73f65abf10a5e26960ce Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 17:36:29 -0400 Subject: [PATCH 20/28] Revert "add extra-index-url to be pypi" This reverts commit 5f2cfbbccbf43d1dd418e59f9c5fe1f672fa5e56. --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 7382ec9cc5..8dc1ade75c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ ---index-url https://download.pytorch.org/whl/nightly/cpu --extra-index-url=https://pypi.org/simple +--index-url https://download.pytorch.org/whl/nightly/cpu Jinja2<3.1.0 sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme From bd7b9760d4592bb814eeaf148400fb404a47a52a Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 17:36:34 -0400 Subject: [PATCH 21/28] Revert "try replacing extra-index-url with index-url" This reverts commit b431da9469f7d04fc7e839d5a35e92067ae87ff5. --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 8dc1ade75c..7a850cdebd 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ ---index-url https://download.pytorch.org/whl/nightly/cpu +--extra-index-url https://download.pytorch.org/whl/nightly/cpu Jinja2<3.1.0 sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme From 92ae288795da411f54ac634804cef965e6d57107 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 17:36:38 -0400 Subject: [PATCH 22/28] Revert "torchdata nightly try again 2" This reverts commit bdc73a5b8f88a253711aac236e53d7aba1626f3d. --- docs/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 7a850cdebd..b6f8b18927 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,8 @@ ---extra-index-url https://download.pytorch.org/whl/nightly/cpu Jinja2<3.1.0 sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme matplotlib sphinx_gallery + +--extra-index-url https://download.pytorch.org/whl/nightly/cpu torchdata From 36a7f004dd593236e14b10ecd44d542f1f8f2cd8 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 17:36:41 -0400 Subject: [PATCH 23/28] Revert "torchdata nightly try again" This reverts commit 2f43ca27d01d8b08de4839e107408d0b093635bc. --- docs/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index b6f8b18927..c53aeab9a6 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,6 +3,5 @@ sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme matplotlib sphinx_gallery - --extra-index-url https://download.pytorch.org/whl/nightly/cpu torchdata From 875895891dde6cfef6ea568880b50c7f9c430b33 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 17:36:44 -0400 Subject: [PATCH 24/28] Revert "torchdata nightly build dependency" This reverts commit 67e2e50d594e139b91129bf4ac00cb794fd89e05. --- docs/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index c53aeab9a6..5a4c972b7a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,5 +3,4 @@ sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme matplotlib sphinx_gallery ---extra-index-url https://download.pytorch.org/whl/nightly/cpu torchdata From fd4c6d7d9ee335e90246389d8e1173029fc27c7d Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 17:36:48 -0400 Subject: [PATCH 25/28] Revert "adding torchdata dependency for docs build" This reverts commit e50a5a075d831b13acdc00e51e6706e461b8118f. --- docs/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 5a4c972b7a..82de9e49f4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,4 +3,3 @@ sphinx==3.5.4 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@b4d0005#egg=pytorch_sphinx_theme matplotlib sphinx_gallery -torchdata From 669b211a710b50adc364715f6d14d959ffea8b74 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 17:36:52 -0400 Subject: [PATCH 26/28] Revert "update index.rst" This reverts commit bd129b7b2fe9096b4b04c6e2aae08c16bb0e34ac. --- docs/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 4a2a2cccb1..7de6b45d02 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -51,7 +51,7 @@ Getting Started :caption: Getting Started tutorials/sst2_classification_non_distributed - tutorials/t5_tutorial + tutorials/t5_demo .. automodule:: torchtext From 35527a43797b95c25f217cebab24b4f870b5ae9f Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Fri, 5 Aug 2022 17:36:54 -0400 Subject: [PATCH 27/28] Revert "renaming tutorial and removing hard-coded outputs" This reverts commit 9b11269dc129ea6415f3c053b79031b398a8caa1. --- .../tutorials/{t5_tutorial.py => t5_demo.py} | 201 +++++++++++++++++- 1 file changed, 195 insertions(+), 6 deletions(-) rename examples/tutorials/{t5_tutorial.py => t5_demo.py} (64%) diff --git a/examples/tutorials/t5_tutorial.py b/examples/tutorials/t5_demo.py similarity index 64% rename from examples/tutorials/t5_tutorial.py rename to examples/tutorials/t5_demo.py index 77f240c4ee..f639c406ad 100644 --- a/examples/tutorials/t5_tutorial.py +++ b/examples/tutorials/t5_demo.py @@ -86,8 +86,8 @@ t5_base = T5_BASE_GENERATION transform = t5_base.transform() model = t5_base.get_model() -model = model.eval() -model = model.to(DEVICE) +model.eval() +model.to(DEVICE) ####################################################################### @@ -303,7 +303,7 @@ def apply_prefix(task, x): from torchtext.datasets import IMDB -imdb_batch_size = 5 +imdb_batch_size = 3 imdb_datapipe = IMDB(split="test") task = "sst2 sentence" labels = {"neg": "negative", "pos": "positive"} @@ -338,7 +338,7 @@ def process_labels(labels, x): multi_dataloader = DataLoader(multi_datapipe, batch_size=None) ####################################################################### -# Summaries +# Generate Summaries # ------------------ # # We can put all of the components together the generate summaries on the first batch of articles in the CNNDM test set @@ -361,7 +361,74 @@ def process_labels(labels, x): ####################################################################### -# Sentiment Classifications +# Summarization Output +# -------------------- +# +# :: +# +# Example 1: +# +# prediction: the Palestinians become the 123rd member of the international criminal +# court . the accession was marked by a ceremony at the Hague, where the court is based . +# the ICC opened a preliminary examination into the situation in the occupied +# Palestinian territory . +# +# target: Membership gives the ICC jurisdiction over alleged crimes committed in +# Palestinian territories since last June . Israel and the United States opposed the +# move, which could open the door to war crimes investigations against Israelis . +# +# +# Example 2: +# +# prediction: a stray pooch has used up at least three of her own after being hit by a +# car and buried in a field . the dog managed to stagger to a nearby farm, dirt-covered +# and emaciated, where she was found . she suffered a dislocated jaw, leg injuries and a +# caved-in sinus cavity -- and still requires surgery to help her breathe . +# +# target: Theia, a bully breed mix, was apparently hit by a car, whacked with a hammer +# and buried in a field . "She's a true miracle dog and she deserves a good life," says +# Sara Mellado, who is looking for a home for Theia . +# +# +# Example 3: +# +# prediction: mohammad Javad Zarif arrived in Iran on a sunny friday morning . he has gone +# a long way to bring Iran in from the cold and allow it to rejoin the international +# community . but there are some facts about him that are less well-known . +# +# target: Mohammad Javad Zarif has spent more time with John Kerry than any other +# foreign minister . He once participated in a takeover of the Iranian Consulate in San +# Francisco . The Iranian foreign minister tweets in English . +# +# +# Example 4: +# +# prediction: five americans were monitored for three weeks after being exposed to Ebola in +# west africa . one of the five had a heart-related issue and has been discharged but hasn't +# left the area . they are clinicians for Partners in Health, a Boston-based aid group . +# +# target: 17 Americans were exposed to the Ebola virus while in Sierra Leone in March . +# Another person was diagnosed with the disease and taken to hospital in Maryland . +# National Institutes of Health says the patient is in fair condition after weeks of +# treatment . +# +# +# Example 5: +# +# prediction: the student was identified during an investigation by campus police and +# the office of student affairs . he admitted to placing the noose on the tree early +# Wednesday morning . the incident is one of several recent racist events to affect +# college students . +# +# target: Student is no longer on Duke University campus and will face disciplinary +# review . School officials identified student during investigation and the person +# admitted to hanging the noose, Duke says . The noose, made of rope, was discovered on +# campus about 2 a.m. +# + + +####################################################################### +# Generate Sentiment Classifications # ---------------------------------- # # Similarly, we can now use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set @@ -385,7 +452,77 @@ def process_labels(labels, x): ####################################################################### -# Translations +# Sentiment Output +# ---------------- +# +# :: +# +# Example 1: +# +# input_text: sst2 sentence: I love sci-fi and am willing to put up with a lot. Sci-fi +# movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like +# this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). +# Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the +# background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' +# setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. +# It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character +# development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may +# treat important issues, yet not as a serious philosophy. It's really difficult to care about +# the characters here as they are not simply foolish, just missing a spark of life. Their +# actions and reactions are wooden and predictable, often painful to watch. The makers of Earth +# KNOW it's rubbish as they have to always say "Gene Roddenberry's Earth..." otherwise people +# would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, +# cheap, poorly edited (watching it without advert breaks really brings this home) trudging +# Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring +# him back as another actor. Jeeez. Dallas all over again. +# +# prediction: negative +# +# target: negative +# +# +# Example 2: +# +# input_text: sst2 sentence: Worth the entertainment value of a rental, especially if you like +# action movies. This one features the usual car chases, fights with the great Van Damme kick +# style, shooting battles with the 40 shell load shotgun, and even terrorist style bombs. All +# of this is entertaining and competently handled but there is nothing that really blows you +# away if you've seen your share before.

The plot is made interesting by the +# inclusion of a rabbit, which is clever but hardly profound. Many of the characters are +# heavily stereotyped -- the angry veterans, the terrified illegal aliens, the crooked cops, +# the indifferent feds, the bitchy tough lady station head, the crooked politician, the fat +# federale who looks like he was typecast as the Mexican in a Hollywood movie from the 1940s. +# All passably acted but again nothing special.

I thought the main villains were +# pretty well done and fairly well acted. By the end of the movie you certainly knew who the +# good guys were and weren't. There was an emotional lift as the really bad ones got their just +# deserts. Very simplistic, but then you weren't expecting Hamlet, right? The only thing I found +# really annoying was the constant cuts to VDs daughter during the last fight scene.

+# Not bad. Not good. Passable 4. +# +# prediction: negative +# +# target: negative +# +# +# Example 3: +# +# input_text: sst2 sentence: its a totally average film with a few semi-alright action sequences +# that make the plot seem a little better and remind the viewer of the classic van dam films. +# parts of the plot don't make sense and seem to be added in to use up time. the end plot is that +# of a very basic type that doesn't leave the viewer guessing and any twists are obvious from the +# beginning. the end scene with the flask backs don't make sense as they are added in and seem to +# have little relevance to the history of van dam's character. not really worth watching again, +# bit disappointed in the end production, even though it is apparent it was shot on a low budget +# certain shots and sections in the film are of poor directed quality. +# +# prediction: negative +# +# target: negative +# + + +####################################################################### +# Generate Translations # --------------------- # # Finally, we can also use the model to generate English to German translations on the first batch of examples from the Multi30k @@ -406,3 +543,55 @@ def process_labels(labels, x): print(f"input_text: {input_text[i]}\n") print(f"prediction: {output_text[i]}\n") print(f"target: {target[i]}\n\n") + + +####################################################################### +# Translation Output +# ------------------ +# +# :: +# +# Example 1: +# +# input_text: translate English to German: A man in an orange hat starring at something. +# +# prediction: Ein Mann in einem orangen Hut, der an etwas schaut. +# +# target: Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt. +# +# +# Example 2: +# +# input_text: translate English to German: A Boston Terrier is running on lush green grass in front of a white fence. +# +# prediction: Ein Boston Terrier läuft auf üppigem grünem Gras vor einem weißen Zaun. +# +# target: Ein Boston Terrier läuft über saftig-grünes Gras vor einem weißen Zaun. +# +# +# Example 3: +# +# input_text: translate English to German: A girl in karate uniform breaking a stick with a front kick. +# +# prediction: Ein Mädchen in Karate-Uniform bricht einen Stöck mit einem Frontkick. +# +# target: Ein Mädchen in einem Karateanzug bricht ein Brett mit einem Tritt. +# +# +# Example 4: +# +# input_text: translate English to German: Five people wearing winter jackets and helmets stand in the snow, with snowmobiles in the background. +# +# prediction: Fünf Menschen mit Winterjacken und Helmen stehen im Schnee, mit Schneemobilen im Hintergrund. +# +# target: Fünf Leute in Winterjacken und mit Helmen stehen im Schnee mit Schneemobilen im Hintergrund. +# +# +# Example 5: +# +# input_text: translate English to German: People are fixing the roof of a house. +# +# prediction: Die Leute fixieren das Dach eines Hauses. +# +# target: Leute Reparieren das Dach eines Hauses. +# From 8bd65dd37997c6bb755d7d8c62c77f4498174ac0 Mon Sep 17 00:00:00 2001 From: pmabbo13 Date: Mon, 8 Aug 2022 09:56:52 -0400 Subject: [PATCH 28/28] correcting typos --- examples/tutorials/t5_demo.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/tutorials/t5_demo.py b/examples/tutorials/t5_demo.py index f639c406ad..fbdb932ad1 100644 --- a/examples/tutorials/t5_demo.py +++ b/examples/tutorials/t5_demo.py @@ -76,8 +76,9 @@ # ----------------- # # torchtext provides SOTA pre-trained models that can be used directly for NLP tasks or fine-tuned on downstream tasks. Below -# we use the pre-trained T5 model with standard base configuration to perform text summarization. For additional details on -# available pre-trained models, please refer to documentation at https://pytorch.org/text/main/models.html +# we use the pre-trained T5 model with standard base configuration to perform text summarization, sentiment classification, and +# translation. For additional details on available pre-trained models, please refer to documentation at +# https://pytorch.org/text/main/models.html # # from torchtext.prototype.models import T5_BASE_GENERATION @@ -341,7 +342,7 @@ def process_labels(labels, x): # Generate Summaries # ------------------ # -# We can put all of the components together the generate summaries on the first batch of articles in the CNNDM test set +# We can put all of the components together to generate summaries on the first batch of articles in the CNNDM test set # using a beam size of 3. # @@ -431,7 +432,7 @@ def process_labels(labels, x): # Generate Sentiment Classifications # ---------------------------------- # -# Similarly, we can now use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set +# Similarly, we can use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set # using a beam size of 1. #