From ee188661fe9c2aacd18e23342ba376e41924e042 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Mon, 5 Apr 2021 15:28:21 -0700 Subject: [PATCH] Added defusedxml to parse untrusted XML data --- requirements.txt | 3 +++ torchtext/data/datasets_utils.py | 5 ++++- torchtext/legacy/datasets/translation.py | 5 ++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index c9c761c82d..beb1e7155c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,3 +28,6 @@ pytest-pythonpath # Coverage statistics pytest-cov codecov + +# To parse untrusted XML data +defusedxml \ No newline at end of file diff --git a/torchtext/data/datasets_utils.py b/torchtext/data/datasets_utils.py index 1baef44e70..b17df76354 100644 --- a/torchtext/data/datasets_utils.py +++ b/torchtext/data/datasets_utils.py @@ -11,7 +11,10 @@ unicode_csv_reader, ) import codecs -import xml.etree.ElementTree as ET +try: + import defusedxml.ElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET """ These functions and classes are meant solely for use in torchtext.datasets and not for public consumption yet. diff --git a/torchtext/legacy/datasets/translation.py b/torchtext/legacy/datasets/translation.py index 275e7cea90..6e6bfeb36e 100644 --- a/torchtext/legacy/datasets/translation.py +++ b/torchtext/legacy/datasets/translation.py @@ -1,5 +1,8 @@ import os -import xml.etree.ElementTree as ET +try: + import defusedxml.ElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET import glob import io import codecs