From e8561c4d936082722c6c6c73a6d67f6d2656de14 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Tue, 30 Mar 2021 13:12:12 -0700 Subject: [PATCH 1/6] Fixed Bandit High/Medium severity security issues --- .circleci/regenerate.py | 2 +- .circleci/utils/test_sort_yaml.py | 2 +- requirements.txt | 3 +++ torchtext/data/datasets_utils.py | 2 +- torchtext/legacy/datasets/translation.py | 2 +- 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py index f963469c58..981474a268 100755 --- a/.circleci/regenerate.py +++ b/.circleci/regenerate.py @@ -184,7 +184,7 @@ def unittest_workflows(indentation=6): env = jinja2.Environment( loader=jinja2.FileSystemLoader(d), lstrip_blocks=True, - autoescape=False, + autoescape=True, ) with open(os.path.join(d, 'config.yml'), 'w') as f: diff --git a/.circleci/utils/test_sort_yaml.py b/.circleci/utils/test_sort_yaml.py index dc6db481dd..44ed29af6d 100755 --- a/.circleci/utils/test_sort_yaml.py +++ b/.circleci/utils/test_sort_yaml.py @@ -11,4 +11,4 @@ import sys import yaml -sys.stdout.write(yaml.dump(yaml.load(sys.stdin, Loader=yaml.FullLoader), sort_keys=True)) +sys.stdout.write(yaml.dump(yaml.safe_load(sys.stdin, Loader=yaml.FullLoader), sort_keys=True)) diff --git a/requirements.txt b/requirements.txt index c9c761c82d..beb1e7155c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,3 +28,6 @@ pytest-pythonpath # Coverage statistics pytest-cov codecov + +# To parse untrusted XML data +defusedxml \ No newline at end of file diff --git a/torchtext/data/datasets_utils.py b/torchtext/data/datasets_utils.py index 1baef44e70..1e93f11b3c 100644 --- a/torchtext/data/datasets_utils.py +++ b/torchtext/data/datasets_utils.py @@ -11,7 +11,7 @@ unicode_csv_reader, ) import codecs -import xml.etree.ElementTree as ET +import defusedxml.ElementTree as ET """ These functions and classes are meant solely for use in torchtext.datasets and not for public consumption yet. diff --git a/torchtext/legacy/datasets/translation.py b/torchtext/legacy/datasets/translation.py index 275e7cea90..d856e9e264 100644 --- a/torchtext/legacy/datasets/translation.py +++ b/torchtext/legacy/datasets/translation.py @@ -1,5 +1,5 @@ import os -import xml.etree.ElementTree as ET +import defusedxml.ElementTree as ET import glob import io import codecs From 6f8a4372671a58a24144c0aa18c079b76e16b9a1 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Tue, 30 Mar 2021 13:29:01 -0700 Subject: [PATCH 2/6] Added defusedxml to conda pip requirements --- .circleci/unittest/linux/scripts/environment.yml | 1 + .circleci/unittest/windows/scripts/environment.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.circleci/unittest/linux/scripts/environment.yml b/.circleci/unittest/linux/scripts/environment.yml index e1e9f2cda8..0c5e7ff9f5 100644 --- a/.circleci/unittest/linux/scripts/environment.yml +++ b/.circleci/unittest/linux/scripts/environment.yml @@ -18,5 +18,6 @@ dependencies: - sphinx - sphinx-rtd-theme - tqdm + - defusedxml - https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.0.0/de_core_news_sm-3.0.0.tar.gz#egg=de_core_news_sm==3.0.0 - https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm==3.0.0 diff --git a/.circleci/unittest/windows/scripts/environment.yml b/.circleci/unittest/windows/scripts/environment.yml index 9716f09114..0a653a48ce 100644 --- a/.circleci/unittest/windows/scripts/environment.yml +++ b/.circleci/unittest/windows/scripts/environment.yml @@ -20,5 +20,6 @@ dependencies: - tqdm - certifi - future + - defusedxml - https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.0.0/de_core_news_sm-3.0.0.tar.gz#egg=de_core_news_sm==3.0.0 - https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm==3.0.0 From eefe55be47c21b90d6807c2843cd7cb355f2df2d Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Tue, 30 Mar 2021 16:39:12 -0700 Subject: [PATCH 3/6] Added Bandit and CodeQL integration --- .github/workflows/bandit.yml | 23 +++++++++++++++++++ .github/workflows/codeql.yml | 43 ++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 .github/workflows/bandit.yml create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml new file mode 100644 index 0000000000..93bae80f9b --- /dev/null +++ b/.github/workflows/bandit.yml @@ -0,0 +1,23 @@ +# GitHub Actions Bandit Workflow + +name: Bandit + +on: + pull_request: + branches: [ master ] + + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + # Task will fail if any high-severity issues are found + # Ignoring submodules + - name: Run Bandit Security Analysis + run: | + python -m pip install bandit + python -m bandit -r . -x ./third_party -lll diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000000..b983ccaab7 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,43 @@ +# GitHub Actions CodeQL Workflow + +name: CodeQL + +on: + pull_request: + branches: [ master ] + + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: python, cpp + + - name: Install Ninja + run: | + sudo apt-get update -y + sudo apt-get install -y ninja-build + + - name: Update submodules + run: git submodule update --init --recursive + + - name: Install Torch + run: | + python -m pip install cmake + python -m pip install torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html + sudo ln -s /usr/bin/ninja /usr/bin/ninja-build + + - name: Build TorchText + run: python setup.py develop --user + + # If any code scanning alerts are found, they will be under Security -> CodeQL + # Link: https://github.com/pytorch/text/security/code-scanning + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 From eba1ee3be98bb57cabe1333fd7214148c310f9a1 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Thu, 1 Apr 2021 13:53:31 -0700 Subject: [PATCH 4/6] Fixed renegerate jinja2 bug --- .circleci/regenerate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py index 981474a268..d3fd6131f7 100755 --- a/.circleci/regenerate.py +++ b/.circleci/regenerate.py @@ -15,6 +15,7 @@ """ import jinja2 +from jinja2 import select_autoescape import yaml import os.path @@ -184,7 +185,7 @@ def unittest_workflows(indentation=6): env = jinja2.Environment( loader=jinja2.FileSystemLoader(d), lstrip_blocks=True, - autoescape=True, + autoescape=select_autoescape(enabled_extensions=('html', 'xml')), ) with open(os.path.join(d, 'config.yml'), 'w') as f: From 2fdec269ba4e28d3826a1aa9c8f4e8ad15d247cc Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Thu, 1 Apr 2021 14:03:49 -0700 Subject: [PATCH 5/6] Removed defusedxml as a hard requirement --- .circleci/unittest/linux/scripts/environment.yml | 1 - .circleci/unittest/windows/scripts/environment.yml | 1 - torchtext/data/datasets_utils.py | 5 ++++- torchtext/legacy/datasets/translation.py | 5 ++++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.circleci/unittest/linux/scripts/environment.yml b/.circleci/unittest/linux/scripts/environment.yml index 0c5e7ff9f5..e1e9f2cda8 100644 --- a/.circleci/unittest/linux/scripts/environment.yml +++ b/.circleci/unittest/linux/scripts/environment.yml @@ -18,6 +18,5 @@ dependencies: - sphinx - sphinx-rtd-theme - tqdm - - defusedxml - https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.0.0/de_core_news_sm-3.0.0.tar.gz#egg=de_core_news_sm==3.0.0 - https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm==3.0.0 diff --git a/.circleci/unittest/windows/scripts/environment.yml b/.circleci/unittest/windows/scripts/environment.yml index 0a653a48ce..9716f09114 100644 --- a/.circleci/unittest/windows/scripts/environment.yml +++ b/.circleci/unittest/windows/scripts/environment.yml @@ -20,6 +20,5 @@ dependencies: - tqdm - certifi - future - - defusedxml - https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.0.0/de_core_news_sm-3.0.0.tar.gz#egg=de_core_news_sm==3.0.0 - https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm==3.0.0 diff --git a/torchtext/data/datasets_utils.py b/torchtext/data/datasets_utils.py index 1e93f11b3c..b17df76354 100644 --- a/torchtext/data/datasets_utils.py +++ b/torchtext/data/datasets_utils.py @@ -11,7 +11,10 @@ unicode_csv_reader, ) import codecs -import defusedxml.ElementTree as ET +try: + import defusedxml.ElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET """ These functions and classes are meant solely for use in torchtext.datasets and not for public consumption yet. diff --git a/torchtext/legacy/datasets/translation.py b/torchtext/legacy/datasets/translation.py index d856e9e264..6e6bfeb36e 100644 --- a/torchtext/legacy/datasets/translation.py +++ b/torchtext/legacy/datasets/translation.py @@ -1,5 +1,8 @@ import os -import defusedxml.ElementTree as ET +try: + import defusedxml.ElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET import glob import io import codecs From fd524732d4fca6b34906211f1a14f774bbbf2972 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Mon, 5 Apr 2021 15:40:49 -0700 Subject: [PATCH 6/6] Removed specific bandit changes --- .circleci/regenerate.py | 3 +-- .circleci/utils/test_sort_yaml.py | 2 +- requirements.txt | 5 +---- torchtext/data/datasets_utils.py | 5 +---- torchtext/legacy/datasets/translation.py | 5 +---- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py index d3fd6131f7..f963469c58 100755 --- a/.circleci/regenerate.py +++ b/.circleci/regenerate.py @@ -15,7 +15,6 @@ """ import jinja2 -from jinja2 import select_autoescape import yaml import os.path @@ -185,7 +184,7 @@ def unittest_workflows(indentation=6): env = jinja2.Environment( loader=jinja2.FileSystemLoader(d), lstrip_blocks=True, - autoescape=select_autoescape(enabled_extensions=('html', 'xml')), + autoescape=False, ) with open(os.path.join(d, 'config.yml'), 'w') as f: diff --git a/.circleci/utils/test_sort_yaml.py b/.circleci/utils/test_sort_yaml.py index 44ed29af6d..dc6db481dd 100755 --- a/.circleci/utils/test_sort_yaml.py +++ b/.circleci/utils/test_sort_yaml.py @@ -11,4 +11,4 @@ import sys import yaml -sys.stdout.write(yaml.dump(yaml.safe_load(sys.stdin, Loader=yaml.FullLoader), sort_keys=True)) +sys.stdout.write(yaml.dump(yaml.load(sys.stdin, Loader=yaml.FullLoader), sort_keys=True)) diff --git a/requirements.txt b/requirements.txt index beb1e7155c..1814cc7521 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,4 @@ pytest-pythonpath # Coverage statistics pytest-cov -codecov - -# To parse untrusted XML data -defusedxml \ No newline at end of file +codecov \ No newline at end of file diff --git a/torchtext/data/datasets_utils.py b/torchtext/data/datasets_utils.py index b17df76354..1baef44e70 100644 --- a/torchtext/data/datasets_utils.py +++ b/torchtext/data/datasets_utils.py @@ -11,10 +11,7 @@ unicode_csv_reader, ) import codecs -try: - import defusedxml.ElementTree as ET -except ImportError: - import xml.etree.ElementTree as ET +import xml.etree.ElementTree as ET """ These functions and classes are meant solely for use in torchtext.datasets and not for public consumption yet. diff --git a/torchtext/legacy/datasets/translation.py b/torchtext/legacy/datasets/translation.py index 6e6bfeb36e..275e7cea90 100644 --- a/torchtext/legacy/datasets/translation.py +++ b/torchtext/legacy/datasets/translation.py @@ -1,8 +1,5 @@ import os -try: - import defusedxml.ElementTree as ET -except ImportError: - import xml.etree.ElementTree as ET +import xml.etree.ElementTree as ET import glob import io import codecs