diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 4282504cc398..ce995428bf9e 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -88,7 +88,7 @@ jobs: run: ./dev/lint-java - name: Python run: | - pip install flake8 sphinx numpy + pip install -r ./dev/requirements-pinned.txt ./dev/lint-python - name: License run: ./dev/check-license @@ -147,8 +147,8 @@ jobs: sudo apt-get install -y libcurl4-openssl-dev pandoc - name: Install packages run: | - pip install sphinx mkdocs numpy - gem install jekyll jekyll-redirect-from rouge + pip install -r ./dev/requirements-pinned.txt + gem install jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0 sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" - name: Run jekyll build run: | diff --git a/dev/README.md b/dev/README.md index 2b0f3d8ee892..dd5ce18574e9 100644 --- a/dev/README.md +++ b/dev/README.md @@ -1,5 +1,20 @@ # Spark Developer Scripts + This directory contains scripts useful to developers when packaging, testing, or committing to Spark. Many of these scripts require Apache credentials to work correctly. + +## Managing Python-based Development Requirements + +* For local development, use `requirements.txt`. +* For continuous integration and release engineering, use `requirements-pinned.txt`. +* `requirements-pinned.txt` is generated automatically from `requirements.txt`, so don't update it by hand. + + To update `requirements-pinned.txt`, use pip-tools: + + ```sh + pip install pip-tools + cd dev/ + pip-compile requirements.txt --output-file requirements-pinned.txt + ``` diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh index 8f53f4a4e13a..351d04607c4b 100755 --- a/dev/create-release/do-release-docker.sh +++ b/dev/create-release/do-release-docker.sh @@ -28,6 +28,7 @@ set -e SELF=$(cd $(dirname $0) && pwd) +SPARK_ROOT="$SELF/../.." . "$SELF/release-util.sh" function usage { @@ -91,6 +92,8 @@ for f in "$SELF"/*; do fi done +cp "$SPARK_ROOT/dev/requirements-pinned.txt" "$WORKDIR/dev-requirements-pinned.txt" + GPG_KEY_FILE="$WORKDIR/gpg.key" fcreate_secure "$GPG_KEY_FILE" $GPG --export-secret-key --armor --pinentry-mode loopback --passphrase "$GPG_PASSPHRASE" "$GPG_KEY" > "$GPG_KEY_FILE" diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 44d602415b26..51d7fe1271a6 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -33,7 +33,7 @@ ENV DEBCONF_NONINTERACTIVE_SEEN true # These arguments are just for reuse and not really meant to be customized. ARG APT_INSTALL="apt-get install --no-install-recommends -y" -ARG PIP_PKGS="sphinx==2.3.1 mkdocs==1.0.4 numpy==1.18.1" +COPY ./dev-requirements-pinned.txt /dev-requirements-pinned.txt ARG GEM_PKGS="jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0" # Install extra needed repos and refresh. @@ -67,7 +67,7 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && \ update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 && \ update-alternatives --set python /usr/bin/python3.6 && \ - pip3 install $PIP_PKGS && \ + pip3 install -r dev-requirements-pinned.txt && \ # Install R packages and dependencies used when building. # R depends on pandoc*, libssl (which are installed above). $APT_INSTALL r-base r-base-dev && \ diff --git a/dev/requirements-pinned.txt b/dev/requirements-pinned.txt new file mode 100644 index 000000000000..ed76569af415 --- /dev/null +++ b/dev/requirements-pinned.txt @@ -0,0 +1,66 @@ +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile --output-file=requirements-pinned.txt requirements.txt +# +alabaster==0.7.12 # via sphinx +babel==2.8.0 # via sphinx +certifi==2019.11.28 # via requests +cffi==1.14.0 # via cryptography +chardet==3.0.4 # via requests +click==7.1.1 # via mkdocs, pip-tools +cryptography==2.8 # via oauthlib +defusedxml==0.6.0 # via jira +deprecated==1.2.7 # via pygithub +docutils==0.16 # via sphinx +entrypoints==0.3 # via flake8 +flake8==3.7.9 # via -r requirements.txt +future==0.18.2 # via lunr +idna==2.9 # via requests +imagesize==1.2.0 # via sphinx +jinja2==2.11.1 # via mkdocs, sphinx +jira==2.0.0 # via -r requirements.txt +livereload==2.6.1 # via mkdocs +lunr[languages]==0.5.6 # via mkdocs +markdown==3.2.1 # via mkdocs +markupsafe==1.1.1 # via jinja2 +mccabe==0.6.1 # via flake8 +mkdocs==1.1 # via -r requirements.txt +nltk==3.4.5 # via lunr +numpy==1.18.2 # via -r requirements.txt, pandas, pyarrow +oauthlib[signedtoken]==3.1.0 # via jira, requests-oauthlib +packaging==20.3 # via sphinx +pandas==1.0.3 # via -r requirements.txt +pbr==5.4.4 # via jira +pip-tools==4.5.1 # via -r requirements.txt +pyarrow==0.16.0 # via -r requirements.txt +pycodestyle==2.5.0 # via -r requirements.txt, flake8 +pycparser==2.20 # via cffi +pyflakes==2.1.1 # via flake8 +pygithub==1.47 # via -r requirements.txt +pygments==2.6.1 # via sphinx +pyjwt==1.7.1 # via oauthlib, pygithub +pyparsing==2.4.6 # via packaging +python-dateutil==2.8.1 # via pandas +pytz==2019.3 # via babel, pandas +pyyaml==5.3.1 # via mkdocs +requests-oauthlib==1.3.0 # via jira +requests-toolbelt==0.9.1 # via jira +requests==2.23.0 # via jira, pygithub, requests-oauthlib, requests-toolbelt, sphinx +six==1.14.0 # via cryptography, jira, livereload, lunr, nltk, packaging, pip-tools, pyarrow, python-dateutil +snowballstemmer==2.0.0 # via sphinx +sphinx==2.4.4 # via -r requirements.txt +sphinxcontrib-applehelp==1.0.2 # via sphinx +sphinxcontrib-devhelp==1.0.2 # via sphinx +sphinxcontrib-htmlhelp==1.0.3 # via sphinx +sphinxcontrib-jsmath==1.0.1 # via sphinx +sphinxcontrib-qthelp==1.0.3 # via sphinx +sphinxcontrib-serializinghtml==1.1.4 # via sphinx +tornado==6.0.4 # via livereload, mkdocs +unidecode==1.1.1 # via -r requirements.txt +urllib3==1.25.8 # via requests +wrapt==1.12.1 # via deprecated + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/dev/requirements.txt b/dev/requirements.txt index baea9213dbc9..6341fbbd7b80 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -1,5 +1,21 @@ -flake8==3.5.0 -jira==1.0.3 -PyGithub==1.26.0 -Unidecode==0.04.19 +# testing +flake8 +pycodestyle + +# dev/release tools +jira +pip-tools +PyGithub +Unidecode + +# docs +mkdocs sphinx + +# PySpark +# It's difficult to compile dependencies by directly referencing +# python/setup.py due to a limitation of pip-tools. +# See: https://github.com/jazzband/pip-tools/issues/908 +numpy +pandas +pyarrow diff --git a/docs/README.md b/docs/README.md index 22039871cf63..27570cf6ec61 100644 --- a/docs/README.md +++ b/docs/README.md @@ -58,7 +58,7 @@ Note: Other versions of roxygen2 might work in SparkR documentation generation b To generate API docs for any language, you'll need to install these libraries: ```sh -$ sudo pip install sphinx mkdocs numpy +pip install -r ./dev/requirements.txt ``` ## Generating the Documentation HTML