diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 009ebe90ddf5..458272c11af6 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -9,175 +9,6 @@ on:
- master
jobs:
- # TODO(SPARK-32248): Recover JDK 11 builds
- # Build: build Spark and run the tests for specified modules.
- build:
- name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- java:
- - 1.8
- hadoop:
- - hadoop3.2
- hive:
- - hive2.3
- # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
- # Kinesis tests depends on external Amazon kinesis service.
- # Note that the modules below are from sparktestsupport/modules.py.
- modules:
- - |-
- core, unsafe, kvstore, avro,
- network-common, network-shuffle, repl, launcher,
- examples, sketch, graphx
- - |-
- catalyst, hive-thriftserver
- - |-
- streaming, sql-kafka-0-10, streaming-kafka-0-10,
- mllib-local, mllib,
- yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
- - |-
- pyspark-sql, pyspark-mllib, pyspark-resource
- - |-
- pyspark-core, pyspark-streaming, pyspark-ml
- - |-
- sparkr
- # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
- included-tags: [""]
- excluded-tags: [""]
- comment: [""]
- include:
- # Hive tests
- - modules: hive
- java: 1.8
- hadoop: hadoop3.2
- hive: hive2.3
- included-tags: org.apache.spark.tags.SlowHiveTest
- comment: "- slow tests"
- - modules: hive
- java: 1.8
- hadoop: hadoop3.2
- hive: hive2.3
- excluded-tags: org.apache.spark.tags.SlowHiveTest
- comment: "- other tests"
- # SQL tests
- - modules: sql
- java: 1.8
- hadoop: hadoop3.2
- hive: hive2.3
- included-tags: org.apache.spark.tags.ExtendedSQLTest
- comment: "- slow tests"
- - modules: sql
- java: 1.8
- hadoop: hadoop3.2
- hive: hive2.3
- excluded-tags: org.apache.spark.tags.ExtendedSQLTest
- comment: "- other tests"
- env:
- MODULES_TO_TEST: ${{ matrix.modules }}
- EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
- INCLUDED_TAGS: ${{ matrix.included-tags }}
- HADOOP_PROFILE: ${{ matrix.hadoop }}
- HIVE_PROFILE: ${{ matrix.hive }}
- # GitHub Actions' default miniconda to use in pip packaging test.
- CONDA_PREFIX: /usr/share/miniconda
- GITHUB_PREV_SHA: ${{ github.event.before }}
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- # In order to fetch changed files
- with:
- fetch-depth: 0
- # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- - name: Cache Scala, SBT, Maven and Zinc
- uses: actions/cache@v1
- with:
- path: build
- key: build-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- build-
- - name: Cache Maven local repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
- - name: Cache Ivy local repository
- uses: actions/cache@v2
- with:
- path: ~/.ivy2/cache
- key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
- restore-keys: |
- ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
- - name: Install JDK ${{ matrix.java }}
- uses: actions/setup-java@v1
- with:
- java-version: ${{ matrix.java }}
- # PySpark
- - name: Install PyPy3
- # Note that order of Python installations here matters because default python3 is
- # overridden by pypy3.
- uses: actions/setup-python@v2
- if: contains(matrix.modules, 'pyspark')
- with:
- python-version: pypy3
- architecture: x64
- - name: Install Python 3.6
- uses: actions/setup-python@v2
- if: contains(matrix.modules, 'pyspark')
- with:
- python-version: 3.6
- architecture: x64
- - name: Install Python 3.8
- uses: actions/setup-python@v2
- # We should install one Python that is higher then 3+ for SQL and Yarn because:
- # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
- # - Yarn has a Python specific test too, for example, YarnClusterSuite.
- if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
- with:
- python-version: 3.8
- architecture: x64
- - name: Install Python packages (Python 3.6 and PyPy3)
- if: contains(matrix.modules, 'pyspark')
- # PyArrow is not supported in PyPy yet, see ARROW-2651.
- # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason.
- run: |
- python3.6 -m pip install numpy pyarrow pandas scipy
- python3.6 -m pip list
- pypy3 -m pip install numpy pandas
- pypy3 -m pip list
- - name: Install Python packages (Python 3.8)
- if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
- run: |
- python3.8 -m pip install numpy pyarrow pandas scipy
- python3.8 -m pip list
- # SparkR
- - name: Install R 4.0
- if: contains(matrix.modules, 'sparkr')
- run: |
- sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
- curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
- sudo apt-get update
- sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
- - name: Install R packages
- if: contains(matrix.modules, 'sparkr')
- run: |
- # qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497.
- sudo apt-get install -y libcurl4-openssl-dev qpdf
- sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')"
- # Show installed packages in R.
- sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
- # Run the tests.
- - name: "Run tests: ${{ matrix.modules }}"
- run: |
- # Hive tests become flaky when running in parallel as it's too intensive.
- if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
- mkdir -p ~/.m2
- ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
- rm -rf ~/.m2/repository/org/apache/spark
-
# Static analysis, and documentation build
lint:
name: Linters, licenses, dependencies and documentation generation
@@ -185,89 +16,9 @@ jobs:
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- - name: Cache Maven local repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- docs-maven-
- - name: Install JDK 1.8
- uses: actions/setup-java@v1
- with:
- java-version: 1.8
- - name: Install Python 3.6
- uses: actions/setup-python@v2
- with:
- python-version: 3.6
- architecture: x64
- - name: Install Python linter dependencies
- run: |
- # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
- # See also https://github.com/sphinx-doc/sphinx/issues/7551.
- pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme
- - name: Install R 4.0
- run: |
- sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
- curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
- sudo apt-get update
- sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
- - name: Install R linter dependencies and SparkR
- run: |
- sudo apt-get install -y libcurl4-openssl-dev
- sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
- sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
- ./R/install-dev.sh
- - name: Install Ruby 2.7 for documentation generation
- uses: actions/setup-ruby@v1
- with:
- ruby-version: 2.7
- - name: Install dependencies for documentation generation
- run: |
- sudo apt-get install -y libcurl4-openssl-dev pandoc
- # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
- # See also https://github.com/sphinx-doc/sphinx/issues/7551.
- pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme
- gem install jekyll jekyll-redirect-from rouge
- sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
- - name: Scala linter
- run: ./dev/lint-scala
- - name: Java linter
- run: ./dev/lint-java
- - name: Python linter
- run: ./dev/lint-python
- - name: R linter
- run: ./dev/lint-r
- - name: License test
- run: ./dev/check-license
- - name: Dependencies test
- run: ./dev/test-dependencies.sh
- - name: Run documentation build
- run: |
- cd docs
- jekyll build
-
- java11:
- name: Java 11 build
- runs-on: ubuntu-latest
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- - name: Cache Maven local repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: java11-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- java11-maven-
- - name: Install Java 11
- uses: actions/setup-java@v1
- with:
- java-version: 11
- - name: Build with Maven
- run: |
- export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
- export MAVEN_CLI_OPTS="--no-transfer-progress"
- mkdir -p ~/.m2
- ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
- rm -rf ~/.m2/repository/org/apache/spark
+ - name: Lint Code Base
+ uses: docker://github/super-linter:v3
+ env:
+ DEFAULT_BRANCH: master
+ VALIDATE_MD: true
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 247957d087fa..2ab7b30a1dca 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -573,7 +573,6 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
?planDescription=[true (default) | false] enables/disables Physical planDescription on demand when Physical Plan size is high.
?offset=[offset]&length=[len] lists queries in the given range.
-
/applications/[app-id]/sql/[execution-id]?details=[true (default) | false] lists/hides metric details in addition to given query details.
?planDescription=[true (default) | false] enables/disables Physical planDescription on demand for the given query when Physical Plan size is high.
-
/applications/[app-id]/environment