|
1 | 1 | name: master |
2 | 2 |
|
3 | 3 | on: |
4 | | - push: |
5 | | - branches: |
6 | | - - master |
7 | 4 | pull_request: |
8 | 5 | branches: |
9 | 6 | - master |
10 | 7 |
|
11 | 8 | jobs: |
| 9 | + # TODO(SPARK-32248): Recover JDK 11 builds |
| 10 | + # Build: build Spark and run the tests for specified modules. |
12 | 11 | build: |
13 | | - |
| 12 | + name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" |
14 | 13 | runs-on: ubuntu-latest |
15 | 14 | strategy: |
| 15 | + fail-fast: false |
16 | 16 | matrix: |
17 | | - java: [ '1.8', '11' ] |
18 | | - hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ] |
19 | | - hive: [ 'hive-1.2', 'hive-2.3' ] |
20 | | - exclude: |
21 | | - - java: '11' |
22 | | - hive: 'hive-1.2' |
23 | | - - hadoop: 'hadoop-3.2' |
24 | | - hive: 'hive-1.2' |
25 | | - name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ matrix.hive }} |
26 | | - |
| 17 | + java: |
| 18 | + - 1.8 |
| 19 | + hadoop: |
| 20 | + - hadoop3.2 |
| 21 | + hive: |
| 22 | + - hive2.3 |
| 23 | + # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. |
| 24 | + # Kinesis tests depends on external Amazon kinesis service. |
| 25 | + # Note that the modules below are from sparktestsupport/modules.py. |
| 26 | + modules: |
| 27 | + - |- |
| 28 | + core, unsafe, kvstore, avro, |
| 29 | + network_common, network_shuffle, repl, launcher |
| 30 | + examples, sketch, graphx |
| 31 | + - |- |
| 32 | + catalyst, hive-thriftserver |
| 33 | + - |- |
| 34 | + streaming, sql-kafka-0-10, streaming-kafka-0-10, |
| 35 | + mllib-local, mllib, |
| 36 | + yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl |
| 37 | + - |- |
| 38 | + pyspark-sql, pyspark-mllib, pyspark-resource |
| 39 | + - |- |
| 40 | + pyspark-core, pyspark-streaming, pyspark-ml |
| 41 | + - |- |
| 42 | + sparkr |
| 43 | + # Here, we split Hive and SQL tests into some of slow ones and the rest of them. |
| 44 | + included-tags: [""] |
| 45 | + excluded-tags: [""] |
| 46 | + comment: [""] |
| 47 | + include: |
| 48 | + # Hive tests |
| 49 | + - modules: hive |
| 50 | + java: 1.8 |
| 51 | + hadoop: hadoop3.2 |
| 52 | + hive: hive2.3 |
| 53 | + included-tags: org.apache.spark.tags.SlowHiveTest |
| 54 | + comment: "- slow tests" |
| 55 | + - modules: hive |
| 56 | + java: 1.8 |
| 57 | + hadoop: hadoop3.2 |
| 58 | + hive: hive2.3 |
| 59 | + excluded-tags: org.apache.spark.tags.SlowHiveTest |
| 60 | + comment: "- other tests" |
| 61 | + # SQL tests |
| 62 | + - modules: sql |
| 63 | + java: 1.8 |
| 64 | + hadoop: hadoop3.2 |
| 65 | + hive: hive2.3 |
| 66 | + included-tags: org.apache.spark.tags.ExtendedSQLTest |
| 67 | + comment: "- slow tests" |
| 68 | + - modules: sql |
| 69 | + java: 1.8 |
| 70 | + hadoop: hadoop3.2 |
| 71 | + hive: hive2.3 |
| 72 | + excluded-tags: org.apache.spark.tags.ExtendedSQLTest |
| 73 | + comment: "- other tests" |
| 74 | + env: |
| 75 | + TEST_ONLY_MODULES: ${{ matrix.modules }} |
| 76 | + TEST_ONLY_EXCLUDED_TAGS: ${{ matrix.excluded-tags }} |
| 77 | + TEST_ONLY_INCLUDED_TAGS: ${{ matrix.included-tags }} |
| 78 | + HADOOP_PROFILE: ${{ matrix.hadoop }} |
| 79 | + HIVE_PROFILE: ${{ matrix.hive }} |
| 80 | + # GitHub Actions' default miniconda to use in pip packaging test. |
| 81 | + CONDA_PREFIX: /usr/share/miniconda |
27 | 82 | steps: |
28 | | - - uses: actions/checkout@master |
29 | | - # We split caches because GitHub Action Cache has a 400MB-size limit. |
30 | | - - uses: actions/cache@v1 |
| 83 | + - name: Checkout Spark repository |
| 84 | + uses: actions/checkout@v2 |
| 85 | + # Cache local repositories. Note that GitHub Actions cache has a 2G limit. |
| 86 | + - name: Cache Scala, SBT, Maven and Zinc |
| 87 | + uses: actions/cache@v1 |
31 | 88 | with: |
32 | 89 | path: build |
33 | 90 | key: build-${{ hashFiles('**/pom.xml') }} |
34 | 91 | restore-keys: | |
35 | 92 | build- |
36 | | - - uses: actions/cache@v1 |
37 | | - with: |
38 | | - path: ~/.m2/repository/com |
39 | | - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ hashFiles('**/pom.xml') }} |
40 | | - restore-keys: | |
41 | | - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com- |
42 | | - - uses: actions/cache@v1 |
| 93 | + - name: Cache Maven local repository |
| 94 | + uses: actions/cache@v2 |
43 | 95 | with: |
44 | | - path: ~/.m2/repository/org |
45 | | - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ hashFiles('**/pom.xml') }} |
46 | | - restore-keys: | |
47 | | - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org- |
48 | | - - uses: actions/cache@v1 |
49 | | - with: |
50 | | - path: ~/.m2/repository/net |
51 | | - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ hashFiles('**/pom.xml') }} |
| 96 | + path: ~/.m2/repository |
| 97 | + key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} |
52 | 98 | restore-keys: | |
53 | | - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net- |
54 | | - - uses: actions/cache@v1 |
| 99 | + ${{ matrix.java }}-${{ matrix.hadoop }}-maven- |
| 100 | + - name: Cache Ivy local repository |
| 101 | + uses: actions/cache@v2 |
55 | 102 | with: |
56 | | - path: ~/.m2/repository/io |
57 | | - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ hashFiles('**/pom.xml') }} |
| 103 | + path: ~/.ivy2/cache |
| 104 | + key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }} |
58 | 105 | restore-keys: | |
59 | | - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io- |
60 | | - - name: Set up JDK ${{ matrix.java }} |
| 106 | + ${{ matrix.java }}-${{ matrix.hadoop }}-ivy- |
| 107 | + - name: Install JDK ${{ matrix.java }} |
61 | 108 | uses: actions/setup-java@v1 |
62 | 109 | with: |
63 | 110 | java-version: ${{ matrix.java }} |
64 | | - - name: Build with Maven |
65 | | - run: | |
66 | | - export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" |
67 | | - export MAVEN_CLI_OPTS="--no-transfer-progress" |
68 | | - mkdir -p ~/.m2 |
69 | | - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install |
70 | | - rm -rf ~/.m2/repository/org/apache/spark |
71 | | -
|
72 | | -
|
73 | | - lint: |
74 | | - runs-on: ubuntu-latest |
75 | | - name: Linters (Java/Scala/Python), licenses, dependencies |
76 | | - steps: |
77 | | - - uses: actions/checkout@master |
78 | | - - uses: actions/setup-java@v1 |
| 111 | + # PySpark |
| 112 | + - name: Install PyPy3 |
| 113 | + # SQL component also has Python related tests, for example, IntegratedUDFTestUtils. |
| 114 | + # Note that order of Python installations here matters because default python3 is |
| 115 | + # overridden by pypy3. |
| 116 | + uses: actions/setup-python@v2 |
| 117 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
79 | 118 | with: |
80 | | - java-version: '11' |
81 | | - - uses: actions/setup-python@v1 |
| 119 | + python-version: pypy3 |
| 120 | + architecture: x64 |
| 121 | + - name: Install Python 2.7 |
| 122 | + uses: actions/setup-python@v2 |
| 123 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
82 | 124 | with: |
83 | | - python-version: '3.x' |
84 | | - architecture: 'x64' |
85 | | - - name: Scala |
86 | | - run: ./dev/lint-scala |
87 | | - - name: Java |
88 | | - run: ./dev/lint-java |
89 | | - - name: Python |
90 | | - run: | |
91 | | - pip install flake8 sphinx numpy |
92 | | - ./dev/lint-python |
93 | | - - name: License |
94 | | - run: ./dev/check-license |
95 | | - - name: Dependencies |
96 | | - run: ./dev/test-dependencies.sh |
97 | | - |
98 | | - lintr: |
99 | | - runs-on: ubuntu-latest |
100 | | - name: Linter (R) |
101 | | - steps: |
102 | | - - uses: actions/checkout@master |
103 | | - - uses: actions/setup-java@v1 |
| 125 | + python-version: 2.7 |
| 126 | + architecture: x64 |
| 127 | + - name: Install Python 3.6 |
| 128 | + uses: actions/setup-python@v2 |
| 129 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
104 | 130 | with: |
105 | | - java-version: '11' |
106 | | - - uses: r-lib/actions/setup-r@v1 |
| 131 | + python-version: 3.6 |
| 132 | + architecture: x64 |
| 133 | + - name: Install Python packages |
| 134 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
| 135 | + # PyArrow is not supported in PyPy yet, see ARROW-2651. |
| 136 | + # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. |
| 137 | + run: | |
| 138 | + python3 -m pip install numpy pyarrow pandas scipy |
| 139 | + python3 -m pip list |
| 140 | + python2 -m pip install numpy pyarrow pandas scipy |
| 141 | + python2 -m pip list |
| 142 | + pypy3 -m pip install numpy pandas |
| 143 | + pypy3 -m pip list |
| 144 | + # SparkR |
| 145 | + - name: Install R 3.6 |
| 146 | + uses: r-lib/actions/setup-r@v1 |
| 147 | + if: contains(matrix.modules, 'sparkr') |
107 | 148 | with: |
108 | | - r-version: '3.6.2' |
109 | | - - name: Install lib |
| 149 | + r-version: 3.6 |
| 150 | + - name: Install R packages |
| 151 | + if: contains(matrix.modules, 'sparkr') |
110 | 152 | run: | |
111 | 153 | sudo apt-get install -y libcurl4-openssl-dev |
112 | | - - name: install R packages |
| 154 | + sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" |
| 155 | + # Show installed packages in R. |
| 156 | + sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' |
| 157 | + # Run the tests. |
| 158 | + - name: "Run tests: ${{ matrix.modules }}" |
113 | 159 | run: | |
114 | | - sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" |
115 | | - sudo Rscript -e "devtools::install_github('jimhester/[email protected]')" |
116 | | - - name: package and install SparkR |
117 | | - run: ./R/install-dev.sh |
118 | | - - name: lint-r |
119 | | - run: ./dev/lint-r |
| 160 | + # Hive tests become flaky when running in parallel as it's too intensive. |
| 161 | + if [[ "$TEST_ONLY_MODULES" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi |
| 162 | + mkdir -p ~/.m2 |
| 163 | + ./dev/run-tests --parallelism 2 |
| 164 | + rm -rf ~/.m2/repository/org/apache/spark |
120 | 165 |
|
121 | | - docs: |
| 166 | + # Static analysis, and documentation build |
| 167 | + lint: |
| 168 | + name: Linters, licenses, dependencies and documentation generation |
122 | 169 | runs-on: ubuntu-latest |
123 | | - name: Generate documents |
124 | 170 | steps: |
125 | | - - uses: actions/checkout@master |
126 | | - - uses: actions/cache@v1 |
| 171 | + - name: Checkout Spark repository |
| 172 | + uses: actions/checkout@v2 |
| 173 | + - name: Cache Maven local repository |
| 174 | + uses: actions/cache@v2 |
127 | 175 | with: |
128 | 176 | path: ~/.m2/repository |
129 | 177 | key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} |
130 | 178 | restore-keys: | |
131 | | - docs-maven-repo- |
132 | | - - uses: actions/setup-java@v1 |
| 179 | + docs-maven- |
| 180 | + - name: Install JDK 1.8 |
| 181 | + uses: actions/setup-java@v1 |
133 | 182 | with: |
134 | | - java-version: '1.8' |
135 | | - - uses: actions/setup-python@v1 |
| 183 | + java-version: 1.8 |
| 184 | + - name: Install Python 3.6 |
| 185 | + uses: actions/setup-python@v2 |
136 | 186 | with: |
137 | | - python-version: '3.x' |
138 | | - architecture: 'x64' |
139 | | - - uses: actions/setup-ruby@v1 |
| 187 | + python-version: 3.6 |
| 188 | + architecture: x64 |
| 189 | + - name: Install Python linter dependencies |
| 190 | + run: | |
| 191 | + pip3 install flake8 sphinx numpy |
| 192 | + - name: Install R 3.6 |
| 193 | + uses: r-lib/actions/setup-r@v1 |
140 | 194 | with: |
141 | | - ruby-version: '2.7' |
142 | | - - uses: r-lib/actions/setup-r@v1 |
| 195 | + r-version: 3.6 |
| 196 | + - name: Install R linter dependencies and SparkR |
| 197 | + run: | |
| 198 | + sudo apt-get install -y libcurl4-openssl-dev |
| 199 | + sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" |
| 200 | + sudo Rscript -e "devtools::install_github('jimhester/[email protected]')" |
| 201 | + ./R/install-dev.sh |
| 202 | + - name: Install Ruby 2.7 for documentation generation |
| 203 | + uses: actions/setup-ruby@v1 |
143 | 204 | with: |
144 | | - r-version: '3.6.2' |
145 | | - - name: Install lib and pandoc |
| 205 | + ruby-version: 2.7 |
| 206 | + - name: Install dependencies for documentation generation |
146 | 207 | run: | |
147 | 208 | sudo apt-get install -y libcurl4-openssl-dev pandoc |
148 | | - - name: Install packages |
149 | | - run: | |
150 | 209 | pip install sphinx mkdocs numpy |
151 | 210 | gem install jekyll jekyll-redirect-from rouge |
152 | | - sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" |
153 | | - - name: Run jekyll build |
| 211 | + sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" |
| 212 | + - name: Scala linter |
| 213 | + run: ./dev/lint-scala |
| 214 | + - name: Java linter |
| 215 | + run: ./dev/lint-java |
| 216 | + - name: Python linter |
| 217 | + run: ./dev/lint-python |
| 218 | + - name: R linter |
| 219 | + run: ./dev/lint-r |
| 220 | + - name: License test |
| 221 | + run: ./dev/check-license |
| 222 | + - name: Dependencies test |
| 223 | + run: ./dev/test-dependencies.sh |
| 224 | + - name: Run documentation build |
154 | 225 | run: | |
155 | 226 | cd docs |
156 | 227 | jekyll build |
0 commit comments