Skip to content

Commit 06e2b2d

Browse files
authored
Add Hive integration tests (#207)
* Add Hive for CI * Add Hive integration tests * Add missing licenses * Fix * Remove Arrow * Add catalog * Update test suite * Whitespace
1 parent 7deb739 commit 06e2b2d

File tree

10 files changed

+466
-353
lines changed

10 files changed

+466
-353
lines changed

dev/Dockerfile

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,8 @@ WORKDIR ${SPARK_HOME}
3838

3939
ENV SPARK_VERSION=3.4.2
4040
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.4_2.12
41-
ENV ICEBERG_VERSION=1.4.0
42-
ENV AWS_SDK_VERSION=2.20.18
43-
ENV PYICEBERG_VERSION=0.4.0
41+
ENV ICEBERG_VERSION=1.4.2
42+
ENV PYICEBERG_VERSION=0.5.1
4443

4544
RUN curl --retry 3 -s -C - https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
4645
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
@@ -51,8 +50,7 @@ RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runt
5150
&& mv iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar /opt/spark/jars
5251

5352
# Download AWS bundle
54-
RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
55-
&& mv iceberg-aws-bundle-${ICEBERG_VERSION}.jar /opt/spark/jars
53+
RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar
5654

5755
COPY spark-defaults.conf /opt/spark/conf
5856
ENV PATH="/opt/spark/sbin:/opt/spark/bin:${PATH}"
@@ -62,7 +60,7 @@ RUN chmod u+x /opt/spark/sbin/* && \
6260

6361
RUN pip3 install -q ipython
6462

65-
RUN pip3 install "pyiceberg[s3fs]==${PYICEBERG_VERSION}"
63+
RUN pip3 install "pyiceberg[s3fs,hive]==${PYICEBERG_VERSION}"
6664

6765
COPY entrypoint.sh .
6866
COPY provision.py .

dev/docker-compose-integration.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ services:
2525
iceberg_net:
2626
depends_on:
2727
- rest
28+
- hive
2829
- minio
2930
volumes:
3031
- ./warehouse:/home/iceberg/warehouse
@@ -37,6 +38,7 @@ services:
3738
- 8080:8080
3839
links:
3940
- rest:rest
41+
- hive:hive
4042
- minio:minio
4143
rest:
4244
image: tabulario/iceberg-rest
@@ -85,5 +87,17 @@ services:
8587
/usr/bin/mc policy set public minio/warehouse;
8688
tail -f /dev/null
8789
"
90+
hive:
91+
build: hive/
92+
container_name: hive
93+
hostname: hive
94+
networks:
95+
iceberg_net:
96+
ports:
97+
- 9083:9083
98+
environment:
99+
SERVICE_NAME: "metastore"
100+
SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/"
101+
88102
networks:
89103
iceberg_net:

dev/hive/Dockerfile

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
FROM openjdk:8-jre-slim AS build
17+
18+
RUN apt-get update -qq && apt-get -qq -y install curl
19+
20+
ENV AWSSDK_VERSION=2.20.18
21+
ENV HADOOP_VERSION=3.1.0
22+
23+
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.271/aws-java-sdk-bundle-1.11.271.jar -Lo /tmp/aws-java-sdk-bundle-1.11.271.jar
24+
RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -Lo /tmp/hadoop-aws-${HADOOP_VERSION}.jar
25+
26+
27+
FROM apache/hive:3.1.3
28+
29+
ENV AWSSDK_VERSION=2.20.18
30+
ENV HADOOP_VERSION=3.1.0
31+
32+
COPY --from=build /tmp/hadoop-aws-${HADOOP_VERSION}.jar /opt/hive/lib/hadoop-aws-${HADOOP_VERSION}.jar
33+
COPY --from=build /tmp/aws-java-sdk-bundle-1.11.271.jar /opt/hive/lib/aws-java-sdk-bundle-1.11.271.jar
34+
COPY core-site.xml /opt/hadoop/etc/hadoop/core-site.xml

dev/hive/core-site.xml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
<?xml version="1.0"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<!--
4+
Licensed to the Apache Software Foundation (ASF) under one or more
5+
contributor license agreements. See the NOTICE file distributed with
6+
this work for additional information regarding copyright ownership.
7+
The ASF licenses this file to You under the Apache License, Version 2.0
8+
(the "License"); you may not use this file except in compliance with
9+
the License. You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
-->
19+
20+
<configuration>
21+
<property>
22+
<name>fs.defaultFS</name>
23+
<value>s3a://warehouse/hive</value>
24+
</property>
25+
<property>
26+
<name>fs.s3a.impl</name>
27+
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
28+
</property>
29+
<property>
30+
<name>fs.s3a.fast.upload</name>
31+
<value>true</value>
32+
</property>
33+
<property>
34+
<name>fs.s3a.endpoint</name>
35+
<value>http://minio:9000</value>
36+
</property>
37+
<property>
38+
<name>fs.s3a.access.key</name>
39+
<value>admin</value>
40+
</property>
41+
<property>
42+
<name>fs.s3a.secret.key</name>
43+
<value>password</value>
44+
</property>
45+
<property>
46+
<name>fs.s3a.connection.ssl.enabled</name>
47+
<value>false</value>
48+
</property>
49+
<property>
50+
<name>fs.s3a.path.style.access</name>
51+
<value>true</value>
52+
</property>
53+
</configuration>

0 commit comments

Comments
 (0)