From 4ade1128871fd43a066559bb2f51b51459870262 Mon Sep 17 00:00:00 2001 From: Yong Date: Sat, 1 Nov 2025 17:17:08 -0500 Subject: [PATCH 1/6] Refactor: improve and clean up Dockerfiles --- getting-started/spark/notebooks/Dockerfile | 4 +- .../v3.5/getting-started/notebooks/Dockerfile | 10 ++-- .../notebooks/SparkPolaris.ipynb | 1 - plugins/spark/v3.5/regtests/Dockerfile | 32 ++++++------- regtests/Dockerfile | 47 +++++++++---------- site/docker/Dockerfile | 21 +++------ 6 files changed, 50 insertions(+), 65 deletions(-) diff --git a/getting-started/spark/notebooks/Dockerfile b/getting-started/spark/notebooks/Dockerfile index 32ee4067b6..bb57ee65ff 100644 --- a/getting-started/spark/notebooks/Dockerfile +++ b/getting-started/spark/notebooks/Dockerfile @@ -19,8 +19,8 @@ FROM docker.io/apache/spark:3.5.6-java17 -ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" -ENV PYSPARK_PYTHON=/home/spark/venv/bin/python +ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \ + PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" USER root diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index f5e052b2a3..0a480044be 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -19,8 +19,11 @@ FROM docker.io/apache/spark:3.5.6-java17 -ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" -ENV PYSPARK_PYTHON=/home/spark/venv/bin/python +ARG CURRENT_SCALA_VERSION=2.12 + +ENV CURRENT_SCALA_VERSION=${CURRENT_SCALA_VERSION} \ + PYSPARK_PYTHON=/home/spark/venv/bin/python \ + PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" USER root @@ -36,8 +39,7 @@ WORKDIR /home/spark COPY --chown=spark client /home/spark/client COPY --chown=spark regtests/requirements.txt /tmp COPY --chown=spark regtests/notebook_requirements.txt /tmp -COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs /home/spark/polaris_libs - +COPY --chown=spark plugins/spark/v3.5/spark/build/${CURRENT_SCALA_VERSION}/libs/*bundle.jar /opt/spark/jars/ RUN python3 -m venv /home/spark/venv && \ . /home/spark/venv/bin/activate && \ diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb index 8e2be2918e..cd4a61d571 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb +++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb @@ -265,7 +265,6 @@ "from pyspark.sql import SparkSession\n", "\n", "spark = (SparkSession.builder\n", - " .config(\"spark.jars\", \"../polaris_libs/polaris-spark-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar\") # TODO: add a way to automatically discover the Jar\n", " .config(\"spark.jars.packages\", \"org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.2.1\")\n", " .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n", " .config('spark.sql.iceberg.vectorization.enabled', 'false')\n", diff --git a/plugins/spark/v3.5/regtests/Dockerfile b/plugins/spark/v3.5/regtests/Dockerfile index db84d3eb1f..37bee03280 100755 --- a/plugins/spark/v3.5/regtests/Dockerfile +++ b/plugins/spark/v3.5/regtests/Dockerfile @@ -18,31 +18,29 @@ # FROM docker.io/apache/spark:3.5.6-java17 -ARG POLARIS_HOST=polaris -ENV POLARIS_HOST=$POLARIS_HOST -ENV SPARK_HOME=/opt/spark -ENV CURRENT_SCALA_VERSION='2.12' -ENV LANGUAGE='en_US:en' + +ARG POLARIS_HOST=polaris \ + CURRENT_SCALA_VERSION=2.12 + +ENV POLARIS_HOST=${POLARIS_HOST} \ + CURRENT_SCALA_VERSION=${CURRENT_SCALA_VERSION} \ USER root -RUN apt update -RUN apt-get install -y diffutils wget curl -RUN mkdir -p /home/spark && \ - chown -R spark /home/spark && \ - mkdir -p /tmp/polaris-regtests && \ - chown -R spark /tmp/polaris-regtests -RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf -USER spark +RUN apt-get update && \ + apt-get install -y --no-install-recommends diffutils wget curl && \ + rm -rf /var/lib/apt/lists/* && \ + mkdir -p /home/spark /tmp/polaris-regtests /opt/spark/conf && \ + chown -R spark:spark /home/spark /tmp/polaris-regtests && \ + chmod -R 777 /opt/spark/conf WORKDIR /home/spark/polaris -COPY --chown=spark ./v3.5 /home/spark/polaris/v3.5 +COPY --chown=spark:spark ./v3.5 /home/spark/polaris/v3.5 # /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 -USER root -RUN chmod -R go+rwx /home/spark/polaris -RUN chmod -R 777 ./v3.5/regtests +RUN chmod -R 777 /home/spark/polaris/v3.5/regtests + USER spark ENTRYPOINT ["./v3.5/regtests/run.sh"] diff --git a/regtests/Dockerfile b/regtests/Dockerfile index 88fa13ddaf..183701a5cf 100644 --- a/regtests/Dockerfile +++ b/regtests/Dockerfile @@ -18,45 +18,40 @@ # FROM docker.io/apache/spark:3.5.6-java17-python3 + ARG POLARIS_HOST=polaris -ENV POLARIS_HOST=$POLARIS_HOST -ENV SPARK_HOME=/opt/spark -ENV LANGUAGE='en_US:en' + +ENV POLARIS_HOST=${POLARIS_HOST} \ + PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip" USER root -RUN apt update -RUN apt-get install -y diffutils wget curl python3.10-venv jq -RUN mkdir -p /home/spark && \ - chown -R spark /home/spark && \ - mkdir -p /tmp/polaris-regtests && \ - chown -R spark /tmp/polaris-regtests -RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf + +RUN apt-get update && \ + apt-get install -y --no-install-recommends diffutils wget curl python3.10-venv jq && \ + rm -rf /var/lib/apt/lists/* && \ + mkdir -p /home/spark /tmp/polaris-regtests /opt/spark/conf && \ + chown -R spark:spark /home/spark /tmp/polaris-regtests && \ + chmod -R 777 /opt/spark/conf + +COPY --chown=spark:spark ./regtests/setup.sh ./regtests/pyspark-setup.sh ./regtests/requirements.txt /home/spark/polaris/regtests/ +COPY --chown=spark:spark ./client/python /home/spark/polaris/client/python +COPY --chown=spark:spark ./polaris /home/spark/polaris/polaris +COPY --chown=spark:spark ./spec /home/spark/polaris/spec +COPY --chown=spark:spark ./regtests /home/spark/polaris/regtests + +# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 +RUN chmod -R go+rwx /home/spark/polaris USER spark -ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" -# Copy and run setup.sh separately so that test sources can change, but the setup script run is still cached WORKDIR /home/spark/polaris -COPY --chown=spark ./regtests/setup.sh /home/spark/polaris/regtests/setup.sh -COPY --chown=spark ./regtests/pyspark-setup.sh /home/spark/polaris/regtests/pyspark-setup.sh -COPY --chown=spark ./client/python /home/spark/polaris/client/python -COPY --chown=spark ./polaris /home/spark/polaris/polaris -COPY --chown=spark ./spec /home/spark/polaris/spec -COPY --chown=spark ./regtests/requirements.txt /tmp/ RUN python3 -m venv /home/spark/polaris/polaris-venv && \ . /home/spark/polaris/polaris-venv/bin/activate && \ - pip install -r /tmp/requirements.txt && \ + pip install -r /home/spark/polaris/regtests/requirements.txt && \ cd /home/spark/polaris/client/python && \ poetry install && \ deactivate && \ /home/spark/polaris/regtests/setup.sh -COPY --chown=spark ./regtests /home/spark/polaris/regtests - -# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 -USER root -RUN chmod -R go+rwx /home/spark/polaris -USER spark - ENTRYPOINT ["./regtests/run.sh"] diff --git a/site/docker/Dockerfile b/site/docker/Dockerfile index 714b1ccf53..65ed42fa51 100644 --- a/site/docker/Dockerfile +++ b/site/docker/Dockerfile @@ -21,23 +21,14 @@ FROM ubuntu:24.04 AS hugo ENV LANGUAGE='en_US:en' -RUN apt-get update -RUN apt-get install --yes golang hugo asciidoctor npm curl -RUN apt-get clean -# http-server is used when building the static site to manually check it locally -# (via `site/bin/create-static-site.sh --local` at http://localhost:8080/) -RUN npm install --global http-server - -# these dependencies are needed to build the static site -#RUN npm install --global autoprefixer postcss postcss-cli http-server - -RUN mkdir /polaris -RUN mkdir /polaris/site -RUN mkdir /polaris/site/resources +RUN apt-get update && \ + apt-get install -y --no-install-recommends golang hugo asciidoctor npm curl && \ + npm install --global http-server && \ + rm -rf /var/lib/apt/lists/* && \ + mkdir -p /polaris/site/resources COPY _run_in_docker.sh /hugo/run -EXPOSE 1313 -EXPOSE 8080 +EXPOSE 1313 8080 ENTRYPOINT ["/hugo/run"] From a1cc02a3d98ebe171af4b831b2983f72fd734b06 Mon Sep 17 00:00:00 2001 From: Yong Date: Sat, 1 Nov 2025 17:27:45 -0500 Subject: [PATCH 2/6] Refactor: improve and clean up Dockerfiles --- site/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/docker/Dockerfile b/site/docker/Dockerfile index 65ed42fa51..9c11793146 100644 --- a/site/docker/Dockerfile +++ b/site/docker/Dockerfile @@ -22,7 +22,7 @@ FROM ubuntu:24.04 AS hugo ENV LANGUAGE='en_US:en' RUN apt-get update && \ - apt-get install -y --no-install-recommends golang hugo asciidoctor npm curl && \ + apt-get install -y --no-install-recommends golang hugo asciidoctor npm curl git && \ npm install --global http-server && \ rm -rf /var/lib/apt/lists/* && \ mkdir -p /polaris/site/resources From 21874e8493c9b198df5833e5e96780047e8e835f Mon Sep 17 00:00:00 2001 From: Yong Date: Sat, 1 Nov 2025 17:36:16 -0500 Subject: [PATCH 3/6] Refactor: improve and clean up Dockerfiles --- plugins/spark/v3.5/regtests/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/spark/v3.5/regtests/Dockerfile b/plugins/spark/v3.5/regtests/Dockerfile index 37bee03280..70c5c6ce64 100755 --- a/plugins/spark/v3.5/regtests/Dockerfile +++ b/plugins/spark/v3.5/regtests/Dockerfile @@ -23,7 +23,7 @@ ARG POLARIS_HOST=polaris \ CURRENT_SCALA_VERSION=2.12 ENV POLARIS_HOST=${POLARIS_HOST} \ - CURRENT_SCALA_VERSION=${CURRENT_SCALA_VERSION} \ + CURRENT_SCALA_VERSION=${CURRENT_SCALA_VERSION} USER root From 68e14a90a67082bc656e71667fbafc458f38ffc5 Mon Sep 17 00:00:00 2001 From: Yong Date: Sat, 1 Nov 2025 20:00:52 -0500 Subject: [PATCH 4/6] Refactor: improve and clean up Dockerfiles --- plugins/spark/v3.5/getting-started/notebooks/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index 0a480044be..821baab120 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -21,8 +21,7 @@ FROM docker.io/apache/spark:3.5.6-java17 ARG CURRENT_SCALA_VERSION=2.12 -ENV CURRENT_SCALA_VERSION=${CURRENT_SCALA_VERSION} \ - PYSPARK_PYTHON=/home/spark/venv/bin/python \ +ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \ PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" USER root From 34d66f51ffc736bd3d5e045e792474fd9e2db2b2 Mon Sep 17 00:00:00 2001 From: Yong Date: Sun, 2 Nov 2025 13:43:14 -0600 Subject: [PATCH 5/6] Refactor: improve and clean up Dockerfiles --- runtime/admin/src/main/docker/Dockerfile.jvm | 24 +++++++------- runtime/server/src/main/docker/Dockerfile.jvm | 32 +++++++++---------- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/runtime/admin/src/main/docker/Dockerfile.jvm b/runtime/admin/src/main/docker/Dockerfile.jvm index c6aa9739ec..e48631c1b3 100644 --- a/runtime/admin/src/main/docker/Dockerfile.jvm +++ b/runtime/admin/src/main/docker/Dockerfile.jvm @@ -18,23 +18,25 @@ # FROM registry.access.redhat.com/ubi9/openjdk-21-runtime:1.23-6.1761164966 -LABEL org.opencontainers.image.source=https://github.com/apache/polaris -LABEL org.opencontainers.image.description="Apache Polaris (incubating) Admin Tool" -LABEL org.opencontainers.image.licenses=Apache-2.0 +LABEL org.opencontainers.image.source=https://github.com/apache/polaris \ + org.opencontainers.image.description="Apache Polaris (incubating) Admin Tool" \ + org.opencontainers.image.licenses=Apache-2.0 -ENV LANGUAGE='en_US:en' +ENV LANGUAGE='en_US:en' \ + USER=polaris \ + UID=10000 \ + HOME=/home/polaris USER root -RUN groupadd --gid 10001 polaris \ - && useradd --uid 10000 --gid polaris polaris \ - && chown -R polaris:polaris /opt/jboss/container \ - && chown -R polaris:polaris /deployments + +RUN groupadd --gid 10001 polaris && \ + useradd --uid 10000 --gid polaris -m polaris && \ + mkdir -p /deployments && \ + chown -R polaris:polaris /deployments /opt/jboss/container USER polaris + WORKDIR /home/polaris -ENV USER=polaris -ENV UID=10000 -ENV HOME=/home/polaris # We make four distinct layers so if there are application changes the library layers can be re-used COPY --chown=polaris:polaris build/quarkus-app/lib/ /deployments/lib/ diff --git a/runtime/server/src/main/docker/Dockerfile.jvm b/runtime/server/src/main/docker/Dockerfile.jvm index 84ebee136f..a805f7b973 100644 --- a/runtime/server/src/main/docker/Dockerfile.jvm +++ b/runtime/server/src/main/docker/Dockerfile.jvm @@ -18,23 +18,27 @@ # FROM registry.access.redhat.com/ubi9/openjdk-21-runtime:1.23-6.1761164966 -LABEL org.opencontainers.image.source=https://github.com/apache/polaris -LABEL org.opencontainers.image.description="Apache Polaris (incubating)" -LABEL org.opencontainers.image.licenses=Apache-2.0 +LABEL org.opencontainers.image.source=https://github.com/apache/polaris \ + org.opencontainers.image.description="Apache Polaris (incubating)" \ + org.opencontainers.image.licenses=Apache-2.0 -ENV LANGUAGE='en_US:en' +ENV LANGUAGE='en_US:en' \ + USER=polaris \ + UID=10000 \ + HOME=/home/polaris \ + AB_JOLOKIA_OFF="" \ + JAVA_APP_JAR="/deployments/quarkus-run.jar" USER root -RUN groupadd --gid 10001 polaris \ - && useradd --uid 10000 --gid polaris polaris \ - && chown -R polaris:polaris /opt/jboss/container \ - && chown -R polaris:polaris /deployments + +RUN groupadd --gid 10001 polaris && \ + useradd --uid 10000 --gid polaris polaris && \ + chown -R polaris:polaris /opt/jboss/container && \ + chown -R polaris:polaris /deployments USER polaris + WORKDIR /home/polaris -ENV USER=polaris -ENV UID=10000 -ENV HOME=/home/polaris # We make four distinct layers so if there are application changes the library layers can be re-used COPY --chown=polaris:polaris build/quarkus-app/lib/ /deployments/lib/ @@ -45,8 +49,4 @@ COPY --chown=polaris:polaris distribution/LICENSE /deployments/ COPY --chown=polaris:polaris distribution/NOTICE /deployments/ COPY --chown=polaris:polaris distribution/DISCLAIMER /deployments/ -EXPOSE 8181 -EXPOSE 8182 - -ENV AB_JOLOKIA_OFF="" -ENV JAVA_APP_JAR="/deployments/quarkus-run.jar" +EXPOSE 8181 8182 \ No newline at end of file From c9fcf504875e7281b778c3cdc05289fa93252e31 Mon Sep 17 00:00:00 2001 From: Yong Date: Mon, 3 Nov 2025 19:19:25 -0600 Subject: [PATCH 6/6] Refactor: improve and clean up Dockerfiles --- plugins/spark/v3.5/getting-started/notebooks/Dockerfile | 4 +--- .../spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb | 2 ++ plugins/spark/v3.5/regtests/Dockerfile | 2 +- site/docker/Dockerfile | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index 821baab120..392d79e0a4 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -19,8 +19,6 @@ FROM docker.io/apache/spark:3.5.6-java17 -ARG CURRENT_SCALA_VERSION=2.12 - ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \ PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" @@ -38,7 +36,7 @@ WORKDIR /home/spark COPY --chown=spark client /home/spark/client COPY --chown=spark regtests/requirements.txt /tmp COPY --chown=spark regtests/notebook_requirements.txt /tmp -COPY --chown=spark plugins/spark/v3.5/spark/build/${CURRENT_SCALA_VERSION}/libs/*bundle.jar /opt/spark/jars/ +COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs/*bundle.jar /opt/spark/jars/ RUN python3 -m venv /home/spark/venv && \ . /home/spark/venv/bin/activate && \ diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb index cd4a61d571..e9d9e06d28 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb +++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb @@ -265,6 +265,8 @@ "from pyspark.sql import SparkSession\n", "\n", "spark = (SparkSession.builder\n", + " # This jar is now automatically discovered, thus no longer needed\n", + " #.config(\"spark.jars\", \"../polaris_libs/polaris-spark-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar\")\n", " .config(\"spark.jars.packages\", \"org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.2.1\")\n", " .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n", " .config('spark.sql.iceberg.vectorization.enabled', 'false')\n", diff --git a/plugins/spark/v3.5/regtests/Dockerfile b/plugins/spark/v3.5/regtests/Dockerfile index 70c5c6ce64..5c4c480f20 100755 --- a/plugins/spark/v3.5/regtests/Dockerfile +++ b/plugins/spark/v3.5/regtests/Dockerfile @@ -38,7 +38,7 @@ WORKDIR /home/spark/polaris COPY --chown=spark:spark ./v3.5 /home/spark/polaris/v3.5 -# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 +# /home/spark/.../regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 RUN chmod -R 777 /home/spark/polaris/v3.5/regtests USER spark diff --git a/site/docker/Dockerfile b/site/docker/Dockerfile index 9c11793146..297c9aa7d3 100644 --- a/site/docker/Dockerfile +++ b/site/docker/Dockerfile @@ -22,9 +22,9 @@ FROM ubuntu:24.04 AS hugo ENV LANGUAGE='en_US:en' RUN apt-get update && \ - apt-get install -y --no-install-recommends golang hugo asciidoctor npm curl git && \ - npm install --global http-server && \ + apt-get install --yes --no-install-recommends golang hugo asciidoctor npm curl git && \ rm -rf /var/lib/apt/lists/* && \ + npm install --global http-server && \ mkdir -p /polaris/site/resources COPY _run_in_docker.sh /hugo/run