diff --git a/getting-started/spark/notebooks/Dockerfile b/getting-started/spark/notebooks/Dockerfile index 32ee4067b6..bb57ee65ff 100644 --- a/getting-started/spark/notebooks/Dockerfile +++ b/getting-started/spark/notebooks/Dockerfile @@ -19,8 +19,8 @@ FROM docker.io/apache/spark:3.5.6-java17 -ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" -ENV PYSPARK_PYTHON=/home/spark/venv/bin/python +ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \ + PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" USER root diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index f5e052b2a3..392d79e0a4 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -19,8 +19,8 @@ FROM docker.io/apache/spark:3.5.6-java17 -ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" -ENV PYSPARK_PYTHON=/home/spark/venv/bin/python +ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \ + PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" USER root @@ -36,8 +36,7 @@ WORKDIR /home/spark COPY --chown=spark client /home/spark/client COPY --chown=spark regtests/requirements.txt /tmp COPY --chown=spark regtests/notebook_requirements.txt /tmp -COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs /home/spark/polaris_libs - +COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs/*bundle.jar /opt/spark/jars/ RUN python3 -m venv /home/spark/venv && \ . /home/spark/venv/bin/activate && \ diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb index 8e2be2918e..e9d9e06d28 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb +++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb @@ -265,7 +265,8 @@ "from pyspark.sql import SparkSession\n", "\n", "spark = (SparkSession.builder\n", - " .config(\"spark.jars\", \"../polaris_libs/polaris-spark-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar\") # TODO: add a way to automatically discover the Jar\n", + " # This jar is now automatically discovered, thus no longer needed\n", + " #.config(\"spark.jars\", \"../polaris_libs/polaris-spark-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar\")\n", " .config(\"spark.jars.packages\", \"org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.2.1\")\n", " .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n", " .config('spark.sql.iceberg.vectorization.enabled', 'false')\n", diff --git a/plugins/spark/v3.5/regtests/Dockerfile b/plugins/spark/v3.5/regtests/Dockerfile index db84d3eb1f..5c4c480f20 100755 --- a/plugins/spark/v3.5/regtests/Dockerfile +++ b/plugins/spark/v3.5/regtests/Dockerfile @@ -18,31 +18,29 @@ # FROM docker.io/apache/spark:3.5.6-java17 -ARG POLARIS_HOST=polaris -ENV POLARIS_HOST=$POLARIS_HOST -ENV SPARK_HOME=/opt/spark -ENV CURRENT_SCALA_VERSION='2.12' -ENV LANGUAGE='en_US:en' + +ARG POLARIS_HOST=polaris \ + CURRENT_SCALA_VERSION=2.12 + +ENV POLARIS_HOST=${POLARIS_HOST} \ + CURRENT_SCALA_VERSION=${CURRENT_SCALA_VERSION} USER root -RUN apt update -RUN apt-get install -y diffutils wget curl -RUN mkdir -p /home/spark && \ - chown -R spark /home/spark && \ - mkdir -p /tmp/polaris-regtests && \ - chown -R spark /tmp/polaris-regtests -RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf -USER spark +RUN apt-get update && \ + apt-get install -y --no-install-recommends diffutils wget curl && \ + rm -rf /var/lib/apt/lists/* && \ + mkdir -p /home/spark /tmp/polaris-regtests /opt/spark/conf && \ + chown -R spark:spark /home/spark /tmp/polaris-regtests && \ + chmod -R 777 /opt/spark/conf WORKDIR /home/spark/polaris -COPY --chown=spark ./v3.5 /home/spark/polaris/v3.5 +COPY --chown=spark:spark ./v3.5 /home/spark/polaris/v3.5 + +# /home/spark/.../regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 +RUN chmod -R 777 /home/spark/polaris/v3.5/regtests -# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 -USER root -RUN chmod -R go+rwx /home/spark/polaris -RUN chmod -R 777 ./v3.5/regtests USER spark ENTRYPOINT ["./v3.5/regtests/run.sh"] diff --git a/regtests/Dockerfile b/regtests/Dockerfile index 88fa13ddaf..183701a5cf 100644 --- a/regtests/Dockerfile +++ b/regtests/Dockerfile @@ -18,45 +18,40 @@ # FROM docker.io/apache/spark:3.5.6-java17-python3 + ARG POLARIS_HOST=polaris -ENV POLARIS_HOST=$POLARIS_HOST -ENV SPARK_HOME=/opt/spark -ENV LANGUAGE='en_US:en' + +ENV POLARIS_HOST=${POLARIS_HOST} \ + PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip" USER root -RUN apt update -RUN apt-get install -y diffutils wget curl python3.10-venv jq -RUN mkdir -p /home/spark && \ - chown -R spark /home/spark && \ - mkdir -p /tmp/polaris-regtests && \ - chown -R spark /tmp/polaris-regtests -RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf + +RUN apt-get update && \ + apt-get install -y --no-install-recommends diffutils wget curl python3.10-venv jq && \ + rm -rf /var/lib/apt/lists/* && \ + mkdir -p /home/spark /tmp/polaris-regtests /opt/spark/conf && \ + chown -R spark:spark /home/spark /tmp/polaris-regtests && \ + chmod -R 777 /opt/spark/conf + +COPY --chown=spark:spark ./regtests/setup.sh ./regtests/pyspark-setup.sh ./regtests/requirements.txt /home/spark/polaris/regtests/ +COPY --chown=spark:spark ./client/python /home/spark/polaris/client/python +COPY --chown=spark:spark ./polaris /home/spark/polaris/polaris +COPY --chown=spark:spark ./spec /home/spark/polaris/spec +COPY --chown=spark:spark ./regtests /home/spark/polaris/regtests + +# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 +RUN chmod -R go+rwx /home/spark/polaris USER spark -ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" -# Copy and run setup.sh separately so that test sources can change, but the setup script run is still cached WORKDIR /home/spark/polaris -COPY --chown=spark ./regtests/setup.sh /home/spark/polaris/regtests/setup.sh -COPY --chown=spark ./regtests/pyspark-setup.sh /home/spark/polaris/regtests/pyspark-setup.sh -COPY --chown=spark ./client/python /home/spark/polaris/client/python -COPY --chown=spark ./polaris /home/spark/polaris/polaris -COPY --chown=spark ./spec /home/spark/polaris/spec -COPY --chown=spark ./regtests/requirements.txt /tmp/ RUN python3 -m venv /home/spark/polaris/polaris-venv && \ . /home/spark/polaris/polaris-venv/bin/activate && \ - pip install -r /tmp/requirements.txt && \ + pip install -r /home/spark/polaris/regtests/requirements.txt && \ cd /home/spark/polaris/client/python && \ poetry install && \ deactivate && \ /home/spark/polaris/regtests/setup.sh -COPY --chown=spark ./regtests /home/spark/polaris/regtests - -# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 -USER root -RUN chmod -R go+rwx /home/spark/polaris -USER spark - ENTRYPOINT ["./regtests/run.sh"] diff --git a/runtime/admin/src/main/docker/Dockerfile.jvm b/runtime/admin/src/main/docker/Dockerfile.jvm index c6aa9739ec..e48631c1b3 100644 --- a/runtime/admin/src/main/docker/Dockerfile.jvm +++ b/runtime/admin/src/main/docker/Dockerfile.jvm @@ -18,23 +18,25 @@ # FROM registry.access.redhat.com/ubi9/openjdk-21-runtime:1.23-6.1761164966 -LABEL org.opencontainers.image.source=https://github.com/apache/polaris -LABEL org.opencontainers.image.description="Apache Polaris (incubating) Admin Tool" -LABEL org.opencontainers.image.licenses=Apache-2.0 +LABEL org.opencontainers.image.source=https://github.com/apache/polaris \ + org.opencontainers.image.description="Apache Polaris (incubating) Admin Tool" \ + org.opencontainers.image.licenses=Apache-2.0 -ENV LANGUAGE='en_US:en' +ENV LANGUAGE='en_US:en' \ + USER=polaris \ + UID=10000 \ + HOME=/home/polaris USER root -RUN groupadd --gid 10001 polaris \ - && useradd --uid 10000 --gid polaris polaris \ - && chown -R polaris:polaris /opt/jboss/container \ - && chown -R polaris:polaris /deployments + +RUN groupadd --gid 10001 polaris && \ + useradd --uid 10000 --gid polaris -m polaris && \ + mkdir -p /deployments && \ + chown -R polaris:polaris /deployments /opt/jboss/container USER polaris + WORKDIR /home/polaris -ENV USER=polaris -ENV UID=10000 -ENV HOME=/home/polaris # We make four distinct layers so if there are application changes the library layers can be re-used COPY --chown=polaris:polaris build/quarkus-app/lib/ /deployments/lib/ diff --git a/runtime/server/src/main/docker/Dockerfile.jvm b/runtime/server/src/main/docker/Dockerfile.jvm index 84ebee136f..a805f7b973 100644 --- a/runtime/server/src/main/docker/Dockerfile.jvm +++ b/runtime/server/src/main/docker/Dockerfile.jvm @@ -18,23 +18,27 @@ # FROM registry.access.redhat.com/ubi9/openjdk-21-runtime:1.23-6.1761164966 -LABEL org.opencontainers.image.source=https://github.com/apache/polaris -LABEL org.opencontainers.image.description="Apache Polaris (incubating)" -LABEL org.opencontainers.image.licenses=Apache-2.0 +LABEL org.opencontainers.image.source=https://github.com/apache/polaris \ + org.opencontainers.image.description="Apache Polaris (incubating)" \ + org.opencontainers.image.licenses=Apache-2.0 -ENV LANGUAGE='en_US:en' +ENV LANGUAGE='en_US:en' \ + USER=polaris \ + UID=10000 \ + HOME=/home/polaris \ + AB_JOLOKIA_OFF="" \ + JAVA_APP_JAR="/deployments/quarkus-run.jar" USER root -RUN groupadd --gid 10001 polaris \ - && useradd --uid 10000 --gid polaris polaris \ - && chown -R polaris:polaris /opt/jboss/container \ - && chown -R polaris:polaris /deployments + +RUN groupadd --gid 10001 polaris && \ + useradd --uid 10000 --gid polaris polaris && \ + chown -R polaris:polaris /opt/jboss/container && \ + chown -R polaris:polaris /deployments USER polaris + WORKDIR /home/polaris -ENV USER=polaris -ENV UID=10000 -ENV HOME=/home/polaris # We make four distinct layers so if there are application changes the library layers can be re-used COPY --chown=polaris:polaris build/quarkus-app/lib/ /deployments/lib/ @@ -45,8 +49,4 @@ COPY --chown=polaris:polaris distribution/LICENSE /deployments/ COPY --chown=polaris:polaris distribution/NOTICE /deployments/ COPY --chown=polaris:polaris distribution/DISCLAIMER /deployments/ -EXPOSE 8181 -EXPOSE 8182 - -ENV AB_JOLOKIA_OFF="" -ENV JAVA_APP_JAR="/deployments/quarkus-run.jar" +EXPOSE 8181 8182 \ No newline at end of file diff --git a/site/docker/Dockerfile b/site/docker/Dockerfile index 714b1ccf53..297c9aa7d3 100644 --- a/site/docker/Dockerfile +++ b/site/docker/Dockerfile @@ -21,23 +21,14 @@ FROM ubuntu:24.04 AS hugo ENV LANGUAGE='en_US:en' -RUN apt-get update -RUN apt-get install --yes golang hugo asciidoctor npm curl -RUN apt-get clean -# http-server is used when building the static site to manually check it locally -# (via `site/bin/create-static-site.sh --local` at http://localhost:8080/) -RUN npm install --global http-server - -# these dependencies are needed to build the static site -#RUN npm install --global autoprefixer postcss postcss-cli http-server - -RUN mkdir /polaris -RUN mkdir /polaris/site -RUN mkdir /polaris/site/resources +RUN apt-get update && \ + apt-get install --yes --no-install-recommends golang hugo asciidoctor npm curl git && \ + rm -rf /var/lib/apt/lists/* && \ + npm install --global http-server && \ + mkdir -p /polaris/site/resources COPY _run_in_docker.sh /hugo/run -EXPOSE 1313 -EXPOSE 8080 +EXPOSE 1313 8080 ENTRYPOINT ["/hugo/run"]