Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ ARCH_SHORT=amd64

# Default repository to pull and push images from
REPO=ghcr.io/apache/arrow-java-dev
ARROW_REPO=apache/arrow-dev
ARROW_REPO=ghcr.io/apache/arrow-dev

# The setup attempts to generate coredumps by default, in order to disable the
# coredump generation set it to 0
Expand All @@ -53,5 +53,4 @@ MAVEN=3.9.9
# Versions for various dependencies used to build artifacts
# Keep in sync with apache/arrow
ARROW_REPO_ROOT=./arrow
PYTHON=3.9
VCPKG="f7423ee180c4b7f40d43402c2feb3859161ef625" # 2024.06.15 Release
VCPKG="4334d8b4c8916018600212ab4dd4bbdc343065d1" # 2025.09.17 Release
26 changes: 16 additions & 10 deletions .github/workflows/rc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ jobs:
fail-fast: false
matrix:
platform:
- { runs_on: macos-13, arch: "x86_64"}
- { runs_on: macos-15-intel, arch: "x86_64"}
- { runs_on: macos-14, arch: "aarch_64" }
env:
MACOSX_DEPLOYMENT_TARGET: "14.0"
Expand Down Expand Up @@ -222,7 +222,7 @@ jobs:
brew uninstall llvm || :

# We can remove this when we drop support for
# macos-13. because macos-14 or later uses /opt/homebrew/
# macos-15-intel. because macos-14 or later with arm64 uses /opt/homebrew/
# not /usr/local/.
#
# Ensure updating python@XXX with the "--overwrite" option.
Expand Down Expand Up @@ -298,7 +298,7 @@ jobs:
fail-fast: false
matrix:
platform:
- runs_on: windows-2019
- runs_on: windows-2022
arch: "x86_64"
steps:
- name: Download source archive
Expand All @@ -309,13 +309,19 @@ jobs:
shell: bash
run: |
tar -xf apache-arrow-java-*.tar.gz --strip-components=1
- name: Download the latest Apache Arrow C++
if: github.event_name != 'schedule'
shell: bash
run: |
ci/scripts/download_cpp.sh
# We always use the main branch for apache/arrow for now.
# Because we want to use
# https://github.com/apache/arrow/pull/47749 in
# apache/arrow-java. We can revert this workaround once Apache
# Arrow 22.0.0 that includes the change released.
#
# - name: Download the latest Apache Arrow C++
# if: github.event_name != 'schedule'
# shell: bash
# run: |
# ci/scripts/download_cpp.sh
- name: Checkout Apache Arrow C++
if: github.event_name == 'schedule'
# if: github.event_name == 'schedule'
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: apache/arrow
Expand Down Expand Up @@ -354,7 +360,7 @@ jobs:
- name: Build
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
REM For ORC
set TZDIR=/c/msys64/usr/share/zoneinfo
bash -c "ci/scripts/jni_windows_build.sh . arrow build jni"
Expand Down
16 changes: 1 addition & 15 deletions ci/docker/vcpkg-jni.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,10 @@
ARG base
FROM ${base}

# Install the libraries required by Gandiva to run
# Use enable llvm[enable-rtti] in the vcpkg.json to avoid link problems in Gandiva
RUN vcpkg install \
--clean-after-build \
--x-install-root=${VCPKG_ROOT}/installed \
--x-manifest-root=/arrow/ci/vcpkg \
--x-feature=dev \
--x-feature=flight \
--x-feature=gcs \
--x-feature=json \
--x-feature=parquet \
--x-feature=gandiva \
--x-feature=s3

# Install Java
# We need Java for JNI headers, but we don't invoke Maven in this build.
ARG java=11
RUN yum install -y java-$java-openjdk-devel && yum clean all
RUN dnf install -y java-$java-openjdk-devel && dnf clean all

# For ci/scripts/{cpp,java}_*.sh
ENV ARROW_HOME=/tmp/local \
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/jni_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ cmake \
-DProtobuf_USE_STATIC_LIBS=ON \
-GNinja \
"${EXTRA_CMAKE_OPTIONS[@]}"
cmake --build "${build_dir}"
cmake --build "${build_dir}" --verbose
if [ "${ARROW_JAVA_BUILD_TESTS}" = "ON" ]; then
ctest \
--output-on-failure \
Expand Down
67 changes: 10 additions & 57 deletions ci/scripts/jni_macos_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,72 +59,24 @@ fi

github_actions_group_begin "Building Arrow C++ libraries"
install_dir="${build_dir}/cpp-install"
: "${ARROW_ACERO:=ON}"
export ARROW_ACERO
: "${ARROW_BUILD_TESTS:=OFF}"
export ARROW_BUILD_TESTS
: "${ARROW_DATASET:=ON}"
export ARROW_DATASET
: "${ARROW_GANDIVA:=ON}"
export ARROW_GANDIVA
: "${ARROW_ORC:=ON}"
export ARROW_ORC
: "${ARROW_PARQUET:=ON}"
: "${ARROW_S3:=ON}"
: "${CMAKE_BUILD_TYPE:=Release}"
: "${CMAKE_UNITY_BUILD:=ON}"

export ARROW_TEST_DATA="${arrow_dir}/testing/data"
export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
export ARROW_BUILD_TESTS=OFF

export ARROW_DATASET=ON
export ARROW_GANDIVA=ON
export ARROW_ORC=ON
export ARROW_PARQUET=ON

export AWS_EC2_METADATA_DISABLED=TRUE

cmake \
-S "${arrow_dir}/cpp" \
-B "${build_dir}/cpp" \
-DARROW_ACERO="${ARROW_ACERO}" \
-DARROW_BUILD_SHARED=OFF \
-DARROW_BUILD_TESTS="${ARROW_BUILD_TESTS}" \
-DARROW_CSV="${ARROW_DATASET}" \
-DARROW_DATASET="${ARROW_DATASET}" \
-DARROW_SUBSTRAIT="${ARROW_DATASET}" \
-DARROW_DEPENDENCY_USE_SHARED=OFF \
-DARROW_GANDIVA="${ARROW_GANDIVA}" \
-DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
-DARROW_JSON="${ARROW_DATASET}" \
-DARROW_ORC="${ARROW_ORC}" \
-DARROW_PARQUET="${ARROW_PARQUET}" \
-DARROW_S3="${ARROW_S3}" \
-DARROW_USE_CCACHE="${ARROW_USE_CCACHE}" \
-DAWSSDK_SOURCE=BUNDLED \
-DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \
-DCMAKE_INSTALL_PREFIX="${install_dir}" \
-DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD}" \
-DGTest_SOURCE=BUNDLED \
-DPARQUET_BUILD_EXAMPLES=OFF \
-DPARQUET_BUILD_EXECUTABLES=OFF \
-DPARQUET_REQUIRE_ENCRYPTION=OFF \
-Dre2_SOURCE=BUNDLED \
-GNinja
--preset=ninja-release-jni-macos \
-DCMAKE_INSTALL_PREFIX="${install_dir}"
cmake --build "${build_dir}/cpp" --target install
github_actions_group_end

if [ "${ARROW_RUN_TESTS:-}" == "ON" ]; then
github_actions_group_begin "Running Arrow C++ libraries tests"
# MinIO is required
exclude_tests="arrow-s3fs-test"
# unstable
exclude_tests="${exclude_tests}|arrow-acero-asof-join-node-test"
exclude_tests="${exclude_tests}|arrow-acero-hash-join-node-test"
ctest \
--exclude-regex "${exclude_tests}" \
--label-regex unittest \
--output-on-failure \
--parallel "$(sysctl -n hw.ncpu)" \
--test-dir "${build_dir}/cpp" \
--timeout 300
github_actions_group_end
fi

export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
"${source_dir}/ci/scripts/jni_build.sh" \
"${source_dir}" \
Expand All @@ -142,6 +94,7 @@ github_actions_group_begin "Checking shared dependencies for libraries"
pushd "${dist_dir}"
archery linking check-dependencies \
--allow CoreFoundation \
--allow Network \
--allow Security \
--allow libSystem \
--allow libarrow_cdata_jni \
Expand Down
97 changes: 12 additions & 85 deletions ci/scripts/jni_manylinux_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,105 +53,32 @@ if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
fi

github_actions_group_begin "Building Arrow C++ libraries"
devtoolset_version="$(rpm -qa "devtoolset-*-gcc" --queryformat '%{VERSION}' | grep -o "^[0-9]*")"
devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
: "${ARROW_ACERO:=ON}"
export ARROW_ACERO
: "${ARROW_BUILD_TESTS:=OFF}"
export ARROW_BUILD_TESTS
: "${ARROW_DATASET:=ON}"
export ARROW_DATASET
: "${ARROW_GANDIVA:=ON}"
export ARROW_GANDIVA
: "${ARROW_GCS:=ON}"
: "${ARROW_JEMALLOC:=OFF}"
: "${ARROW_MIMALLOC:=ON}"
: "${ARROW_RPATH_ORIGIN:=ON}"
: "${ARROW_ORC:=ON}"
export ARROW_ORC
: "${ARROW_PARQUET:=ON}"
: "${ARROW_S3:=ON}"
: "${CMAKE_BUILD_TYPE:=release}"
: "${CMAKE_UNITY_BUILD:=ON}"

: "${VCPKG_ROOT:=/opt/vcpkg}"
: "${VCPKG_FEATURE_FLAGS:=-manifests}"
: "${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}"
: "${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-lpthread}"
: "${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-release}}"
export VCPKG_TARGET_TRIPLET

export ARROW_BUILD_TESTS=OFF

export ARROW_DATASET=ON
export ARROW_GANDIVA=ON
export ARROW_ORC=ON
export ARROW_PARQUET=ON

export ARROW_TEST_DATA="${arrow_dir}/testing/data"
export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
export AWS_EC2_METADATA_DISABLED=TRUE

install_dir="${build_dir}/cpp-install"

cmake \
-S "${arrow_dir}/cpp" \
-B "${build_dir}/cpp" \
-DARROW_ACERO="${ARROW_ACERO}" \
-DARROW_BUILD_SHARED=OFF \
-DARROW_BUILD_TESTS="${ARROW_BUILD_TESTS}" \
-DARROW_CSV="${ARROW_DATASET}" \
-DARROW_DATASET="${ARROW_DATASET}" \
-DARROW_SUBSTRAIT="${ARROW_DATASET}" \
-DARROW_DEPENDENCY_SOURCE="VCPKG" \
-DARROW_DEPENDENCY_USE_SHARED=OFF \
-DARROW_GANDIVA_PC_CXX_FLAGS="${GANDIVA_CXX_FLAGS}" \
-DARROW_GANDIVA="${ARROW_GANDIVA}" \
-DARROW_GCS="${ARROW_GCS}" \
-DARROW_JEMALLOC="${ARROW_JEMALLOC}" \
-DARROW_JSON="${ARROW_DATASET}" \
-DARROW_MIMALLOC="${ARROW_MIMALLOC}" \
-DARROW_ORC="${ARROW_ORC}" \
-DARROW_PARQUET="${ARROW_PARQUET}" \
-DARROW_RPATH_ORIGIN="${ARROW_RPATH_ORIGIN}" \
-DARROW_S3="${ARROW_S3}" \
-DARROW_USE_CCACHE="${ARROW_USE_CCACHE}" \
-DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \
-DCMAKE_INSTALL_PREFIX="${install_dir}" \
-DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD}" \
-DGTest_SOURCE=BUNDLED \
-DORC_SOURCE=BUNDLED \
-DORC_PROTOBUF_EXECUTABLE="${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc" \
-DPARQUET_BUILD_EXAMPLES=OFF \
-DPARQUET_BUILD_EXECUTABLES=OFF \
-DPARQUET_REQUIRE_ENCRYPTION=OFF \
-DVCPKG_MANIFEST_MODE=OFF \
-DVCPKG_TARGET_TRIPLET="${VCPKG_TARGET_TRIPLET}" \
-GNinja
--preset=ninja-release-jni-linux \
-DCMAKE_INSTALL_PREFIX="${install_dir}"
cmake --build "${build_dir}/cpp"
cmake --install "${build_dir}/cpp"
github_actions_group_end

if [ "${ARROW_RUN_TESTS:-OFF}" = "ON" ]; then
github_actions_group_begin "Running Arrow C++ libraries tests"
# MinIO is required
exclude_tests="arrow-s3fs-test"
case $(arch) in
aarch64)
# GCS testbench is crashed on aarch64:
# ImportError: ../grpc/_cython/cygrpc.cpython-38-aarch64-linux-gnu.so:
# undefined symbol: vtable for std::__cxx11::basic_ostringstream<
# char, std::char_traits<char>, std::allocator<char> >
exclude_tests="${exclude_tests}|arrow-gcsfs-test"
;;
esac
# unstable
exclude_tests="${exclude_tests}|arrow-acero-asof-join-node-test"
exclude_tests="${exclude_tests}|arrow-acero-hash-join-node-test"
# external dependency
exclude_tests="${exclude_tests}|arrow-gcsfs-test"
# strptime
exclude_tests="${exclude_tests}|arrow-utility-test"
ctest \
--exclude-regex "${exclude_tests}" \
--label-regex unittest \
--output-on-failure \
--parallel "$(nproc)" \
--test-dir "${build_dir}/cpp" \
--timeout 300
github_actions_group_end
fi

JAVA_JNI_CMAKE_ARGS="-DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}"
export JAVA_JNI_CMAKE_ARGS
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/jni_windows_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ cmake \
-B "${build_dir}/cpp" \
-DARROW_ACERO="${ARROW_ACERO}" \
-DARROW_BUILD_SHARED=OFF \
-DARROW_BUILD_TESTS=ON \
-DARROW_BUILD_TESTS="${ARROW_BUILD_TESTS}" \
-DARROW_CSV="${ARROW_DATASET}" \
-DARROW_DATASET="${ARROW_DATASET}" \
-DARROW_SUBSTRAIT="${ARROW_DATASET}" \
Expand Down
2 changes: 1 addition & 1 deletion compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ services:
cache_from:
- ${REPO}:${ARCH}-vcpkg-jni-${VCPKG}
args:
base: ${ARROW_REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
base: ${ARROW_REPO}:${ARCH}-cpp-jni-${VCPKG}
volumes:
- .:/arrow-java:delegated
- ${ARROW_REPO_ROOT}:/arrow:delegated
Expand Down
5 changes: 4 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ under the License.
<parent>
<groupId>org.apache</groupId>
<artifactId>apache</artifactId>
<version>34</version>
<version>35</version>
</parent>

<groupId>org.apache.arrow</groupId>
Expand Down Expand Up @@ -91,6 +91,7 @@ under the License.
</issueManagement>

<properties>
<project.build.outputTimestamp>1695310533</project.build.outputTimestamp>
<target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
<dep.junit.platform.version>1.9.0</dep.junit.platform.version>
<dep.junit.jupiter.version>5.12.2</dep.junit.jupiter.version>
Expand Down Expand Up @@ -123,6 +124,8 @@ under the License.
<!--
Downgrade maven-jar-plugin until https://github.com/codehaus-plexus/plexus-archiver/issues/332
is addressed
maven-jar-plugin 4.0.0-beta-2-SNAPSHOT upgraded to plexus-archive 4.10.2 fixing the issue.
We have to wait new maven-jar-plugin release, and a new Apache POM release providing it
-->
<version.maven-jar-plugin>3.2.2</version.maven-jar-plugin>
</properties>
Expand Down
Loading