From fe2bdedb1a8e32b2f8b2dab6a38ceb5068aae602 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 12 Dec 2023 13:19:17 -0800 Subject: [PATCH 1/4] CI: Run NumPy on SPR using SDE --- .github/workflows/build-numpy.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/build-numpy.yml b/.github/workflows/build-numpy.yml index 4cff8ed8..aac2adcd 100644 --- a/.github/workflows/build-numpy.yml +++ b/.github/workflows/build-numpy.yml @@ -89,6 +89,12 @@ jobs: sudo apt update sudo apt -y install g++-12 gcc-12 git + - name: Install Intel SDE + run: | + curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/788820/sde-external-9.27.0-2023-09-13-lin.tar.xz.sig + mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/ + sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde + - name: Checkout NumPy main uses: actions/checkout@v3 with: @@ -123,3 +129,13 @@ jobs: CC: gcc-12 run: | spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_spr + + - name: SIMD tests (SPR) + run: | + export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) + export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE" + cd build-install && + sde -spr -- python -c "import numpy; numpy.show_config()" && + sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_umath* \ + $NUMPY_SITE/numpy/_core/tests/test_ufunc.py \ + $NUMPY_SITE/numpy/linalg/tests/test_* From c010378de67794da42b5e636e595b8b95d334ac6 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 12 Dec 2023 13:25:26 -0800 Subject: [PATCH 2/4] Typo --- .github/workflows/build-numpy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-numpy.yml b/.github/workflows/build-numpy.yml index aac2adcd..46d7f674 100644 --- a/.github/workflows/build-numpy.yml +++ b/.github/workflows/build-numpy.yml @@ -91,7 +91,7 @@ jobs: - name: Install Intel SDE run: | - curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/788820/sde-external-9.27.0-2023-09-13-lin.tar.xz.sig + curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/788820/sde-external-9.27.0-2023-09-13-lin.tar.xz mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/ sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde From be0ad85ede57119e2175ba20b41b7c3044e84191 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 12 Dec 2023 13:48:12 -0800 Subject: [PATCH 3/4] Add the right working directory --- .github/workflows/build-numpy.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-numpy.yml b/.github/workflows/build-numpy.yml index 46d7f674..f992586a 100644 --- a/.github/workflows/build-numpy.yml +++ b/.github/workflows/build-numpy.yml @@ -130,12 +130,20 @@ jobs: run: | spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_spr - - name: SIMD tests (SPR) + - name: Run tests on TGL + working-directory: ${{ github.workspace }}/numpy + run: | + export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) + export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE" + cd build-install && + sde -tgl -- python -c "import numpy; numpy.show_config()" && + sde -tgl -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_multiarray.py + + - name: Run tests on SPR + working-directory: ${{ github.workspace }}/numpy run: | export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/) export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE" cd build-install && sde -spr -- python -c "import numpy; numpy.show_config()" && - sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_umath* \ - $NUMPY_SITE/numpy/_core/tests/test_ufunc.py \ - $NUMPY_SITE/numpy/linalg/tests/test_* + sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_multiarray.py From c32dbe57d2c33a2dd44edfb855afeb641e5b9a56 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 12 Dec 2023 15:36:16 -0800 Subject: [PATCH 4/4] Bug fix in replace_nan_with_inf for float16 --- src/avx512-16bit-qsort.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/avx512-16bit-qsort.hpp b/src/avx512-16bit-qsort.hpp index a71281f4..be806f5f 100644 --- a/src/avx512-16bit-qsort.hpp +++ b/src/avx512-16bit-qsort.hpp @@ -499,8 +499,10 @@ replace_nan_with_inf>(uint16_t *arr, arrsize_t arrsize) { arrsize_t nan_count = 0; __mmask16 loadmask = 0xFFFF; - while (arrsize > 0) { - if (arrsize < 16) { loadmask = (0x0001 << arrsize) - 0x0001; } + for (arrsize_t ii = 0; ii < arrsize; ii = ii + zmm_vector::numlanes / 2) { + if (arrsize - ii < 16) { + loadmask = (0x0001 << (arrsize-ii)) - 0x0001; + } __m256i in_zmm = _mm256_maskz_loadu_epi16(loadmask, arr); __m512 in_zmm_asfloat = _mm512_cvtph_ps(in_zmm); __mmask16 nanmask = _mm512_cmp_ps_mask( @@ -508,7 +510,6 @@ replace_nan_with_inf>(uint16_t *arr, arrsize_t arrsize) nan_count += _mm_popcnt_u32((int32_t)nanmask); _mm256_mask_storeu_epi16(arr, nanmask, YMM_MAX_HALF); arr += 16; - arrsize -= 16; } return nan_count; }