From 88da28e9fad7b416f59ec2720a56a381ffc2ab51 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 18 Dec 2023 16:52:48 +0100 Subject: [PATCH 1/3] Added sort built on Thrust --- cmake/sac-core-ext.txt | 1 + src/CMakeLists.txt | 89 ++++++++++++++++++++++++++++++++- src/algorithms/README.txt | 7 +++ src/algorithms/Sort.sac | 30 +++++++++++ src/algorithms/src/sort_cpu.cpp | 9 ++++ src/algorithms/src/sort_gpu.cu | 9 ++++ 6 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 src/algorithms/README.txt create mode 100644 src/algorithms/Sort.sac create mode 100644 src/algorithms/src/sort_cpu.cpp create mode 100644 src/algorithms/src/sort_gpu.cu diff --git a/cmake/sac-core-ext.txt b/cmake/sac-core-ext.txt index 12d7c18a..573112a9 100644 --- a/cmake/sac-core-ext.txt +++ b/cmake/sac-core-ext.txt @@ -82,3 +82,4 @@ auxiliary/Interval.sac Ext auxiliary/Hiding.sac Ext auxiliary/C99Benchmarking.sac Ext auxiliary/Benchmarking.sac Ext +algorithms/Sort.sac Core diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 445b4cc7..f3fc3271 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -315,6 +315,16 @@ SET (C_DEPS_SRC auxiliary/src/C99Benchmarking/bench.c ) +# C++ files relatively to thes CMakeLists.txt. +SET (CXX_DEPS_SRC + algorithms/src/sort_cpu.cpp +) + +# Cuda files relatively to thes CMakeLists.txt. +SET (CUDA_DEPS_SRC + algorithms/src/sort_gpu.cu +) + # Read the list of sac sources from sac-core-ext.txt PARSE_CORE_EXT_CONFIG ( "${CMAKE_SOURCE_DIR}/cmake/sac-core-ext.txt" @@ -375,6 +385,72 @@ FOREACH (name ${C_DEPS_SRC}) ) ENDFOREACH (name) +# For every C++ source, compile an object file maintaining the right location +# in the binary dir so that sac files can pick it up. +FOREACH (name ${CXX_DEPS_SRC}) + SET (src "${CMAKE_CURRENT_SOURCE_DIR}/${name}") + + GET_FILENAME_COMPONENT (dir ${name} DIRECTORY) + + GET_FILENAME_COMPONENT (dst ${name} NAME_WE) + SET (dst "${CMAKE_CURRENT_BINARY_DIR}/${dir}/${dst}${OBJEXT}") + + # Make sure that we put the object file in the same location where + # the source file was. + FILE (MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}") + + MESSAGE(STATUS "Compiling ${dst}") + ADD_CUSTOM_COMMAND ( + OUTPUT "${dst}" + MAIN_DEPENDENCY "${src}" + IMPLICIT_DEPENDS C "${src}" + COMMAND + ${CMAKE_CXX_COMPILER} -I${CMAKE_CURRENT_SOURCE_DIR}/${dir} + -I${CMAKE_CURRENT_BINARY_DIR}/${dir} + -O3 + -march=native + -mtune=native + -fPIC + -c "${src}" + -o "${dst}" + WORKING_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/${dir}" + COMMENT "Generating ${dst} for target `${TARGET}'" + ) +ENDFOREACH (name) + +# TODO only compile when option cuda is enabled +# For every cuda source, compile an object file maintaining the right location +# in the binary dir so that sac files can pick it up. +FOREACH (name ${CUDA_DEPS_SRC}) + SET (src "${CMAKE_CURRENT_SOURCE_DIR}/${name}") + + GET_FILENAME_COMPONENT (dir ${name} DIRECTORY) + + GET_FILENAME_COMPONENT (dst ${name} NAME_WE) + SET (dst "${CMAKE_CURRENT_BINARY_DIR}/${dir}/${dst}${OBJEXT}") + + MESSAGE(STATUS "Compiling ${dst}") + # Make sure that we put the object file in the same location where + # the source file was. + FILE (MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}") + + ADD_CUSTOM_COMMAND ( + OUTPUT "${dst}" + MAIN_DEPENDENCY "${src}" + IMPLICIT_DEPENDS C "${src}" + COMMAND + nvcc -I${CMAKE_CURRENT_SOURCE_DIR}/${dir} + -I${CMAKE_CURRENT_BINARY_DIR}/${dir} + -O3 + --compiler-options -fPIC + -c "${src}" + -o "${dst}" + WORKING_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/${dir}" + COMMENT "Generating ${dst} for target `${TARGET}'" + ) +ENDFOREACH (name) # Make a directory for sac2c output FILE (MAKE_DIRECTORY "${DLL_BUILD_DIR}/${TARGET_ENV}/${SBI}") @@ -438,7 +514,12 @@ FOREACH (name ${SAC_SRC}) ADD_CUSTOM_COMMAND ( OUTPUT "${mod}" "${tree}" COMMAND - ${SAC2C} -v0 -linksetsize ${LINKSETSIZE} ${NOTREE_FLAG} -o ${DLL_BUILD_DIR} "${src}" + ${SAC2C} -v0 + -linksetsize ${LINKSETSIZE} + -Xl -lstdc++ + ${NOTREE_FLAG} + -o ${DLL_BUILD_DIR} + "${src}" WORKING_DIRECTORY "${dir}" MAIN_DEPENDENCY "${src}" @@ -515,7 +596,11 @@ FOREACH (name ${XSAC_SRC}) ADD_CUSTOM_COMMAND ( OUTPUT "${mod}" "${tree}" COMMAND - ${SAC2C} -v0 -linksetsize ${LINKSETSIZE} -o ${DLL_BUILD_DIR} "${dir}/${dst}.sac" + ${SAC2C} -v0 + -linksetsize ${LINKSETSIZE} + -Xl -lstdc++ + -o ${DLL_BUILD_DIR} + "${dir}/${dst}.sac" WORKING_DIRECTORY "${dir}" MAIN_DEPENDENCY "${dir}/${dst}.sac" diff --git a/src/algorithms/README.txt b/src/algorithms/README.txt new file mode 100644 index 00000000..5ac42cb6 --- /dev/null +++ b/src/algorithms/README.txt @@ -0,0 +1,7 @@ +This directory is for functions that are not practical to implement in SaC. +This can happen for two reasons: + +1. The optimal algorithm for computing the function depends on the backend. +2. Too difficult. + +Currently only a sorting function is included, for reason 1. diff --git a/src/algorithms/Sort.sac b/src/algorithms/Sort.sac new file mode 100644 index 00000000..2b3c1c4a --- /dev/null +++ b/src/algorithms/Sort.sac @@ -0,0 +1,30 @@ +module Sort; +export {Sort}; + +int[n] Sort(double[n] keys) +{ + /* TODO inefficient for the GPU backend as iota(n) will be created + on the device, send back to the host, and then back to the device + again. Still 10x faster on a RTX 1650 than on a Ryzen 4600H. */ + iota = with { + ([0] <= [i] < [n]): i; + }: genarray([n], 0); + return TrueSort(keys, iota, n); +} + +/* indices must be initialised to iota(n) */ +external int[n] TrueSort(double[n] keys, int[n] indices, int n); + #pragma linkname "MySortDouble" + #pragma linksign [2, 1, 2, 3] +#if defined(SAC_TARGET_cuda) + #pragma linkobj "src/sort_gpu.o" + #pragma gpumem [0, 1, 2] +#elif defined(SAC_TARGET_default_sbi) + #pragma linkobj "src/sort_cpu.o" +#elif defined(SAC_TARGET_mt_pth) + /* TODO: make a multithreaded version work here. Thrust clashes + with our private heap manager. */ + #pragma linkobj "src/sort_cpu.o" +#else + #pragma linkobj "src/sort_cpu.o" +#endif diff --git a/src/algorithms/src/sort_cpu.cpp b/src/algorithms/src/sort_cpu.cpp new file mode 100644 index 00000000..55bc6adf --- /dev/null +++ b/src/algorithms/src/sort_cpu.cpp @@ -0,0 +1,9 @@ +#include +#include + +extern "C" { + void MySortDouble(double *keys, int *indices, int n) + { + thrust::sort_by_key(thrust::host, keys, keys + n, indices); + } +} diff --git a/src/algorithms/src/sort_gpu.cu b/src/algorithms/src/sort_gpu.cu new file mode 100644 index 00000000..0d60c1c3 --- /dev/null +++ b/src/algorithms/src/sort_gpu.cu @@ -0,0 +1,9 @@ +#include +#include + +extern "C" { + void MySortDouble(double *keys, int *indices, int n) + { + thrust::sort_by_key(thrust::device, keys, keys + n, indices); + } +} From 6723d444d29913b9abe5ecb38c9392f915ece1bd Mon Sep 17 00:00:00 2001 From: thomas Date: Tue, 19 Dec 2023 09:44:14 +0100 Subject: [PATCH 2/3] Add thrust as submodule --- .gitmodules | 3 +++ src/CMakeLists.txt | 12 +++++++----- src/algorithms/cccl | 1 + 3 files changed, 11 insertions(+), 5 deletions(-) create mode 160000 src/algorithms/cccl diff --git a/.gitmodules b/.gitmodules index 3e0e3ab1..f215fcd9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "cmake-common"] path = cmake-common url = https://github.com/SacBase/cmake-common.git +[submodule "src/algorithms/cccl"] + path = src/algorithms/cccl + url = https://github.com/NVIDIA/cccl/ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f3fc3271..ecf16838 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -407,6 +407,7 @@ FOREACH (name ${CXX_DEPS_SRC}) COMMAND ${CMAKE_CXX_COMPILER} -I${CMAKE_CURRENT_SOURCE_DIR}/${dir} -I${CMAKE_CURRENT_BINARY_DIR}/${dir} + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/thrust -O3 -march=native -mtune=native @@ -441,11 +442,12 @@ FOREACH (name ${CUDA_DEPS_SRC}) IMPLICIT_DEPENDS C "${src}" COMMAND nvcc -I${CMAKE_CURRENT_SOURCE_DIR}/${dir} - -I${CMAKE_CURRENT_BINARY_DIR}/${dir} - -O3 - --compiler-options -fPIC - -c "${src}" - -o "${dst}" + -I${CMAKE_CURRENT_BINARY_DIR}/${dir} + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/thrust + -O3 + --compiler-options -fPIC + -c "${src}" + -o "${dst}" WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}" COMMENT "Generating ${dst} for target `${TARGET}'" diff --git a/src/algorithms/cccl b/src/algorithms/cccl new file mode 160000 index 00000000..21658458 --- /dev/null +++ b/src/algorithms/cccl @@ -0,0 +1 @@ +Subproject commit 21658458223d288dcff9cba9d4104d52290c0a96 From 70981664e1b40180f0f8a0e4a09ddbcb97bc32d8 Mon Sep 17 00:00:00 2001 From: Thomas Koopman Date: Tue, 19 Dec 2023 13:27:32 +0100 Subject: [PATCH 3/3] Make build dependent on cuda option --- src/CMakeLists.txt | 63 +++++++++++++++++---------------- src/algorithms/src/sort_cpu.cpp | 1 + 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ecf16838..142a0759 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -408,11 +408,13 @@ FOREACH (name ${CXX_DEPS_SRC}) ${CMAKE_CXX_COMPILER} -I${CMAKE_CURRENT_SOURCE_DIR}/${dir} -I${CMAKE_CURRENT_BINARY_DIR}/${dir} -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/thrust + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/libcudacxx/include + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/cub -O3 -march=native -mtune=native -fPIC - -c "${src}" + -c "${src}" -o "${dst}" WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}" @@ -420,39 +422,40 @@ FOREACH (name ${CXX_DEPS_SRC}) ) ENDFOREACH (name) -# TODO only compile when option cuda is enabled -# For every cuda source, compile an object file maintaining the right location -# in the binary dir so that sac files can pick it up. -FOREACH (name ${CUDA_DEPS_SRC}) - SET (src "${CMAKE_CURRENT_SOURCE_DIR}/${name}") +IF ("${TARGET}" MATCHES "^cuda.*") + FOREACH (name ${CUDA_DEPS_SRC}) + SET (src "${CMAKE_CURRENT_SOURCE_DIR}/${name}") - GET_FILENAME_COMPONENT (dir ${name} DIRECTORY) + GET_FILENAME_COMPONENT (dir ${name} DIRECTORY) - GET_FILENAME_COMPONENT (dst ${name} NAME_WE) - SET (dst "${CMAKE_CURRENT_BINARY_DIR}/${dir}/${dst}${OBJEXT}") + GET_FILENAME_COMPONENT (dst ${name} NAME_WE) + SET (dst "${CMAKE_CURRENT_BINARY_DIR}/${dir}/${dst}${OBJEXT}") - MESSAGE(STATUS "Compiling ${dst}") - # Make sure that we put the object file in the same location where - # the source file was. - FILE (MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}") + MESSAGE(STATUS "Compiling ${dst}") + # Make sure that we put the object file in the same location where + # the source file was. + FILE (MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}") - ADD_CUSTOM_COMMAND ( - OUTPUT "${dst}" - MAIN_DEPENDENCY "${src}" - IMPLICIT_DEPENDS C "${src}" - COMMAND - nvcc -I${CMAKE_CURRENT_SOURCE_DIR}/${dir} - -I${CMAKE_CURRENT_BINARY_DIR}/${dir} - -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/thrust - -O3 - --compiler-options -fPIC - -c "${src}" - -o "${dst}" - WORKING_DIRECTORY - "${CMAKE_CURRENT_BINARY_DIR}/${dir}" - COMMENT "Generating ${dst} for target `${TARGET}'" - ) -ENDFOREACH (name) + ADD_CUSTOM_COMMAND ( + OUTPUT "${dst}" + MAIN_DEPENDENCY "${src}" + IMPLICIT_DEPENDS C "${src}" + COMMAND + nvcc -I${CMAKE_CURRENT_SOURCE_DIR}/${dir} + -I${CMAKE_CURRENT_BINARY_DIR}/${dir} + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/thrust + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/libcudacxx/include + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/cub + -O3 + --compiler-options -fPIC + -c "${src}" + -o "${dst}" + WORKING_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/${dir}" + COMMENT "Generating ${dst} for target `${TARGET}'" + ) + ENDFOREACH (name) +ENDIF () # Make a directory for sac2c output FILE (MAKE_DIRECTORY "${DLL_BUILD_DIR}/${TARGET_ENV}/${SBI}") diff --git a/src/algorithms/src/sort_cpu.cpp b/src/algorithms/src/sort_cpu.cpp index 55bc6adf..43f5dbd2 100644 --- a/src/algorithms/src/sort_cpu.cpp +++ b/src/algorithms/src/sort_cpu.cpp @@ -1,3 +1,4 @@ +#define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_CPP #include #include