diff --git a/.gitmodules b/.gitmodules index 3e0e3ab1..f215fcd9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "cmake-common"] path = cmake-common url = https://github.com/SacBase/cmake-common.git +[submodule "src/algorithms/cccl"] + path = src/algorithms/cccl + url = https://github.com/NVIDIA/cccl/ diff --git a/cmake/sac-core-ext.txt b/cmake/sac-core-ext.txt index 12d7c18a..573112a9 100644 --- a/cmake/sac-core-ext.txt +++ b/cmake/sac-core-ext.txt @@ -82,3 +82,4 @@ auxiliary/Interval.sac Ext auxiliary/Hiding.sac Ext auxiliary/C99Benchmarking.sac Ext auxiliary/Benchmarking.sac Ext +algorithms/Sort.sac Core diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 445b4cc7..142a0759 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -315,6 +315,16 @@ SET (C_DEPS_SRC auxiliary/src/C99Benchmarking/bench.c ) +# C++ files relatively to thes CMakeLists.txt. +SET (CXX_DEPS_SRC + algorithms/src/sort_cpu.cpp +) + +# Cuda files relatively to thes CMakeLists.txt. +SET (CUDA_DEPS_SRC + algorithms/src/sort_gpu.cu +) + # Read the list of sac sources from sac-core-ext.txt PARSE_CORE_EXT_CONFIG ( "${CMAKE_SOURCE_DIR}/cmake/sac-core-ext.txt" @@ -375,6 +385,77 @@ FOREACH (name ${C_DEPS_SRC}) ) ENDFOREACH (name) +# For every C++ source, compile an object file maintaining the right location +# in the binary dir so that sac files can pick it up. +FOREACH (name ${CXX_DEPS_SRC}) + SET (src "${CMAKE_CURRENT_SOURCE_DIR}/${name}") + + GET_FILENAME_COMPONENT (dir ${name} DIRECTORY) + + GET_FILENAME_COMPONENT (dst ${name} NAME_WE) + SET (dst "${CMAKE_CURRENT_BINARY_DIR}/${dir}/${dst}${OBJEXT}") + + # Make sure that we put the object file in the same location where + # the source file was. + FILE (MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}") + + MESSAGE(STATUS "Compiling ${dst}") + ADD_CUSTOM_COMMAND ( + OUTPUT "${dst}" + MAIN_DEPENDENCY "${src}" + IMPLICIT_DEPENDS C "${src}" + COMMAND + ${CMAKE_CXX_COMPILER} -I${CMAKE_CURRENT_SOURCE_DIR}/${dir} + -I${CMAKE_CURRENT_BINARY_DIR}/${dir} + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/thrust + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/libcudacxx/include + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/cub + -O3 + -march=native + -mtune=native + -fPIC + -c "${src}" + -o "${dst}" + WORKING_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/${dir}" + COMMENT "Generating ${dst} for target `${TARGET}'" + ) +ENDFOREACH (name) + +IF ("${TARGET}" MATCHES "^cuda.*") + FOREACH (name ${CUDA_DEPS_SRC}) + SET (src "${CMAKE_CURRENT_SOURCE_DIR}/${name}") + + GET_FILENAME_COMPONENT (dir ${name} DIRECTORY) + + GET_FILENAME_COMPONENT (dst ${name} NAME_WE) + SET (dst "${CMAKE_CURRENT_BINARY_DIR}/${dir}/${dst}${OBJEXT}") + + MESSAGE(STATUS "Compiling ${dst}") + # Make sure that we put the object file in the same location where + # the source file was. + FILE (MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${dir}") + + ADD_CUSTOM_COMMAND ( + OUTPUT "${dst}" + MAIN_DEPENDENCY "${src}" + IMPLICIT_DEPENDS C "${src}" + COMMAND + nvcc -I${CMAKE_CURRENT_SOURCE_DIR}/${dir} + -I${CMAKE_CURRENT_BINARY_DIR}/${dir} + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/thrust + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/libcudacxx/include + -I${CMAKE_CURRENT_SOURCE_DIR}/algorithms/cccl/cub + -O3 + --compiler-options -fPIC + -c "${src}" + -o "${dst}" + WORKING_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/${dir}" + COMMENT "Generating ${dst} for target `${TARGET}'" + ) + ENDFOREACH (name) +ENDIF () # Make a directory for sac2c output FILE (MAKE_DIRECTORY "${DLL_BUILD_DIR}/${TARGET_ENV}/${SBI}") @@ -438,7 +519,12 @@ FOREACH (name ${SAC_SRC}) ADD_CUSTOM_COMMAND ( OUTPUT "${mod}" "${tree}" COMMAND - ${SAC2C} -v0 -linksetsize ${LINKSETSIZE} ${NOTREE_FLAG} -o ${DLL_BUILD_DIR} "${src}" + ${SAC2C} -v0 + -linksetsize ${LINKSETSIZE} + -Xl -lstdc++ + ${NOTREE_FLAG} + -o ${DLL_BUILD_DIR} + "${src}" WORKING_DIRECTORY "${dir}" MAIN_DEPENDENCY "${src}" @@ -515,7 +601,11 @@ FOREACH (name ${XSAC_SRC}) ADD_CUSTOM_COMMAND ( OUTPUT "${mod}" "${tree}" COMMAND - ${SAC2C} -v0 -linksetsize ${LINKSETSIZE} -o ${DLL_BUILD_DIR} "${dir}/${dst}.sac" + ${SAC2C} -v0 + -linksetsize ${LINKSETSIZE} + -Xl -lstdc++ + -o ${DLL_BUILD_DIR} + "${dir}/${dst}.sac" WORKING_DIRECTORY "${dir}" MAIN_DEPENDENCY "${dir}/${dst}.sac" diff --git a/src/algorithms/README.txt b/src/algorithms/README.txt new file mode 100644 index 00000000..5ac42cb6 --- /dev/null +++ b/src/algorithms/README.txt @@ -0,0 +1,7 @@ +This directory is for functions that are not practical to implement in SaC. +This can happen for two reasons: + +1. The optimal algorithm for computing the function depends on the backend. +2. Too difficult. + +Currently only a sorting function is included, for reason 1. diff --git a/src/algorithms/Sort.sac b/src/algorithms/Sort.sac new file mode 100644 index 00000000..2b3c1c4a --- /dev/null +++ b/src/algorithms/Sort.sac @@ -0,0 +1,30 @@ +module Sort; +export {Sort}; + +int[n] Sort(double[n] keys) +{ + /* TODO inefficient for the GPU backend as iota(n) will be created + on the device, send back to the host, and then back to the device + again. Still 10x faster on a RTX 1650 than on a Ryzen 4600H. */ + iota = with { + ([0] <= [i] < [n]): i; + }: genarray([n], 0); + return TrueSort(keys, iota, n); +} + +/* indices must be initialised to iota(n) */ +external int[n] TrueSort(double[n] keys, int[n] indices, int n); + #pragma linkname "MySortDouble" + #pragma linksign [2, 1, 2, 3] +#if defined(SAC_TARGET_cuda) + #pragma linkobj "src/sort_gpu.o" + #pragma gpumem [0, 1, 2] +#elif defined(SAC_TARGET_default_sbi) + #pragma linkobj "src/sort_cpu.o" +#elif defined(SAC_TARGET_mt_pth) + /* TODO: make a multithreaded version work here. Thrust clashes + with our private heap manager. */ + #pragma linkobj "src/sort_cpu.o" +#else + #pragma linkobj "src/sort_cpu.o" +#endif diff --git a/src/algorithms/cccl b/src/algorithms/cccl new file mode 160000 index 00000000..21658458 --- /dev/null +++ b/src/algorithms/cccl @@ -0,0 +1 @@ +Subproject commit 21658458223d288dcff9cba9d4104d52290c0a96 diff --git a/src/algorithms/src/sort_cpu.cpp b/src/algorithms/src/sort_cpu.cpp new file mode 100644 index 00000000..43f5dbd2 --- /dev/null +++ b/src/algorithms/src/sort_cpu.cpp @@ -0,0 +1,10 @@ +#define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_CPP +#include +#include + +extern "C" { + void MySortDouble(double *keys, int *indices, int n) + { + thrust::sort_by_key(thrust::host, keys, keys + n, indices); + } +} diff --git a/src/algorithms/src/sort_gpu.cu b/src/algorithms/src/sort_gpu.cu new file mode 100644 index 00000000..0d60c1c3 --- /dev/null +++ b/src/algorithms/src/sort_gpu.cu @@ -0,0 +1,9 @@ +#include +#include + +extern "C" { + void MySortDouble(double *keys, int *indices, int n) + { + thrust::sort_by_key(thrust::device, keys, keys + n, indices); + } +}