Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions coreneuron/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,15 @@ target_include_directories(coreneuron SYSTEM
target_include_directories(coreneuron SYSTEM
PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include)

if(CORENRN_ENABLE_GPU)
# nrnran123.cpp possibly-temporarily uses Boost.Pool in GPU builds if it's available.
find_package(Boost QUIET)
if(Boost_FOUND)
target_include_directories(coreneuron SYSTEM PRIVATE ${Boost_INCLUDE_DIRS})
target_compile_definitions(coreneuron PRIVATE CORENEURON_USE_BOOST_POOL)
endif()
endif()

set_target_properties(
coreneuron scopmath
PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
Expand Down
55 changes: 49 additions & 6 deletions coreneuron/utils/randoms/nrnran123.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
#include <memory>
#include <mutex>

#ifdef CORENEURON_USE_BOOST_POOL
#include <boost/pool/pool_alloc.hpp>
#include <unordered_map>
#endif

// In a GPU build this file will be compiled by NVCC as CUDA code
// In a CPU build this file will be compiled by a C++ compiler as C++ code
#ifdef __CUDACC__
Expand All @@ -24,6 +29,48 @@
#endif

namespace {
#ifdef CORENEURON_USE_BOOST_POOL
/** Tag type for use with boost::fast_pool_allocator that forwards to
* coreneuron::[de]allocate_unified(). Using a Random123-specific type here
* makes sure that allocations do not come from the same global pool as other
* usage of boost pools for objects with sizeof == sizeof(nrnran123_State).
*
* The messy m_block_sizes map is just because `deallocate_unified` uses sized
* deallocations, but the Boost pool allocators don't. Because this is hidden
* behind the pool mechanism, these methods are not called very often and the
* overhead is minimal.
*/
struct random123_allocate_unified {
using size_type = std::size_t;
using difference_type = std::size_t;
static char* malloc(const size_type bytes) {
std::lock_guard<std::mutex> const lock{m_mutex};
static_cast<void>(lock);
auto* buffer = coreneuron::allocate_unified(bytes);
m_block_sizes[buffer] = bytes;
return reinterpret_cast<char*>(buffer);
}
static void free(char* const block) {
std::lock_guard<std::mutex> const lock{m_mutex};
static_cast<void>(lock);
auto const iter = m_block_sizes.find(block);
assert(iter != m_block_sizes.end());
auto const size = iter->second;
m_block_sizes.erase(iter);
return coreneuron::deallocate_unified(block, size);
}
static std::mutex m_mutex;
static std::unordered_map<void*, std::size_t> m_block_sizes;
};

std::mutex random123_allocate_unified::m_mutex{};
std::unordered_map<void*, std::size_t> random123_allocate_unified::m_block_sizes{};

using random123_allocator =
boost::fast_pool_allocator<coreneuron::nrnran123_State, random123_allocate_unified>;
#else
using random123_allocator = coreneuron::unified_allocator<nrnran123_State>;
#endif
/* Global data structure per process. Using a unique_ptr here causes [minor]
* problems because its destructor can be called very late during application
* shutdown. If the destructor calls cudaFree and the CUDA runtime has already
Expand Down Expand Up @@ -212,9 +259,7 @@ nrnran123_State* nrnran123_newstream3(uint32_t id1,
#endif
nrnran123_State* s{nullptr};
if (use_unified_memory) {
s = coreneuron::allocate_unique<nrnran123_State>(
coreneuron::unified_allocator<nrnran123_State>{})
.release();
s = coreneuron::allocate_unique<nrnran123_State>(random123_allocator{}).release();
} else {
s = new nrnran123_State{};
}
Expand Down Expand Up @@ -244,9 +289,7 @@ void nrnran123_deletestream(nrnran123_State* s, bool use_unified_memory) {
--g_instance_count;
}
if (use_unified_memory) {
std::unique_ptr<nrnran123_State,
coreneuron::alloc_deleter<coreneuron::unified_allocator<nrnran123_State>>>
_{s};
std::unique_ptr<nrnran123_State, coreneuron::alloc_deleter<random123_allocator>> _{s};
} else {
delete s;
}
Expand Down