Call cudaSetDevice in OpenMP mode.

olupton · olupton · commit 7f060adbf26d · 2021-12-15T10:10:30.000+01:00
diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp
@@ -28,6 +28,10 @@
 #include <pat_api.h>
 #endif
 
+#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
+#include <cuda_runtime_api.h>
+#endif
+
 namespace coreneuron {
 extern InterleaveInfo* interleave_info;
 void copy_ivoc_vect_to_device(const IvocVect& iv, IvocVect& div);
@@ -60,6 +64,12 @@ void cnrn_target_set_default_device(int device_num) {
 #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
     defined(_OPENMP)
     omp_set_default_device(device_num);
+    // It seems that with NVHPC 21.9 then only setting the default OpenMP device
+    // is not enough: there were errors on some nodes when not-the-0th GPU was
+    // used. These seemed to be related to the NMODL instance structs, which are
+    // allocated using cudaMallocManaged.
+    auto const cuda_code = cudaSetDevice(device_num);
+    assert(cuda_code == cudaSuccess);
 #else
     throw std::runtime_error(
         "cnrn_target_set_default_device() not implemented without OpenACC/OpenMP and gpu build");