Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.

Commit 30845dd

Browse files
committed
Enable fast_imem on GPU.
Updates mod2c/nmodl submodule commits to include relevant fixes, BlueBrain/mod2c#64 and BlueBrain/nmodl#681. Closes #197.
1 parent 9b18271 commit 30845dd

File tree

7 files changed

+64
-4
lines changed

7 files changed

+64
-4
lines changed

coreneuron/gpu/nrn_acc_manager.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,23 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
291291
acc_memcpy_to_device(&(d_nt->_shadow_d), &d_shadow_ptr, sizeof(double*));
292292
}
293293

294+
/* Fast membrane current calculation struct */
295+
if (nt->nrn_fast_imem) {
296+
auto* d_fast_imem = reinterpret_cast<NrnFastImem*>(
297+
acc_copyin(nt->nrn_fast_imem, sizeof(NrnFastImem)));
298+
acc_memcpy_to_device(&(d_nt->nrn_fast_imem), &d_fast_imem, sizeof(NrnFastImem*));
299+
{
300+
auto* d_ptr = reinterpret_cast<double*>(
301+
acc_copyin(nt->nrn_fast_imem->nrn_sav_rhs, nt->end * sizeof(double)));
302+
acc_memcpy_to_device(&(d_fast_imem->nrn_sav_rhs), &d_ptr, sizeof(double*));
303+
}
304+
{
305+
auto* d_ptr = reinterpret_cast<double*>(
306+
acc_copyin(nt->nrn_fast_imem->nrn_sav_d, nt->end * sizeof(double)));
307+
acc_memcpy_to_device(&(d_fast_imem->nrn_sav_d), &d_ptr, sizeof(double*));
308+
}
309+
}
310+
294311
if (nt->n_pntproc) {
295312
/* copy Point_processes array and fix the pointer to execute net_receive blocks on GPU
296313
*/
@@ -659,6 +676,11 @@ void update_nrnthreads_on_host(NrnThread* threads, int nthreads) {
659676
acc_update_self(nt->_shadow_d, pcnt * sizeof(double));
660677
}
661678

679+
if (nt->nrn_fast_imem) {
680+
acc_update_self(nt->nrn_fast_imem->nrn_sav_rhs, nt->end * sizeof(double));
681+
acc_update_self(nt->nrn_fast_imem->nrn_sav_d, nt->end * sizeof(double));
682+
}
683+
662684
if (nt->n_pntproc) {
663685
acc_update_self(nt->pntprocs, nt->n_pntproc * sizeof(Point_process));
664686
}
@@ -748,6 +770,11 @@ void update_nrnthreads_on_device(NrnThread* threads, int nthreads) {
748770
acc_update_device(nt->_shadow_d, pcnt * sizeof(double));
749771
}
750772

773+
if (nt->nrn_fast_imem) {
774+
acc_update_device(nt->nrn_fast_imem->nrn_sav_rhs, nt->end * sizeof(double));
775+
acc_update_device(nt->nrn_fast_imem->nrn_sav_d, nt->end * sizeof(double));
776+
}
777+
751778
if (nt->n_pntproc) {
752779
acc_update_device(nt->pntprocs, nt->n_pntproc * sizeof(Point_process));
753780
}
@@ -787,6 +814,19 @@ void update_voltage_from_gpu(NrnThread* nt) {
787814
}
788815
}
789816

817+
/**
818+
* @brief Copy fast_imem vectors from GPU to CPU.
819+
*
820+
*/
821+
void update_fast_imem_from_gpu(NrnThread* nt) {
822+
if (nt->compute_gpu && nt->end > 0 && nt->nrn_fast_imem) {
823+
int num_fast_imem = nt->end;
824+
double* fast_imem_d = nt->nrn_fast_imem->nrn_sav_d;
825+
double* fast_imem_rhs = nt->nrn_fast_imem->nrn_sav_rhs;
826+
#pragma acc update host(fast_imem_d [0:num_fast_imem], fast_imem_rhs [0:num_fast_imem])
827+
}
828+
}
829+
790830
/**
791831
* Copy weights from GPU to CPU
792832
*
@@ -940,6 +980,11 @@ void delete_nrnthreads_on_device(NrnThread* threads, int nthreads) {
940980
acc_delete(nt->pntprocs, nt->n_pntproc * sizeof(Point_process));
941981
}
942982

983+
if (nt->nrn_fast_imem) {
984+
acc_delete(nt->nrn_fast_imem->nrn_sav_d, nt->end * sizeof(double));
985+
acc_delete(nt->nrn_fast_imem->nrn_sav_rhs, nt->end * sizeof(double));
986+
}
987+
943988
if (nt->shadow_rhs_cnt) {
944989
int pcnt = nrn_soa_padded_size(nt->shadow_rhs_cnt, 0);
945990
acc_delete(nt->_shadow_d, pcnt * sizeof(double));

coreneuron/gpu/nrn_acc_manager.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void update_net_receive_buffer(NrnThread* _nt);
2929
void realloc_net_receive_buffer(NrnThread* nt, Memb_list* ml);
3030
void update_net_send_buffer_on_host(NrnThread* nt, NetSendBuffer_t* nsb);
3131
void update_voltage_from_gpu(NrnThread* nt);
32+
void update_fast_imem_from_gpu(NrnThread* nt);
3233
void update_weights_from_gpu(NrnThread* threads, int nthreads);
3334
void init_gpu();
3435

coreneuron/sim/fadvance_core.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,9 @@ void nrncore2nrn_send_values(NrnThread* nth) {
298298
// Currently we are updating voltages if there is any trajectory
299299
// requested by NEURON.
300300
update_voltage_from_gpu(nth);
301+
// \todo Check if this information has been requested by the user for
302+
// this NrnThread object.
303+
update_fast_imem_from_gpu(nth);
301304

302305
if (tr->varrays) { // full trajectories into Vector data
303306
double** va = tr->varrays;

coreneuron/sim/fast_imem.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ void nrn_calc_fast_imem(NrnThread* nt) {
5050

5151
double* fast_imem_d = nt->nrn_fast_imem->nrn_sav_d;
5252
double* fast_imem_rhs = nt->nrn_fast_imem->nrn_sav_rhs;
53+
#pragma acc parallel loop present(vec_rhs, \
54+
vec_area, \
55+
fast_imem_d, \
56+
fast_imem_rhs) if (nt->compute_gpu) async(nt->stream_id)
5357
for (int i = i1; i < i3; ++i) {
5458
fast_imem_rhs[i] = (fast_imem_d[i] * vec_rhs[i] + fast_imem_rhs[i]) * vec_area[i] * 0.01;
5559
}

coreneuron/sim/treeset_core.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,14 @@ static void nrn_rhs(NrnThread* _nt) {
4444
}
4545

4646
if (_nt->nrn_fast_imem) {
47+
double* fast_imem_d = _nt->nrn_fast_imem->nrn_sav_d;
48+
double* fast_imem_rhs = _nt->nrn_fast_imem->nrn_sav_rhs;
49+
#pragma acc parallel loop present(fast_imem_d [i1:i3], \
50+
fast_imem_rhs [i1:i3]) if (_nt->compute_gpu) \
51+
async(_nt->stream_id)
4752
for (int i = i1; i < i3; ++i) {
48-
_nt->nrn_fast_imem->nrn_sav_rhs[i] = 0.;
49-
_nt->nrn_fast_imem->nrn_sav_d[i] = 0.;
53+
fast_imem_d[i] = 0.;
54+
fast_imem_rhs[i] = 0.;
5055
}
5156
}
5257

@@ -71,6 +76,7 @@ static void nrn_rhs(NrnThread* _nt) {
7176
so here we transform so it only has membrane current contribution
7277
*/
7378
double* p = _nt->nrn_fast_imem->nrn_sav_rhs;
79+
#pragma acc parallel loop present(p, vec_rhs) if (_nt->compute_gpu) async(_nt->stream_id)
7480
for (int i = i1; i < i3; ++i) {
7581
p[i] -= vec_rhs[i];
7682
}
@@ -144,6 +150,7 @@ static void nrn_lhs(NrnThread* _nt) {
144150
so here we transform so it only has membrane current contribution
145151
*/
146152
double* p = _nt->nrn_fast_imem->nrn_sav_d;
153+
#pragma acc parallel loop present(p, vec_d) if (_nt->compute_gpu) async(_nt->stream_id)
147154
for (int i = i1; i < i3; ++i) {
148155
p[i] += vec_d[i];
149156
}

external/mod2c

0 commit comments

Comments
 (0)