diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 072d5a49fb3..f1e78e078bd 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -692,7 +692,7 @@ ompi_comm_split_type(ompi_communicator_t *comm, int my_rsize; int mode; int rsize; - int i, loc; + int i, loc, found; int inter; int *results=NULL, *sorted=NULL; int *rresults=NULL, *rsorted=NULL; @@ -711,7 +711,51 @@ ompi_comm_split_type(ompi_communicator_t *comm, /* --------------------------------------------------------- */ /* sort according to participation and rank. Gather information from everyone */ - myinfo[0] = (split_type == MPI_COMM_TYPE_SHARED) ? 1 : 0; + /* allowed splitting types: + CLUSTER + CU + HOST + BOARD + NODE + NUMA + SOCKET + L3CACHE + L2CACHE + L1CACHE + CORE + HWTHREAD + Even though HWTHREAD/CORE etc. is overkill they are here for consistency. + They will most likely return a communicator which is equal to MPI_COMM_SELF + Unless oversubscribing. + */ + myinfo[0] = 0; // default to no type splitting (also if non-recognized split-type) + switch ( split_type ) { + case OMPI_COMM_TYPE_HWTHREAD: + myinfo[0] = 1; break; + case OMPI_COMM_TYPE_CORE: + myinfo[0] = 2; break; + case OMPI_COMM_TYPE_L1CACHE: + myinfo[0] = 3; break; + case OMPI_COMM_TYPE_L2CACHE: + myinfo[0] = 4; break; + case OMPI_COMM_TYPE_L3CACHE: + myinfo[0] = 5; break; + case OMPI_COMM_TYPE_SOCKET: + myinfo[0] = 6; break; + case OMPI_COMM_TYPE_NUMA: + myinfo[0] = 7; break; + //case MPI_COMM_TYPE_SHARED: // the standard implemented type + case OMPI_COMM_TYPE_NODE: + myinfo[0] = 8; break; + case OMPI_COMM_TYPE_BOARD: + myinfo[0] = 9; break; + case OMPI_COMM_TYPE_HOST: + myinfo[0] = 10; break; + case OMPI_COMM_TYPE_CU: + myinfo[0] = 11; break; + case OMPI_COMM_TYPE_CLUSTER: + myinfo[0] = 12; break; + } myinfo[1] = key; size = ompi_comm_size ( comm ); @@ -731,13 +775,65 @@ ompi_comm_split_type(ompi_communicator_t *comm, if ( OMPI_SUCCESS != rc ) { goto exit; } - + + /* check that all processors have been called with the same value */ + for ( i=0; i < size; i++) { + if ( results[2*i] != myinfo[0] ) { + rc = OMPI_ERR_BAD_PARAM; + goto exit; + } + } + /* how many are participating and on my node? */ for ( my_size = 0, i=0; i < size; i++) { - if ( results[(2*i)+0] == 1) { + if ( results[2*i] == 1 ) { + if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } + } else if ( results[2*i] == 2 ) { + if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } + } else if ( results[2*i] == 3 ) { + if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } + } else if ( results[2*i] == 4 ) { + if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } + } else if ( results[2*i] == 5 ) { + if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } + } else if ( results[2*i] == 6 ) { + if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } + } else if ( results[2*i] == 7 ) { + if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } + } else if ( results[2*i] == 8 ) { if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { my_size++; } + } else if ( results[2*i] == 9 ) { + if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } + } else if ( results[2*i] == 10 ) { + if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } + } else if ( results[2*i] == 11 ) { + if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } + } else if ( results[2*i] == 12 ) { + if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + my_size++; + } } } @@ -755,13 +851,63 @@ ompi_comm_split_type(ompi_communicator_t *comm, /* ok we can now fill this info */ for( loc = 0, i = 0; i < size; i++ ) { - if ( results[(2*i)+0] == 1) { + found = 0; + if ( results[2*i] == 1 ) { + if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( results[2*i] == 2 ) { + if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( results[2*i] == 3 ) { + if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( results[2*i] == 4 ) { + if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( results[2*i] == 5 ) { + if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( results[2*i] == 6 ) { + if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( results[2*i] == 7 ) { + if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( results[2*i] == 8 ) { if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { - sorted[(2*loc)+0] = i; /* copy org rank */ - sorted[(2*loc)+1] = results[(2*i)+1]; /* copy key */ - loc++; + found = 1; + } + } else if ( results[2*i] == 9 ) { + if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( results[2*i] == 10 ) { + if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( results[2*i] == 11 ) { + if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( results[2*i] == 12 ) { + if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) { + found = 1; } } + + /* we have found and occupied the index (i) */ + if ( found == 1 ) { + sorted[2*loc ] = i; /* copy org rank */ + sorted[2*loc+1] = results[2*i+1]; /* copy key */ + loc++; + } } /* the new array needs to be sorted so that it is in 'key' order */ @@ -800,10 +946,54 @@ ompi_comm_split_type(ompi_communicator_t *comm, /* how many are participating and on my node? */ for ( my_rsize = 0, i=0; i < rsize; i++) { - if ( rresults[(2*i)+0] == 1) { + if ( rresults[2*i] == 1 ) { + if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } + } else if ( rresults[2*i] == 2 ) { + if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } + } else if ( rresults[2*i] == 3 ) { + if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } + } else if ( rresults[2*i] == 4 ) { + if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } + } else if ( rresults[2*i] == 5 ) { + if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } + } else if ( rresults[2*i] == 6 ) { + if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } + } else if ( rresults[2*i] == 7 ) { + if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } + } else if ( rresults[2*i] == 8 ) { if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { my_rsize++; } + } else if ( rresults[2*i] == 9 ) { + if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } + } else if ( rresults[2*i] == 10 ) { + if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } + } else if ( rresults[2*i] == 11 ) { + if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } + } else if ( rresults[2*i] == 12 ) { + if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + my_rsize++; + } } } @@ -816,12 +1006,61 @@ ompi_comm_split_type(ompi_communicator_t *comm, /* ok we can now fill this info */ for( loc = 0, i = 0; i < rsize; i++ ) { - if ( rresults[(2*i)+0] == 1) { + found = 0; + if ( rresults[2*i] == 1 ) { + if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( rresults[2*i] == 2 ) { + if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( rresults[2*i] == 3 ) { + if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( rresults[2*i] == 4 ) { + if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( rresults[2*i] == 5 ) { + if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( rresults[2*i] == 6 ) { + if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( rresults[2*i] == 7 ) { + if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( rresults[2*i] == 8 ) { if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { - rsorted[(2*loc)+0] = i; /* org rank */ - rsorted[(2*loc)+1] = rresults[(2*i)+1]; /* key */ - loc++; + found = 1; + } + } else if ( rresults[2*i] == 9 ) { + if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( rresults[2*i] == 10 ) { + if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; } + } else if ( rresults[2*i] == 11 ) { + if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; + } + } else if ( rresults[2*i] == 12 ) { + if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) { + found = 1; + } + } + + if ( found == 1 ) { + rsorted[2*loc ] = i; /* org rank */ + rsorted[2*loc+1] = rresults[2*i+1]; /* key */ + loc++; } } diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index a629ce0216f..be899a593e4 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -667,8 +667,20 @@ enum { * (see also mpif-common.h.fin). */ enum { - MPI_COMM_TYPE_SHARED + OMPI_COMM_TYPE_HWTHREAD, + OMPI_COMM_TYPE_CORE, + OMPI_COMM_TYPE_L1CACHE, + OMPI_COMM_TYPE_L2CACHE, + OMPI_COMM_TYPE_L3CACHE, + OMPI_COMM_TYPE_SOCKET, + OMPI_COMM_TYPE_NUMA, + OMPI_COMM_TYPE_NODE, + OMPI_COMM_TYPE_BOARD, + OMPI_COMM_TYPE_HOST, + OMPI_COMM_TYPE_CU, + OMPI_COMM_TYPE_CLUSTER }; +#define MPI_COMM_TYPE_SHARED OMPI_COMM_TYPE_NODE /* * MPIT Verbosity Levels diff --git a/ompi/include/mpif-values.pl b/ompi/include/mpif-values.pl index 32a8a07e9df..975aa3fc2cc 100755 --- a/ompi/include/mpif-values.pl +++ b/ompi/include/mpif-values.pl @@ -354,7 +354,19 @@ sub write_file { $constants->{MPI_COMBINER_RESIZED} = 17; $constants->{MPI_COMBINER_HINDEXED_BLOCK} = 18; -$constants->{MPI_COMM_TYPE_SHARED} = 0; +$constants->{OMPI_COMM_TYPE_HWTHREAD} = 0; +$constants->{OMPI_COMM_TYPE_CORE} = 1; +$constants->{OMPI_COMM_TYPE_L1CACHE} = 2; +$constants->{OMPI_COMM_TYPE_L2CACHE} = 3; +$constants->{OMPI_COMM_TYPE_L3CACHE} = 4; +$constants->{OMPI_COMM_TYPE_SOCKET} = 5; +$constants->{OMPI_COMM_TYPE_NUMA} = 6; +$constants->{OMPI_COMM_TYPE_NODE} = 7; +$constants->{MPI_COMM_TYPE_SHARED} = 7; +$constants->{OMPI_COMM_TYPE_BOARD} = 8; +$constants->{OMPI_COMM_TYPE_HOST} = 9; +$constants->{OMPI_COMM_TYPE_CU} = 10; +$constants->{OMPI_COMM_TYPE_CLUSTER} = 11; #---------------------------------------------------------------------------- diff --git a/ompi/mpi/c/comm_split_type.c b/ompi/mpi/c/comm_split_type.c index e9776f24c27..1fdece79a0f 100644 --- a/ompi/mpi/c/comm_split_type.c +++ b/ompi/mpi/c/comm_split_type.c @@ -61,7 +61,19 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, FUNC_NAME); } - if ( MPI_COMM_TYPE_SHARED != split_type && + if ( MPI_COMM_TYPE_SHARED != split_type && // Same as OMPI_COMM_TYPE_NODE + OMPI_COMM_TYPE_CLUSTER != split_type && + OMPI_COMM_TYPE_CU != split_type && + OMPI_COMM_TYPE_HOST != split_type && + OMPI_COMM_TYPE_BOARD != split_type && + OMPI_COMM_TYPE_NODE != split_type && // Same as MPI_COMM_TYPE_SHARED + OMPI_COMM_TYPE_NUMA != split_type && + OMPI_COMM_TYPE_SOCKET != split_type && + OMPI_COMM_TYPE_L3CACHE != split_type && + OMPI_COMM_TYPE_L2CACHE != split_type && + OMPI_COMM_TYPE_L1CACHE != split_type && + OMPI_COMM_TYPE_CORE != split_type && + OMPI_COMM_TYPE_HWTHREAD != split_type && MPI_UNDEFINED != split_type ) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); diff --git a/ompi/mpi/man/man3/MPI_Comm_split_type.3in b/ompi/mpi/man/man3/MPI_Comm_split_type.3in index fe0ed84996d..acac9c7309d 100644 --- a/ompi/mpi/man/man3/MPI_Comm_split_type.3in +++ b/ompi/mpi/man/man3/MPI_Comm_split_type.3in @@ -62,10 +62,53 @@ value MPI_UNDEFINED, in which case newcomm returns MPI_COMM_NULL. .SH SPLIT TYPES .ft R - .TP 1i -MPI_COMM_TYPE_SHARED +MPI_COMM_TYPE_SHARED|OMPI_COMM_TYPE_NODE This type splits the communicator into subcommunicators, each of which can create a shared memory region. +.ft R +.TP 1i +OMPI_COMM_TYPE_HWTHREAD +This type splits the communicator into subcommunicators, each of which belongs to the same hardware thread. +.ft R +.TP 1i +OMPI_COMM_TYPE_CORE +This type splits the communicator into subcommunicators, each of which belongs to the same core/processing unit. +.ft R +.TP 1i +OMPI_COMM_TYPE_L1CACHE +This type splits the communicator into subcommunicators, each of which belongs to the same L1 cache. +.ft R +.TP 1i +OMPI_COMM_TYPE_L2CACHE +This type splits the communicator into subcommunicators, each of which belongs to the same L2 cache. +.ft R +.TP 1i +OMPI_COMM_TYPE_L3CACHE +This type splits the communicator into subcommunicators, each of which belongs to the same L3 cache. +.ft R +.TP 1i +OMPI_COMM_TYPE_SOCKET +This type splits the communicator into subcommunicators, each of which belongs to the same socket. +.ft R +.TP 1i +OMPI_COMM_TYPE_NUMA +This type splits the communicator into subcommunicators, each of which belongs to the same NUMA-node. +.ft R +.TP 1i +OMPI_COMM_TYPE_BOARD +This type splits the communicator into subcommunicators, each of which belongs to the same board. +.ft R +.TP 1i +OMPI_COMM_TYPE_HOST +This type splits the communicator into subcommunicators, each of which belongs to the same host. +.ft R +.TP 1i +OMPI_COMM_TYPE_CU +This type splits the communicator into subcommunicators, each of which belongs to the same computational unit. +.ft R +.TP 1i +OMPI_COMM_TYPE_CLUSTER +This type splits the communicator into subcommunicators, each of which belongs to the same cluster. .SH NOTES .ft R @@ -79,6 +122,8 @@ Multiple calls to MPI_Comm_split_type can be used to overcome the requirement th Note that keys need not be unique. It is MPI_Comm_split_type's responsibility to sort processes in ascending order according to this key, and to break ties in a consistent way. If all the keys are specified in the same way, then all the processes in a given color will have the relative rank order as they did in their parent group. (In general, they will have different ranks.) .sp Essentially, making the key value zero for all processes of a given split_type means that one needn't really pay attention to the rank-order of the processes in the new communicator. +.sp +The communicator coloring denoted with OMPI instead of MPI are specific to OpenMPI only and are not part of the standard. .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument.