Skip to content

Commit d43a118

Browse files
authored
Merge pull request #3487 from ggouaillardet/topic/v2.x/a2aw_zeros
v2.x: fix MPI_Alltoallw() with zero size messages
2 parents 1b13bf3 + 608726a commit d43a118

27 files changed

+407
-668
lines changed

ompi/mca/coll/base/coll_base_alltoallv.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
1515
* reserved.
1616
* Copyright (c) 2013 FUJITSU LIMITED. All rights reserved.
17-
* Copyright (c) 2014-2016 Research Organization for Information Science
17+
* Copyright (c) 2014-2017 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
1919
* $COPYRIGHT$
2020
*
@@ -45,17 +45,16 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
4545
int i, j, size, rank, err=MPI_SUCCESS;
4646
ompi_request_t *req;
4747
char *allocated_buffer, *tmp_buffer;
48-
size_t max_size, rdtype_size;
49-
OPAL_PTRDIFF_TYPE ext, gap;
48+
size_t max_size;
49+
OPAL_PTRDIFF_TYPE ext, gap = 0;
5050

5151
/* Initialize. */
5252

5353
size = ompi_comm_size(comm);
5454
rank = ompi_comm_rank(comm);
55-
ompi_datatype_type_size(rdtype, &rdtype_size);
5655

5756
/* If only one process, we're done. */
58-
if (1 == size || 0 == rdtype_size) {
57+
if (1 == size) {
5958
return MPI_SUCCESS;
6059
}
6160

@@ -67,6 +66,10 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
6766
}
6867
/* The gap will always be the same as we are working on the same datatype */
6968

69+
if (OPAL_UNLIKELY(0 == max_size)) {
70+
return MPI_SUCCESS;
71+
}
72+
7073
/* Allocate a temporary buffer */
7174
allocated_buffer = calloc (max_size, 1);
7275
if (NULL == allocated_buffer) {
@@ -78,7 +81,7 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
7881
/* in-place alltoallv slow algorithm (but works) */
7982
for (i = 0 ; i < size ; ++i) {
8083
for (j = i+1 ; j < size ; ++j) {
81-
if (i == rank && rcounts[j]) {
84+
if (i == rank && 0 != rcounts[j]) {
8285
/* Copy the data into the temporary buffer */
8386
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
8487
tmp_buffer, (char *) rbuf + rdisps[j] * ext);
@@ -93,7 +96,7 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
9396
j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
9497
comm));
9598
if (MPI_SUCCESS != err) { goto error_hndl; }
96-
} else if (j == rank && rcounts[i]) {
99+
} else if (j == rank && 0 != rcounts[i]) {
97100
/* Copy the data into the temporary buffer */
98101
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i],
99102
tmp_buffer, (char *) rbuf + rdisps[i] * ext);

ompi/mca/coll/libnbc/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
1313
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights
1414
# reserved.
15+
# Copyright (c) 2017 Research Organization for Information Science
16+
# and Technology (RIST). All rights reserved.
1517
# $COPYRIGHT$
1618
#
1719
# Additional copyrights may follow
@@ -37,7 +39,6 @@ sources = \
3739
nbc_ialltoallw.c \
3840
nbc_ibarrier.c \
3941
nbc_ibcast.c \
40-
nbc_ibcast_inter.c \
4142
nbc_iexscan.c \
4243
nbc_igather.c \
4344
nbc_igatherv.c \

ompi/mca/coll/libnbc/nbc.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* rights reserved.
1111
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
1212
* reserved.
13-
* Copyright (c) 2015-2016 Research Organization for Information Science
13+
* Copyright (c) 2015-2017 Research Organization for Information Science
1414
* and Technology (RIST). All rights reserved.
1515
*
1616
* Author(s): Torsten Hoefler <[email protected]>
@@ -709,6 +709,25 @@ int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule) {
709709
return OMPI_SUCCESS;
710710
}
711711

712+
int NBC_Schedule_request(NBC_Schedule *schedule, ompi_communicator_t *comm, ompi_coll_libnbc_module_t *module, ompi_request_t **request, void *tmpbuf) {
713+
int res;
714+
NBC_Handle *handle;
715+
res = NBC_Init_handle (comm, &handle, module);
716+
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
717+
return res;
718+
}
719+
handle->tmpbuf = tmpbuf;
720+
721+
res = NBC_Start (handle, schedule);
722+
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
723+
NBC_Return_handle (handle);
724+
return res;
725+
}
726+
727+
*request = (ompi_request_t *) handle;
728+
return OMPI_SUCCESS;
729+
}
730+
712731
#ifdef NBC_CACHE_SCHEDULE
713732
void NBC_SchedCache_args_delete_key_dummy(void *k) {
714733
/* do nothing because the key and the data element are identical :-)

ompi/mca/coll/libnbc/nbc_iallgather.c

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ int ompi_coll_libnbc_iallgather(const void* sendbuf, int sendcount, MPI_Datatype
5050
#ifdef NBC_CACHE_SCHEDULE
5151
NBC_Allgather_args *args, *found, search;
5252
#endif
53-
NBC_Handle *handle;
5453
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
5554

5655
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
@@ -147,20 +146,12 @@ int ompi_coll_libnbc_iallgather(const void* sendbuf, int sendcount, MPI_Datatype
147146
}
148147
#endif
149148

150-
res = NBC_Init_handle (comm, &handle, libnbc_module);
149+
res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL);
151150
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
152151
OBJ_RELEASE(schedule);
153152
return res;
154153
}
155154

156-
res = NBC_Start (handle, schedule);
157-
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
158-
OMPI_COLL_LIBNBC_REQUEST_RETURN(handle);
159-
return res;
160-
}
161-
162-
*request = (ompi_request_t *) handle;
163-
164155
return OMPI_SUCCESS;
165156
}
166157

@@ -172,7 +163,6 @@ int ompi_coll_libnbc_iallgather_inter(const void* sendbuf, int sendcount, MPI_Da
172163
MPI_Aint rcvext;
173164
NBC_Schedule *schedule;
174165
char *rbuf;
175-
NBC_Handle *handle;
176166
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
177167

178168
res = ompi_datatype_type_extent(recvtype, &rcvext);
@@ -213,19 +203,11 @@ int ompi_coll_libnbc_iallgather_inter(const void* sendbuf, int sendcount, MPI_Da
213203
return res;
214204
}
215205

216-
res = NBC_Init_handle (comm, &handle, libnbc_module);
206+
res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL);
217207
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
218208
OBJ_RELEASE(schedule);
219209
return res;
220210
}
221211

222-
res = NBC_Start (handle, schedule);
223-
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
224-
OMPI_COLL_LIBNBC_REQUEST_RETURN(handle);
225-
return res;
226-
}
227-
228-
*request = (ompi_request_t *) handle;
229-
230212
return OMPI_SUCCESS;
231213
}

ompi/mca/coll/libnbc/nbc_iallgatherv.c

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
1212
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
1313
* reserved.
14-
* Copyright (c) 2014-2016 Research Organization for Information Science
14+
* Copyright (c) 2014-2017 Research Organization for Information Science
1515
* and Technology (RIST). All rights reserved.
1616
*
1717
*/
@@ -37,7 +37,6 @@ int ompi_coll_libnbc_iallgatherv(const void* sendbuf, int sendcount, MPI_Datatyp
3737
MPI_Aint rcvext;
3838
NBC_Schedule *schedule;
3939
char *rbuf, *sbuf, inplace;
40-
NBC_Handle *handle;
4140
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
4241

4342
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
@@ -96,20 +95,12 @@ int ompi_coll_libnbc_iallgatherv(const void* sendbuf, int sendcount, MPI_Datatyp
9695
return res;
9796
}
9897

99-
res = NBC_Init_handle (comm, &handle, libnbc_module);
98+
res = NBC_Schedule_request (schedule, comm, libnbc_module, request, NULL);
10099
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
101100
OBJ_RELEASE(schedule);
102101
return res;
103102
}
104103

105-
res = NBC_Start (handle, schedule);
106-
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
107-
NBC_Return_handle (handle);
108-
return res;
109-
}
110-
111-
*request = (ompi_request_t *) handle;
112-
113104
return OMPI_SUCCESS;
114105
}
115106

@@ -120,7 +111,6 @@ int ompi_coll_libnbc_iallgatherv_inter(const void* sendbuf, int sendcount, MPI_D
120111
int res, rsize;
121112
MPI_Aint rcvext;
122113
NBC_Schedule *schedule;
123-
NBC_Handle *handle;
124114
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
125115

126116
rsize = ompi_comm_remote_size (comm);
@@ -165,19 +155,11 @@ int ompi_coll_libnbc_iallgatherv_inter(const void* sendbuf, int sendcount, MPI_D
165155
return res;
166156
}
167157

168-
res = NBC_Init_handle (comm, &handle, libnbc_module);
158+
res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL);
169159
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
170160
OBJ_RELEASE(schedule);
171161
return res;
172162
}
173163

174-
res = NBC_Start (handle, schedule);
175-
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
176-
NBC_Return_handle (handle);
177-
return res;
178-
}
179-
180-
*request = (ompi_request_t *) handle;
181-
182164
return OMPI_SUCCESS;
183165
}

0 commit comments

Comments
 (0)