Skip to content

Commit 84b4e7f

Browse files
committed
Fix sync image race condition.
Sync images works by setting up asynchronous receivers for each image in the sync set. Next all images check the image status of all other images participating in the sync. Then each image sends a zero int or the stopped image special code to all other images in the sync set. At last all images wait for the asynchronous receivers to get their data. The race here was, that an image could be in the waiting phase while the stopped image had not set its status correctly yet. The waiting image did not return then, because it never got the stopped image code from the stopped image. To solve this two changes had to be made: 1. caf_finalize() now calls sync_image_internal () 2. After waiting sync_image_internal() checks the status of the image, that send its data, again. sync_image() has be renamed to sync_image_internal(). A flag was added to distinguish calls to sync_image_internal() from caf_finalize and regular sync image calls. The latter shall report an error on failure, while the former keeps silent. This commit fixes the timeout of syncimage_status.f90 mentioned in #298.
1 parent 0696306 commit 84b4e7f

File tree

2 files changed

+33
-13
lines changed

2 files changed

+33
-13
lines changed

src/mpi/mpi_caf.c

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,11 @@ PREFIX (init) (int *argc, char ***argv)
421421
/* MPI_Barrier(CAF_COMM_WORLD); */
422422
}
423423

424+
/* Forward declaration of sync_images. */
425+
426+
void
427+
sync_images_internal (int count, int images[], int *stat, char *errmsg,
428+
int errmsg_len, bool internal);
424429

425430
/* Finalize coarray program. */
426431

@@ -431,10 +436,11 @@ _gfortran_caf_finalize (void)
431436
PREFIX (finalize) (void)
432437
#endif
433438
{
439+
int empty[0];
434440
*img_status = STAT_STOPPED_IMAGE; /* GFC_STAT_STOPPED_IMAGE = 6000 */
435441
MPI_Win_sync(*stat_tok);
436442

437-
MPI_Barrier(CAF_COMM_WORLD);
443+
sync_images_internal (-1, empty, NULL, NULL, 0, true);
438444

439445
while (caf_static_list != NULL)
440446
{
@@ -2987,6 +2993,13 @@ PREFIX(is_present) (caf_token_t token, int image_index, caf_reference_t *refs)
29872993
void
29882994
PREFIX (sync_images) (int count, int images[], int *stat, char *errmsg,
29892995
int errmsg_len)
2996+
{
2997+
sync_images_internal (count, images, stat, errmsg, errmsg_len, false);
2998+
}
2999+
3000+
void
3001+
sync_images_internal (int count, int images[], int *stat, char *errmsg,
3002+
int errmsg_len, bool internal)
29903003
{
29913004
int ierr = 0, i = 0, remote_stat = 0, j = 0;
29923005
MPI_Status s;
@@ -3049,16 +3062,10 @@ PREFIX (sync_images) (int count, int images[], int *stat, char *errmsg,
30493062
CAF_COMM_WORLD, &handlers[i]);
30503063
for(i = 0; i < count; ++i)
30513064
{
3052-
# ifdef CAF_MPI_LOCK_UNLOCK
3053-
MPI_Win_lock (MPI_LOCK_SHARED, images[i] - 1, 0, *stat_tok);
3054-
# endif // CAF_MPI_LOCK_UNLOCK
3065+
CAF_Win_lock (MPI_LOCK_SHARED, images[i] - 1, *stat_tok);
30553066
ierr = MPI_Get (&remote_stat, 1, MPI_INT,
30563067
images[i] - 1, 0, 1, MPI_INT, *stat_tok);
3057-
# ifdef CAF_MPI_LOCK_UNLOCK
3058-
MPI_Win_unlock (images[i] - 1, *stat_tok);
3059-
# else // CAF_MPI_LOCK_UNLOCK
3060-
MPI_Win_flush (images[i] - 1, *stat_tok);
3061-
# endif // CAF_MPI_LOCK_UNLOCK
3068+
CAF_Win_unlock (images[i] - 1, *stat_tok);
30623069
if(remote_stat != 0)
30633070
{
30643071
ierr = STAT_STOPPED_IMAGE;
@@ -3071,12 +3078,15 @@ PREFIX (sync_images) (int count, int images[], int *stat, char *errmsg,
30713078
}
30723079
if (ierr == 0)
30733080
{
3081+
int zero = 0;
30743082
int done_count = 0;
30753083
for(i = 0; i < count; ++i)
30763084
{
30773085
if (arrived[images[i] - 1] != STAT_STOPPED_IMAGE)
3078-
/* Only send, when no stopped images have been found. */
3079-
ierr = MPI_Send (&caf_this_image, 1, MPI_INT, images[i] - 1, 0,
3086+
/* Only send, when no stopped images have been found.
3087+
Do not send our id, because on very large clusters one id
3088+
and he STAT_STOPPED_IMAGE code may be the same. */
3089+
ierr = MPI_Send (&zero, 1, MPI_INT, images[i] - 1, 0,
30803090
CAF_COMM_WORLD);
30813091
else
30823092
ierr = STAT_STOPPED_IMAGE;
@@ -3089,6 +3099,17 @@ PREFIX (sync_images) (int count, int images[], int *stat, char *errmsg,
30893099
++done_count;
30903100
if (i != MPI_UNDEFINED && arrived[i] == STAT_STOPPED_IMAGE)
30913101
ierr = STAT_STOPPED_IMAGE;
3102+
else if (i != MPI_UNDEFINED)
3103+
{
3104+
/* Check that after a late stopped image has set its status
3105+
that status is not stopped. */
3106+
CAF_Win_lock (MPI_LOCK_SHARED, images[i] - 1, *stat_tok);
3107+
ierr = MPI_Get (&remote_stat, 1, MPI_INT,
3108+
images[i] - 1, 0, 1, MPI_INT, *stat_tok);
3109+
CAF_Win_unlock (images[i] - 1, *stat_tok);
3110+
if (remote_stat != 0)
3111+
ierr = STAT_STOPPED_IMAGE;
3112+
}
30923113
else if (ierr != MPI_SUCCESS)
30933114
break;
30943115
}
@@ -3117,7 +3138,7 @@ PREFIX (sync_images) (int count, int images[], int *stat, char *errmsg,
31173138
if (errmsg_len > len)
31183139
memset (&errmsg[len], ' ', errmsg_len-len);
31193140
}
3120-
else
3141+
else if (!internal)
31213142
caf_runtime_error (msg);
31223143
}
31233144
}

src/tests/unit/sync/syncimages_status.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ program sync_images_stat
1010
me = this_image()
1111

1212
if (me /= 1 ) then
13-
call sleep(1)
1413
sync images(*,STAT=stat_var)
1514
if ( stat_var /= STAT_STOPPED_IMAGE) then
1615
print *, "Error:stat_var /= STAT_STOPPED_IMAGE: ", me

0 commit comments

Comments
 (0)