From 51b01cdfeb3c1ece568100a8bed2f5e16e9c37ed Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 10 May 2023 14:57:59 -0400 Subject: [PATCH] Use acquire/release barrier to synchronize completion of request and test/wait Before completing the request we should make sure that all other pieces are visible, the status. The release barrier should synchronize with an acquire barrier before reading the status in test/wait. This also removes a full memory barrier at the beginning of test functions that does not seem to serve any purpose. Signed-off-by: Joseph Schuchart --- ompi/request/req_test.c | 13 +++++++++---- ompi/request/req_wait.c | 11 +++++++++++ ompi/request/request.h | 2 ++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/ompi/request/req_test.c b/ompi/request/req_test.c index cd04645a0c0..b28ade8a67a 100644 --- a/ompi/request/req_test.c +++ b/ompi/request/req_test.c @@ -37,7 +37,6 @@ int ompi_request_default_test(ompi_request_t ** rptr, recheck_request_status: #endif - opal_atomic_mb(); if( request->req_state == OMPI_REQUEST_INACTIVE ) { *completed = true; if (MPI_STATUS_IGNORE != status) { @@ -55,6 +54,8 @@ int ompi_request_default_test(ompi_request_t ** rptr, ompi_grequest_invoke_query(request, &request->req_status); } if (MPI_STATUS_IGNORE != status) { + /* make sure we get the correct status */ + opal_atomic_rmb(); OMPI_COPY_STATUS(status, request->req_status, false); } if( request->req_persistent ) { @@ -108,7 +109,6 @@ int ompi_request_default_test_any( ompi_request_t **rptr; ompi_request_t *request; - opal_atomic_mb(); rptr = requests; for (i = 0; i < count; i++, rptr++) { request = *rptr; @@ -129,6 +129,8 @@ int ompi_request_default_test_any( ompi_grequest_invoke_query(request, &request->req_status); } if (MPI_STATUS_IGNORE != status) { + /* make sure we get the correct status */ + opal_atomic_rmb(); OMPI_COPY_STATUS(status, request->req_status, false); } @@ -186,7 +188,6 @@ int ompi_request_default_test_all( ompi_request_t *request; int do_it_once = 0; - opal_atomic_mb(); for (i = 0; i < count; i++) { request = requests[i]; @@ -231,6 +232,8 @@ int ompi_request_default_test_all( rc = MPI_SUCCESS; if (MPI_STATUSES_IGNORE != statuses) { + /* make sure we get the correct statuses */ + opal_atomic_rmb(); /* fill out completion status and free request if required */ for( i = 0; i < count; i++, rptr++ ) { request = *rptr; @@ -318,7 +321,6 @@ int ompi_request_default_test_some( ompi_request_t **rptr; ompi_request_t *request; - opal_atomic_mb(); rptr = requests; for (i = 0; i < count; i++, rptr++) { request = *rptr; @@ -357,6 +359,9 @@ int ompi_request_default_test_some( return OMPI_SUCCESS; } + /* make sure we get the correct statuses */ + opal_atomic_rmb(); + /* fill out completion status and free request if required */ for( i = 0; i < num_requests_done; i++) { request = requests[indices[i]]; diff --git a/ompi/request/req_wait.c b/ompi/request/req_wait.c index 98a89faba70..d66eccf9f79 100644 --- a/ompi/request/req_wait.c +++ b/ompi/request/req_wait.c @@ -39,6 +39,9 @@ int ompi_request_default_wait( ompi_request_wait_completion(req); + /* make sure we get the correct status */ + opal_atomic_rmb(); + #if OPAL_ENABLE_FT_MPI /* Special case for MPI_ANY_SOURCE */ if( MPI_ERR_PROC_FAILED_PENDING == req->req_status.MPI_ERROR ) { @@ -198,6 +201,8 @@ int ompi_request_default_wait_any(size_t count, rc = ompi_grequest_invoke_query(request, &request->req_status); } if (MPI_STATUS_IGNORE != status) { + /* make sure we get the correct status */ + opal_atomic_rmb(); OMPI_COPY_STATUS(status, request->req_status, false); } rc = request->req_status.MPI_ERROR; @@ -302,6 +307,9 @@ int ompi_request_default_wait_all( size_t count, finish: rptr = requests; if (MPI_STATUSES_IGNORE != statuses) { + /* make sure we get the correct status */ + opal_atomic_rmb(); + /* fill out status and free request if required */ for( i = 0; i < count; i++, rptr++ ) { void *_tmp_ptr = &sync; @@ -574,6 +582,9 @@ int ompi_request_default_wait_some(size_t count, *outcount = num_requests_done; + /* make sure we get the correct status */ + opal_atomic_rmb(); + for (size_t i = 0; i < num_requests_done; i++) { request = requests[indices[i]]; #if OPAL_ENABLE_FT_MPI diff --git a/ompi/request/request.h b/ompi/request/request.h index 3d076cfeb7f..e2e62a2d7ad 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -527,6 +527,8 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa if (0 == rc) { if (OPAL_LIKELY(with_signal)) { + /* make sure everything in the request is visible before we mark it complete */ + opal_atomic_wmb(); ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete, REQUEST_COMPLETED);