From 6dcd42be28ac1a8dac887bf5e6c9ffb9b99f9511 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 17 Dec 2014 23:12:33 -0500 Subject: [PATCH 1/2] Prevent deadlocks on recursive calls (deleting communicators with attributes from an attribute callback). (cherry picked from commit 4d55ae838d5eff2818707da7ba60ffd640144360) --- ompi/attribute/attribute.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/ompi/attribute/attribute.c b/ompi/attribute/attribute.c index 2088defb5f..86c016f375 100644 --- a/ompi/attribute/attribute.c +++ b/ompi/attribute/attribute.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -247,6 +247,8 @@ */ #define DELETE_ATTR_CALLBACKS(type, attribute, keyval_obj, object, err) \ +do { \ + OPAL_THREAD_UNLOCK(&attribute_lock); \ if (0 != (keyval_obj->attr_flag & OMPI_KEYVAL_F77)) { \ MPI_Fint f_key = OMPI_INT_2_FINT(key); \ MPI_Fint f_err; \ @@ -278,12 +280,16 @@ ((ompi_##type##_t *)object, \ key, attr_val, \ keyval_obj->extra_state.c_ptr); \ - } + } \ + OPAL_THREAD_LOCK(&attribute_lock); \ +} while (0) /* See the big, long comment above from DELETE_ATTR_CALLBACKS -- most of that text applies here, too. */ #define COPY_ATTR_CALLBACKS(type, old_object, keyval_obj, in_attr, new_object, out_attr, err) \ +do { \ + OPAL_THREAD_UNLOCK(&attribute_lock); \ if (0 != (keyval_obj->attr_flag & OMPI_KEYVAL_F77)) { \ MPI_Fint f_key = OMPI_INT_2_FINT(key); \ MPI_Fint f_err; \ @@ -329,7 +335,9 @@ in, &out, &flag, (ompi_##type##_t *)(new_object))) == MPI_SUCCESS) { \ out_attr->av_value = out; \ } \ - } + } \ + OPAL_THREAD_LOCK(&attribute_lock); \ +} while (0) /* @@ -402,12 +410,9 @@ static int attr_sequence; static unsigned int int_pos = 12345; /* - * We used to have multiple locks for semi-fine-grained locking. But - * the code got complex, and we had to spend time looking for subtle - * bugs. Craziness -- MPI attributes are *not* high performance, so - * just use a One Big Lock approach: there is *no* concurrent access. - * If you have the lock, you can do whatever you want and no data will - * change/disapear from underneath you. + * MPI attributes are *not* high performance, so just use a One Big Lock + * approach. However, this lock is released before a user provided callback is + * triggered and acquired right after, allowing for recursive behaviors. */ static opal_mutex_t attribute_lock; From 8d89d9280958fb30d5a2e6368b41495e26e2a485 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 19 Dec 2014 11:45:58 -0800 Subject: [PATCH 2/2] man: update a bunch of attribute-related man pages Per discussion starting http://www.open-mpi.org/community/lists/users/2014/12/26018.php, at least note that OMPI does not allow adding or deleting attributes in an attribute copy or delete callback (or any of its children) on the same object on which the callback was invoked. (cherry picked from commit 9144517ad420deb6bad1cb50c41ae74c35548ad0) --- ompi/mpi/man/man3/MPI_Attr_delete.3in | 15 ++++++++++++--- ompi/mpi/man/man3/MPI_Attr_get.3in | 6 +++--- ompi/mpi/man/man3/MPI_Attr_put.3in | 6 +++--- ompi/mpi/man/man3/MPI_Comm_delete_attr.3in | 10 +++++++++- ompi/mpi/man/man3/MPI_Comm_dup.3in | 13 ++++++++++++- ompi/mpi/man/man3/MPI_Comm_dup_with_info.3in | 8 +++++++- ompi/mpi/man/man3/MPI_Comm_free.3in | 15 ++++++++++++++- ompi/mpi/man/man3/MPI_Comm_idup.3in | 8 +++++++- ompi/mpi/man/man3/MPI_Type_delete_attr.3in | 11 ++++++++++- ompi/mpi/man/man3/MPI_Type_dup.3in | 11 ++++++++++- ompi/mpi/man/man3/MPI_Win_delete_attr.3in | 9 ++++++++- 11 files changed, 95 insertions(+), 17 deletions(-) diff --git a/ompi/mpi/man/man3/MPI_Attr_delete.3in b/ompi/mpi/man/man3/MPI_Attr_delete.3in index ce83a91dcb..058d316398 100644 --- a/ompi/mpi/man/man3/MPI_Attr_delete.3in +++ b/ompi/mpi/man/man3/MPI_Attr_delete.3in @@ -39,14 +39,23 @@ IERROR Fortran only: Error status (integer). .SH DESCRIPTION -This deprecated routine is not available in C++. -.sp -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_delete_attr. +Note that use of this routine is \fIdeprecated\fP as of MPI-2, and +was \fIdeleted\fP in MPI-3. Please use MPI_Comm_delete_attr. This +function does not have a C++ or mpi_f08 binding. .sp Delete attribute from cache by key. This function invokes the attribute delete function delete_fn specified when the keyval was created. The call will fail if the delete_fn function returns an error code other than MPI_SUCCESS. Whenever a communicator is replicated using the function MPI_Comm_dup, all callback copy functions for attributes that are currently set are invoked (in arbitrary order). Whenever a communicator is deleted using the function MPI_Comm_free, all callback delete functions for attributes that are currently set are invoked. + +.SH NOTES +Note that it is not defined by the MPI standard what happens if the +delete_fn callback invokes other MPI functions. In Open MPI, it is +not valid for delete_fn callbacks (or any of their children) to add or +delete attributes on the same object on which the delete_fn callback +is being invoked. + + .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp diff --git a/ompi/mpi/man/man3/MPI_Attr_get.3in b/ompi/mpi/man/man3/MPI_Attr_get.3in index fc2e09fdec..3edb4148b9 100644 --- a/ompi/mpi/man/man3/MPI_Attr_get.3in +++ b/ompi/mpi/man/man3/MPI_Attr_get.3in @@ -49,9 +49,9 @@ Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_get_attr instead. -.sp -This deprecated routine is not available in C++. +Note that use of this routine is \fIdeprecated\fP as of MPI-2, and +was \fIdeleted\fP in MPI-3. Please use MPI_Comm_create_attr. This +function does not have a C++ or mpi_f08 binding. .sp Retrieves attribute value by key. The call is erroneous if there is no key with value keyval. On the other hand, the call is correct if the key value exists, but no attribute is attached on comm for that key; in such case, the call returns flag = false. In particular MPI_KEYVAL_INVALID is an erroneous key value. diff --git a/ompi/mpi/man/man3/MPI_Attr_put.3in b/ompi/mpi/man/man3/MPI_Attr_put.3in index 8e8830fdd5..e78ba8df94 100644 --- a/ompi/mpi/man/man3/MPI_Attr_put.3in +++ b/ompi/mpi/man/man3/MPI_Attr_put.3in @@ -43,9 +43,9 @@ Fortran only: Error status (integer). .SH DESCRIPTION .ft R -Note that use of this routine is \fIdeprecated\fP as of MPI-2. Please use MPI_Comm_set_attr instead. -.sp -This deprecated routine is not available in C++. +Note that use of this routine is \fIdeprecated\fP as of MPI-2, and +was \fIdeleted\fP in MPI-3. Please use MPI_Comm_set_attr. This +function does not have a C++ or mpi_f08 binding. .sp MPI_Attr_put stores the stipulated attribute value attribute_val for subsequent retrieval by MPI_Attr_get. If the value is already present, then the outcome is as if MPI_Attr_delete was first called to delete the previous value (and the callback function delete_fn was executed), and a new value was next stored. The call is erroneous if there is no key with value keyval; in particular MPI_KEYVAL_INVALID is an erroneous key value. The call will fail if the delete_fn function returned an error code other than MPI_SUCCESS. diff --git a/ompi/mpi/man/man3/MPI_Comm_delete_attr.3in b/ompi/mpi/man/man3/MPI_Comm_delete_attr.3in index 5e4bf2e526..bb51b04d77 100644 --- a/ompi/mpi/man/man3/MPI_Comm_delete_attr.3in +++ b/ompi/mpi/man/man3/MPI_Comm_delete_attr.3in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines .\" $COPYRIGHT$ @@ -55,6 +55,14 @@ Whenever a communicator is replicated using the function MPI_Comm_dup, all callb This function is the same as MPI_Attr_delete but is needed to match the communicator-specific functions introduced in the MPI-2 standard. The use of MPI_Attr_delete is deprecated. +.SH NOTES +Note that it is not defined by the MPI standard what happens if the +delete_fn callback invokes other MPI functions. In Open MPI, it is +not valid for delete_fn callbacks (or any of their children) to add or +delete attributes on the same object on which the delete_fn callback +is being invoked. + + .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp diff --git a/ompi/mpi/man/man3/MPI_Comm_dup.3in b/ompi/mpi/man/man3/MPI_Comm_dup.3in index 7598939620..b410dc651d 100644 --- a/ompi/mpi/man/man3/MPI_Comm_dup.3in +++ b/ompi/mpi/man/man3/MPI_Comm_dup.3in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ @@ -57,6 +57,12 @@ library call with a duplicate communication space that has the same properties a .sp This call applies to both intra- and intercommunicators. +Note that it is not defined by the MPI standard what happens if the +attribute copy callback invokes other MPI functions. In Open MPI, it +is not valid for attribute copy callbacks (or any of their children) +to add or delete attributes on the same object on which the attribute +copy callback is being invoked. + .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp @@ -64,3 +70,8 @@ Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +.SH SEE ALSO +.ft R +.nf +MPI_Comm_dup_with_info +MPI_Comm_idup diff --git a/ompi/mpi/man/man3/MPI_Comm_dup_with_info.3in b/ompi/mpi/man/man3/MPI_Comm_dup_with_info.3in index 7b006f054b..fdae7e7d8f 100644 --- a/ompi/mpi/man/man3/MPI_Comm_dup_with_info.3in +++ b/ompi/mpi/man/man3/MPI_Comm_dup_with_info.3in @@ -1,6 +1,6 @@ .\" -*- nroff -*- .\" Copyright 2013 Los Alamos National Security, LLC. All rights reserved. -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ @@ -56,6 +56,12 @@ library call with a duplicate communication space that has the same properties a .sp This call applies to both intra- and intercommunicators. +Note that it is not defined by the MPI standard what happens if the +attribute copy callback invokes other MPI functions. In Open MPI, it +is not valid for attribute copy callbacks (or any of their children) +to add or delete attributes on the same object on which the attribute +copy callback is being invoked. + .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. .sp diff --git a/ompi/mpi/man/man3/MPI_Comm_free.3in b/ompi/mpi/man/man3/MPI_Comm_free.3in index 44ca9a7105..472cacb9ad 100644 --- a/ompi/mpi/man/man3/MPI_Comm_free.3in +++ b/ompi/mpi/man/man3/MPI_Comm_free.3in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ @@ -44,9 +44,22 @@ Fortran only: Error status (integer). .ft R This operation marks the communicator object for deallocation. The handle is set to MPI_COMM_NULL. Any pending operations that use this communicator will complete normally; the object is actually deallocated only if there are no other active references to it. This call applies to intracommunicators and intercommunicators. Upon actual deallocation, the delete callback functions for all cached attributes (see Section 5.7 in the MPI-1 Standard, "Caching") are called in arbitrary order. + +.SH NOTES +Note that it is not defined by the MPI standard what happens if the +delete_fn callback invokes other MPI functions. In Open MPI, it is +not valid for delete_fn callbacks (or any of their children) to add or +delete attributes on the same object on which the delete_fn callback +is being invoked. + + .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp Before the error value is returned, the current MPI error handler is called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error. +.SH SEE ALSO +.ft R +.nf +MPI_Comm_delete_attr diff --git a/ompi/mpi/man/man3/MPI_Comm_idup.3in b/ompi/mpi/man/man3/MPI_Comm_idup.3in index 5ba6e4d159..8dc5ee3acd 100644 --- a/ompi/mpi/man/man3/MPI_Comm_idup.3in +++ b/ompi/mpi/man/man3/MPI_Comm_idup.3in @@ -1,6 +1,6 @@ .\" -*- nroff -*- .\" Copyright 2013 Los Alamos National Security, LLC. All rights reserved. -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ @@ -56,6 +56,12 @@ library call with a duplicate communication space that has the same properties a .sp This call applies to both intra- and intercommunicators. +Note that it is not defined by the MPI standard what happens if the +attribute copy callback invokes other MPI functions. In Open MPI, it +is not valid for attribute copy callbacks (or any of their children) +to add or delete attributes on the same object on which the attribute +copy callback is being invoked. + .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. .sp diff --git a/ompi/mpi/man/man3/MPI_Type_delete_attr.3in b/ompi/mpi/man/man3/MPI_Type_delete_attr.3in index 98e321e0c1..fef9d77ccd 100644 --- a/ompi/mpi/man/man3/MPI_Type_delete_attr.3in +++ b/ompi/mpi/man/man3/MPI_Type_delete_attr.3in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines .\" $COPYRIGHT$ @@ -50,6 +50,15 @@ Fortran only: Error status (integer). .ft R MPI_Type_delete_attr deletes a datatype-caching attribute value associated with a key. This routines partially replaces MPI_Attr_delete, which is now deprecated. + +.SH NOTES +Note that it is not defined by the MPI standard what happens if the +delete_fn callback invokes other MPI functions. In Open MPI, it is +not valid for delete_fn callbacks (or any of their children) to add or +delete attributes on the same object on which the delete_fn callback +is being invoked. + + .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp diff --git a/ompi/mpi/man/man3/MPI_Type_dup.3in b/ompi/mpi/man/man3/MPI_Type_dup.3in index 1a9dfb679a..25290deeca 100644 --- a/ompi/mpi/man/man3/MPI_Type_dup.3in +++ b/ompi/mpi/man/man3/MPI_Type_dup.3in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines .\" $COPYRIGHT$ @@ -47,6 +47,15 @@ Fortran only: Error status (integer). .ft R MPI_Type_dup is a type constructor that duplicates the existing type with associated key values. For each key value, the respective copy callback function determines the attribute value associated with this key in the new communicator. One particular action that a copy callback may take is to delete the attribute from the new data type. Returns in \fInewtype\fP a new data type with exactly the same properties as \fItype\fP, as well as any copied cached information. The new data type has identical upper bound and lower bound and yields the same net result when fully decoded with the functions described in Section 8.6 of the MPI-2 standard. \fInewtype\fP has the same committed state as the old \fItype\fP. + +.SH NOTES +Note that it is not defined by the MPI standard what happens if the +attribute copy callback invokes other MPI functions. In Open MPI, it +is not valid for attribute copy callbacks (or any of their children) +to add or delete attributes on the same object on which the attribute +copy callback is being invoked. + + .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp diff --git a/ompi/mpi/man/man3/MPI_Win_delete_attr.3in b/ompi/mpi/man/man3/MPI_Win_delete_attr.3in index 743d0b5760..8777c1549b 100644 --- a/ompi/mpi/man/man3/MPI_Win_delete_attr.3in +++ b/ompi/mpi/man/man3/MPI_Win_delete_attr.3in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright 2010 Cisco Systems, Inc. All rights reserved. +.\" Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. .\" Copyright 2006-2008 Sun Microsystems, Inc. .\" Copyright (c) 1996 Thinking Machines Corporation .\" $COPYRIGHT$ @@ -46,6 +46,13 @@ Key value (integer). IERROR Fortran only: Error status (integer). +.SH NOTES +Note that it is not defined by the MPI standard what happens if the +delete_fn callback invokes other MPI functions. In Open MPI, it is +not valid for delete_fn callbacks (or any of their children) to add or +delete attributes on the same object on which the delete_fn callback +is being invoked. + .SH ERRORS Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI::Exception object. .sp