From 01f5d6896d95f9104d1293fda259c7c89307a16e Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Tue, 22 Dec 2020 21:25:07 +0000 Subject: [PATCH] btl/ofi: fix memory leaks in error handling path Currently, mca_btl_ofi_put (get, aop, afop, acswp) will allocate a mca_btl_ofi_rdma_completion_t object and use it as the context for fi_write/fi_read/fi_atomic/fi_fetch_atomic/fi_compare_atomic. In normal code path, this completion object when processing completion entry. However, when error happened when calling fi_write/fi_read/fi_atomic/fi_fetch_atomic/fi_compare_atomic, there will be no completion entry from libfabric, in this case the completion object's memory is leaked. This patch address the issue by calling opal_free_list_return() in the error handling code path. Signed-off-by: Wei Zhang --- opal/mca/btl/ofi/btl_ofi_atomics.c | 6 ++++++ opal/mca/btl/ofi/btl_ofi_rdma.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/opal/mca/btl/ofi/btl_ofi_atomics.c b/opal/mca/btl/ofi/btl_ofi_atomics.c index 34fa9cc4776..f9b2447130b 100644 --- a/opal/mca/btl/ofi/btl_ofi_atomics.c +++ b/opal/mca/btl/ofi/btl_ofi_atomics.c @@ -73,8 +73,10 @@ int mca_btl_ofi_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end fi_datatype, fi_op, &comp->comp_ctx); if (rc == -FI_EAGAIN) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); return OPAL_ERR_OUT_OF_RESOURCE; } else if (rc < 0) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); BTL_ERROR(("fi_fetch_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } @@ -125,8 +127,10 @@ int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t fi_datatype, fi_op, &comp->comp_ctx); if (rc == -FI_EAGAIN) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); return OPAL_ERR_OUT_OF_RESOURCE; } else if (rc < 0) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); BTL_ERROR(("fi_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } @@ -181,8 +185,10 @@ int mca_btl_ofi_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e &comp->comp_ctx); if (rc == -FI_EAGAIN) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); return OPAL_ERR_OUT_OF_RESOURCE; } else if (rc < 0) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); BTL_ERROR(("fi_compare_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } diff --git a/opal/mca/btl/ofi/btl_ofi_rdma.c b/opal/mca/btl/ofi/btl_ofi_rdma.c index 0ecbd887bc1..f2728e36dc0 100644 --- a/opal/mca/btl/ofi/btl_ofi_rdma.c +++ b/opal/mca/btl/ofi/btl_ofi_rdma.c @@ -88,10 +88,12 @@ int mca_btl_ofi_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi &comp->comp_ctx); /* completion context */ if (-FI_EAGAIN == rc) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); return OPAL_ERR_OUT_OF_RESOURCE; } if (0 != rc) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); BTL_ERROR(("fi_read failed with %d:%s", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } @@ -133,10 +135,12 @@ int mca_btl_ofi_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi &comp->comp_ctx); /* completion context */ if (-FI_EAGAIN == rc) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); return OPAL_ERR_OUT_OF_RESOURCE; } if (0 != rc) { + opal_free_list_return(comp->base.my_list, (opal_free_list_item_t*) comp); BTL_ERROR(("fi_write failed with %d:%s", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); }