diff --git a/AUTHORS b/AUTHORS index e88845b1b7..9137fa722e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -43,6 +43,7 @@ gef@icl.utk.edu Graham Fagg UTK gilles@rist.or.jp Gilles Gouaillardet RIST gingery@lanl.gov Ginger Young LANL gleb@voltaire.com Gleb Natapov Voltaire +gpaulsen@us.ibm.com Geoffrey Paulsen IBM gshipman@lanl.gov Galen Shipman LANL gwatson@lanl.gov Greg Watson LANL herault@icl.utk.edu Thomas Herault INRIA diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index 023fbdbba0..3759c1e0fd 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -302,7 +302,8 @@ else OPAL_ENABLE_DLOPEN_SUPPORT=1 AC_MSG_RESULT([yes]) fi - +AC_DEFINE_UNQUOTED(OPAL_ENABLE_DLOPEN_SUPPORT, $OPAL_ENABLE_DLOPEN_SUPPORT, + [Whether we want to enable dlopen support]) # # Heterogeneous support diff --git a/contrib/platform/lanl/cray_xe6/debug-common b/contrib/platform/lanl/cray_xe6/debug-common index e7e6bcd99d..6bb806362d 100644 --- a/contrib/platform/lanl/cray_xe6/debug-common +++ b/contrib/platform/lanl/cray_xe6/debug-common @@ -1,4 +1,3 @@ -enable_dlopen=no enable_mem_profile=no enable_binaries=yes enable_heterogeneous=no @@ -33,8 +32,5 @@ enable_mca_direct=pml-ob1 # enable development headers with_devel_headers=yes -# enable ptmalloc (enables lazy deregistration) -with_memory_manager=linux - # disable valgrind with_valgrind=no diff --git a/contrib/platform/lanl/cray_xe6/optimized-common b/contrib/platform/lanl/cray_xe6/optimized-common index 26877aa41b..937bf6b598 100644 --- a/contrib/platform/lanl/cray_xe6/optimized-common +++ b/contrib/platform/lanl/cray_xe6/optimized-common @@ -1,5 +1,3 @@ -enable_dlopen=no - enable_mem_profile=no enable_binaries=yes @@ -40,8 +38,5 @@ enable_mca_direct=pml-ob1 # enable development headers with_devel_headers=yes -# enable ptmalloc (enables lazy deregistration) -with_memory_manager=linux - # disable valgrind with_valgrind=no diff --git a/contrib/platform/lanl/toss/toss-common b/contrib/platform/lanl/toss/toss-common index 639672a3a5..ba000be23f 100644 --- a/contrib/platform/lanl/toss/toss-common +++ b/contrib/platform/lanl/toss/toss-common @@ -1,7 +1,6 @@ # (c) 2013 Los Alamos National Security, LLC. All rights reserved. # Open MPI common configuration for TOSS/TOSS2 v1.7.x/1.8.x -enable_dlopen=no enable_binaries=yes enable_heterogeneous=no enable_shared=yes diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 46012d93ae..9d4e0ec197 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -92,15 +92,13 @@ #include "ompi/dpm/dpm.h" #include "ompi/mpiext/mpiext.h" -#if defined(MEMORY_LINUX_PTMALLOC2) && MEMORY_LINUX_PTMALLOC2 -#include "opal/mca/memory/linux/memory_linux.h" +#include "opal/mca/memory/base/base.h" /* So this sucks, but with OPAL in its own library that is brought in implicity from libmpi, there are times when the malloc initialize hook in the memory component doesn't work. So we have to do it from here, since any MPI code is going to call MPI_Init... */ OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) = - opal_memory_linux_malloc_init_hook; -#endif /* defined(MEMORY_LINUX_PTMALLOC2) && MEMORY_LINUX_PTMALLOC2 */ + opal_memory_base_malloc_init_hook; /* This is required for the boundaries of the hash tables used to store * the F90 types returned by the MPI_Type_create_f90_XXX functions. diff --git a/opal/class/opal_fifo.h b/opal/class/opal_fifo.h index 604601dc35..ad9cbdbcbb 100644 --- a/opal/class/opal_fifo.h +++ b/opal/class/opal_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reseved. * $COPYRIGHT$ * @@ -101,7 +101,8 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, if (&fifo->opal_fifo_ghost == tail.data.item) { /* update the head */ - fifo->opal_fifo_head.data.item = item; + opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}; + opal_update_counted_pointer (&fifo->opal_fifo_head, head, item); } else { /* update previous item */ tail.data.item->opal_list_next = item; diff --git a/opal/mca/memory/base/base.h b/opal/mca/memory/base/base.h index 2316b6c4f1..63e5b56803 100644 --- a/opal/mca/memory/base/base.h +++ b/opal/mca/memory/base/base.h @@ -32,5 +32,7 @@ BEGIN_C_DECLS */ OPAL_DECLSPEC extern mca_base_framework_t opal_memory_base_framework; +OPAL_DECLSPEC void opal_memory_base_malloc_init_hook (void); + END_C_DECLS #endif /* OPAL_BASE_MEMORY_H */ diff --git a/opal/mca/memory/base/memory_base_open.c b/opal/mca/memory/base/memory_base_open.c index b27b1ae61d..2ab4c00eb2 100644 --- a/opal/mca/memory/base/memory_base_open.c +++ b/opal/mca/memory/base/memory_base_open.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,20 +45,22 @@ static int empty_process(void) return OPAL_SUCCESS; } +static int empty_query (int *priority) +{ + *priority = 0; + return OPAL_SUCCESS; +} /* * Local variables */ static opal_memory_base_component_2_0_0_t empty_component = { - /* Don't care about the version info */ - { 0, }, - /* Don't care about the data */ - { 0, }, /* Empty / safe functions to call if no memory componet is selected */ - empty_process, - opal_memory_base_component_register_empty, - opal_memory_base_component_deregister_empty, - opal_memory_base_component_set_alignment_empty, + .memoryc_query = empty_query, + .memoryc_process = empty_process, + .memoryc_register = opal_memory_base_component_register_empty, + .memoryc_deregister = opal_memory_base_component_deregister_empty, + .memoryc_set_alignment = opal_memory_base_component_set_alignment_empty, }; @@ -65,7 +69,30 @@ static opal_memory_base_component_2_0_0_t empty_component = { */ opal_memory_base_component_2_0_0_t *opal_memory = &empty_component; +#if MEMORY_LINUX_PTMALLOC2 +/* + * Note that this is a minor abstraction violation (that has actually + * existed for quite a long time -- it used to be up in + * ompi_mpi_init(); yoinks!): we're including a component's header + * file here. This is an unfortunate necessity: the linux/ptmallox + * system hooks in pre-main, and has to be initialized before any + * component or module has even been created. Sad panda. + */ +#include "opal/mca/memory/linux/memory_linux.h" +#endif +void opal_memory_base_malloc_init_hook (void) +{ +#if MEMORY_LINUX_PTMALLOC2 + /* See above comment about linux/ptmalloc2 about why this + abstraction violation is here. */ + opal_memory->memoryc_init_hook = opal_memory_linux_malloc_init_hook; +#endif + + if (opal_memory->memoryc_init_hook) { + opal_memory->memoryc_init_hook (); + } +} /* * Function for finding and opening either all MCA components, or the one @@ -73,23 +100,45 @@ opal_memory_base_component_2_0_0_t *opal_memory = &empty_component; */ static int opal_memory_base_open(mca_base_open_flag_t flags) { + mca_base_component_list_item_t *item, *next; + opal_memory_base_component_2_0_0_t *tmp; + int priority, highest_priority = 0; int ret; - /* Open up all available components */ + /* can only be zero or one */ + OPAL_LIST_FOREACH(item, &opal_memory_base_framework.framework_components, mca_base_component_list_item_t) { + tmp = (opal_memory_base_component_2_0_0_t *) item->cli_component; + + ret = tmp->memoryc_query (&priority); +#if MEMORY_LINUX_PTMALLOC2 + /* See above comment about linux/ptmalloc2 about why this + abstraction violation is here. */ + if (0 == strcmp (tmp->mca_component_name, "linux")) { + /* if ptmalloc is enabled always use it */ + priority = 1000000; + } +#endif + if (OPAL_SUCCESS != ret || priority < highest_priority) { + continue; + } + + highest_priority = priority; + opal_memory = tmp; + } + + OPAL_LIST_FOREACH_SAFE(item, next, &opal_memory_base_framework.framework_components, mca_base_component_list_item_t) { + if ((void *) opal_memory != (void *) item->cli_component) { + mca_base_component_unload (item->cli_component, opal_memory_base_framework.framework_output); + opal_list_remove_item (&opal_memory_base_framework.framework_components, &item->super); + } + } + + /* open remaining component */ ret = mca_base_framework_components_open (&opal_memory_base_framework, flags); if (ret != OPAL_SUCCESS) { return ret; } - /* can only be zero or one */ - if (opal_list_get_size(&opal_memory_base_framework.framework_components) == 1) { - mca_base_component_list_item_t *item; - item = (mca_base_component_list_item_t*) - opal_list_get_first(&opal_memory_base_framework.framework_components); - opal_memory = (opal_memory_base_component_2_0_0_t*) - item->cli_component; - } - /* All done */ return OPAL_SUCCESS; } diff --git a/opal/mca/memory/linux/configure.m4 b/opal/mca/memory/linux/configure.m4 index e44786e1eb..9a37d38a4d 100644 --- a/opal/mca/memory/linux/configure.m4 +++ b/opal/mca/memory/linux/configure.m4 @@ -52,17 +52,14 @@ AC_DEFUN([MCA_opal_memory_linux_CONFIG],[ ;; esac + # Must specifically request this component AS_IF([test "$with_memory_manager" = "linux"], [memory_linux_ptmalloc2_happy=yes memory_linux_ummu_happy=yes memory_linux_requested=1], [memory_linux_requested=0 - AS_IF([test -z "$with_memory_manager" || test "$with_memory_manager" = "yes"], - [memory_linux_ptmalloc2_happy=yes - memory_linux_ummu_happy=yes], - [memory_linux_ptmalloc2_happy=no - memory_linux_ummu_happy=no])]) - + memory_linux_ptmalloc2_happy=no + memory_linux_ummu_happy=no]) ###################################################################### # if memory hook available diff --git a/opal/mca/memory/linux/hooks.c b/opal/mca/memory/linux/hooks.c index 910d8e6e20..db608869b2 100644 --- a/opal/mca/memory/linux/hooks.c +++ b/opal/mca/memory/linux/hooks.c @@ -744,6 +744,10 @@ void opal_memory_linux_malloc_init_hook(void) check_result_t r1, lp, lpp; bool want_rcache = false, found_driver = false; + if (!opal_memory_linux_opened) { + return; + } + /* First, check for a FAKEROOT environment. If we're in a fakeroot, then access() (and likely others) have been replaced and are not safe to call here in this pre-main environment. So diff --git a/opal/mca/memory/linux/memory_linux.h b/opal/mca/memory/linux/memory_linux.h index 22685c1478..9334f3ae82 100644 --- a/opal/mca/memory/linux/memory_linux.h +++ b/opal/mca/memory/linux/memory_linux.h @@ -83,6 +83,8 @@ OPAL_DECLSPEC void opal_memory_linux_malloc_init_hook(void); OPAL_DECLSPEC void opal_memory_linux_malloc_set_alignment(int use_memalign, size_t memalign_threshold); #endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ +extern bool opal_memory_linux_opened; + END_C_DECLS #endif diff --git a/opal/mca/memory/linux/memory_linux_component.c b/opal/mca/memory/linux/memory_linux_component.c index d883f10f2a..323ab19134 100644 --- a/opal/mca/memory/linux/memory_linux_component.c +++ b/opal/mca/memory/linux/memory_linux_component.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -60,6 +60,7 @@ static int linux_open(void); static int linux_close(void); static int linux_register(void); +static int linux_query(int *); #if MEMORY_LINUX_UMMUNOTIFY static bool ummunotify_opened = false; @@ -69,6 +70,9 @@ static bool ptmalloc2_opened = false; #endif bool opal_memory_linux_disable = false; +static int mca_memory_linux_priority; + +bool opal_memory_linux_opened = false; opal_memory_linux_component_t mca_memory_linux_component = { /* First, the opal_memory_base_component_2_0_0_t */ @@ -96,6 +100,8 @@ opal_memory_linux_component_t mca_memory_linux_component = { /* Memory framework functions. These function pointer values are replaced by memory_linux_ummunotify.c at run time if we end up using ummunotify support. */ + .memoryc_init_hook = opal_memory_linux_malloc_init_hook, + .memoryc_query = linux_query, .memoryc_register = opal_memory_base_component_register_empty, .memoryc_deregister = opal_memory_base_component_deregister_empty, #if MEMORY_LINUX_MALLOC_ALIGN_ENABLED @@ -243,11 +249,25 @@ static int linux_register(void) if (0 > ret) { return ret; } + + mca_memory_linux_priority = 50; + ret = mca_base_component_var_register (&mca_memory_linux_component.super.memoryc_version, + "priority", "Priority of the linux memory hook component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_CONSTANT, &mca_memory_linux_priority); + if (0 > ret) { + return ret; + } #endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ return (0 > ret) ? ret : OPAL_SUCCESS; } +static int linux_query (int *priority) +{ + *priority = mca_memory_linux_priority; + return OPAL_SUCCESS; +} static int linux_open(void) { @@ -318,6 +338,7 @@ static int linux_open(void) __malloc_hook = _opal_memory_linux_malloc_align_hook; } #endif /* MEMORY_LINUX_MALLOC_ALIGN_ENABLED */ + opal_memory_linux_opened = true; return OPAL_SUCCESS; } diff --git a/opal/mca/memory/malloc_solaris/memory_malloc_solaris_component.c b/opal/mca/memory/malloc_solaris/memory_malloc_solaris_component.c index 15fa63468d..035d14ade4 100644 --- a/opal/mca/memory/malloc_solaris/memory_malloc_solaris_component.c +++ b/opal/mca/memory/malloc_solaris/memory_malloc_solaris_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2011 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -46,6 +46,7 @@ int __munmap(caddr_t addr, size_t len); #endif static int opal_memory_malloc_open(void); +static int opal_memory_malloc_query(int *); const opal_memory_base_component_2_0_0_t mca_memory_malloc_solaris_component = { /* First, the mca_component_t struct containing meta information @@ -68,6 +69,7 @@ const opal_memory_base_component_2_0_0_t mca_memory_malloc_solaris_component = { /* This component doesn't need these functions, but need to provide safe/empty register/deregister functions to call */ + .memoryc_query = opal_memory_malloc_query, .memoryc_register = opal_memory_base_component_register_empty, .memoryc_deregister = opal_memory_base_component_deregister_empty, .memoryc_set_alignment = opal_memory_base_component_set_alignment_empty, @@ -93,6 +95,11 @@ opal_memory_malloc_open(void) return OPAL_SUCCESS; } +static int opal_memory_malloc_query (int *priority) +{ + *priority = 79; + return OPAL_SUCCESS; +} /* * Three ways to call munmap. Prefered is to call __munmap, which diff --git a/opal/mca/memory/memory.h b/opal/mca/memory/memory.h index ab9ac49d78..b5968fd264 100644 --- a/opal/mca/memory/memory.h +++ b/opal/mca/memory/memory.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -78,6 +78,12 @@ BEGIN_C_DECLS */ typedef int (*opal_memory_base_component_process_fn_t)(void); +/** + * Prototype for a function that is invoked when the memory base is + * trying to select a component. This funtionality is required. + */ +typedef int (*opal_memory_base_component_query_fn_t)(int *priority); + /** * Prototype for a function that is invoked when Open MPI starts to * "care" about a specific memory region. That is, Open MPI declares @@ -119,6 +125,11 @@ typedef int (*opal_memory_base_component_deregister_fn_t)(void *base, typedef void (*opal_memory_base_component_set_alignment_fn_t)(int use_memalign, size_t memalign_threshold); +/** + * Function to be called when initializing malloc hooks + */ +typedef void (*opal_memory_base_component_init_hook_fn_t)(void); + /** * Structure for memory components. */ @@ -128,6 +139,12 @@ typedef struct opal_memory_base_component_2_0_0_t { /** MCA base data */ mca_base_component_data_t memoryc_data; + opal_memory_base_component_query_fn_t memoryc_query; + + /** This function will be called when the malloc hooks are + * initialized. It may be NULL if no hooks are needed. */ + opal_memory_base_component_init_hook_fn_t memoryc_init_hook; + /** Function to call when something has changed, as indicated by opal_memory_changed(). Will be ignored if the component does not provide an opal_memory_changed() macro that returns diff --git a/opal/mca/memory/patcher/Makefile.am b/opal/mca/memory/patcher/Makefile.am new file mode 100644 index 0000000000..ce4172617f --- /dev/null +++ b/opal/mca/memory/patcher/Makefile.am @@ -0,0 +1,32 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This component is only ever built statically (i.e., slurped into +# libopen-pal) -- it is never built as a DSO. +noinst_LTLIBRARIES = libmca_memory_patcher.la +libmca_memory_patcher_la_SOURCES = \ + memory_patcher.h \ + memory_patcher_component.c +libmca_memory_patcher_la_LDFLAGS = \ + -module -avoid-version $(memory_patcher_LDFLAGS) +libmca_memory_patcher_la_LIBADD = $(memory_patcher_LIBS) diff --git a/opal/mca/memory/patcher/configure.m4 b/opal/mca/memory/patcher/configure.m4 new file mode 100644 index 0000000000..4e2949fd63 --- /dev/null +++ b/opal/mca/memory/patcher/configure.m4 @@ -0,0 +1,90 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +AC_DEFUN([MCA_opal_memory_patcher_PRIORITY], [41]) + +AC_DEFUN([MCA_opal_memory_patcher_COMPILE_MODE], [ + AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) + $4="static" + AC_MSG_RESULT([$$4]) +]) + + +# MCA_memory_patcher_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_memory_patcher_CONFIG],[ + AC_CONFIG_FILES([opal/mca/memory/patcher/Makefile]) + + OPAL_VAR_SCOPE_PUSH([memory_patcher_have___curbrk memory_patcher_have___mmap memory_patcher_have___syscall memory_patcher_have___mmap_prototype memory_patcher_have___syscall_prototype]) + + memory_patcher_have___curbrk=0 + memory_patcher_have___mmap=0 + memory_patcher_have___mmap_prototype=0 + memory_patcher_have___syscall=0 + memory_patcher_have___syscall_prototype=0 + + AC_MSG_CHECKING([for __curbrk symbol]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([extern char *__curbrk;],[char *tmp = __curbrk;])], + [AC_MSG_RESULT([yes]) + memory_patcher_have___curbrk=1], + [AC_MSG_RESULT([no])]) + AC_DEFINE_UNQUOTED([OPAL_MEMORY_PATCHER_HAVE___CURBRK], [$memory_patcher_have___curbrk], + [Whether the glibc __curbrk exists]) + + AC_MSG_CHECKING([whether __mmap prototype exists]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include ],[char *tmp = __mmap (NULL, 0, 0, 0, 0, 0);])], + [AC_MSG_RESULT([yes]) + memory_patcher_have___mmap_prototype=1], + [AC_MSG_RESULT([no])]) + AC_DEFINE_UNQUOTED([OPAL_MEMORY_PATCHER_HAVE___MMAP_PROTO], [$memory_patcher_have___mmap_prototype], + [Whether the internal __mmap call has a prototype]) + + AC_MSG_CHECKING([whether __mmap symbol exists]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([void *__mmap ();],[char *tmp = __mmap ();])], + [AC_MSG_RESULT([yes]) + memory_patcher_have___mmap=1], + [AC_MSG_RESULT([no])]) + AC_DEFINE_UNQUOTED([OPAL_MEMORY_PATCHER_HAVE___MMAP], [$memory_patcher_have___mmap], + [Whether the internal __mmap call exists]) + + AC_MSG_CHECKING([whether __syscall prototype exists]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include ],[char *tmp = __syscall (SYS_mmap, NULL);])], + [AC_MSG_RESULT([yes]) + memory_patcher_have___syscall_prototype=1], + [AC_MSG_RESULT([no])]) + AC_DEFINE_UNQUOTED([OPAL_MEMORY_PATCHER_HAVE___SYSCALL_PROTO], [$memory_patcher_have___syscall_prototype], + [Whether the internal __syscall call has a prototype]) + + AC_MSG_CHECKING([whether __syscall symbol exists]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([void *__syscall ();],[char *tmp = __syscall ();])], + [AC_MSG_RESULT([yes]) + memory_patcher_have___syscall=1], + [AC_MSG_RESULT([no])]) + AC_DEFINE_UNQUOTED([OPAL_MEMORY_PATCHER_HAVE___SYSCALL], [$memory_patcher_have___syscall], + [Whether the internal __syscall call exists]) + + [$1] + + OPAL_VAR_SCOPE_POP +]) diff --git a/opal/mca/memory/patcher/memory_patcher.h b/opal/mca/memory/patcher/memory_patcher.h new file mode 100644 index 0000000000..1909443c54 --- /dev/null +++ b/opal/mca/memory/patcher/memory_patcher.h @@ -0,0 +1,27 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OPAL_MEMORY_PATCHER_H) +#define OPAL_MEMORY_PATCHER_H + +#include "opal_config.h" + +#include "opal/mca/memory/memory.h" +#include "opal/mca/patcher/patcher.h" + +typedef struct opal_memory_patcher_component_t { + opal_memory_base_component_2_0_0_t super; +} opal_memory_patcher_component_t; + +extern opal_memory_patcher_component_t mca_memory_patcher_component; + +#endif /* !defined(OPAL_MEMORY_PATCHER_H) */ diff --git a/opal/mca/memory/patcher/memory_patcher_component.c b/opal/mca/memory/patcher/memory_patcher_component.c new file mode 100644 index 0000000000..2e0577596f --- /dev/null +++ b/opal/mca/memory/patcher/memory_patcher_component.c @@ -0,0 +1,442 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "memory_patcher.h" + +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/mca/memory/base/empty.h" +#include "opal/mca/memory/base/base.h" +#include "opal/memoryhooks/memory.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "memory_patcher.h" +#undef opal_memory_changed + +static int patcher_open(void); +static int patcher_close(void); +static int patcher_register(void); +static int patcher_query (int *); + +static int mca_memory_patcher_priority; + +opal_memory_patcher_component_t mca_memory_patcher_component = { + .super = { + .memoryc_version = { + OPAL_MEMORY_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "patcher", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = patcher_open, + .mca_close_component = patcher_close, + .mca_register_component_params = patcher_register, + }, + .memoryc_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + /* Memory framework functions. */ + .memoryc_query = patcher_query, + .memoryc_register = opal_memory_base_component_register_empty, + .memoryc_deregister = opal_memory_base_component_deregister_empty, + .memoryc_set_alignment = opal_memory_base_component_set_alignment_empty, + }, + + /* Component-specific data, filled in later (compiler will 0/NULL + it out) */ +}; + +#if OPAL_MEMORY_PATCHER_HAVE___SYSCALL_PROTO && OPAL_MEMORY_PATCHER_HAVE___SYSCALL +/* calling __syscall is preferred on some systems when some arguments may be 64-bit. it also + * has the benefit of having an off_t return type */ +#define memory_patcher_syscall __syscall +#else +#define memory_patcher_syscall syscall +#endif + +/* + * The following block of code is #if 0'ed out because we do not need + * to intercept mmap() any more (mmap() only deals with memory + * protection; it does not invalidate any rcache entries for a given + * region). But if we do someday, this is the code that we'll need. + * It's a little non-trivial, so we might as well keep it (and #if 0 + * it out). + */ +#if 0 + +#if OPAL_MEMORY_PATCHER_HAVE___MMAP && !OPAL_MEMORY_PATCHER_HAVE___MMAP_PROTO +/* prototype for Apple's internal mmap function */ +void *__mmap (void *start, size_t length, int prot, int flags, int fd, off_t offset); +#endif + +static void *(*original_mmap)(void *, size_t, int, int, int, off_t); + +static void *intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) +{ + OPAL_PATCHER_BEGIN; + void *result = 0; + + if (prot == PROT_NONE) { + opal_mem_hooks_release_hook (start, length, true); + } + + if (!original_mmap) { +#if OPAL_MEMORY_PATCHER_HAVE___MMAP + /* the darwin syscall returns an int not a long so call the underlying __mmap function */ + result = __mmap (start, length, prot, flags, fd, offset); +#else + result = (void*)(intptr_t) memory_patcher_syscall(SYS_mmap, start, length, prot, flags, fd, offset); +#endif + + // I thought we had some issue in the past with the above line for IA32, + // like maybe syscall() wouldn't handle that many arguments. But just now + // I used gcc -m32 and it worked on a recent system. But there's a possibility + // that older ia32 systems may need some other code to make the above syscall. + } else { + result = original_mmap (start, length, prot, flags, fd, offset); + } + + OPAL_PATCHER_END; + return result; +} + +#endif + +static int (*original_munmap) (void *, size_t); + +static int intercept_munmap(void *start, size_t length) +{ + OPAL_PATCHER_BEGIN; + int result = 0; + + /* could be in a malloc implementation */ + opal_mem_hooks_release_hook (start, length, true); + + if (!original_munmap) { + result = memory_patcher_syscall(SYS_munmap, start, length); + } else { + result = original_munmap (start, length); + } + + OPAL_PATCHER_END; + return result; +} + +#if defined (SYS_mremap) + +/* on linux this function has an optional extra argument but ... can not be used here because it + * causes issues when intercepting a 4-argument mremap call */ +static void *(*original_mremap) (void *, size_t, size_t, int, void *); + +static void *intercept_mremap (void *start, size_t oldlen, size_t newlen, int flags, void *new_address) +{ + OPAL_PATCHER_BEGIN; + void *result = MAP_FAILED; + + if (MAP_FAILED != start && oldlen > 0) { + opal_mem_hooks_release_hook (start, oldlen, true); + } + + if (!(flags & MREMAP_FIXED)) { + new_address = NULL; + } + + if (!original_mremap) { + result = (void *)(intptr_t) memory_patcher_syscall (SYS_mremap, start, oldlen, newlen, flags, new_address); + } else { + result = original_mremap (start, oldlen, newlen, flags, new_address); + } + + OPAL_PATCHER_END; + return result; +} + +#endif + +#if defined (SYS_madvise) + +static int (*original_madvise) (void *, size_t, int); + +static int intercept_madvise (void *start, size_t length, int advice) +{ + OPAL_PATCHER_BEGIN; + int result = 0; + + if (advice == MADV_DONTNEED || +#ifdef MADV_REMOVE + advice == MADV_REMOVE || +#endif + advice == POSIX_MADV_DONTNEED) + { + opal_mem_hooks_release_hook (start, length, false); + } + + if (!original_madvise) { + result = memory_patcher_syscall(SYS_madvise, start, length, advice); + } else { + result = original_madvise (start, length, advice); + } + + OPAL_PATCHER_END; + return result; +} + +#endif + +#if defined SYS_brk + +#if OPAL_MEMORY_PATCHER_HAVE___CURBRK +void *__curbrk; /* in libc */ +#endif + +static int (*original_brk) (void *); + +static int intercept_brk (void *addr) +{ + OPAL_PATCHER_BEGIN; + int result = 0; + void *old_addr, *new_addr; + +#if OPAL_MEMORY_PATCHER_HAVE___CURBRK + old_addr = __curbrk; +#else + old_addr = sbrk (0); +#endif + + if (!original_brk) { + /* get the current_addr */ + new_addr = (void *) (intptr_t) memory_patcher_syscall(SYS_brk, addr); + +#if OPAL_MEMORY_PATCHER_HAVE___CURBRK + /* + * Note: if we were using glibc brk/sbrk, their __curbrk would get + * updated, but since we're going straight to the syscall, we have + * to update __curbrk or else glibc won't see it. + */ + __curbrk = new_addr; +#endif + } else { + result = original_brk (addr); +#if OPAL_MEMORY_PATCHER_HAVE___CURBRK + new_addr = __curbrk; +#else + new_addr = sbrk (0); +#endif + } + + if (new_addr < addr) { + errno = ENOMEM; + result = -1; + } else if (new_addr < old_addr) { + opal_mem_hooks_release_hook (new_addr, (intptr_t) old_addr - (intptr_t) new_addr, true); + } + OPAL_PATCHER_END; + return result; +} + +#endif + +#if defined(SYS_shmdt) && defined(__linux__) + +#include +#include +#include + +static size_t memory_patcher_get_shm_seg_size (const void *shmaddr) +{ + unsigned long start_addr, end_addr; + char *ptr, *newline; + char buffer[1024]; + size_t seg_size = 0; + int fd; + + seg_size = 0; + + fd = open ("/proc/self/maps", O_RDONLY); + if (fd < 0) { + return 0; + } + + for (size_t read_offset = 0 ; ; ) { + ssize_t nread = read(fd, buffer + read_offset, sizeof(buffer) - 1 - read_offset); + if (nread <= 0) { + if (errno == EINTR) { + continue; + } + + break; + } else { + buffer[nread + read_offset] = '\0'; + } + + ptr = buffer; + while ( (newline = strchr(ptr, '\n')) != NULL ) { + /* 00400000-0040b000 r-xp ... \n */ + int ret = sscanf(ptr, "%lx-%lx ", &start_addr, &end_addr); + if (ret != 2) { + continue; + } + + if (start_addr == (uintptr_t)shmaddr) { + seg_size = end_addr - start_addr; + goto out_close; + } + + newline = strchr(ptr, '\n'); + if (newline == NULL) { + break; + } + + ptr = newline + 1; + } + + read_offset = strlen(ptr); + memmove(buffer, ptr, read_offset); + } + + out_close: + close(fd); + return seg_size; +} + +static int (*original_shmdt) (const void *); + +static int intercept_shmdt (const void *shmaddr) +{ + OPAL_PATCHER_BEGIN; + int result; + + opal_mem_hooks_release_hook (shmaddr, memory_patcher_get_shm_seg_size (shmaddr), false); + + if (original_shmdt) { + result = original_shmdt (shmaddr); + } else { + result = memory_patcher_syscall (SYS_shmdt, shmaddr); + } + + OPAL_PATCHER_END; + return result; +} +#endif + +static int patcher_register (void) +{ + mca_memory_patcher_priority = 80; + mca_base_component_var_register (&mca_memory_patcher_component.super.memoryc_version, + "priority", "Priority of the patcher memory hook component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_CONSTANT, &mca_memory_patcher_priority); + + return OPAL_SUCCESS; +} + +static int patcher_query (int *priority) +{ + if (opal_patcher->patch_symbol) { + *priority = mca_memory_patcher_priority; + } else { + *priority = -1; + } + return OPAL_SUCCESS; +} + +static int patcher_open (void) +{ + static int was_executed_already = 0; + int rc; + + if (was_executed_already) { + return OPAL_SUCCESS; + } + + was_executed_already = 1; + + /* set memory hooks support level */ + opal_mem_hooks_set_support (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT); + +#if 0 + /* See above block to see why mmap() functionality is #if 0'ed + out */ + rc = opal_patcher->patch_symbol ("mmap", (uintptr_t) intercept_mmap, (uintptr_t *) &original_mmap); + if (OPAL_SUCCESS != rc) { + return rc; + } +#endif + + rc = opal_patcher->patch_symbol ("munmap", (uintptr_t)intercept_munmap, (uintptr_t *) &original_munmap); + if (OPAL_SUCCESS != rc) { + return rc; + } + +#if defined (SYS_mremap) + rc = opal_patcher->patch_symbol ("mremap",(uintptr_t)intercept_mremap, (uintptr_t *) &original_mremap); + if (OPAL_SUCCESS != rc) { + return rc; + } +#endif + +#if defined (SYS_madvise) + rc = opal_patcher->patch_symbol ("madvise", (uintptr_t)intercept_madvise, (uintptr_t *) &original_madvise); + if (OPAL_SUCCESS != rc) { + return rc; + } +#endif + +#if defined(SYS_shmdt) && defined(__linux__) + rc = opal_patcher->patch_symbol ("shmdt", (uintptr_t) intercept_shmdt, (uintptr_t *) &original_shmdt); + if (OPAL_SUCCESS != rc) { + return rc; + } +#endif + +#if defined (SYS_brk) + rc = opal_patcher->patch_symbol ("brk", (uintptr_t)intercept_brk, (uintptr_t *) &original_brk); +#endif + + return rc; +} + +static int patcher_close(void) +{ + /* Note that we don't need to unpatch any symbols here; the + patcher framework will take care of all of that for us. */ + return OPAL_SUCCESS; +} diff --git a/opal/mca/patcher/Makefile.am b/opal/mca/patcher/Makefile.am new file mode 100644 index 0000000000..664683bad8 --- /dev/null +++ b/opal/mca/patcher/Makefile.am @@ -0,0 +1,39 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# main library setup +noinst_LTLIBRARIES = libmca_patcher.la +libmca_patcher_la_SOURCES = + +# local files +headers = patcher.h +libmca_patcher_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +opaldir = $(opalincludedir)/$(subdir) +nobase_opal_HEADERS = $(headers) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/opal/mca/patcher/base/Makefile.am b/opal/mca/patcher/base/Makefile.am new file mode 100644 index 0000000000..441e1c645d --- /dev/null +++ b/opal/mca/patcher/base/Makefile.am @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += base/base.h + +libmca_patcher_la_SOURCES += base/patcher_base_frame.c \ + base/patcher_base_patch.c diff --git a/opal/mca/patcher/base/base.h b/opal/mca/patcher/base/base.h new file mode 100644 index 0000000000..65b48fc00c --- /dev/null +++ b/opal/mca/patcher/base/base.h @@ -0,0 +1,83 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#ifndef OPAL_PATCHER_BASE_H +#define OPAL_PATCHER_BASE_H + +#include "opal_config.h" +#include "opal/mca/base/mca_base_framework.h" +#include "opal/mca/patcher/patcher.h" + + +BEGIN_C_DECLS + +#define MCA_BASE_PATCHER_MAX_PATCH 32 + +struct mca_patcher_base_patch_t; + +typedef void (*mca_patcher_base_restore_fn_t) (struct mca_patcher_base_patch_t *); + +struct mca_patcher_base_patch_t { + /** patches are list items */ + opal_list_item_t super; + /** name symbol to patch */ + char *patch_symbol; + /** address of function to call instead */ + uintptr_t patch_value; + /** original address of function */ + uintptr_t patch_orig; + /** patch data */ + unsigned char patch_data[MCA_BASE_PATCHER_MAX_PATCH]; + /** original data */ + unsigned char patch_orig_data[MCA_BASE_PATCHER_MAX_PATCH]; + /** size of patch data */ + unsigned patch_data_size; + /** function to undo the patch */ + mca_patcher_base_restore_fn_t patch_restore; +}; + +typedef struct mca_patcher_base_patch_t mca_patcher_base_patch_t; + +OBJ_CLASS_DECLARATION(mca_patcher_base_patch_t); + +/** + * Framework struct declaration for this framework + */ +OPAL_DECLSPEC extern mca_base_framework_t opal_patcher_base_framework; +OPAL_DECLSPEC int opal_patcher_base_select (void); +OPAL_DECLSPEC int mca_patcher_base_patch_hook (mca_patcher_base_module_t *module, uintptr_t hook); +OPAL_DECLSPEC void mca_base_patcher_patch_apply_binary (mca_patcher_base_patch_t *patch); + +static inline uintptr_t mca_patcher_base_addr_text (uintptr_t addr) { +#if (defined(__PPC64__) || defined(__powerpc64__) || defined(__PPC__)) && _CALL_ELF != 2 + struct odp_t { + uintptr_t text; + uintptr_t toc; + } *odp = (struct odp_t *) addr; + return (odp)?odp->text:0; +#else + return addr; +#endif +} + +END_C_DECLS +#endif /* OPAL_BASE_PATCHER_H */ diff --git a/opal/mca/patcher/base/patcher_base_frame.c b/opal/mca/patcher/base/patcher_base_frame.c new file mode 100644 index 0000000000..8d685d3fa4 --- /dev/null +++ b/opal/mca/patcher/base/patcher_base_frame.c @@ -0,0 +1,81 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/mca/patcher/patcher.h" +#include "opal/mca/patcher/base/base.h" +#include "opal/mca/patcher/base/static-components.h" + +/* + * Local variables + */ +static mca_patcher_base_module_t empty_module; + +/* + * Globals + */ +mca_patcher_base_module_t *opal_patcher = &empty_module; + +int opal_patcher_base_select (void) +{ + mca_patcher_base_module_t *best_module; + mca_patcher_base_component_t *best_component; + int rc, priority; + + rc = mca_base_select ("patcher", opal_patcher_base_framework.framework_output, + &opal_patcher_base_framework.framework_components, + (mca_base_module_t **) &best_module, (mca_base_component_t **) &best_component, + &priority); + if (OPAL_SUCCESS != rc) { + return rc; + } + + OBJ_CONSTRUCT(&best_module->patch_list, opal_list_t); + OBJ_CONSTRUCT(&best_module->patch_list_mutex, opal_mutex_t); + + if (best_module->patch_init) { + rc = best_module->patch_init (); + if (OPAL_SUCCESS != rc) { + return rc; + } + } + + opal_patcher = best_module; + + return OPAL_SUCCESS; +} + +static int opal_patcher_base_close (void) +{ + if (opal_patcher == &empty_module) { + return OPAL_SUCCESS; + } + + mca_patcher_base_patch_t *patch; + OPAL_LIST_FOREACH_REV(patch, &opal_patcher->patch_list, mca_patcher_base_patch_t) { + patch->patch_restore (patch); + } + + OPAL_LIST_DESTRUCT(&opal_patcher->patch_list); + OBJ_DESTRUCT(&opal_patcher->patch_list_mutex); + + if (opal_patcher->patch_fini) { + return opal_patcher->patch_fini (); + } + + return OPAL_SUCCESS; +} + +/* Use default register/open functions */ +MCA_BASE_FRAMEWORK_DECLARE(opal, patcher, "runtime code patching", NULL, NULL, + opal_patcher_base_close, mca_patcher_base_static_components, + 0); diff --git a/opal/mca/patcher/base/patcher_base_patch.c b/opal/mca/patcher/base/patcher_base_patch.c new file mode 100644 index 0000000000..94a0c12e38 --- /dev/null +++ b/opal/mca/patcher/base/patcher_base_patch.c @@ -0,0 +1,175 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/mca/patcher/patcher.h" +#include "opal/mca/patcher/base/base.h" +#include "opal/util/sys_limits.h" +#include "opal/prefetch.h" +#include + +static void mca_patcher_base_patch_construct (mca_patcher_base_patch_t *patch) +{ + patch->patch_symbol = NULL; + patch->patch_data_size = 0; +} + +static void mca_patcher_base_patch_destruct (mca_patcher_base_patch_t *patch) +{ + free (patch->patch_symbol); +} + +OBJ_CLASS_INSTANCE(mca_patcher_base_patch_t, opal_list_item_t, + mca_patcher_base_patch_construct, + mca_patcher_base_patch_destruct); + +#if defined(__PPC__) + +// PowerPC instructions used in patching +// Reference: "PowerPC User Instruction Set Architecture" +static unsigned int addis(unsigned int RT, unsigned int RS, unsigned int UI) { + return (15<<26) + (RT<<21) + (RS<<16) + (UI&0xffff); +} +static unsigned int ori(unsigned int RT, unsigned int RS, unsigned int UI) { + return (24<<26) + (RS<<21) + (RT<<16) + (UI&0xffff); +} +static unsigned int oris(unsigned int RT, unsigned int RS, unsigned int UI) { + return (25<<26) + (RS<<21) + (RT<<16) + (UI&0xffff); +} +static unsigned int mtspr(unsigned int SPR, unsigned int RS) { + return (31<<26) + (RS<<21) + ((SPR&0x1f)<<16) + ((SPR>>5)<<11) + (467<<1); +} +static unsigned int bcctr(unsigned int BO, unsigned int BI, unsigned int BH) { + return (19<<26) + (BO<<21) + (BI<<16) + (BH<<11) + (528<<1); +} +static unsigned int rldicr(unsigned int RT, unsigned int RS, unsigned int SH, unsigned int MB) +{ + return (30<<26) + (RS<<21) + (RT<<16) + ((SH&0x1f)<<11) + ((SH>>5)<<1) + + ((MB&0x1f)<<6) + ((MB>>5)<<5) + (1<<2); +} + +static int PatchLoadImm (uintptr_t addr, unsigned int reg, size_t value) +{ +#if defined(__PPC64__) + *(unsigned int *) (addr + 0) = addis ( reg, 0, (value >> 48)); + *(unsigned int *) (addr + 4) = ori ( reg, reg, (value >> 32)); + *(unsigned int *) (addr + 8) = rldicr( reg, reg, 32, 31); + *(unsigned int *) (addr +12) = oris ( reg, reg, (value >> 16)); + *(unsigned int *) (addr +16) = ori ( reg, reg, (value >> 0)); + return 20; +#else + *(unsigned int *) (addr + 0) = addis ( reg, 0, (value >> 16)); + *(unsigned int *) (addr + 4) = ori ( reg, reg, (value >> 0)); + return 8; +#endif +} + +#endif + +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) + +static void flush_and_invalidate_cache (unsigned long a) +{ +#if defined(__i386__) + /* does not work with AMD processors */ + __asm__ volatile("mfence;clflush %0;mfence" : :"m" (*(char*)a)); +#elif defined(__x86_64__) + __asm__ volatile("mfence;clflush %0;mfence" : :"m" (*(char*)a)); +#elif defined(__ia64__) + __asm__ volatile ("fc %0;; sync.i;; srlz.i;;" : : "r"(a) : "memory"); +#endif +} +#endif + +// modify protection of memory range +static void ModifyMemoryProtection (uintptr_t addr, size_t length, int prot) +{ + long page_size = opal_getpagesize (); + uintptr_t base = (addr & ~(page_size-1)); + uintptr_t bound = ((addr + length + page_size-1) & ~(page_size-1)); + + length = bound - base; + +#if defined(__PPC__) + /* NTH: is a loop necessary here? */ + do { + if (mprotect((void *)base, page_size, prot)) + perror("MemHook: mprotect failed"); + base += page_size; + } while (base < addr + length); +#else + if (mprotect((void *) base, length, prot)) { + perror("MemHook: mprotect failed"); + } +#endif +} + +static inline void apply_patch (unsigned char *patch_data, uintptr_t address, size_t data_size) +{ + ModifyMemoryProtection (address, data_size, PROT_EXEC|PROT_READ|PROT_WRITE); + memcpy ((void *) address, patch_data, data_size); +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) + for (size_t i = 0 ; i < data_size ; i += 16) { + flush_and_invalidate_cache (address + i); + } +#endif + + ModifyMemoryProtection (address, data_size, PROT_EXEC|PROT_READ); +} + +static void mca_base_patcher_patch_unapply_binary (mca_patcher_base_patch_t *patch) +{ + apply_patch (patch->patch_orig_data, patch->patch_orig, patch->patch_data_size); +} + +void mca_base_patcher_patch_apply_binary (mca_patcher_base_patch_t *patch) +{ + memcpy (patch->patch_orig_data, (void *) patch->patch_orig, patch->patch_data_size); + apply_patch (patch->patch_data, patch->patch_orig, patch->patch_data_size); + patch->patch_restore = mca_base_patcher_patch_unapply_binary; +} + + +int mca_patcher_base_patch_hook (mca_patcher_base_module_t *module, uintptr_t hook_addr) +{ +#if defined(__PPC64__) || defined(__powerpc64__) || defined(__PPC__) + mca_patcher_base_patch_t *hook_patch; + const unsigned int nop = 0x60000000; + unsigned int *nop_addr; + + fprintf (stderr, "Patching hook @ 0x%lx\n", hook_addr); + + hook_patch = OBJ_NEW(mca_patcher_base_patch_t); + if (OPAL_UNLIKELY(NULL == hook_patch)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + // locate reserved code space in hook function + for (nop_addr = (unsigned int *)hook_addr ; ; nop_addr++) { + if (nop_addr[0] == nop && nop_addr[1] == nop && nop_addr[2] == nop + && nop_addr[3] == nop && nop_addr[4] == nop) { + break; + } + } + // generate code to restore TOC + register unsigned long toc asm("r2"); + hook_patch->patch_orig = (uintptr_t) nop_addr; + hook_patch->patch_data_size = PatchLoadImm((uintptr_t)hook_patch->patch_data, 2, toc); + + /* put the hook patch on the patch list so it will be undone on finalize */ + opal_list_append (&module->patch_list, &hook_patch->super); + + mca_base_patcher_patch_apply_binary (hook_patch); +#endif + + return OPAL_SUCCESS; +} diff --git a/opal/mca/patcher/linux/Makefile.am b/opal/mca/patcher/linux/Makefile.am new file mode 100644 index 0000000000..a0facb5ce7 --- /dev/null +++ b/opal/mca/patcher/linux/Makefile.am @@ -0,0 +1,47 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +if MCA_BUILD_opal_patcher_linux_DSO +component_noinst = +component_install = mca_patcher_linux.la +else +component_noinst = libmca_patcher_linux.la +component_install = +endif + +linux_SOURCES = \ + patcher_linux.h \ + patcher_linux_module.c \ + patcher_linux_component.c + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_patcher_linux_la_SOURCES = $(linux_SOURCES) +nodist_mca_patcher_linux_la_SOURCES = $(linux_nodist_SOURCES) +mca_patcher_linux_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_patcher_linux_la_SOURCES = $(linux_SOURCES) +nodist_libmca_patcher_linux_la_SOURCES = $(linux_nodist_SOURCES) +libmca_patcher_linux_la_LIBADD = $(patcher_linux_LIBS) +libmca_patcher_linux_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/patcher/linux/configure.m4 b/opal/mca/patcher/linux/configure.m4 new file mode 100644 index 0000000000..6cfef2ca1a --- /dev/null +++ b/opal/mca/patcher/linux/configure.m4 @@ -0,0 +1,53 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_patcher_linux_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_patcher_linux_CONFIG],[ + AC_CONFIG_FILES([opal/mca/patcher/linux/Makefile]) + + OPAL_VAR_SCOPE_PUSH([opal_patcher_linux_CPPFLAGS_save]) + + opal_patcher_linux_happy=no + if test $OPAL_ENABLE_DLOPEN_SUPPORT = 1 ; then + # Only enable on Linux for now. In the future this component might + # be modified to work on FreeBSD. + case $host in + *-linux*) + opal_patcher_linux_happy=yes; + ;; + esac + + if test $opal_patcher_linux_happy = yes ; then + OPAL_CHECK_PACKAGE([patcher_linux], [dlfcn.h], [dl], [dl_iterate_phdr], [], [], [], + [],[opal_patcher_linux_happy=no]) + AC_CHECK_HEADERS([elf.h],[],[opal_patcher_linux_happy=no]) + AC_CHECK_HEADERS([sys/auxv.h]) + fi + fi + + AS_IF([test $opal_patcher_linux_happy = yes], [$1], [$2]) + OPAL_VAR_SCOPE_POP +]) diff --git a/opal/mca/patcher/linux/patcher_linux.h b/opal/mca/patcher/linux/patcher_linux.h new file mode 100644 index 0000000000..de4e85b35d --- /dev/null +++ b/opal/mca/patcher/linux/patcher_linux.h @@ -0,0 +1,45 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OPAL_PATCHER_LINUX_H) +#define OPAL_PATCHER_LINUX_H + +#include "opal_config.h" + +#include "opal/mca/patcher/base/base.h" +#include "opal/mca/patcher/patcher.h" + +#include "opal/class/opal_list.h" +#include "opal/threads/mutex.h" + +struct mca_patcher_linux_patch_got_t { + opal_list_item_t super; + void **got_entry; + void *got_orig; +}; + +typedef struct mca_patcher_linux_patch_got_t mca_patcher_linux_patch_got_t; + +OBJ_CLASS_DECLARATION(mca_patcher_linux_patch_got_t); + +struct mca_patcher_linux_patch_t { + mca_patcher_base_patch_t super; + opal_list_t patch_got_list; +}; + +typedef struct mca_patcher_linux_patch_t mca_patcher_linux_patch_t; + +OBJ_CLASS_DECLARATION(mca_patcher_linux_patch_t); + +extern mca_patcher_base_module_t mca_patcher_linux_module; +extern mca_patcher_base_component_t mca_patcher_linux_component; + +#endif /* !defined(OPAL_PATCHER_LINUX_H) */ diff --git a/opal/mca/patcher/linux/patcher_linux_component.c b/opal/mca/patcher/linux/patcher_linux_component.c new file mode 100644 index 0000000000..445ee23d6a --- /dev/null +++ b/opal/mca/patcher/linux/patcher_linux_component.c @@ -0,0 +1,43 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "patcher_linux.h" + +static int mca_patcher_linux_priority; + +static int mca_patcher_linux_register (void) +{ + mca_patcher_linux_priority = 13; + mca_base_component_var_register (&mca_patcher_linux_component.patcherc_version, + "priority", "Priority of the linux binary patcher component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_CONSTANT, &mca_patcher_linux_priority); + + return OPAL_SUCCESS; +} + +static int mca_patcher_linux_query (mca_base_module_t **module, int *priority) +{ + *module = &mca_patcher_linux_module.super; + *priority = mca_patcher_linux_priority; + return OPAL_SUCCESS; +} + +mca_patcher_base_component_t mca_patcher_linux_component = { + .patcherc_version = { + OPAL_PATCHER_BASE_VERSION_1_0_0, + .mca_component_name = "linux", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + .mca_query_component = mca_patcher_linux_query, + .mca_register_component_params = mca_patcher_linux_register, + }, +}; diff --git a/opal/mca/patcher/linux/patcher_linux_module.c b/opal/mca/patcher/linux/patcher_linux_module.c new file mode 100644 index 0000000000..90cd497a10 --- /dev/null +++ b/opal/mca/patcher/linux/patcher_linux_module.c @@ -0,0 +1,466 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/* + * Copied from OpenUCX + */ + +#include "patcher_linux.h" + +#include "opal/mca/patcher/base/base.h" + +#include "opal/constants.h" +#include "opal/util/sys_limits.h" +#include "opal/util/output.h" +#include "opal/prefetch.h" + +#if defined(HAVE_SYS_AUXV_H) +#include +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +static void *mca_patcher_linux_dlopen(const char *filename, int flag); + +typedef struct mca_patcher_linux_elf_strtab { + char *tab; + ElfW(Xword) size; +} mca_patcher_linux_elf_strtab_t; + +typedef struct mca_patcher_linux_elf_jmpreltab { + ElfW(Rela) *tab; + ElfW(Xword) size; +} mca_patcher_linux_elf_jmprel_t; + +typedef struct mca_patcher_linux_elf_symtab { + ElfW(Sym) *tab; + ElfW(Xword) entsz; +} mca_patcher_linux_elf_symtab_t; + +typedef struct mca_patcher_linux_dl_iter_context { + mca_patcher_linux_patch_t *patch; + bool remove; + int status; +} mca_patcher_linux_dl_iter_context_t; + +OBJ_CLASS_INSTANCE(mca_patcher_linux_patch_got_t, opal_list_item_t, NULL, NULL); + +static void mca_patcher_linux_patch_construct (mca_patcher_linux_patch_t *patch) +{ + OBJ_CONSTRUCT(&patch->patch_got_list, opal_list_t); +} + +static void mca_patcher_linux_patch_destruct (mca_patcher_linux_patch_t *patch) +{ + OPAL_LIST_DESTRUCT(&patch->patch_got_list); +} + +OBJ_CLASS_INSTANCE(mca_patcher_linux_patch_t, mca_patcher_base_patch_t, mca_patcher_linux_patch_construct, + mca_patcher_linux_patch_destruct); + +/* List of patches to be applied to additional libraries */ +static void *(*orig_dlopen) (const char *, int); + +static const ElfW(Phdr) * +mca_patcher_linux_get_phdr_dynamic(const ElfW(Phdr) *phdr, uint16_t phnum, int phent) +{ + for (uint16_t i = 0; i < phnum; ++i) { + if (phdr->p_type == PT_DYNAMIC) { + return phdr; + } + phdr = (ElfW(Phdr)*)((char*)phdr + phent); + } + return NULL; +} + +#if SIZEOF_VOID_P == 8 +static const ElfW(Dyn)* +mca_patcher_linux_get_dynentry(ElfW(Addr) base, const ElfW(Phdr) *pdyn, int64_t type) +#else +static const ElfW(Dyn)* +mca_patcher_linux_get_dynentry(ElfW(Addr) base, const ElfW(Phdr) *pdyn, int32_t type) +#endif +{ + for (ElfW(Dyn) *dyn = (ElfW(Dyn)*)(base + pdyn->p_vaddr); dyn->d_tag; ++dyn) { + if (dyn->d_tag == type) { + return dyn; + } + } + return NULL; +} + +static void mca_patcher_linux_get_jmprel(ElfW(Addr) base, const ElfW(Phdr) *pdyn, + mca_patcher_linux_elf_jmprel_t *table) +{ + const ElfW(Dyn) *dyn; + + dyn = mca_patcher_linux_get_dynentry(base, pdyn, DT_JMPREL); + table->tab = (dyn == NULL) ? NULL : (ElfW(Rela)*)dyn->d_un.d_ptr; + dyn = mca_patcher_linux_get_dynentry(base, pdyn, DT_PLTRELSZ); + table->size = (dyn == NULL) ? 0 : dyn->d_un.d_val; +} + +static void mca_patcher_linux_get_symtab(ElfW(Addr) base, const ElfW(Phdr) *pdyn, + mca_patcher_linux_elf_symtab_t *table) +{ + const ElfW(Dyn) *dyn; + + dyn = mca_patcher_linux_get_dynentry(base, pdyn, DT_SYMTAB); + table->tab = (dyn == NULL) ? NULL : (ElfW(Sym)*)dyn->d_un.d_ptr; + dyn = mca_patcher_linux_get_dynentry(base, pdyn, DT_SYMENT); + table->entsz = (dyn == NULL) ? 0 : dyn->d_un.d_val; +} + +static void mca_patcher_linux_get_strtab(ElfW(Addr) base, const ElfW(Phdr) *pdyn, + mca_patcher_linux_elf_strtab_t *table) +{ + const ElfW(Dyn) *dyn; + + dyn = mca_patcher_linux_get_dynentry(base, pdyn, DT_STRTAB); + table->tab = (dyn == NULL) ? NULL : (char *)dyn->d_un.d_ptr; + dyn = mca_patcher_linux_get_dynentry(base, pdyn, DT_STRSZ); + table->size = (dyn == NULL) ? 0 : dyn->d_un.d_val; +} + +static void * mca_patcher_linux_get_got_entry (ElfW(Addr) base, const ElfW(Phdr) *phdr, int16_t phnum, + int phent, const char *symbol) +{ + mca_patcher_linux_elf_jmprel_t jmprel; + mca_patcher_linux_elf_symtab_t symtab; + mca_patcher_linux_elf_strtab_t strtab; + ElfW(Rela) *rela, *relaend; + const ElfW(Phdr) *dphdr; + const char *relsymname; + uint32_t relsymidx; + + dphdr = mca_patcher_linux_get_phdr_dynamic (phdr, phnum, phent); + + mca_patcher_linux_get_jmprel (base, dphdr, &jmprel); + mca_patcher_linux_get_symtab (base, dphdr, &symtab); + mca_patcher_linux_get_strtab (base, dphdr, &strtab); + + relaend = (ElfW(Rela) *)((char *)jmprel.tab + jmprel.size); + for (rela = jmprel.tab; rela < relaend; ++rela) { +#if SIZEOF_VOID_P == 8 + relsymidx = ELF64_R_SYM(rela->r_info); +#else + relsymidx = ELF32_R_SYM(rela->r_info); +#endif + relsymname = strtab.tab + symtab.tab[relsymidx].st_name; + if (!strcmp(symbol, relsymname)) { + return (void *)(base + rela->r_offset); + } + } + return NULL; +} + +static int mca_patcher_linux_get_aux_phent (void) +{ +#if !defined(HAVE_SYS_AUXV_H) +#define MCA_PATCHER_LINUX_AUXV_BUF_LEN 16 + static const char *proc_auxv_filename = "/proc/self/auxv"; + static int phent = 0; +#if SIZEOF_VOID_P == 8 + Elf64_auxv_t buffer[MCA_PATCHER_LINUX_AUXV_BUF_LEN]; +#else + Elf32_auxv_t buffer[MCA_PATCHER_LINUX_AUXV_BUF_LEN]; +#endif + unsigned count; + ssize_t nread; + int fd; + + /* Can avoid lock here - worst case we'll read the file more than once */ + if (phent == 0) { + fd = open(proc_auxv_filename, O_RDONLY); + if (fd < 0) { + opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output, + "failed to open '%s' for reading: %s", proc_auxv_filename, + strerror (errno)); + return OPAL_ERROR; + } + + /* Use small buffer on the stack, avoid using malloc() */ + do { + nread = read(fd, buffer, sizeof(buffer)); + if (nread < 0) { + opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output, + "failed to read %" PRIsize_t " bytes from %s (ret=%ld): %s", sizeof (buffer), + proc_auxv_filename, nread, strerror (errno)); + break; + } + + count = nread / sizeof(buffer[0]); + for (unsigned i = 0 ; i < count && AT_NULL != buffer[i].a_type ; ++i) { + if (AT_PHENT == buffer[i].a_type) { + phent = buffer[i].a_un.a_val; + opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output, + "read phent from %s: %d", proc_auxv_filename, phent); + break; + } + } + } while ((count > 0) && (phent == 0)); + + close(fd); + } + + return phent; +#else + return getauxval (AT_PHENT); +#endif +} + +static int +mca_patcher_linux_modify_got (ElfW(Addr) base, const ElfW(Phdr) *phdr, const char *phname, + int16_t phnum, int phent, mca_patcher_linux_dl_iter_context_t *ctx) +{ + long page_size = opal_getpagesize (); + void **entry; + void *page; + int ret; + + entry = mca_patcher_linux_get_got_entry (base, phdr, phnum, phent, ctx->patch->super.patch_symbol); + if (entry == NULL) { + return OPAL_SUCCESS; + } + + page = (void *)((intptr_t)entry & ~(page_size - 1)); + ret = mprotect(page, page_size, PROT_READ|PROT_WRITE); + if (ret < 0) { + opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output, + "failed to modify GOT page %p to rw: %s", page, strerror (errno)); + return OPAL_ERR_NOT_SUPPORTED; + } + + if (!ctx->remove) { + if (*entry != (void *) ctx->patch->super.patch_value) { + mca_patcher_linux_patch_got_t *patch_got = OBJ_NEW(mca_patcher_linux_patch_got_t); + if (NULL == patch_got) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + opal_output_verbose (MCA_BASE_VERBOSE_TRACE, opal_patcher_base_framework.framework_output, + "patch %p (%s): modifying got entry %p. original value %p. new value %p\n", (void *)ctx->patch, + ctx->patch->super.patch_symbol, (void *) entry, *entry, (void *) ctx->patch->super.patch_value); + + patch_got->got_entry = entry; + patch_got->got_orig = *entry; + + opal_list_append (&ctx->patch->patch_got_list, &patch_got->super); + + *entry = (void *) ctx->patch->super.patch_value; + } + } else { + if (*entry == (void *) ctx->patch->super.patch_value) { + /* find the appropriate entry and restore the original value */ + mca_patcher_linux_patch_got_t *patch_got; + OPAL_LIST_FOREACH_REV(patch_got, &ctx->patch->patch_got_list, mca_patcher_linux_patch_got_t) { + if (patch_got->got_entry == entry) { + opal_output_verbose (MCA_BASE_VERBOSE_TRACE, opal_patcher_base_framework.framework_output, + "restoring got entry %p with original value %p\n", (void *) entry, patch_got->got_orig); + *entry = patch_got->got_orig; + opal_list_remove_item (&ctx->patch->patch_got_list, &patch_got->super); + OBJ_RELEASE(patch_got); + break; + } + } + } + } + + return OPAL_SUCCESS; +} + +static int mca_patcher_linux_phdr_iterator(struct dl_phdr_info *info, size_t size, void *data) +{ + mca_patcher_linux_dl_iter_context_t *ctx = data; + int phent; + + phent = mca_patcher_linux_get_aux_phent(); + if (phent <= 0) { + opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output, + "failed to read phent size"); + ctx->status = OPAL_ERR_NOT_SUPPORTED; + return -1; + } + + ctx->status = mca_patcher_linux_modify_got (info->dlpi_addr, info->dlpi_phdr, + info->dlpi_name, info->dlpi_phnum, + phent, ctx); + if (ctx->status == OPAL_SUCCESS) { + return 0; /* continue iteration and patch all objects */ + } else { + return -1; /* stop iteration if got a real error */ + } +} + +/* called with lock held */ +static int mca_patcher_linux_apply_patch (mca_patcher_linux_patch_t *patch) +{ + mca_patcher_linux_dl_iter_context_t ctx = { + .patch = patch, + .remove = false, + .status = OPAL_SUCCESS, + }; + + /* Avoid locks here because we don't modify ELF data structures. + * Worst case the same symbol will be written more than once. + */ + (void)dl_iterate_phdr(mca_patcher_linux_phdr_iterator, &ctx); + if (ctx.status == OPAL_SUCCESS) { + opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_patcher_base_framework.framework_output, + "modified '%s' to %" PRIxPTR , ctx.patch->super.patch_symbol, + ctx.patch->super.patch_value); + } + + return ctx.status; +} + +static int mca_patcher_linux_remove_patch (mca_patcher_linux_patch_t *patch) +{ + mca_patcher_linux_dl_iter_context_t ctx = { + .patch = patch, + .remove = true, + .status = OPAL_SUCCESS, + }; + + /* Avoid locks here because we don't modify ELF data structures. + * Worst case the same symbol will be written more than once. + */ + (void)dl_iterate_phdr(mca_patcher_linux_phdr_iterator, &ctx); + if (ctx.status == OPAL_SUCCESS) { + opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_patcher_base_framework.framework_output, + "modified '%s' to 0x%lx", ctx.patch->super.patch_symbol, ctx.patch->super.patch_value); + } + + return ctx.status; +} + +static void *mca_patcher_linux_dlopen(const char *filename, int flag) +{ + OPAL_PATCHER_BEGIN; + mca_patcher_linux_patch_t *patch; + void *handle; + + assert (orig_dlopen); + handle = orig_dlopen (filename, flag); + if (handle != NULL) { + /* + * Every time a new object is loaded, we must update its relocations + * with our list of patches (including dlopen itself). This code is less + * efficient and will modify all existing objects every time, but good + * enough. + */ + opal_mutex_lock (&mca_patcher_linux_module.patch_list_mutex); + OPAL_LIST_FOREACH(patch, &mca_patcher_linux_module.patch_list, mca_patcher_linux_patch_t) { + opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_patcher_base_framework.framework_output, + "in dlopen(), re-applying '%s' to %p", patch->super.patch_symbol, (void *) patch->super.patch_value); + /* ignore hook binary patches */ + if (!patch->super.patch_data_size) { + mca_patcher_linux_apply_patch (patch); + } + } + opal_mutex_unlock (&mca_patcher_linux_module.patch_list_mutex); + } + + OPAL_PATCHER_END; + return handle; +} + +static intptr_t mca_patcher_linux_get_orig (const char *symbol, void *replacement) +{ + const char *error; + void *func_ptr; + + func_ptr = dlsym(RTLD_DEFAULT, symbol); + if (func_ptr == replacement) { + (void)dlerror(); + func_ptr = dlsym(RTLD_NEXT, symbol); + if (func_ptr == NULL) { + error = dlerror(); + opal_output_verbose (MCA_BASE_VERBOSE_ERROR, opal_patcher_base_framework.framework_output, + "could not find address of original %s(): %s", symbol, error ? error : "Unknown error"); + } + } + + opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_patcher_base_framework.framework_output, + "original %s() is at %p", symbol, func_ptr); + + return (intptr_t) func_ptr; +} + +static int mca_patcher_linux_patch_symbol (const char *symbol_name, uintptr_t replacement, uintptr_t *orig) +{ + mca_patcher_linux_patch_t *patch = OBJ_NEW(mca_patcher_linux_patch_t); + int rc; + + if (OPAL_UNLIKELY(NULL == patch)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + patch->super.patch_symbol = strdup (symbol_name); + if (NULL == patch->super.patch_symbol) { + OBJ_RELEASE(patch); + return OPAL_ERR_OUT_OF_RESOURCE; + } + + patch->super.patch_value = mca_patcher_base_addr_text (replacement); + patch->super.patch_restore = (mca_patcher_base_restore_fn_t) mca_patcher_linux_remove_patch; + + /* Take lock first to handle a possible race where dlopen() is called + * from another thread and we may end up not patching it. + */ + opal_mutex_lock (&mca_patcher_linux_module.patch_list_mutex); + do { + rc = mca_patcher_base_patch_hook (&mca_patcher_linux_module, patch->super.patch_value); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(patch); + break; + } + + rc = mca_patcher_linux_apply_patch (patch); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(patch); + break; + } + + *orig = mca_patcher_linux_get_orig (patch->super.patch_symbol, (void *) replacement); + + opal_list_append (&mca_patcher_linux_module.patch_list, &patch->super.super); + } while (0); + opal_mutex_unlock (&mca_patcher_linux_module.patch_list_mutex); + + return rc; +} + +/* called with lock held */ +static int mca_patcher_linux_install_dlopen (void) +{ + return mca_patcher_linux_patch_symbol ("dlopen", (uintptr_t) mca_patcher_linux_dlopen, + (uintptr_t *) &orig_dlopen); +} + +static int mca_patcher_linux_init (void) +{ + return mca_patcher_linux_install_dlopen (); +} + +mca_patcher_base_module_t mca_patcher_linux_module = { + .patch_init = mca_patcher_linux_init, + .patch_symbol = mca_patcher_linux_patch_symbol, +}; diff --git a/opal/mca/patcher/overwrite/Makefile.am b/opal/mca/patcher/overwrite/Makefile.am new file mode 100644 index 0000000000..e9e4a31718 --- /dev/null +++ b/opal/mca/patcher/overwrite/Makefile.am @@ -0,0 +1,47 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +if MCA_BUILD_opal_patcher_overwrite_DSO +component_noinst = +component_install = mca_patcher_overwrite.la +else +component_noinst = libmca_patcher_overwrite.la +component_install = +endif + +overwrite_SOURCES = \ + patcher_overwrite.h \ + patcher_overwrite_module.c \ + patcher_overwrite_component.c + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_patcher_overwrite_la_SOURCES = $(overwrite_SOURCES) +nodist_mca_patcher_overwrite_la_SOURCES = $(overwrite_nodist_SOURCES) +mca_patcher_overwrite_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_patcher_overwrite_la_SOURCES = $(overwrite_SOURCES) +nodist_libmca_patcher_overwrite_la_SOURCES = $(overwrite_nodist_SOURCES) +libmca_patcher_overwrite_la_LIBADD = $(patcher_overwrite_LIBS) +libmca_patcher_overwrite_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/patcher/overwrite/configure.m4 b/opal/mca/patcher/overwrite/configure.m4 new file mode 100644 index 0000000000..02394cc958 --- /dev/null +++ b/opal/mca/patcher/overwrite/configure.m4 @@ -0,0 +1,41 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_patcher_overwrite_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_patcher_overwrite_CONFIG],[ + AC_CONFIG_FILES([opal/mca/patcher/overwrite/Makefile]) + + opal_patcher_overwrite_happy=no + if test $OPAL_ENABLE_DLOPEN_SUPPORT = 1; then + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__ia64__) && !defined(__PPC__) +#error "platform not supported" +#endif +]],[])],[opal_patcher_overwrite_happy=yes],[]) + fi + + AS_IF([test $opal_patcher_overwrite_happy = yes], [$1], [$2]) +]) diff --git a/opal/mca/patcher/overwrite/patcher_overwrite.h b/opal/mca/patcher/overwrite/patcher_overwrite.h new file mode 100644 index 0000000000..9c2ad58dfd --- /dev/null +++ b/opal/mca/patcher/overwrite/patcher_overwrite.h @@ -0,0 +1,32 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file pather_overwrite.h + * + * This component works by overwritting the first couple instructions in + * the target function with a jump instruction to the hook function. The + * hook function will be expected to implement the functionality of the + * hooked function when using this module. + * + * Note: This component only supports x86, x86_64, ia64, and powerpc/power. + */ + +#if !defined(OPAL_PATCHER_OVERWRITE_H) +#define OPAL_PATCHER_OVERWRITE_H + +#include "opal_config.h" +#include "opal/mca/patcher/patcher.h" +#include "opal/class/opal_list.h" + +extern mca_patcher_base_module_t mca_patcher_overwrite_module; +extern mca_patcher_base_component_t mca_patcher_overwrite_component; + +#endif /* !defined(OPAL_PATCHER_OVERWRITE_H) */ diff --git a/opal/mca/patcher/overwrite/patcher_overwrite_component.c b/opal/mca/patcher/overwrite/patcher_overwrite_component.c new file mode 100644 index 0000000000..5211d4deae --- /dev/null +++ b/opal/mca/patcher/overwrite/patcher_overwrite_component.c @@ -0,0 +1,45 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "patcher_overwrite.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +static int mca_patcher_overwrite_priority; + +static int mca_patcher_overwrite_register (void) +{ + mca_patcher_overwrite_priority = 37; + mca_base_component_var_register (&mca_patcher_overwrite_component.patcherc_version, + "priority", "Priority of the overwrite binary patcher component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_CONSTANT, &mca_patcher_overwrite_priority); + + return OPAL_SUCCESS; +} + +static int mca_patcher_overwrite_query (mca_base_module_t **module, int *priority) +{ + *module = &mca_patcher_overwrite_module.super; + *priority = mca_patcher_overwrite_priority; + return OPAL_SUCCESS; +} + +mca_patcher_base_component_t mca_patcher_overwrite_component = { + .patcherc_version = { + OPAL_PATCHER_BASE_VERSION_1_0_0, + .mca_component_name = "overwrite", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + .mca_query_component = mca_patcher_overwrite_query, + .mca_register_component_params = mca_patcher_overwrite_register, + }, +}; diff --git a/opal/mca/patcher/overwrite/patcher_overwrite_module.c b/opal/mca/patcher/overwrite/patcher_overwrite_module.c new file mode 100644 index 0000000000..7258125293 --- /dev/null +++ b/opal/mca/patcher/overwrite/patcher_overwrite_module.c @@ -0,0 +1,307 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "patcher_overwrite.h" + +#include "opal/mca/patcher/base/base.h" + +#include "opal/constants.h" +#include "opal/util/sys_limits.h" +#include "opal/util/output.h" +#include "opal/prefetch.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) + +#if defined(__ia64__) + +#define INSERT_BIT(d,p,v) do { \ + unsigned char c=*(d); \ + assert(((p) < 8) && ((p) >= 0)); \ + c&= ~(1<<(p)); \ + c|= ((v)<<(p)); \ + *(d) = c; \ + } while (0) + +static inline void +copy_instr_slot(unsigned char **dst, int *dst_bitpos, unsigned long instr_slot) +{ + for (int i = 40 ; i >= 0 ; --i) { + INSERT_BIT(*dst, *dst_bitpos, (instr_slot>>i)&1); + if (*dst_bitpos == 0) { + ++*dst; + *dst_bitpos = 7; + } else { + --*dst_bitpos; + } + } +} + +static void make_ia64_bundle (unsigned char *dst, + unsigned long i2, + unsigned long i1, + unsigned long i0, + unsigned template) +{ +/* + * each instr is 41 bits, template is 5 bits + * + * generate the bit concatenation of i2:i1:i0:t, all in all 128 bits + * + */ + + int dst_bitpos = 7; + + copy_instr_slot(&dst, &dst_bitpos, i2); + copy_instr_slot(&dst, &dst_bitpos, i1); + copy_instr_slot(&dst, &dst_bitpos, i0); + + assert(dst_bitpos == 4); + + for (int i = 4 ; i >= 0 ; --i) { + INSERT_BIT(dst, dst_bitpos, (template>>i)&1); + --dst_bitpos; + } +} +#endif /* defined(__ia64__) */ + +static int mca_patcher_overwrite_apply_patch (mca_patcher_base_patch_t *patch) +{ + uintptr_t func_new_addr = patch->patch_value; + + { +#if defined(__i386__) + patch->patch_data_size = 5; + *(unsigned char *)(patch->patch_data+0) = 0xe9; + *(unsigned int *) (patch->patch_data+1) = (unsigned int)(func_new_addr - patch->patch_orig - 5); +#elif defined(__x86_64__) + patch->patch_data_size = 13; + *(unsigned short*)(patch->patch_data + 0) = 0xbb49; + *(unsigned long* )(patch->patch_data + 2) = (unsigned long) func_new_addr; + *(unsigned char*) (patch->patch_data +10) = 0x41; + *(unsigned char*) (patch->patch_data +11) = 0xff; + *(unsigned char*) (patch->patch_data +12) = 0xe3; +#elif defined(__ia64__) + { +/* + * target64 = IP + ((i << 59 | imm39 << 20 | imm20) << 4) + * imm64 = i << 63 | imm41 << 22 | ic << 21 | imm5c << 16 | imm9d << 7 | imm7b + */ + unsigned char buf[16]; + unsigned long long imm64 = func_new_addr - func_old_addr - 16; + register unsigned long long glb_ptr __asm__("r1"); + unsigned long long nop = + (0x0ULL<<37) | /* O */ + (0x0ULL<<36) | /* i */ + (0x0ULL<<33) | /* x3 */ + (0x1ULL<<27) | /* x6 */ + (0x0ULL<< 6) | /* imm20 */ + (0x0ULL<< 0); /* qp */ + unsigned long long brl = + (0xcULL << 37) | + (((imm64>>63)&0x1ULL) << 36) | + (0x0ULL << 35) | + (0x0ULL << 33) | + (((imm64>>4)&0xFFFFFULL) << 13) | + (0x0ULL << 6) | + (0x0ULL << 0); + unsigned long long movl = + (0x6ULL << 37) | + (((glb_ptr>>63)&0x1ULL) << 36) | + (((glb_ptr>> 7)&0x1FFULL) << 27) | + (((glb_ptr>>16)&0x1FULL) << 22) | + (((glb_ptr>>21)&0x1ULL) << 21) | + (0ULL << 20) | + (((glb_ptr>> 0)&0x7FULL) << 13) | + (1ULL << 6) | + (0x0ULL << 0); + + patch->data_size = 32; + + make_ia64_bundle(buf, movl, (glb_ptr>>22)&0x1FFFFFFFFFFULL, nop, 5); + for (int i = 0 ; i < 16 ; ++i) { + patch->patch_data[16-i-1] = buf[i]; + } + + make_ia64_bundle(buf, brl, ((imm64>>24)&0x7FFFFFFFFFULL)<<2, nop, 5); + for (int i = 0 ; i < 16 ; ++i) { + patch->patch_data[32-i-1] = buf[i]; + } + } +#endif + } + + mca_base_patcher_patch_apply_binary (patch); + + return OPAL_SUCCESS; +} + +/* end of #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) */ +// ------------------------------------------------- PPC equivalent: +#elif defined(__PPC__) + +// PowerPC instructions used in patching +// Reference: "PowerPC User Instruction Set Architecture" +static unsigned int addis(unsigned int RT, unsigned int RS, unsigned int UI) { + return (15<<26) + (RT<<21) + (RS<<16) + (UI&0xffff); +} +static unsigned int ori(unsigned int RT, unsigned int RS, unsigned int UI) { + return (24<<26) + (RS<<21) + (RT<<16) + (UI&0xffff); +} +static unsigned int oris(unsigned int RT, unsigned int RS, unsigned int UI) { + return (25<<26) + (RS<<21) + (RT<<16) + (UI&0xffff); +} +static unsigned int mtspr(unsigned int SPR, unsigned int RS) { + return (31<<26) + (RS<<21) + ((SPR&0x1f)<<16) + ((SPR>>5)<<11) + (467<<1); +} +static unsigned int bcctr(unsigned int BO, unsigned int BI, unsigned int BH) { + return (19<<26) + (BO<<21) + (BI<<16) + (BH<<11) + (528<<1); +} +static unsigned int rldicr(unsigned int RT, unsigned int RS, unsigned int SH, unsigned int MB) +{ + return (30<<26) + (RS<<21) + (RT<<16) + ((SH&0x1f)<<11) + ((SH>>5)<<1) + + ((MB&0x1f)<<6) + ((MB>>5)<<5) + (1<<2); +} + +static int +PatchLoadImm(uintptr_t addr, unsigned int reg, size_t value) +{ +#if defined(__PPC64__) + *(unsigned int *) (addr + 0) = addis ( reg, 0, (value >> 48)); + *(unsigned int *) (addr + 4) = ori ( reg, reg, (value >> 32)); + *(unsigned int *) (addr + 8) = rldicr( reg, reg, 32, 31); + *(unsigned int *) (addr +12) = oris ( reg, reg, (value >> 16)); + *(unsigned int *) (addr +16) = ori ( reg, reg, (value >> 0)); + return 20; +#else + *(unsigned int *) (addr + 0) = addis ( reg, 0, (value >> 16)); + *(unsigned int *) (addr + 4) = ori ( reg, reg, (value >> 0)); + return 8; +#endif +} + + +static int mca_patcher_overwrite_apply_patch (mca_patcher_base_patch_t *patch) +{ + uintptr_t sys_addr, hook_addr; + int offset, rc; + + // get system function address + sys_addr = mca_patcher_base_addr_text(patch->patch_orig); + hook_addr = mca_patcher_base_addr_text(patch->patch_value); + +// Patch for hook function: +#if (defined(__PPC64__) || defined(__powerpc64__) || defined(__PPC__)) + rc = mca_patcher_base_patch_hook (&mca_patcher_overwrite_module, hook_addr); + if (OPAL_SUCCESS != rc) { + return rc; + } + +#if _CALL_ELF == 2 + sys_addr += 8; + hook_addr += 8; +#endif /* _CALL_ELF == 2*/ +#endif + + // Patch for system function: + // generate patch code + // r11 is a volatile register according to PowerPC EABI + const unsigned int gr = 11; + offset = PatchLoadImm ((uintptr_t) patch->patch_data, gr, hook_addr); + *(unsigned int *) (patch->patch_data + offset + 0) = mtspr (9, gr); // 9 = CTR + *(unsigned int *) (patch->patch_data + offset + 4) = bcctr (20, 0, 0);// 20 = always + patch->patch_data_size = offset + 8; + patch->patch_orig = sys_addr; + + mca_base_patcher_patch_apply_binary (patch); + + return OPAL_SUCCESS; +} + +#endif + +static int mca_patcher_overwrite_patch_address (uintptr_t sys_addr, uintptr_t hook_addr) +{ + mca_patcher_base_patch_t *patch; + int rc; + + patch = OBJ_NEW(mca_patcher_base_patch_t); + if (OPAL_UNLIKELY(NULL == patch)) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + patch->patch_orig = sys_addr; + patch->patch_value = hook_addr; + + opal_mutex_lock (&mca_patcher_overwrite_module.patch_list_mutex); + do { + rc = mca_patcher_overwrite_apply_patch (patch); + if (OPAL_SUCCESS != rc) { + break; + } + + opal_list_append (&mca_patcher_overwrite_module.patch_list, &patch->super); + } while (0); + + opal_mutex_unlock (&mca_patcher_overwrite_module.patch_list_mutex); + + return OPAL_SUCCESS; +} + +static int mca_patcher_overwrite_patch_symbol (const char *func_symbol_name, uintptr_t func_new_addr, + uintptr_t *func_old_addr) +{ + void *sym_addr; + char *error; + uintptr_t old_addr; + + /* NTH: might want to update opal/mca/dl to handle lookups in the default + * handle. */ + sym_addr = dlsym (RTLD_NEXT, func_symbol_name); + if (NULL == sym_addr) { + sym_addr = dlsym(RTLD_DEFAULT, func_symbol_name); + if ( (sym_addr == NULL) && ((error = dlerror()) != NULL) ) { + opal_output(0, "error locating symbol %s to patch. %s", func_symbol_name, + error); + return OPAL_ERR_NOT_FOUND; + } + } + + old_addr = (unsigned long)sym_addr; + +#if defined(__ia64__) + /* On IA64 addresses are all indirect */ + func_new_addr = *(unsigned long *)func_new_addr; + old_addr = *(unsigned long *) old_addr; +#endif + + if (func_old_addr) { + /* we will be overwritting part of the original function. do not return + * its address */ + *func_old_addr = 0; + } + + return mca_patcher_overwrite_patch_address (old_addr, func_new_addr); +} + +mca_patcher_base_module_t mca_patcher_overwrite_module = { + .patch_symbol = mca_patcher_overwrite_patch_symbol, + .patch_address = mca_patcher_overwrite_patch_address, +}; diff --git a/opal/mca/patcher/patcher.h b/opal/mca/patcher/patcher.h new file mode 100644 index 0000000000..25af9b1376 --- /dev/null +++ b/opal/mca/patcher/patcher.h @@ -0,0 +1,121 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_PATCHER_PATCHER_H +#define OPAL_MCA_PATCHER_PATCHER_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/class/opal_list.h" + +/* Any function being patched in as a hook must use SYMBOLPATCH_BEGIN at the top, + * and SYMBOLPATCH_END before it returns (this is just for PPC). */ + +#if (defined(__PPC64__) || defined(__powerpc64__) || defined(__PPC__)) && defined(OPAL_GCC_INLINE_ASSEMBLY) + +/* special processing for ppc64 to save and restore TOC (r2) + * Reference: "64-bit PowerPC ELF Application Binary Interface Supplement 1.9" */ +#define OPAL_PATCHER_BEGIN \ + unsigned long toc_save; \ + asm volatile ("std 2, %0" : "=m" (toc_save)); \ + asm volatile ("nop; nop; nop; nop; nop"); +#define OPAL_PATCHER_END \ + asm volatile ("ld 2, %0" : : "m" (toc_save)); + +#else /* !__PPC64__ */ + +#define OPAL_PATCHER_BEGIN +#define OPAL_PATCHER_END + +#endif + +/** + * Make any calls to the named function redirect to a new function + * + * @param[in] func_symbol_name function to hook + * @param[in] func_new_addr function pointer of hook + * @param[out] func_old_addr address of func_symbol_name + * + * This function redirects all calls to the function func_symbol_name to + * the function pointer func_new_addr. If it is possible for the hook + * function to call the original function the patcher module will return + * the old function's address in func_old_addr. + */ +typedef int (*mca_patcher_base_patch_symbol_fn_t)(const char *func_symbol_name, uintptr_t func_new_addr, + uintptr_t *func_old_addr); + +/** + * Make any calls to a function redirect to a new function + * + * @param[in] func_symbol_name function to hook + * @param[in] func_new_addr function pointer of hook + * @param[out] func_old_addr address of func_symbol_name + * + * This function redirects all calls to the function at func_addr to + * the function pointer func_new_addr. + */ +typedef int (*mca_patcher_base_patch_address_fn_t)(uintptr_t func_addr, uintptr_t func_new_addr); + +/** + * Set up the patcher module + */ +typedef int (*mca_patcher_base_init_fn_t) (void); + +/** + * Finalize the patcher module + */ +typedef int (*mca_patcher_base_fini_fn_t) (void); + +/** + * Structure for patcher modules. + */ +typedef struct mca_patcher_base_module_t { + mca_base_module_t super; + /** list of patches */ + opal_list_t patch_list; + /** lock for patch list */ + opal_mutex_t patch_list_mutex; + /** function to call if the patcher module is used. can + * be NULL. */ + mca_patcher_base_init_fn_t patch_init; + /** function to call when patcher is unloaded. this function + * MUST clean up all active patches. can be NULL. */ + mca_patcher_base_fini_fn_t patch_fini; + /** hook a symbol. may be NULL */ + mca_patcher_base_patch_symbol_fn_t patch_symbol; + /** hook a function pointer. may be NULL */ + mca_patcher_base_patch_address_fn_t patch_address; +} mca_patcher_base_module_t; + + +OPAL_DECLSPEC extern mca_patcher_base_module_t *opal_patcher; + +/** + * Structure for patcher components. + */ +typedef struct mca_patcher_base_component_1_0_0_t { + /** MCA base component */ + mca_base_component_t patcherc_version; + /** MCA base data */ + mca_base_component_data_t patcherc_data; +} mca_patcher_base_component_1_0_0_t; + +typedef mca_patcher_base_component_1_0_0_t mca_patcher_base_component_t; + +/* + * Macro for use in components that are of type patcher + */ +#define OPAL_PATCHER_BASE_VERSION_1_0_0 \ + OPAL_MCA_BASE_VERSION_2_1_0("patcher", 1, 0, 0) + +#endif /* OPAL_MCA_PATCHER_PATCHER_H */ diff --git a/opal/runtime/opal_finalize.c b/opal/runtime/opal_finalize.c index cd27dd0854..0e89013fc6 100644 --- a/opal/runtime/opal_finalize.c +++ b/opal/runtime/opal_finalize.c @@ -41,6 +41,7 @@ #include "opal/mca/memchecker/base/base.h" #include "opal/mca/memcpy/base/base.h" #include "opal/mca/memory/base/base.h" +#include "opal/mca/patcher/base/base.h" #include "opal/mca/backtrace/base/base.h" #include "opal/mca/sec/base/base.h" #include "opal/mca/timer/base/base.h" @@ -147,6 +148,7 @@ opal_finalize(void) hooks to the bowels of the mem_free code can still occur any time between now and end of application (even post main()!) */ (void) mca_base_framework_close(&opal_memory_base_framework); + (void) mca_base_framework_close(&opal_patcher_base_framework); /* close the memcpy framework */ (void) mca_base_framework_close(&opal_memcpy_base_framework); diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index befe821f35..81ddf2aa23 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -45,6 +45,7 @@ #include "opal/datatype/opal_datatype.h" #include "opal/mca/installdirs/base/base.h" #include "opal/mca/memory/base/base.h" +#include "opal/mca/patcher/base/base.h" #include "opal/mca/memcpy/base/base.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/sec/base/base.h" @@ -418,6 +419,14 @@ opal_init(int* pargc, char*** pargv) goto return_error; } + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_patcher_base_framework, 0))) { + error = "opal_patcher_base_open"; + goto return_error; + } + + /* select a patcher module. if a patcher module can not be found it is not an error. */ + (void) opal_patcher_base_select (); + /* open the memory manager components. Memory hooks may be triggered before this (any time after mem_free_init(), actually). This is a hook available for memory manager hooks diff --git a/opal/util/Makefile.am b/opal/util/Makefile.am index f80cbd50ec..5c4cb2945e 100644 --- a/opal/util/Makefile.am +++ b/opal/util/Makefile.am @@ -12,6 +12,8 @@ # Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 NVIDIA Corporation. All rights reserved. # Copyright (c) 2013 Intel, Inc. All rights reserved +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow