From b13f38490ad42bd67bdbe5fb5faa60b36233733b Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 21 Dec 2021 16:57:02 +0100 Subject: [PATCH 1/4] Revert "Avoid function-local static variables. (#1)" This reverts commit fadddd6bf1f6030aeb34012973060ec023b43417, reversing changes made to e5a8b02aaf17b9aac8024d5fb65f74a7d8d23ba5. --- .../Core/products/GeneralBlockPanelKernel.h | 29 +++++-------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 086e6c8f6..40171eb83 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -90,21 +90,6 @@ struct CacheSizes { std::ptrdiff_t m_l3; }; -// In C++17 this could be an inline variable, see for example -// https://stackoverflow.com/questions/38043442/how-do-inline-variables-work -template -struct CacheSizeGlobalHelper { - static CacheSizes s_cacheSizes; -}; -#ifdef _OPENMP -#pragma omp declare target -#endif -template -CacheSizes CacheSizeGlobalHelper::s_cacheSizes; -#ifdef _OPENMP -#pragma omp end declare target -#endif - /** \internal */ EIGEN_DEVICE_FUNC inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) @@ -133,22 +118,22 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff eigen_internal_assert(false); } #else // EIGEN_CUDA_ARCH + static CacheSizes m_cacheSizes; - auto& cacheSizes = CacheSizeGlobalHelper::s_cacheSizes; if(action==SetAction) { // set the cpu cache size and cache all block sizes from a global cache size in byte eigen_internal_assert(l1!=0 && l2!=0); - cacheSizes.m_l1 = *l1; - cacheSizes.m_l2 = *l2; - cacheSizes.m_l3 = *l3; + m_cacheSizes.m_l1 = *l1; + m_cacheSizes.m_l2 = *l2; + m_cacheSizes.m_l3 = *l3; } else if(action==GetAction) { eigen_internal_assert(l1!=0 && l2!=0); - *l1 = cacheSizes.m_l1; - *l2 = cacheSizes.m_l2; - *l3 = cacheSizes.m_l3; + *l1 = m_cacheSizes.m_l1; + *l2 = m_cacheSizes.m_l2; + *l3 = m_cacheSizes.m_l3; } else { From 70ea71e51bb83edad1f6f212ff67aae9bfdd4922 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 21 Dec 2021 17:05:01 +0100 Subject: [PATCH 2/4] Use static cache sizes if OpenMP is enabled. --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 40171eb83..763e27a35 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -94,10 +94,10 @@ struct CacheSizes { EIGEN_DEVICE_FUNC inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) { - #ifdef EIGEN_CUDA_ARCH + #if defined(EIGEN_CUDA_ARCH) || defined(_OPENMP) if (action==GetAction) { - #if EIGEN_CUDA_ARCH >= 700 + #if EIGEN_CUDA_ARCH >= 700 || defined(_OPENMP) // Volta, Turing, or newer // - the L1 cache is configurable at runtime, with a minimum of 32 KB/SM // - the L2 cache depends on the actual card, with a minimum of 64 KB/SM From d64df869da97c6a8cdd4b1958153c7df89b73794 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 21 Dec 2021 17:39:02 +0100 Subject: [PATCH 3/4] Restrict OpenMP fix to NVIDIA compilers. --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 763e27a35..3ad8ea5bd 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -94,10 +94,10 @@ struct CacheSizes { EIGEN_DEVICE_FUNC inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) { - #if defined(EIGEN_CUDA_ARCH) || defined(_OPENMP) + #if defined(EIGEN_CUDA_ARCH) || (defined(__NVCOMPILER) && defined(_OPENMP)) if (action==GetAction) { - #if EIGEN_CUDA_ARCH >= 700 || defined(_OPENMP) + #if EIGEN_CUDA_ARCH >= 700 || (defined(__NVCOMPILER) && defined(_OPENMP)) // Volta, Turing, or newer // - the L1 cache is configurable at runtime, with a minimum of 32 KB/SM // - the L2 cache depends on the actual card, with a minimum of 64 KB/SM From 68ac74327dff80ec6347c7edf821e590e6b328d9 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 21 Dec 2021 20:00:00 +0100 Subject: [PATCH 4/4] Enable workaround for OpenACC too. This seems to be important because of compiling with -cuda now. --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 3ad8ea5bd..14545361a 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -94,10 +94,10 @@ struct CacheSizes { EIGEN_DEVICE_FUNC inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) { - #if defined(EIGEN_CUDA_ARCH) || (defined(__NVCOMPILER) && defined(_OPENMP)) + #if defined(EIGEN_CUDA_ARCH) || (defined(__NVCOMPILER) && (defined(_OPENMP) || defined(_OPENACC))) if (action==GetAction) { - #if EIGEN_CUDA_ARCH >= 700 || (defined(__NVCOMPILER) && defined(_OPENMP)) + #if EIGEN_CUDA_ARCH >= 700 || (defined(__NVCOMPILER) && (defined(_OPENMP) || defined(_OPENACC))) // Volta, Turing, or newer // - the L1 cache is configurable at runtime, with a minimum of 32 KB/SM // - the L2 cache depends on the actual card, with a minimum of 64 KB/SM