diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 086e6c8f6..14545361a 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -90,29 +90,14 @@ struct CacheSizes { std::ptrdiff_t m_l3; }; -// In C++17 this could be an inline variable, see for example -// https://stackoverflow.com/questions/38043442/how-do-inline-variables-work -template -struct CacheSizeGlobalHelper { - static CacheSizes s_cacheSizes; -}; -#ifdef _OPENMP -#pragma omp declare target -#endif -template -CacheSizes CacheSizeGlobalHelper::s_cacheSizes; -#ifdef _OPENMP -#pragma omp end declare target -#endif - /** \internal */ EIGEN_DEVICE_FUNC inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) { - #ifdef EIGEN_CUDA_ARCH + #if defined(EIGEN_CUDA_ARCH) || (defined(__NVCOMPILER) && (defined(_OPENMP) || defined(_OPENACC))) if (action==GetAction) { - #if EIGEN_CUDA_ARCH >= 700 + #if EIGEN_CUDA_ARCH >= 700 || (defined(__NVCOMPILER) && (defined(_OPENMP) || defined(_OPENACC))) // Volta, Turing, or newer // - the L1 cache is configurable at runtime, with a minimum of 32 KB/SM // - the L2 cache depends on the actual card, with a minimum of 64 KB/SM @@ -133,22 +118,22 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff eigen_internal_assert(false); } #else // EIGEN_CUDA_ARCH + static CacheSizes m_cacheSizes; - auto& cacheSizes = CacheSizeGlobalHelper::s_cacheSizes; if(action==SetAction) { // set the cpu cache size and cache all block sizes from a global cache size in byte eigen_internal_assert(l1!=0 && l2!=0); - cacheSizes.m_l1 = *l1; - cacheSizes.m_l2 = *l2; - cacheSizes.m_l3 = *l3; + m_cacheSizes.m_l1 = *l1; + m_cacheSizes.m_l2 = *l2; + m_cacheSizes.m_l3 = *l3; } else if(action==GetAction) { eigen_internal_assert(l1!=0 && l2!=0); - *l1 = cacheSizes.m_l1; - *l2 = cacheSizes.m_l2; - *l3 = cacheSizes.m_l3; + *l1 = m_cacheSizes.m_l1; + *l2 = m_cacheSizes.m_l2; + *l3 = m_cacheSizes.m_l3; } else {