Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 9 additions & 24 deletions Eigen/src/Core/products/GeneralBlockPanelKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,29 +90,14 @@ struct CacheSizes {
std::ptrdiff_t m_l3;
};

// In C++17 this could be an inline variable, see for example
// https://stackoverflow.com/questions/38043442/how-do-inline-variables-work
template <typename>
struct CacheSizeGlobalHelper {
static CacheSizes s_cacheSizes;
};
#ifdef _OPENMP
#pragma omp declare target
#endif
template <typename T>
CacheSizes CacheSizeGlobalHelper<T>::s_cacheSizes;
#ifdef _OPENMP
#pragma omp end declare target
#endif

/** \internal */
EIGEN_DEVICE_FUNC
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
{
#ifdef EIGEN_CUDA_ARCH
#if defined(EIGEN_CUDA_ARCH) || (defined(__NVCOMPILER) && (defined(_OPENMP) || defined(_OPENACC)))
if (action==GetAction)
{
#if EIGEN_CUDA_ARCH >= 700
#if EIGEN_CUDA_ARCH >= 700 || (defined(__NVCOMPILER) && (defined(_OPENMP) || defined(_OPENACC)))
// Volta, Turing, or newer
// - the L1 cache is configurable at runtime, with a minimum of 32 KB/SM
// - the L2 cache depends on the actual card, with a minimum of 64 KB/SM
Expand All @@ -133,22 +118,22 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
eigen_internal_assert(false);
}
#else // EIGEN_CUDA_ARCH
static CacheSizes m_cacheSizes;

auto& cacheSizes = CacheSizeGlobalHelper<void>::s_cacheSizes;
if(action==SetAction)
{
// set the cpu cache size and cache all block sizes from a global cache size in byte
eigen_internal_assert(l1!=0 && l2!=0);
cacheSizes.m_l1 = *l1;
cacheSizes.m_l2 = *l2;
cacheSizes.m_l3 = *l3;
m_cacheSizes.m_l1 = *l1;
m_cacheSizes.m_l2 = *l2;
m_cacheSizes.m_l3 = *l3;
}
else if(action==GetAction)
{
eigen_internal_assert(l1!=0 && l2!=0);
*l1 = cacheSizes.m_l1;
*l2 = cacheSizes.m_l2;
*l3 = cacheSizes.m_l3;
*l1 = m_cacheSizes.m_l1;
*l2 = m_cacheSizes.m_l2;
*l3 = m_cacheSizes.m_l3;
}
else
{
Expand Down