glotzerlab · joaander · May 13, 2019 · May 9, 2019 · May 9, 2019 · May 9, 2019
diff --git a/CMake/hoomd/HOOMDCUDASetup.cmake b/CMake/hoomd/HOOMDCUDASetup.cmake
@@ -53,9 +53,9 @@ if (ENABLE_CUDA)
 
     # setup nvcc to build for all CUDA architectures. Allow user to modify the list if desired
     if (CUDA_VERSION VERSION_GREATER 8.99)
-        set(CUDA_ARCH_LIST 30 35 50 60 70 CACHE STRING "List of target sm_ architectures to compile CUDA code for. Separate with semicolons.")
+        set(CUDA_ARCH_LIST 35 50 60 70 CACHE STRING "List of target sm_ architectures to compile CUDA code for. Separate with semicolons.")
     elseif (CUDA_VERSION VERSION_GREATER 7.99)
-        set(CUDA_ARCH_LIST 30 35 50 60 CACHE STRING "List of target sm_ architectures to compile CUDA code for. Separate with semicolons.")
+        set(CUDA_ARCH_LIST 35 50 60 CACHE STRING "List of target sm_ architectures to compile CUDA code for. Separate with semicolons.")
     endif()
 
     foreach(_cuda_arch ${CUDA_ARCH_LIST})
@@ -69,8 +69,8 @@ if (ENABLE_CUDA)
     list(GET _cuda_arch_list_sorted -1 _cuda_max_arch)
     add_definitions(-DCUDA_ARCH=${_cuda_min_arch})
 
-    if (_cuda_min_arch LESS 30)
-        message(SEND_ERROR "HOOMD requires compute 3.0 or newer")
+    if (_cuda_min_arch LESS 35)
+        message(SEND_ERROR "HOOMD requires compute 3.5 or newer")
     endif ()
 
     # only generate ptx code for the maximum supported CUDA_ARCH (saves on file size)

diff --git a/hoomd/TextureTools.h b/hoomd/TextureTools.h
@@ -10,161 +10,30 @@
 /*! \file TextureTools.h
     \brief Utilities for working with textures
 
-    TextureTools.h exists to aid in defining Scalar textures which may be either float or double. It aims to simplify
-    code that reads from these textures so that the amount of conditional code is simplified to be entirely within
-    this header.
+    TextureTools.h previously existed to aid in defining Scalar textures which may be either float or double.
 
-    Planning for the future (__ldg), the fetch methods will also take in a pointer to the memory. That way, the initial
-    work done to convert the texture loads over to the single/double will also make it easy to change over to __ldg
-    in a single spot.
+    Now, it only provides a __ldg() overload for double4.
 */
 
 #include "HOOMDMath.h"
 
 #ifdef NVCC
 
-//! Fetch an unsigned int from texture memory.
+//! Fetch a double4 value from texture memory.
 /*! This function should be called whenever a CUDA kernel wants to retrieve a
-    unsigned int value from texture memory.
+    double4 value from read only memory.
 
-    \param ptr Pointer to bound memory
-    \param tex_ref Texture in which the desired values are stored.
-    \param ii Index at which to look.
+    \param ptr Pointer to read
 */
-__device__ inline unsigned int texFetchUint(const unsigned int *ptr, texture<unsigned int, 1> tex_ref, unsigned int ii)
+__device__ inline double4 __ldg(const double4 *ptr)
     {
-    #if __CUDA_ARCH__ >= 350
-    return __ldg(ptr+ii);
-    #else
-    return tex1Dfetch(tex_ref, ii);
-    #endif
-    }
-
-#ifdef SINGLE_PRECISION
-
-typedef texture<Scalar, 1, cudaReadModeElementType> scalar_tex_t;
-typedef texture<Scalar2, 1, cudaReadModeElementType> scalar2_tex_t;
-typedef texture<Scalar4, 1, cudaReadModeElementType> scalar4_tex_t;
-
-//! Fetch a Scalar value from texture memory.
-/*! This function should be called whenever a CUDA kernel wants to retrieve a
-    Scalar value from texture memory.
-
-    \param ptr Pointer to bound memory
-    \param tex_ref Texture in which the desired values are stored.
-    \param ii Index at which to look.
-*/
-__device__ inline Scalar texFetchScalar(const Scalar *ptr, texture<Scalar, 1> tex_ref, unsigned int ii)
-    {
-    #if __CUDA_ARCH__ >= 350
-    return __ldg(ptr+ii);
-    #else
-    return tex1Dfetch(tex_ref, ii);
-    #endif
-    }
-
-//! Fetch a Scalar2 value from texture memory.
-/*! This function should be called whenever a CUDA kernel wants to retrieve a
-    Scalar2 value from texture memory.
-
-    \param ptr Pointer to bound memory
-    \param tex_ref Texture in which the desired values are stored.
-    \param ii Index at which to look.
-*/
-__device__ inline Scalar2 texFetchScalar2(const Scalar2 *ptr, texture<Scalar2, 1> tex_ref, unsigned int ii)
-    {
-    #if __CUDA_ARCH__ >= 350
-    return __ldg(ptr+ii);
-    #else
-    return tex1Dfetch(tex_ref, ii);
-    #endif
-    }
-
-//! Fetch a Scalar4 value from texture memory.
-/*! This function should called whenever a CUDA kernel wants to retrieve a
-    Scalar4 value from texture memory.
-
-    \param ptr Pointer to bound memory
-    \param tex_ref Texture in which the desired values are stored.
-    \param ii Index at which to look.
-*/
-__device__ inline Scalar4 texFetchScalar4(const Scalar4 *ptr, texture<Scalar4, 1> tex_ref, unsigned int ii)
-    {
-    #if __CUDA_ARCH__ >= 350
-    return __ldg(ptr+ii);
-    #else
-    return tex1Dfetch(tex_ref, ii);
-    #endif
-    }
-
-#else
-typedef texture<int2, 1, cudaReadModeElementType> scalar_tex_t;
-typedef texture<int4, 1, cudaReadModeElementType> scalar2_tex_t;
-typedef texture<int4, 1, cudaReadModeElementType> scalar4_tex_t;
-
-//! Fetch a Scalar value from texture memory.
-/*! This function should be called whenever a CUDA kernel wants to retrieve a
-    Scalar value from texture memory.
-
-    \param ptr Pointer to bound memory
-    \param tex_ref Texture in which the desired values are stored.
-    \param ii Index at which to look.
-*/
-__device__ inline Scalar texFetchScalar(const Scalar *ptr, texture<int2, 1> tex_ref, unsigned int ii)
-    {
-    #if __CUDA_ARCH__ >= 350
-    return __ldg(ptr+ii);
-    #else
-    int2 val = tex1Dfetch(tex_ref, ii);
-    return Scalar(__hiloint2double(val.y, val.x));
-    #endif
-    }
-
-//! Fetch a Scalar2 value from texture memory.
-/*! This function should be called whenever a CUDA kernel wants to retrieve a
-    Scalar2 value from texture memory.
-
-    \param ptr Pointer to bound memory
-    \param tex_ref Texture in which the desired values are stored.
-    \param ii Index at which to look.
-*/
-__device__ inline Scalar2 texFetchScalar2(const Scalar2* ptr, texture<int4, 1> tex_ref, unsigned int ii)
-    {
-    #if __CUDA_ARCH__ >= 350
-    return __ldg(ptr+ii);
-    #else
-    int4 val = tex1Dfetch(tex_ref, ii);
-    return make_scalar2(__hiloint2double(val.y, val.x),
-                        __hiloint2double(val.w, val.z));
-    #endif
-    }
-
-//! Fetch a Scalar4 value from texture memory.
-/*! This function should be called whenever a CUDA kernel wants to retrieve a
-    Scalar4 value from texture memory.
-
-    \param ptr Pointer to bound memory
-    \param tex_ref Texture in which the desired values are stored.
-    \param ii Index at which to look.
-*/
-__device__ inline Scalar4 texFetchScalar4(const Scalar4 *ptr, texture<int4, 1> tex_ref, unsigned int ii)
-    {
-    unsigned int idx = 2*ii;
-    #if __CUDA_ARCH__ >= 350
-    int4 part1 = __ldg(((int4 *)ptr)+idx);;
-    int4 part2 = __ldg(((int4 *)ptr)+idx+1);;
-    #else
-    int4 part1 = tex1Dfetch(tex_ref, idx);
-    int4 part2 = tex1Dfetch(tex_ref, idx+1);
-    #endif
-    return make_scalar4(__hiloint2double(part1.y, part1.x),
+    int4 part1 = __ldg(((int4 *)ptr));;
+    int4 part2 = __ldg(((int4 *)ptr)+1);;
+    return make_double4(__hiloint2double(part1.y, part1.x),
                         __hiloint2double(part1.w, part1.z),
                         __hiloint2double(part2.y, part2.x),
                         __hiloint2double(part2.w, part2.z));
     }
 #endif
-#endif
-
-
 
 #endif // __HOOMD_MATH_H__
diff --git a/hoomd/cgcmm/CGCMMAngleForceComputeGPU.cc b/hoomd/cgcmm/CGCMMAngleForceComputeGPU.cc
@@ -133,8 +133,7 @@ void CGCMMAngleForceComputeGPU::computeForces(unsigned int timestep)
                                    d_CGCMMsr.data,
                                    d_CGCMMepow.data,
                                    m_CGCMMAngle_data->getNTypes(),
-                                   m_tuner->getParam(),
-                                   m_exec_conf->getComputeCapability());
+                                   m_tuner->getParam());
 
     if(m_exec_conf->isCUDAErrorCheckingEnabled())
         CHECK_CUDA_ERROR();

diff --git a/hoomd/cgcmm/CGCMMAngleForceGPU.cu b/hoomd/cgcmm/CGCMMAngleForceGPU.cu
@@ -16,15 +16,6 @@
     \brief Defines GPU kernel code for calculating the CGCMM angle forces. Used by CGCMMAngleForceComputeGPU.
 */
 
-//! Texture for reading angle parameters
-scalar2_tex_t angle_params_tex;
-
-//! Texture for reading angle CGCMM S-R parameters
-scalar2_tex_t angle_CGCMMsr_tex; // MISSING EPSILON!!! sigma=.x, rcut=.y
-
-//! Texture for reading angle CGCMM Epsilon-pow/pref parameters
-scalar4_tex_t angle_CGCMMepow_tex; // now with EPSILON=.x, pow1=.y, pow2=.z, pref=.w
-
 //! Kernel for calculating CGCMM angle forces on the GPU
 /*! \param d_force Device memory to write computed forces
     \param d_virial Device memory to write computed virials
@@ -124,7 +115,7 @@ extern "C" __global__ void gpu_compute_CGCMM_angle_forces_kernel(Scalar4* d_forc
         dac = box.minImage(dac);
 
         // get the angle parameters (MEM TRANSFER: 8 bytes)
-        Scalar2 params = texFetchScalar2(d_params, angle_params_tex, cur_angle_type);
+        Scalar2 params = __ldg(d_params + cur_angle_type);
         Scalar K = params.x;
         Scalar t_0 = params.y;
 
@@ -154,14 +145,14 @@ extern "C" __global__ void gpu_compute_CGCMM_angle_forces_kernel(Scalar4* d_forc
             vac[i] = Scalar(0.0);
 
         // get the angle E-S-R parameters (MEM TRANSFER: 12 bytes)
-        const Scalar2 cgSR = texFetchScalar2(d_CGCMMsr, angle_CGCMMsr_tex, cur_angle_type);
+        const Scalar2 cgSR = __ldg(d_CGCMMsr + cur_angle_type);
 
         Scalar cgsigma = cgSR.x;
         Scalar cgrcut = cgSR.y;
 
         if (rac < cgrcut)
             {
-            const Scalar4 cgEPOW = texFetchScalar4(d_CGCMMepow, angle_CGCMMepow_tex, cur_angle_type);
+            const Scalar4 cgEPOW = __ldg(d_CGCMMepow + cur_angle_type);
 
             // get the angle pow/pref parameters (MEM TRANSFER: 12 bytes)
             Scalar cgeps = cgEPOW.x;
@@ -282,8 +273,7 @@ cudaError_t gpu_compute_CGCMM_angle_forces(Scalar4* d_force,
                                            Scalar2 *d_CGCMMsr,
                                            Scalar4 *d_CGCMMepow,
                                            unsigned int n_angle_types,
-                                           int block_size,
-                                           const unsigned int compute_capability)
+                                           int block_size)
     {
     assert(d_params);
     assert(d_CGCMMsr);
@@ -306,22 +296,6 @@ cudaError_t gpu_compute_CGCMM_angle_forces(Scalar4* d_force,
     dim3 grid( (int)ceil((double)N / (double)run_block_size), 1, 1);
     dim3 threads(run_block_size, 1, 1);
 
-    // bind the textures on pre sm 35 arches
-    if (compute_capability < 350)
-        {
-        cudaError_t error = cudaBindTexture(0, angle_params_tex, d_params, sizeof(Scalar2) * n_angle_types);
-        if (error != cudaSuccess)
-            return error;
-
-        error = cudaBindTexture(0, angle_CGCMMsr_tex, d_CGCMMsr, sizeof(Scalar2) * n_angle_types);
-        if (error != cudaSuccess)
-            return error;
-
-        error = cudaBindTexture(0, angle_CGCMMepow_tex, d_CGCMMepow, sizeof(Scalar4) * n_angle_types);
-        if (error != cudaSuccess)
-            return error;
-        }
-
     // run the kernel
     gpu_compute_CGCMM_angle_forces_kernel<<< grid, threads>>>(d_force,
                                                               d_virial,

diff --git a/hoomd/cgcmm/CGCMMAngleForceGPU.cuh b/hoomd/cgcmm/CGCMMAngleForceGPU.cuh
@@ -30,7 +30,6 @@ cudaError_t gpu_compute_CGCMM_angle_forces(Scalar4* d_force,
                                            Scalar2 *d_CGCMMsr,
                                            Scalar4 *d_CGCMMepow,
                                            unsigned int n_angle_types,
-                                           int block_size,
-                                           const unsigned int compute_capability);
+                                           int block_size);
 
 #endif
diff --git a/hoomd/cgcmm/CGCMMForceComputeGPU.cc b/hoomd/cgcmm/CGCMMForceComputeGPU.cc
@@ -165,9 +165,7 @@ void CGCMMForceComputeGPU::computeForces(unsigned int timestep)
                              this->m_nlist->getNListArray().getPitch(),
                              m_pdata->getNTypes(),
                              m_r_cut * m_r_cut,
-                             m_block_size,
-                             m_exec_conf->getComputeCapability()/10,
-                             m_exec_conf->dev_prop.maxTexture1DLinear);
+                             m_block_size);
     if (m_exec_conf->isCUDAErrorCheckingEnabled())
         CHECK_CUDA_ERROR();