Skip to content

Remove compute 3.0 #406

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions CMake/hoomd/HOOMDCUDASetup.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ if (ENABLE_CUDA)

# setup nvcc to build for all CUDA architectures. Allow user to modify the list if desired
if (CUDA_VERSION VERSION_GREATER 8.99)
set(CUDA_ARCH_LIST 30 35 50 60 70 CACHE STRING "List of target sm_ architectures to compile CUDA code for. Separate with semicolons.")
set(CUDA_ARCH_LIST 35 50 60 70 CACHE STRING "List of target sm_ architectures to compile CUDA code for. Separate with semicolons.")
elseif (CUDA_VERSION VERSION_GREATER 7.99)
set(CUDA_ARCH_LIST 30 35 50 60 CACHE STRING "List of target sm_ architectures to compile CUDA code for. Separate with semicolons.")
set(CUDA_ARCH_LIST 35 50 60 CACHE STRING "List of target sm_ architectures to compile CUDA code for. Separate with semicolons.")
endif()

foreach(_cuda_arch ${CUDA_ARCH_LIST})
Expand All @@ -69,8 +69,8 @@ if (ENABLE_CUDA)
list(GET _cuda_arch_list_sorted -1 _cuda_max_arch)
add_definitions(-DCUDA_ARCH=${_cuda_min_arch})

if (_cuda_min_arch LESS 30)
message(SEND_ERROR "HOOMD requires compute 3.0 or newer")
if (_cuda_min_arch LESS 35)
message(SEND_ERROR "HOOMD requires compute 3.5 or newer")
endif ()

# only generate ptx code for the maximum supported CUDA_ARCH (saves on file size)
Expand Down
149 changes: 9 additions & 140 deletions hoomd/TextureTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,161 +10,30 @@
/*! \file TextureTools.h
\brief Utilities for working with textures

TextureTools.h exists to aid in defining Scalar textures which may be either float or double. It aims to simplify
code that reads from these textures so that the amount of conditional code is simplified to be entirely within
this header.
TextureTools.h previously existed to aid in defining Scalar textures which may be either float or double.

Planning for the future (__ldg), the fetch methods will also take in a pointer to the memory. That way, the initial
work done to convert the texture loads over to the single/double will also make it easy to change over to __ldg
in a single spot.
Now, it only provides a __ldg() overload for double4.
*/

#include "HOOMDMath.h"

#ifdef NVCC

//! Fetch an unsigned int from texture memory.
//! Fetch a double4 value from texture memory.
/*! This function should be called whenever a CUDA kernel wants to retrieve a
unsigned int value from texture memory.
double4 value from read only memory.

\param ptr Pointer to bound memory
\param tex_ref Texture in which the desired values are stored.
\param ii Index at which to look.
\param ptr Pointer to read
*/
__device__ inline unsigned int texFetchUint(const unsigned int *ptr, texture<unsigned int, 1> tex_ref, unsigned int ii)
__device__ inline double4 __ldg(const double4 *ptr)
{
#if __CUDA_ARCH__ >= 350
return __ldg(ptr+ii);
#else
return tex1Dfetch(tex_ref, ii);
#endif
}

#ifdef SINGLE_PRECISION

typedef texture<Scalar, 1, cudaReadModeElementType> scalar_tex_t;
typedef texture<Scalar2, 1, cudaReadModeElementType> scalar2_tex_t;
typedef texture<Scalar4, 1, cudaReadModeElementType> scalar4_tex_t;

//! Fetch a Scalar value from texture memory.
/*! This function should be called whenever a CUDA kernel wants to retrieve a
Scalar value from texture memory.

\param ptr Pointer to bound memory
\param tex_ref Texture in which the desired values are stored.
\param ii Index at which to look.
*/
__device__ inline Scalar texFetchScalar(const Scalar *ptr, texture<Scalar, 1> tex_ref, unsigned int ii)
{
#if __CUDA_ARCH__ >= 350
return __ldg(ptr+ii);
#else
return tex1Dfetch(tex_ref, ii);
#endif
}

//! Fetch a Scalar2 value from texture memory.
/*! This function should be called whenever a CUDA kernel wants to retrieve a
Scalar2 value from texture memory.

\param ptr Pointer to bound memory
\param tex_ref Texture in which the desired values are stored.
\param ii Index at which to look.
*/
__device__ inline Scalar2 texFetchScalar2(const Scalar2 *ptr, texture<Scalar2, 1> tex_ref, unsigned int ii)
{
#if __CUDA_ARCH__ >= 350
return __ldg(ptr+ii);
#else
return tex1Dfetch(tex_ref, ii);
#endif
}

//! Fetch a Scalar4 value from texture memory.
/*! This function should called whenever a CUDA kernel wants to retrieve a
Scalar4 value from texture memory.

\param ptr Pointer to bound memory
\param tex_ref Texture in which the desired values are stored.
\param ii Index at which to look.
*/
__device__ inline Scalar4 texFetchScalar4(const Scalar4 *ptr, texture<Scalar4, 1> tex_ref, unsigned int ii)
{
#if __CUDA_ARCH__ >= 350
return __ldg(ptr+ii);
#else
return tex1Dfetch(tex_ref, ii);
#endif
}

#else
typedef texture<int2, 1, cudaReadModeElementType> scalar_tex_t;
typedef texture<int4, 1, cudaReadModeElementType> scalar2_tex_t;
typedef texture<int4, 1, cudaReadModeElementType> scalar4_tex_t;

//! Fetch a Scalar value from texture memory.
/*! This function should be called whenever a CUDA kernel wants to retrieve a
Scalar value from texture memory.

\param ptr Pointer to bound memory
\param tex_ref Texture in which the desired values are stored.
\param ii Index at which to look.
*/
__device__ inline Scalar texFetchScalar(const Scalar *ptr, texture<int2, 1> tex_ref, unsigned int ii)
{
#if __CUDA_ARCH__ >= 350
return __ldg(ptr+ii);
#else
int2 val = tex1Dfetch(tex_ref, ii);
return Scalar(__hiloint2double(val.y, val.x));
#endif
}

//! Fetch a Scalar2 value from texture memory.
/*! This function should be called whenever a CUDA kernel wants to retrieve a
Scalar2 value from texture memory.

\param ptr Pointer to bound memory
\param tex_ref Texture in which the desired values are stored.
\param ii Index at which to look.
*/
__device__ inline Scalar2 texFetchScalar2(const Scalar2* ptr, texture<int4, 1> tex_ref, unsigned int ii)
{
#if __CUDA_ARCH__ >= 350
return __ldg(ptr+ii);
#else
int4 val = tex1Dfetch(tex_ref, ii);
return make_scalar2(__hiloint2double(val.y, val.x),
__hiloint2double(val.w, val.z));
#endif
}

//! Fetch a Scalar4 value from texture memory.
/*! This function should be called whenever a CUDA kernel wants to retrieve a
Scalar4 value from texture memory.

\param ptr Pointer to bound memory
\param tex_ref Texture in which the desired values are stored.
\param ii Index at which to look.
*/
__device__ inline Scalar4 texFetchScalar4(const Scalar4 *ptr, texture<int4, 1> tex_ref, unsigned int ii)
{
unsigned int idx = 2*ii;
#if __CUDA_ARCH__ >= 350
int4 part1 = __ldg(((int4 *)ptr)+idx);;
int4 part2 = __ldg(((int4 *)ptr)+idx+1);;
#else
int4 part1 = tex1Dfetch(tex_ref, idx);
int4 part2 = tex1Dfetch(tex_ref, idx+1);
#endif
return make_scalar4(__hiloint2double(part1.y, part1.x),
int4 part1 = __ldg(((int4 *)ptr));;
int4 part2 = __ldg(((int4 *)ptr)+1);;
return make_double4(__hiloint2double(part1.y, part1.x),
__hiloint2double(part1.w, part1.z),
__hiloint2double(part2.y, part2.x),
__hiloint2double(part2.w, part2.z));
}
#endif
#endif



#endif // __HOOMD_MATH_H__
3 changes: 1 addition & 2 deletions hoomd/cgcmm/CGCMMAngleForceComputeGPU.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,7 @@ void CGCMMAngleForceComputeGPU::computeForces(unsigned int timestep)
d_CGCMMsr.data,
d_CGCMMepow.data,
m_CGCMMAngle_data->getNTypes(),
m_tuner->getParam(),
m_exec_conf->getComputeCapability());
m_tuner->getParam());

if(m_exec_conf->isCUDAErrorCheckingEnabled())
CHECK_CUDA_ERROR();
Expand Down
34 changes: 4 additions & 30 deletions hoomd/cgcmm/CGCMMAngleForceGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,6 @@
\brief Defines GPU kernel code for calculating the CGCMM angle forces. Used by CGCMMAngleForceComputeGPU.
*/

//! Texture for reading angle parameters
scalar2_tex_t angle_params_tex;

//! Texture for reading angle CGCMM S-R parameters
scalar2_tex_t angle_CGCMMsr_tex; // MISSING EPSILON!!! sigma=.x, rcut=.y

//! Texture for reading angle CGCMM Epsilon-pow/pref parameters
scalar4_tex_t angle_CGCMMepow_tex; // now with EPSILON=.x, pow1=.y, pow2=.z, pref=.w

//! Kernel for calculating CGCMM angle forces on the GPU
/*! \param d_force Device memory to write computed forces
\param d_virial Device memory to write computed virials
Expand Down Expand Up @@ -124,7 +115,7 @@ extern "C" __global__ void gpu_compute_CGCMM_angle_forces_kernel(Scalar4* d_forc
dac = box.minImage(dac);

// get the angle parameters (MEM TRANSFER: 8 bytes)
Scalar2 params = texFetchScalar2(d_params, angle_params_tex, cur_angle_type);
Scalar2 params = __ldg(d_params + cur_angle_type);
Scalar K = params.x;
Scalar t_0 = params.y;

Expand Down Expand Up @@ -154,14 +145,14 @@ extern "C" __global__ void gpu_compute_CGCMM_angle_forces_kernel(Scalar4* d_forc
vac[i] = Scalar(0.0);

// get the angle E-S-R parameters (MEM TRANSFER: 12 bytes)
const Scalar2 cgSR = texFetchScalar2(d_CGCMMsr, angle_CGCMMsr_tex, cur_angle_type);
const Scalar2 cgSR = __ldg(d_CGCMMsr + cur_angle_type);

Scalar cgsigma = cgSR.x;
Scalar cgrcut = cgSR.y;

if (rac < cgrcut)
{
const Scalar4 cgEPOW = texFetchScalar4(d_CGCMMepow, angle_CGCMMepow_tex, cur_angle_type);
const Scalar4 cgEPOW = __ldg(d_CGCMMepow + cur_angle_type);

// get the angle pow/pref parameters (MEM TRANSFER: 12 bytes)
Scalar cgeps = cgEPOW.x;
Expand Down Expand Up @@ -282,8 +273,7 @@ cudaError_t gpu_compute_CGCMM_angle_forces(Scalar4* d_force,
Scalar2 *d_CGCMMsr,
Scalar4 *d_CGCMMepow,
unsigned int n_angle_types,
int block_size,
const unsigned int compute_capability)
int block_size)
{
assert(d_params);
assert(d_CGCMMsr);
Expand All @@ -306,22 +296,6 @@ cudaError_t gpu_compute_CGCMM_angle_forces(Scalar4* d_force,
dim3 grid( (int)ceil((double)N / (double)run_block_size), 1, 1);
dim3 threads(run_block_size, 1, 1);

// bind the textures on pre sm 35 arches
if (compute_capability < 350)
{
cudaError_t error = cudaBindTexture(0, angle_params_tex, d_params, sizeof(Scalar2) * n_angle_types);
if (error != cudaSuccess)
return error;

error = cudaBindTexture(0, angle_CGCMMsr_tex, d_CGCMMsr, sizeof(Scalar2) * n_angle_types);
if (error != cudaSuccess)
return error;

error = cudaBindTexture(0, angle_CGCMMepow_tex, d_CGCMMepow, sizeof(Scalar4) * n_angle_types);
if (error != cudaSuccess)
return error;
}

// run the kernel
gpu_compute_CGCMM_angle_forces_kernel<<< grid, threads>>>(d_force,
d_virial,
Expand Down
3 changes: 1 addition & 2 deletions hoomd/cgcmm/CGCMMAngleForceGPU.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ cudaError_t gpu_compute_CGCMM_angle_forces(Scalar4* d_force,
Scalar2 *d_CGCMMsr,
Scalar4 *d_CGCMMepow,
unsigned int n_angle_types,
int block_size,
const unsigned int compute_capability);
int block_size);

#endif
4 changes: 1 addition & 3 deletions hoomd/cgcmm/CGCMMForceComputeGPU.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,7 @@ void CGCMMForceComputeGPU::computeForces(unsigned int timestep)
this->m_nlist->getNListArray().getPitch(),
m_pdata->getNTypes(),
m_r_cut * m_r_cut,
m_block_size,
m_exec_conf->getComputeCapability()/10,
m_exec_conf->dev_prop.maxTexture1DLinear);
m_block_size);
if (m_exec_conf->isCUDAErrorCheckingEnabled())
CHECK_CUDA_ERROR();

Expand Down
Loading