#include <ComputeBondedCUDAKernel.h>

Classes
struct	BondedVirial

Public Types
enum	{ energyIndex_BOND =0, energyIndex_ANGLE, energyIndex_DIHEDRAL, energyIndex_IMPROPER, energyIndex_ELECT, energyIndex_LJ, energyIndex_ELECT_SLOW, energyIndex_CROSSTERM, normalVirialIndex_XX, normalVirialIndex_XY, normalVirialIndex_XZ, normalVirialIndex_YX, normalVirialIndex_YY, normalVirialIndex_YZ, normalVirialIndex_ZX, normalVirialIndex_ZY, normalVirialIndex_ZZ, nbondVirialIndex_XX, nbondVirialIndex_XY, nbondVirialIndex_XZ, nbondVirialIndex_YX, nbondVirialIndex_YY, nbondVirialIndex_YZ, nbondVirialIndex_ZX, nbondVirialIndex_ZY, nbondVirialIndex_ZZ, slowVirialIndex_XX, slowVirialIndex_XY, slowVirialIndex_XZ, slowVirialIndex_YX, slowVirialIndex_YY, slowVirialIndex_YZ, slowVirialIndex_ZX, slowVirialIndex_ZY, slowVirialIndex_ZZ, amdDiheVirialIndex_XX, amdDiheVirialIndex_XY, amdDiheVirialIndex_XZ, amdDiheVirialIndex_YX, amdDiheVirialIndex_YY, amdDiheVirialIndex_YZ, amdDiheVirialIndex_ZX, amdDiheVirialIndex_ZY, amdDiheVirialIndex_ZZ, energies_virials_SIZE }

Public Member Functions
	ComputeBondedCUDAKernel (int deviceID, CudaNonbondedTables &cudaNonbondedTables)

	~ComputeBondedCUDAKernel ()

void	update (const int numBondsIn, const int numAnglesIn, const int numDihedralsIn, const int numImpropersIn, const int numModifiedExclusionsIn, const int numExclusionsIn, const int numCrosstermsIn, const char *h_tupleData, cudaStream_t stream)

void	setupBondValues (int numBondValues, CudaBondValue *h_bondValues)

void	setupAngleValues (int numAngleValues, CudaAngleValue *h_angleValues)

void	setupDihedralValues (int numDihedralValues, CudaDihedralValue *h_dihedralValues)

void	setupImproperValues (int numImproperValues, CudaDihedralValue *h_improperValues)

void	setupCrosstermValues (int numCrosstermValues, CudaCrosstermValue *h_crosstermValues)

int	getForceStride (const int atomStorageSize)

int	getForceSize (const int atomStorageSize)

int	getAllForceSize (const int atomStorageSize, const bool doSlow)

void	bondedForce (const double scale14, const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, const float r2_delta, const int r2_delta_expc, const float4 h_xyzq, FORCE_TYPE h_forces, double *h_energies, cudaStream_t stream)

Static Public Member Functions
static int	warpAlign (const int n)

Detailed Description

Definition at line 54 of file ComputeBondedCUDAKernel.h.

Member Enumeration Documentation

anonymous enum

Enumerator
energyIndex_BOND
energyIndex_ANGLE
energyIndex_DIHEDRAL
energyIndex_IMPROPER
energyIndex_ELECT
energyIndex_LJ
energyIndex_ELECT_SLOW
energyIndex_CROSSTERM
normalVirialIndex_XX
normalVirialIndex_XY
normalVirialIndex_XZ
normalVirialIndex_YX
normalVirialIndex_YY
normalVirialIndex_YZ
normalVirialIndex_ZX
normalVirialIndex_ZY
normalVirialIndex_ZZ
nbondVirialIndex_XX
nbondVirialIndex_XY
nbondVirialIndex_XZ
nbondVirialIndex_YX
nbondVirialIndex_YY
nbondVirialIndex_YZ
nbondVirialIndex_ZX
nbondVirialIndex_ZY
nbondVirialIndex_ZZ
slowVirialIndex_XX
slowVirialIndex_XY
slowVirialIndex_XZ
slowVirialIndex_YX
slowVirialIndex_YY
slowVirialIndex_YZ
slowVirialIndex_ZX
slowVirialIndex_ZY
slowVirialIndex_ZZ
amdDiheVirialIndex_XX
amdDiheVirialIndex_XY
amdDiheVirialIndex_XZ
amdDiheVirialIndex_YX
amdDiheVirialIndex_YY
amdDiheVirialIndex_YZ
amdDiheVirialIndex_ZX
amdDiheVirialIndex_ZY
amdDiheVirialIndex_ZZ
energies_virials_SIZE

Definition at line 58 of file ComputeBondedCUDAKernel.h.

        {energyIndex_BOND=0, energyIndex_ANGLE, energyIndex_DIHEDRAL, energyIndex_IMPROPER,
     energyIndex_ELECT, energyIndex_LJ, energyIndex_ELECT_SLOW, energyIndex_CROSSTERM,
     normalVirialIndex_XX, normalVirialIndex_XY, normalVirialIndex_XZ,
     normalVirialIndex_YX, normalVirialIndex_YY, normalVirialIndex_YZ,
     normalVirialIndex_ZX, normalVirialIndex_ZY, normalVirialIndex_ZZ,
     nbondVirialIndex_XX, nbondVirialIndex_XY, nbondVirialIndex_XZ,
     nbondVirialIndex_YX, nbondVirialIndex_YY, nbondVirialIndex_YZ,
     nbondVirialIndex_ZX, nbondVirialIndex_ZY, nbondVirialIndex_ZZ,
     slowVirialIndex_XX, slowVirialIndex_XY, slowVirialIndex_XZ,
     slowVirialIndex_YX, slowVirialIndex_YY, slowVirialIndex_YZ,
     slowVirialIndex_ZX, slowVirialIndex_ZY, slowVirialIndex_ZZ,
     amdDiheVirialIndex_XX, amdDiheVirialIndex_XY, amdDiheVirialIndex_XZ,
     amdDiheVirialIndex_YX, amdDiheVirialIndex_YY, amdDiheVirialIndex_YZ,
     amdDiheVirialIndex_ZX, amdDiheVirialIndex_ZY, amdDiheVirialIndex_ZZ,
     energies_virials_SIZE};

Constructor & Destructor Documentation

ComputeBondedCUDAKernel::ComputeBondedCUDAKernel	(	int	deviceID,
		CudaNonbondedTables &	cudaNonbondedTables
	)

Definition at line 1826 of file ComputeBondedCUDAKernel.cu.

References ATOMIC_BINS, cudaCheck, and energies_virials_SIZE.

                                                                                                        :
 deviceID(deviceID), cudaNonbondedTables(cudaNonbondedTables) {
 
   cudaCheck(cudaSetDevice(deviceID));
 
   tupleData = NULL;
   tupleDataSize = 0;
 
   numBonds = 0;
   numAngles = 0;
   numDihedrals = 0;
   numImpropers = 0;
   numModifiedExclusions = 0;
   numExclusions = 0;
   numCrossterms = 0;
 
   bondValues = NULL;
   angleValues = NULL;
   dihedralValues = NULL;
   improperValues = NULL;
   crosstermValues = NULL;
 
   xyzq = NULL;
   xyzqSize = 0;
 
   forces = NULL;
   forcesSize = 0;
 
   forceList = NULL;
   forceListStarts = NULL;
   forceListNexts = NULL;
   forceListSize = 0;
   forceListStartsSize = 0;
   forceListNextsSize = 0;
   allocate_device<int>(&forceListCounter, 1);
 
   allocate_device<double>(&energies_virials, ATOMIC_BINS * energies_virials_SIZE);
 }

ComputeBondedCUDAKernel::~ComputeBondedCUDAKernel ( )

Definition at line 1868 of file ComputeBondedCUDAKernel.cu.

References cudaCheck.

                                                   {
   cudaCheck(cudaSetDevice(deviceID));
 
   deallocate_device<double>(&energies_virials);
   // deallocate_device<BondedVirial>(&virial);
 
   if (tupleData != NULL) deallocate_device<char>(&tupleData);
   if (xyzq != NULL) deallocate_device<float4>(&xyzq);
   if (forces != NULL) deallocate_device<FORCE_TYPE>(&forces);
 
   if (forceList != NULL) deallocate_device<FORCE_TYPE>(&forceList);
   if (forceListCounter != NULL) deallocate_device<int>(&forceListCounter);
   if (forceListStarts != NULL) deallocate_device<int>(&forceListStarts);
   if (forceListNexts != NULL) deallocate_device<int>(&forceListNexts);
 
   if (bondValues != NULL) deallocate_device<CudaBondValue>(&bondValues);
   if (angleValues != NULL) deallocate_device<CudaAngleValue>(&angleValues);
   if (dihedralValues != NULL) deallocate_device<CudaDihedralValue>(&dihedralValues);
   if (improperValues != NULL) deallocate_device<CudaDihedralValue>(&improperValues);
   if (crosstermValues != NULL) deallocate_device<CudaCrosstermValue>(&crosstermValues);
 }

Member Function Documentation

void ComputeBondedCUDAKernel::bondedForce	(	const double	scale14,
		const int	atomStorageSize,
		const bool	doEnergy,
		const bool	doVirial,
		const bool	doSlow,
		const float3	lata,
		const float3	latb,
		const float3	latc,
		const float	cutoff2,
		const float	r2_delta,
		const int	r2_delta_expc,
		const float4 *	h_xyzq,
		FORCE_TYPE *	h_forces,
		double *	h_energies,
		cudaStream_t	stream
	)

Definition at line 2023 of file ComputeBondedCUDAKernel.cu.

References ATOMIC_BINS, BONDEDFORCESKERNEL_NUM_WARP, CALL, cudaCheck, deviceCUDA, energies_virials_SIZE, getAllForceSize(), getForceSize(), getForceStride(), DeviceCUDA::getMaxNumBlocks(), stream, and WARPSIZE.

                        {
 
   int forceStorageSize = getAllForceSize(atomStorageSize, true);
   int forceCopySize = getAllForceSize(atomStorageSize, doSlow);
   int forceStride = getForceStride(atomStorageSize);
 
   int forceSize = getForceSize(atomStorageSize);
   int startNbond = forceSize;
   int startSlow = 2*forceSize;
 
   // Re-allocate coordinate and force arrays if neccessary
   reallocate_device<float4>(&xyzq, &xyzqSize, atomStorageSize, 1.4f);
   reallocate_device<FORCE_TYPE>(&forces, &forcesSize, forceStorageSize, 1.4f);
 
 #if !defined(USE_BONDED_FORCE_ATOMIC_STORE)
   //                       function               stores
   // numBonds              bondForce              2
   // numAngles             angleForce             3
   // numDihedrals          diheForce              4
   // numImpropers          diheForce              4
   // numExclusions         exclusionForce         2
   // numCrossterms         crosstermForce         8
   // numModifiedExclusions modifiedExclusionForce 4
   int listSize = 3 * (numBonds * 2 + numAngles * 3 + numDihedrals * 4 + numImpropers * 4 + numExclusions * 2 + numCrossterms * 8 + numModifiedExclusions * 4);
   reallocate_device<FORCE_TYPE>(&forceList, &forceListSize, listSize, 1.4f);
   reallocate_device<int>(&forceListNexts, &forceListNextsSize, listSize, 1.4f);
   reallocate_device<int>(&forceListStarts, &forceListStartsSize, 3 * atomStorageSize, 1.4f);
   int* forceListStartsNbond = forceListStarts + atomStorageSize;
   int* forceListStartsSlow = forceListStarts + 2 * atomStorageSize;
 
   clear_device_array<int>(forceListCounter, 1, stream);
   cudaCheck(cudaMemsetAsync(forceListStarts, -1, sizeof(int) * 3 * atomStorageSize, stream));
 #else
   int* forceListStartsNbond = NULL;
   int* forceListStartsSlow = NULL;
 #endif
 
   // Copy coordinates to device
   copy_HtoD<float4>(h_xyzq, xyzq, atomStorageSize, stream);
 
   // Clear force array
 #if defined(USE_BONDED_FORCE_ATOMIC_STORE)
   clear_device_array<FORCE_TYPE>(forces, forceCopySize, stream);
 #endif
   if (doEnergy || doVirial) {
     clear_device_array<double>(energies_virials, ATOMIC_BINS * energies_virials_SIZE, stream);
   }
 
   float one_scale14 = (float)(1.0 - scale14);
 
   // If doSlow = false, these exclusions are not calculated
   int numExclusionsDoSlow = doSlow ? numExclusions : 0;
 
   int nthread = BONDEDFORCESKERNEL_NUM_WARP * WARPSIZE;
 
   int numBondsTB     = (numBonds + nthread - 1)/nthread;
   int numAnglesTB    = (numAngles + nthread - 1)/nthread;
   int numDihedralsTB = (numDihedrals + nthread - 1)/nthread;
   int numImpropersTB = (numImpropers + nthread - 1)/nthread;
   int numExclusionsTB= (numExclusionsDoSlow + nthread - 1)/nthread;
   int numCrosstermsTB= (numCrossterms + nthread - 1)/nthread;
 
   int nblock = numBondsTB + numAnglesTB + numDihedralsTB + numImpropersTB + 
   numExclusionsTB + numCrosstermsTB;
   int shmem_size = 0;
 
   // printf("%d %d %d %d %d %d nblock %d\n",
   //   numBonds, numAngles, numDihedrals, numImpropers, numModifiedExclusions, numExclusions, nblock);
 
   int max_nblock = deviceCUDA->getMaxNumBlocks();
 
   int start = 0;
   while (start < nblock)
   {
     int nleft = nblock - start;
     int nblock_use = min(max_nblock, nleft);
 
 
 #ifdef NAMD_HIP
 #define NONBONDEDTABLES cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable()
 #else
 #define NONBONDEDTABLES cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable(), \
 cudaNonbondedTables.get_r2_table_tex(), cudaNonbondedTables.getExclusionTableTex()
 #endif
 
 #ifdef NAMD_HIP
 #define CALL(DOENERGY, DOVIRIAL) \
   bondedForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL> <<< nblock_use, nthread, shmem_size, stream >>> \
     (start, numBonds, bonds, bondValues, \
     numAngles, angles, angleValues, \
     numDihedrals, dihedrals, dihedralValues, \
     numImpropers, impropers, improperValues, \
     numExclusionsDoSlow, exclusions, \
     numCrossterms, crossterms, crosstermValues, \
     cutoff2, \
     r2_delta, r2_delta_expc, \
     cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable() , \
     xyzq, forceStride, \
     lata, latb, latc, \
     forces, &forces[startSlow], \
     forceList, forceListCounter, forceListStarts, forceListStartsSlow, forceListNexts, \
     energies_virials);
 #else
 #define CALL(DOENERGY, DOVIRIAL) \
   bondedForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL> <<< nblock_use, nthread, shmem_size, stream >>> \
     (start, numBonds, bonds, bondValues, \
     numAngles, angles, angleValues, \
     numDihedrals, dihedrals, dihedralValues, \
     numImpropers, impropers, improperValues, \
     numExclusionsDoSlow, exclusions, \
     numCrossterms, crossterms, crosstermValues, \
     cutoff2, \
     r2_delta, r2_delta_expc, \
     cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable() , \
     cudaNonbondedTables.get_r2_table_tex(), cudaNonbondedTables.getExclusionTableTex() , \
     xyzq, forceStride, \
     lata, latb, latc, \
     forces, &forces[startSlow], \
     forceList, forceListCounter, forceListStarts, forceListStartsSlow, forceListNexts, \
     energies_virials);
 #endif
 
     if (!doEnergy && !doVirial) CALL(0, 0);
     if (!doEnergy && doVirial)  CALL(0, 1);
     if (doEnergy && !doVirial)  CALL(1, 0);
     if (doEnergy && doVirial)   CALL(1, 1);
 
 #undef CALL
     cudaCheck(cudaGetLastError());
 
     start += nblock_use;
   }
 
   nthread = BONDEDFORCESKERNEL_NUM_WARP * WARPSIZE;
   nblock = (numModifiedExclusions + nthread - 1)/nthread;
 
   bool doElect = (one_scale14 == 0.0f) ? false : true;
 
   start = 0;
   while (start < nblock)
   {
     int nleft = nblock - start;
     int nblock_use = min(max_nblock, nleft);
 
 #define CALL(DOENERGY, DOVIRIAL, DOELECT) \
   modifiedExclusionForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL, DOELECT> \
   <<< nblock_use, nthread, shmem_size, stream >>> (\
     start, numModifiedExclusions, modifiedExclusions, \
     doSlow, one_scale14, cutoff2, \
     cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getExclusionVdwCoefTable(), \
     cudaNonbondedTables.getExclusionVdwCoefTableTex(), \
     cudaNonbondedTables.getModifiedExclusionForceTableTex(), cudaNonbondedTables.getModifiedExclusionEnergyTableTex(), \
     xyzq, forceStride, lata, latb, latc, \
     &forces[startNbond], &forces[startSlow], \
     forceList, forceListCounter, forceListStartsNbond, forceListStartsSlow, forceListNexts, \
     energies_virials);
 
 
     if (!doEnergy && !doVirial && !doElect) CALL(0, 0, 0);
     if (!doEnergy && doVirial && !doElect)  CALL(0, 1, 0);
     if (doEnergy && !doVirial && !doElect)  CALL(1, 0, 0);
     if (doEnergy && doVirial && !doElect)   CALL(1, 1, 0);
 
     if (!doEnergy && !doVirial && doElect)  CALL(0, 0, 1);
     if (!doEnergy && doVirial && doElect)   CALL(0, 1, 1);
     if (doEnergy && !doVirial && doElect)   CALL(1, 0, 1);
     if (doEnergy && doVirial && doElect)    CALL(1, 1, 1);
 
 #undef CALL
     cudaCheck(cudaGetLastError());
 
     start += nblock_use;
   }
 #if !defined(USE_BONDED_FORCE_ATOMIC_STORE)
   nthread = BONDEDFORCESKERNEL_NUM_WARP * WARPSIZE;
   nblock = (atomStorageSize + nthread - 1)/nthread;
 
   start = 0;
   while (start < nblock)
   {
     int nleft = nblock - start;
     int nblock_use = min(max_nblock, nleft);
 
     // cudaCheck(hipDeviceSynchronize());
     // auto t0 = std::chrono::high_resolution_clock::now();
 
     gatherBondedForcesKernel<FORCE_TYPE><<<nblock_use, nthread, 0, stream>>>(
       start, atomStorageSize, forceStride,
       forceList, forceListStarts, forceListNexts,
       forces);
     gatherBondedForcesKernel<FORCE_TYPE><<<nblock_use, nthread, 0, stream>>>(
       start, atomStorageSize, forceStride,
       forceList, forceListStartsNbond, forceListNexts,
       &forces[startNbond]);
     if (doSlow) {
       gatherBondedForcesKernel<FORCE_TYPE><<<nblock_use, nthread, 0, stream>>>(
         start, atomStorageSize, forceStride,
         forceList, forceListStartsSlow, forceListNexts,
         &forces[startSlow]);
     }
     cudaCheck(cudaGetLastError());
 
     // cudaCheck(hipStreamSynchronize(stream));
     // auto t1 = std::chrono::high_resolution_clock::now();
     // std::chrono::duration<double> diff1 = t1 - t0;
     // std::cout << "gatherBondedForcesKernel";
     // std::cout << " " << std::setprecision(3) << diff1.count() * 1e3 << " ms" << std::endl;
 
     start += nblock_use;
   }
 #endif
 
   copy_DtoH<FORCE_TYPE>(forces, h_forces, forceCopySize, stream);
   if (doEnergy || doVirial) {
     if (ATOMIC_BINS > 1) {
       // Reduce energies_virials[ATOMIC_BINS][energies_virials_SIZE] in-place (results are in energies_virials[0])
       reduceBondedBinsKernel<<<energies_virials_SIZE, ATOMIC_BINS, 0, stream>>>(energies_virials);
     }
     copy_DtoH<double>(energies_virials, h_energies_virials, energies_virials_SIZE, stream);
   }
 
 }

int ComputeBondedCUDAKernel::getAllForceSize	(	const int	atomStorageSize,
		const bool	doSlow
	)

Definition at line 2003 of file ComputeBondedCUDAKernel.cu.

References getForceSize().

Referenced by bondedForce().

                                                                                          {
 
   int forceSize = getForceSize(atomStorageSize);
 
   if (numModifiedExclusions > 0 || numExclusions > 0) {
     if (doSlow) {
       // All three force arrays [normal, nbond, slow]
       forceSize *= 3;
     } else {
       // Two force arrays [normal, nbond]
       forceSize *= 2;
     }
   }
 
   return forceSize;
 }

int ComputeBondedCUDAKernel::getForceSize ( const int atomStorageSize )

Definition at line 1992 of file ComputeBondedCUDAKernel.cu.

References getForceStride().

Referenced by bondedForce(), and getAllForceSize().

                                                                    {
 #ifdef USE_STRIDED_FORCE
   return (3*getForceStride(atomStorageSize));
 #else
   return (3*atomStorageSize);
 #endif
 }

int ComputeBondedCUDAKernel::getForceStride ( const int atomStorageSize )

Definition at line 1980 of file ComputeBondedCUDAKernel.cu.

References FORCE_TYPE.

Referenced by bondedForce(), and getForceSize().

                                                                      {
 #ifdef USE_STRIDED_FORCE
   // Align stride to 256 bytes
   return ((atomStorageSize*sizeof(FORCE_TYPE) - 1)/256 + 1)*256/sizeof(FORCE_TYPE);
 #else
   return 1;
 #endif
 }

void ComputeBondedCUDAKernel::setupAngleValues	(	int	numAngleValues,
		CudaAngleValue *	h_angleValues
	)

Definition at line 1895 of file ComputeBondedCUDAKernel.cu.

                                                                                                 {
   allocate_device<CudaAngleValue>(&angleValues, numAngleValues);
   copy_HtoD_sync<CudaAngleValue>(h_angleValues, angleValues, numAngleValues);
 }

void ComputeBondedCUDAKernel::setupBondValues	(	int	numBondValues,
		CudaBondValue *	h_bondValues
	)

Definition at line 1890 of file ComputeBondedCUDAKernel.cu.

                                                                                             {
   allocate_device<CudaBondValue>(&bondValues, numBondValues);
   copy_HtoD_sync<CudaBondValue>(h_bondValues, bondValues, numBondValues);
 }

void ComputeBondedCUDAKernel::setupCrosstermValues	(	int	numCrosstermValues,
		CudaCrosstermValue *	h_crosstermValues
	)

Definition at line 1910 of file ComputeBondedCUDAKernel.cu.

                                                                                                                 {
   allocate_device<CudaCrosstermValue>(&crosstermValues, numCrosstermValues);
   copy_HtoD_sync<CudaCrosstermValue>(h_crosstermValues, crosstermValues, numCrosstermValues);
 }

void ComputeBondedCUDAKernel::setupDihedralValues	(	int	numDihedralValues,
		CudaDihedralValue *	h_dihedralValues
	)

Definition at line 1900 of file ComputeBondedCUDAKernel.cu.

                                                                                                             {
   allocate_device<CudaDihedralValue>(&dihedralValues, numDihedralValues);
   copy_HtoD_sync<CudaDihedralValue>(h_dihedralValues, dihedralValues, numDihedralValues);
 }

void ComputeBondedCUDAKernel::setupImproperValues	(	int	numImproperValues,
		CudaDihedralValue *	h_improperValues
	)

Definition at line 1905 of file ComputeBondedCUDAKernel.cu.

                                                                                                             {
   allocate_device<CudaDihedralValue>(&improperValues, numImproperValues);
   copy_HtoD_sync<CudaDihedralValue>(h_improperValues, improperValues, numImproperValues);
 }

void ComputeBondedCUDAKernel::update	(	const int	numBondsIn,
		const int	numAnglesIn,
		const int	numDihedralsIn,
		const int	numImpropersIn,
		const int	numModifiedExclusionsIn,
		const int	numExclusionsIn,
		const int	numCrosstermsIn,
		const char *	h_tupleData,
		cudaStream_t	stream
	)

Definition at line 1918 of file ComputeBondedCUDAKernel.cu.

References stream, and warpAlign().

                        {
 
   numBonds              = numBondsIn;
   numAngles             = numAnglesIn;
   numDihedrals          = numDihedralsIn;
   numImpropers          = numImpropersIn;
   numModifiedExclusions = numModifiedExclusionsIn;
   numExclusions         = numExclusionsIn;
   numCrossterms         = numCrosstermsIn;
 
   int numBondsWA     = warpAlign(numBonds);
   int numAnglesWA    = warpAlign(numAngles);
   int numDihedralsWA = warpAlign(numDihedrals);
   int numImpropersWA = warpAlign(numImpropers);
   int numModifiedExclusionsWA = warpAlign(numModifiedExclusions);
   int numExclusionsWA         = warpAlign(numExclusions);
   int numCrosstermsWA         = warpAlign(numCrossterms);
 
   int sizeTot = numBondsWA*sizeof(CudaBond) + numAnglesWA*sizeof(CudaAngle) + 
   numDihedralsWA*sizeof(CudaDihedral) + numImpropersWA*sizeof(CudaDihedral) +
   numModifiedExclusionsWA*sizeof(CudaExclusion) + numExclusionsWA*sizeof(CudaExclusion) + 
   numCrosstermsWA*sizeof(CudaCrossterm);
 
   reallocate_device<char>(&tupleData, &tupleDataSize, sizeTot, 1.4f);
   copy_HtoD<char>(h_tupleData, tupleData, sizeTot, stream);
 
   // Setup pointers
   int pos = 0;
   bonds = (CudaBond *)&tupleData[pos];
   pos += numBondsWA*sizeof(CudaBond);
 
   angles = (CudaAngle* )&tupleData[pos];
   pos += numAnglesWA*sizeof(CudaAngle);
 
   dihedrals = (CudaDihedral* )&tupleData[pos];
   pos += numDihedralsWA*sizeof(CudaDihedral);
 
   impropers = (CudaDihedral* )&tupleData[pos];
   pos += numImpropersWA*sizeof(CudaDihedral);
 
   modifiedExclusions = (CudaExclusion* )&tupleData[pos];
   pos += numModifiedExclusionsWA*sizeof(CudaExclusion);
 
   exclusions = (CudaExclusion* )&tupleData[pos];
   pos += numExclusionsWA*sizeof(CudaExclusion);
 
   crossterms = (CudaCrossterm* )&tupleData[pos];
   pos += numCrosstermsWA*sizeof(CudaCrossterm);
 }

static int ComputeBondedCUDAKernel::warpAlign ( const int n )

inlinestatic

Definition at line 161 of file ComputeBondedCUDAKernel.h.

References WARPSIZE.

Referenced by update().

161 {return ((n + WARPSIZE - 1)/WARPSIZE)*WARPSIZE;}

WARPSIZE

#define WARPSIZE

Definition: CudaUtils.h:10

The documentation for this class was generated from the following files:

Classes

Public Types

Public Member Functions

Static Public Member Functions

Detailed Description

Member Enumeration Documentation

Constructor & Destructor Documentation

Member Function Documentation