ComputeBondedCUDAKernel Class Reference

#include <ComputeBondedCUDAKernel.h>

List of all members.

Public Types

 energyIndex_BOND = 0
 energyIndex_ANGLE
 energyIndex_DIHEDRAL
 energyIndex_IMPROPER
 energyIndex_ELECT
 energyIndex_LJ
 energyIndex_ELECT_SLOW
 energyIndex_CROSSTERM
 normalVirialIndex_XX
 normalVirialIndex_XY
 normalVirialIndex_XZ
 normalVirialIndex_YX
 normalVirialIndex_YY
 normalVirialIndex_YZ
 normalVirialIndex_ZX
 normalVirialIndex_ZY
 normalVirialIndex_ZZ
 nbondVirialIndex_XX
 nbondVirialIndex_XY
 nbondVirialIndex_XZ
 nbondVirialIndex_YX
 nbondVirialIndex_YY
 nbondVirialIndex_YZ
 nbondVirialIndex_ZX
 nbondVirialIndex_ZY
 nbondVirialIndex_ZZ
 slowVirialIndex_XX
 slowVirialIndex_XY
 slowVirialIndex_XZ
 slowVirialIndex_YX
 slowVirialIndex_YY
 slowVirialIndex_YZ
 slowVirialIndex_ZX
 slowVirialIndex_ZY
 slowVirialIndex_ZZ
 amdDiheVirialIndex_XX
 amdDiheVirialIndex_XY
 amdDiheVirialIndex_XZ
 amdDiheVirialIndex_YX
 amdDiheVirialIndex_YY
 amdDiheVirialIndex_YZ
 amdDiheVirialIndex_ZX
 amdDiheVirialIndex_ZY
 amdDiheVirialIndex_ZZ
 energies_virials_SIZE
enum  {
  energyIndex_BOND = 0, energyIndex_ANGLE, energyIndex_DIHEDRAL, energyIndex_IMPROPER,
  energyIndex_ELECT, energyIndex_LJ, energyIndex_ELECT_SLOW, energyIndex_CROSSTERM,
  normalVirialIndex_XX, normalVirialIndex_XY, normalVirialIndex_XZ, normalVirialIndex_YX,
  normalVirialIndex_YY, normalVirialIndex_YZ, normalVirialIndex_ZX, normalVirialIndex_ZY,
  normalVirialIndex_ZZ, nbondVirialIndex_XX, nbondVirialIndex_XY, nbondVirialIndex_XZ,
  nbondVirialIndex_YX, nbondVirialIndex_YY, nbondVirialIndex_YZ, nbondVirialIndex_ZX,
  nbondVirialIndex_ZY, nbondVirialIndex_ZZ, slowVirialIndex_XX, slowVirialIndex_XY,
  slowVirialIndex_XZ, slowVirialIndex_YX, slowVirialIndex_YY, slowVirialIndex_YZ,
  slowVirialIndex_ZX, slowVirialIndex_ZY, slowVirialIndex_ZZ, amdDiheVirialIndex_XX,
  amdDiheVirialIndex_XY, amdDiheVirialIndex_XZ, amdDiheVirialIndex_YX, amdDiheVirialIndex_YY,
  amdDiheVirialIndex_YZ, amdDiheVirialIndex_ZX, amdDiheVirialIndex_ZY, amdDiheVirialIndex_ZZ,
  energies_virials_SIZE
}

Public Member Functions

 ComputeBondedCUDAKernel (int deviceID, CudaNonbondedTables &cudaNonbondedTables)
 ~ComputeBondedCUDAKernel ()
void update (const int numBondsIn, const int numAnglesIn, const int numDihedralsIn, const int numImpropersIn, const int numModifiedExclusionsIn, const int numExclusionsIn, const int numCrosstermsIn, const char *h_tupleData, cudaStream_t stream)
void setupBondValues (int numBondValues, CudaBondValue *h_bondValues)
void setupAngleValues (int numAngleValues, CudaAngleValue *h_angleValues)
void setupDihedralValues (int numDihedralValues, CudaDihedralValue *h_dihedralValues)
void setupImproperValues (int numImproperValues, CudaDihedralValue *h_improperValues)
void setupCrosstermValues (int numCrosstermValues, CudaCrosstermValue *h_crosstermValues)
int getForceStride (const int atomStorageSize)
int getForceSize (const int atomStorageSize)
int getAllForceSize (const int atomStorageSize, const bool doSlow)
void bondedForce (const double scale14, const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, const float r2_delta, const int r2_delta_expc, const float4 *h_xyzq, FORCE_TYPE *h_forces, double *h_energies, cudaStream_t stream)

Static Public Member Functions

static int warpAlign (const int n)

Classes

struct  BondedVirial


Detailed Description

Definition at line 46 of file ComputeBondedCUDAKernel.h.


Member Enumeration Documentation

anonymous enum

Enumerator:
energyIndex_BOND 
energyIndex_ANGLE 
energyIndex_DIHEDRAL 
energyIndex_IMPROPER 
energyIndex_ELECT 
energyIndex_LJ 
energyIndex_ELECT_SLOW 
energyIndex_CROSSTERM 
normalVirialIndex_XX 
normalVirialIndex_XY 
normalVirialIndex_XZ 
normalVirialIndex_YX 
normalVirialIndex_YY 
normalVirialIndex_YZ 
normalVirialIndex_ZX 
normalVirialIndex_ZY 
normalVirialIndex_ZZ 
nbondVirialIndex_XX 
nbondVirialIndex_XY 
nbondVirialIndex_XZ 
nbondVirialIndex_YX 
nbondVirialIndex_YY 
nbondVirialIndex_YZ 
nbondVirialIndex_ZX 
nbondVirialIndex_ZY 
nbondVirialIndex_ZZ 
slowVirialIndex_XX 
slowVirialIndex_XY 
slowVirialIndex_XZ 
slowVirialIndex_YX 
slowVirialIndex_YY 
slowVirialIndex_YZ 
slowVirialIndex_ZX 
slowVirialIndex_ZY 
slowVirialIndex_ZZ 
amdDiheVirialIndex_XX 
amdDiheVirialIndex_XY 
amdDiheVirialIndex_XZ 
amdDiheVirialIndex_YX 
amdDiheVirialIndex_YY 
amdDiheVirialIndex_YZ 
amdDiheVirialIndex_ZX 
amdDiheVirialIndex_ZY 
amdDiheVirialIndex_ZZ 
energies_virials_SIZE 

Definition at line 50 of file ComputeBondedCUDAKernel.h.


Constructor & Destructor Documentation

ComputeBondedCUDAKernel::ComputeBondedCUDAKernel ( int  deviceID,
CudaNonbondedTables cudaNonbondedTables 
)

Definition at line 1582 of file ComputeBondedCUDAKernel.cu.

References cudaCheck, and energies_virials_SIZE.

01582                                                                                                        :
01583 deviceID(deviceID), cudaNonbondedTables(cudaNonbondedTables) {
01584 
01585   cudaCheck(cudaSetDevice(deviceID));
01586 
01587   tupleData = NULL;
01588   tupleDataSize = 0;
01589 
01590   numBonds = 0;
01591   numAngles = 0;
01592   numDihedrals = 0;
01593   numImpropers = 0;
01594   numModifiedExclusions = 0;
01595   numExclusions = 0;
01596   numCrossterms = 0;
01597 
01598   bondValues = NULL;
01599   angleValues = NULL;
01600   dihedralValues = NULL;
01601   improperValues = NULL;
01602   crosstermValues = NULL;
01603 
01604   xyzq = NULL;
01605   xyzqSize = 0;
01606 
01607   forces = NULL;
01608   forcesSize = 0;
01609 
01610   allocate_device<double>(&energies_virials, energies_virials_SIZE);
01611 }

ComputeBondedCUDAKernel::~ComputeBondedCUDAKernel (  ) 

Definition at line 1616 of file ComputeBondedCUDAKernel.cu.

References cudaCheck.

01616                                                   {
01617   cudaCheck(cudaSetDevice(deviceID));
01618 
01619   deallocate_device<double>(&energies_virials);
01620   // deallocate_device<BondedVirial>(&virial);
01621 
01622   if (tupleData != NULL) deallocate_device<char>(&tupleData);
01623   if (xyzq != NULL) deallocate_device<float4>(&xyzq);
01624   if (forces != NULL) deallocate_device<FORCE_TYPE>(&forces);
01625 
01626   if (bondValues != NULL) deallocate_device<CudaBondValue>(&bondValues);
01627   if (angleValues != NULL) deallocate_device<CudaAngleValue>(&angleValues);
01628   if (dihedralValues != NULL) deallocate_device<CudaDihedralValue>(&dihedralValues);
01629   if (improperValues != NULL) deallocate_device<CudaDihedralValue>(&improperValues);
01630   if (crosstermValues != NULL) deallocate_device<CudaCrosstermValue>(&crosstermValues);
01631 }


Member Function Documentation

void ComputeBondedCUDAKernel::bondedForce ( const double  scale14,
const int  atomStorageSize,
const bool  doEnergy,
const bool  doVirial,
const bool  doSlow,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
const float  r2_delta,
const int  r2_delta_expc,
const float4 *  h_xyzq,
FORCE_TYPE *  h_forces,
double *  h_energies,
cudaStream_t  stream 
)

Definition at line 1766 of file ComputeBondedCUDAKernel.cu.

References BONDEDFORCESKERNEL_NUM_WARP, CALL, cudaCheck, deviceCUDA, energies_virials_SIZE, getAllForceSize(), getForceSize(), getForceStride(), DeviceCUDA::getMaxNumBlocks(), and WARPSIZE.

01773                        {
01774 
01775   int forceStorageSize = getAllForceSize(atomStorageSize, true);
01776   int forceCopySize = getAllForceSize(atomStorageSize, doSlow);
01777   int forceStride = getForceStride(atomStorageSize);
01778 
01779   int forceSize = getForceSize(atomStorageSize);
01780   int startNbond = forceSize;
01781   int startSlow = 2*forceSize;
01782 
01783   // Re-allocate coordinate and force arrays if neccessary
01784   reallocate_device<float4>(&xyzq, &xyzqSize, atomStorageSize, 1.4f);
01785   reallocate_device<FORCE_TYPE>(&forces, &forcesSize, forceStorageSize, 1.4f);
01786 
01787   // Copy coordinates to device
01788   copy_HtoD<float4>(h_xyzq, xyzq, atomStorageSize, stream);
01789 
01790   // Clear force array
01791   clear_device_array<FORCE_TYPE>(forces, forceCopySize, stream);
01792   if (doEnergy || doVirial) {
01793     clear_device_array<double>(energies_virials, energies_virials_SIZE, stream);
01794   }
01795 
01796   float one_scale14 = (float)(1.0 - scale14);
01797 
01798   // If doSlow = false, these exclusions are not calculated
01799   int numExclusionsDoSlow = doSlow ? numExclusions : 0;
01800 
01801   int nthread = BONDEDFORCESKERNEL_NUM_WARP * WARPSIZE;
01802 
01803   int numBondsTB     = (numBonds + nthread - 1)/nthread;
01804   int numAnglesTB    = (numAngles + nthread - 1)/nthread;
01805   int numDihedralsTB = (numDihedrals + nthread - 1)/nthread;
01806   int numImpropersTB = (numImpropers + nthread - 1)/nthread;
01807   int numExclusionsTB= (numExclusionsDoSlow + nthread - 1)/nthread;
01808   int numCrosstermsTB= (numCrossterms + nthread - 1)/nthread;
01809 
01810   int nblock = numBondsTB + numAnglesTB + numDihedralsTB + numImpropersTB + 
01811   numExclusionsTB + numCrosstermsTB;
01812   int shmem_size = 0;
01813 
01814   // printf("%d %d %d %d %d %d nblock %d\n",
01815   //   numBonds, numAngles, numDihedrals, numImpropers, numModifiedExclusions, numExclusions, nblock);
01816 
01817   int max_nblock = deviceCUDA->getMaxNumBlocks();
01818 
01819   int start = 0;
01820   while (start < nblock)
01821   {
01822     int nleft = nblock - start;
01823     int nblock_use = min(max_nblock, nleft);
01824 
01825 #define CALL(DOENERGY, DOVIRIAL) \
01826   bondedForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL> \
01827   <<< nblock_use, nthread, shmem_size, stream >>> \
01828   (start, numBonds, bonds, bondValues, \
01829     numAngles, angles, angleValues, \
01830     numDihedrals, dihedrals, dihedralValues, \
01831     numImpropers, impropers, improperValues, \
01832     numExclusionsDoSlow, exclusions, \
01833     numCrossterms, crossterms, crosstermValues, \
01834     cutoff2, \
01835     r2_delta, r2_delta_expc, \
01836     cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable(), \
01837     cudaNonbondedTables.get_r2_table_tex(), cudaNonbondedTables.getExclusionTableTex(), \
01838     xyzq, forceStride, \
01839     lata, latb, latc, \
01840     forces, &forces[startSlow], energies_virials);
01841 
01842     if (!doEnergy && !doVirial) CALL(0, 0);
01843     if (!doEnergy && doVirial)  CALL(0, 1);
01844     if (doEnergy && !doVirial)  CALL(1, 0);
01845     if (doEnergy && doVirial)   CALL(1, 1);
01846 
01847 #undef CALL
01848     cudaCheck(cudaGetLastError());
01849 
01850     start += nblock_use;
01851   }
01852 
01853   nthread = BONDEDFORCESKERNEL_NUM_WARP * WARPSIZE;
01854   nblock = (numModifiedExclusions + nthread - 1)/nthread;
01855 
01856   bool doElect = (one_scale14 == 0.0f) ? false : true;
01857 
01858   start = 0;
01859   while (start < nblock)
01860   {
01861     int nleft = nblock - start;
01862     int nblock_use = min(max_nblock, nleft);
01863 
01864 #define CALL(DOENERGY, DOVIRIAL, DOELECT) \
01865   modifiedExclusionForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL, DOELECT> \
01866   <<< nblock_use, nthread, shmem_size, stream >>> \
01867   (start, numModifiedExclusions, modifiedExclusions, \
01868     doSlow, one_scale14, cutoff2, \
01869     cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getExclusionVdwCoefTable(), \
01870     cudaNonbondedTables.getExclusionVdwCoefTableTex(), \
01871     cudaNonbondedTables.getModifiedExclusionForceTableTex(), cudaNonbondedTables.getModifiedExclusionEnergyTableTex(), \
01872     xyzq, forceStride, lata, latb, latc, \
01873     &forces[startNbond], &forces[startSlow], energies_virials);
01874 
01875     if (!doEnergy && !doVirial && !doElect) CALL(0, 0, 0);
01876     if (!doEnergy && doVirial && !doElect)  CALL(0, 1, 0);
01877     if (doEnergy && !doVirial && !doElect)  CALL(1, 0, 0);
01878     if (doEnergy && doVirial && !doElect)   CALL(1, 1, 0);
01879 
01880     if (!doEnergy && !doVirial && doElect)  CALL(0, 0, 1);
01881     if (!doEnergy && doVirial && doElect)   CALL(0, 1, 1);
01882     if (doEnergy && !doVirial && doElect)   CALL(1, 0, 1);
01883     if (doEnergy && doVirial && doElect)    CALL(1, 1, 1);
01884 
01885 #undef CALL
01886     cudaCheck(cudaGetLastError());
01887 
01888     start += nblock_use;
01889   }
01890 
01891   copy_DtoH<FORCE_TYPE>(forces, h_forces, forceCopySize, stream);
01892   if (doEnergy || doVirial) {
01893     copy_DtoH<double>(energies_virials, h_energies_virials, energies_virials_SIZE, stream);
01894   }
01895 
01896 }

int ComputeBondedCUDAKernel::getAllForceSize ( const int  atomStorageSize,
const bool  doSlow 
)

Definition at line 1746 of file ComputeBondedCUDAKernel.cu.

References getForceSize().

Referenced by bondedForce().

01746                                                                                          {
01747 
01748   int forceSize = getForceSize(atomStorageSize);
01749 
01750   if (numModifiedExclusions > 0 || numExclusions > 0) {
01751     if (doSlow) {
01752       // All three force arrays [normal, nbond, slow]
01753       forceSize *= 3;
01754     } else {
01755       // Two force arrays [normal, nbond]
01756       forceSize *= 2;
01757     }
01758   }
01759 
01760   return forceSize;
01761 }

int ComputeBondedCUDAKernel::getForceSize ( const int  atomStorageSize  ) 

Definition at line 1735 of file ComputeBondedCUDAKernel.cu.

References getForceStride().

Referenced by bondedForce(), and getAllForceSize().

01735                                                                    {
01736 #ifdef USE_STRIDED_FORCE
01737   return (3*getForceStride(atomStorageSize));
01738 #else
01739   return (3*atomStorageSize);
01740 #endif
01741 }

int ComputeBondedCUDAKernel::getForceStride ( const int  atomStorageSize  ) 

Definition at line 1723 of file ComputeBondedCUDAKernel.cu.

References FORCE_TYPE.

Referenced by bondedForce(), and getForceSize().

01723                                                                      {
01724 #ifdef USE_STRIDED_FORCE
01725   // Align stride to 256 bytes
01726   return ((atomStorageSize*sizeof(FORCE_TYPE) - 1)/256 + 1)*256/sizeof(FORCE_TYPE);
01727 #else
01728   return 1;
01729 #endif
01730 }

void ComputeBondedCUDAKernel::setupAngleValues ( int  numAngleValues,
CudaAngleValue h_angleValues 
)

Definition at line 1638 of file ComputeBondedCUDAKernel.cu.

01638                                                                                                 {
01639   allocate_device<CudaAngleValue>(&angleValues, numAngleValues);
01640   copy_HtoD_sync<CudaAngleValue>(h_angleValues, angleValues, numAngleValues);
01641 }

void ComputeBondedCUDAKernel::setupBondValues ( int  numBondValues,
CudaBondValue h_bondValues 
)

Definition at line 1633 of file ComputeBondedCUDAKernel.cu.

01633                                                                                             {
01634   allocate_device<CudaBondValue>(&bondValues, numBondValues);
01635   copy_HtoD_sync<CudaBondValue>(h_bondValues, bondValues, numBondValues);
01636 }

void ComputeBondedCUDAKernel::setupCrosstermValues ( int  numCrosstermValues,
CudaCrosstermValue h_crosstermValues 
)

Definition at line 1653 of file ComputeBondedCUDAKernel.cu.

01653                                                                                                                 {
01654   allocate_device<CudaCrosstermValue>(&crosstermValues, numCrosstermValues);
01655   copy_HtoD_sync<CudaCrosstermValue>(h_crosstermValues, crosstermValues, numCrosstermValues);
01656 }

void ComputeBondedCUDAKernel::setupDihedralValues ( int  numDihedralValues,
CudaDihedralValue h_dihedralValues 
)

Definition at line 1643 of file ComputeBondedCUDAKernel.cu.

01643                                                                                                             {
01644   allocate_device<CudaDihedralValue>(&dihedralValues, numDihedralValues);
01645   copy_HtoD_sync<CudaDihedralValue>(h_dihedralValues, dihedralValues, numDihedralValues);
01646 }

void ComputeBondedCUDAKernel::setupImproperValues ( int  numImproperValues,
CudaDihedralValue h_improperValues 
)

Definition at line 1648 of file ComputeBondedCUDAKernel.cu.

01648                                                                                                             {
01649   allocate_device<CudaDihedralValue>(&improperValues, numImproperValues);
01650   copy_HtoD_sync<CudaDihedralValue>(h_improperValues, improperValues, numImproperValues);
01651 }

void ComputeBondedCUDAKernel::update ( const int  numBondsIn,
const int  numAnglesIn,
const int  numDihedralsIn,
const int  numImpropersIn,
const int  numModifiedExclusionsIn,
const int  numExclusionsIn,
const int  numCrosstermsIn,
const char *  h_tupleData,
cudaStream_t  stream 
)

Definition at line 1661 of file ComputeBondedCUDAKernel.cu.

References warpAlign().

01670                        {
01671 
01672   numBonds              = numBondsIn;
01673   numAngles             = numAnglesIn;
01674   numDihedrals          = numDihedralsIn;
01675   numImpropers          = numImpropersIn;
01676   numModifiedExclusions = numModifiedExclusionsIn;
01677   numExclusions         = numExclusionsIn;
01678   numCrossterms         = numCrosstermsIn;
01679 
01680   int numBondsWA     = warpAlign(numBonds);
01681   int numAnglesWA    = warpAlign(numAngles);
01682   int numDihedralsWA = warpAlign(numDihedrals);
01683   int numImpropersWA = warpAlign(numImpropers);
01684   int numModifiedExclusionsWA = warpAlign(numModifiedExclusions);
01685   int numExclusionsWA         = warpAlign(numExclusions);
01686   int numCrosstermsWA         = warpAlign(numCrossterms);
01687 
01688   int sizeTot = numBondsWA*sizeof(CudaBond) + numAnglesWA*sizeof(CudaAngle) + 
01689   numDihedralsWA*sizeof(CudaDihedral) + numImpropersWA*sizeof(CudaDihedral) +
01690   numModifiedExclusionsWA*sizeof(CudaExclusion) + numExclusionsWA*sizeof(CudaExclusion) + 
01691   numCrosstermsWA*sizeof(CudaCrossterm);
01692 
01693   reallocate_device<char>(&tupleData, &tupleDataSize, sizeTot, 1.4f);
01694   copy_HtoD<char>(h_tupleData, tupleData, sizeTot, stream);
01695 
01696   // Setup pointers
01697   int pos = 0;
01698   bonds = (CudaBond *)&tupleData[pos];
01699   pos += numBondsWA*sizeof(CudaBond);
01700 
01701   angles = (CudaAngle* )&tupleData[pos];
01702   pos += numAnglesWA*sizeof(CudaAngle);
01703 
01704   dihedrals = (CudaDihedral* )&tupleData[pos];
01705   pos += numDihedralsWA*sizeof(CudaDihedral);
01706 
01707   impropers = (CudaDihedral* )&tupleData[pos];
01708   pos += numImpropersWA*sizeof(CudaDihedral);
01709 
01710   modifiedExclusions = (CudaExclusion* )&tupleData[pos];
01711   pos += numModifiedExclusionsWA*sizeof(CudaExclusion);
01712 
01713   exclusions = (CudaExclusion* )&tupleData[pos];
01714   pos += numExclusionsWA*sizeof(CudaExclusion);
01715 
01716   crossterms = (CudaCrossterm* )&tupleData[pos];
01717   pos += numCrosstermsWA*sizeof(CudaCrossterm);
01718 }

static int ComputeBondedCUDAKernel::warpAlign ( const int  n  )  [inline, static]

Definition at line 145 of file ComputeBondedCUDAKernel.h.

References WARPSIZE.

Referenced by update().

00145 {return ((n + WARPSIZE - 1)/WARPSIZE)*WARPSIZE;} 


The documentation for this class was generated from the following files:
Generated on Sun Sep 24 01:17:16 2017 for NAMD by  doxygen 1.4.7