ComputeBondedCUDAKernel.cu File Reference

#include <math.h>
#include <cuda.h>
#include <cub/cub.cuh>
#include "ComputeBondedCUDAKernel.h"
#include "DeviceCUDA.h"

Go to the source code of this file.

Defines

#define BONDEDFORCESKERNEL_NUM_WARP   4
#define CALL(DOENERGY, DOVIRIAL)
#define CALL(DOENERGY, DOVIRIAL, DOELECT)

Functions

__device__ long long int lliroundf (float f)
__device__ unsigned long long
int 
llitoulli (long long int l)
template<typename T>
__forceinline__ __device__
void 
convertForces (const float x, const float y, const float z, T &Tx, T &Ty, T &Tz)
template<>
__forceinline__ __device__
void 
convertForces< long long int > (const float x, const float y, const float z, long long int &Tx, long long int &Ty, long long int &Tz)
template<typename T>
__forceinline__ __device__
void 
calcComponentForces (float fij, const float dx, const float dy, const float dz, T &fxij, T &fyij, T &fzij)
template<>
__forceinline__ __device__
void 
calcComponentForces< long long int > (float fij, const float dx, const float dy, const float dz, long long int &fxij, long long int &fyij, long long int &fzij)
template<typename T>
__forceinline__ __device__
void 
calcComponentForces (float fij1, const float dx1, const float dy1, const float dz1, float fij2, const float dx2, const float dy2, const float dz2, T &fxij, T &fyij, T &fzij)
template<>
__forceinline__ __device__
void 
calcComponentForces< long long int > (float fij1, const float dx1, const float dy1, const float dz1, float fij2, const float dx2, const float dy2, const float dz2, long long int &fxij, long long int &fyij, long long int &fzij)
template<typename T>
__forceinline__ __device__
void 
storeForces (const T fx, const T fy, const T fz, const int ind, const int stride, T *force)
template<>
__forceinline__ __device__
void 
storeForces< long long int > (const long long int fx, const long long int fy, const long long int fz, const int ind, const int stride, long long int *force)
template<typename T, bool doEnergy, bool doVirial>
__device__ void bondForce (const int index, const CudaBond *__restrict__ bondList, const CudaBondValue *__restrict__ bondValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial< double > &virial)
template<typename T, bool doEnergy, bool doVirial, bool doElect>
__device__ void modifiedExclusionForce (const int index, const CudaExclusion *__restrict__ exclusionList, const bool doSlow, const float one_scale14, const int vdwCoefTableWidth, cudaTextureObject_t vdwCoefTableTex, cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, double &energyVdw, T *__restrict__ forceNbond, double &energyNbond, T *__restrict__ forceSlow, double &energySlow, ComputeBondedCUDAKernel::BondedVirial< double > &virialNbond, ComputeBondedCUDAKernel::BondedVirial< double > &virialSlow)
template<typename T, bool doEnergy, bool doVirial>
__device__ void exclusionForce (const int index, const CudaExclusion *__restrict__ exclusionList, const float r2_delta, const int r2_delta_expc, cudaTextureObject_t r2_table_tex, cudaTextureObject_t exclusionTableTex, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, T *__restrict__ forceSlow, double &energySlow, ComputeBondedCUDAKernel::BondedVirial< double > &virialSlow)
template<typename T, bool doEnergy, bool doVirial>
__device__ void angleForce (const int index, const CudaAngle *__restrict__ angleList, const CudaAngleValue *__restrict__ angleValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial< double > &virial)
template<bool doEnergy>
__forceinline__ __device__
void 
diheComp (const CudaDihedralValue *dihedralValues, int ic, const float sin_phi, const float cos_phi, const float scale, float &df, double &e)
template<typename T, bool doEnergy, bool doVirial>
__device__ void diheForce (const int index, const CudaDihedral *__restrict__ diheList, const CudaDihedralValue *__restrict__ dihedralValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial< double > &virial)
__device__ __forceinline__
float3 
cross (const float3 v1, const float3 v2)
__device__ __forceinline__
float 
dot (const float3 v1, const float3 v2)
template<typename T, bool doEnergy, bool doVirial>
__device__ void crosstermForce (const int index, const CudaCrossterm *__restrict__ crosstermList, const CudaCrosstermValue *__restrict__ crosstermValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial< double > &virial)
template<typename T, bool doEnergy, bool doVirial>
__global__ void bondedForcesKernel (const int start, const int numBonds, const CudaBond *__restrict__ bonds, const CudaBondValue *__restrict__ bondValues, const int numAngles, const CudaAngle *__restrict__ angles, const CudaAngleValue *__restrict__ angleValues, const int numDihedrals, const CudaDihedral *__restrict__ dihedrals, const CudaDihedralValue *__restrict__ dihedralValues, const int numImpropers, const CudaDihedral *__restrict__ impropers, const CudaDihedralValue *__restrict__ improperValues, const int numExclusions, const CudaExclusion *__restrict__ exclusions, const int numCrossterms, const CudaCrossterm *__restrict__ crossterms, const CudaCrosstermValue *__restrict__ crosstermValues, const float cutoff2, const float r2_delta, const int r2_delta_expc, const float *__restrict__ r2_table, const float4 *__restrict__ exclusionTable, cudaTextureObject_t r2_table_tex, cudaTextureObject_t exclusionTableTex, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, T *__restrict__ forceSlow, double *__restrict__ energies_virials)
template<typename T, bool doEnergy, bool doVirial, bool doElect>
__global__ void modifiedExclusionForcesKernel (const int start, const int numModifiedExclusions, const CudaExclusion *__restrict__ modifiedExclusions, const bool doSlow, const float one_scale14, const float cutoff2, const int vdwCoefTableWidth, const float2 *__restrict__ vdwCoefTable, cudaTextureObject_t vdwCoefTableTex, cudaTextureObject_t modifiedExclusionForceTableTex, cudaTextureObject_t modifiedExclusionEnergyTableTex, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ forceNbond, T *__restrict__ forceSlow, double *__restrict__ energies_virials)

Variables

__thread DeviceCUDAdeviceCUDA


Define Documentation

#define BONDEDFORCESKERNEL_NUM_WARP   4

Definition at line 1023 of file ComputeBondedCUDAKernel.cu.

Referenced by ComputeBondedCUDAKernel::bondedForce(), bondedForcesKernel(), and modifiedExclusionForcesKernel().

#define CALL ( DOENERGY,
DOVIRIAL,
DOELECT   ) 

Value:

modifiedExclusionForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL, DOELECT> \
  <<< nblock_use, nthread, shmem_size, stream >>> \
  (start, numModifiedExclusions, modifiedExclusions, \
    doSlow, one_scale14, cutoff2, \
    cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getExclusionVdwCoefTable(), \
    cudaNonbondedTables.getExclusionVdwCoefTableTex(), \
    cudaNonbondedTables.getModifiedExclusionForceTableTex(), cudaNonbondedTables.getModifiedExclusionEnergyTableTex(), \
    xyzq, forceStride, lata, latb, latc, \
    &forces[startNbond], &forces[startSlow], energies_virials);

#define CALL ( DOENERGY,
DOVIRIAL   ) 

Value:

bondedForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL> \
  <<< nblock_use, nthread, shmem_size, stream >>> \
  (start, numBonds, bonds, bondValues, \
    numAngles, angles, angleValues, \
    numDihedrals, dihedrals, dihedralValues, \
    numImpropers, impropers, improperValues, \
    numExclusionsDoSlow, exclusions, \
    numCrossterms, crossterms, crosstermValues, \
    cutoff2, \
    r2_delta, r2_delta_expc, \
    cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable(), \
    cudaNonbondedTables.get_r2_table_tex(), cudaNonbondedTables.getExclusionTableTex(), \
    xyzq, forceStride, \
    lata, latb, latc, \
    forces, &forces[startSlow], energies_virials);

Referenced by ComputeBondedCUDAKernel::bondedForce(), cuda_nonbonded_forces(), and CudaComputeNonbondedKernel::nonbondedForce().


Function Documentation

template<typename T, bool doEnergy, bool doVirial>
__device__ void angleForce ( const int  index,
const CudaAngle *__restrict__  angleList,
const CudaAngleValue *__restrict__  angleValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial< double > &  virial 
)

Definition at line 431 of file ComputeBondedCUDAKernel.cu.

References f, force_to_float, CudaAngle::i, CudaAngle::ioffsetXYZ, CudaAngle::itype, CudaAngle::j, CudaAngleValue::k, CudaAngle::k, CudaAngleValue::k_ub, CudaAngle::koffsetXYZ, CudaAngleValue::normal, CudaAngleValue::r_ub, CudaAngle::scale, and CudaAngleValue::theta0.

00443     {
00444 
00445   CudaAngle al = angleList[index];
00446 
00447   float ishx = al.ioffsetXYZ.x*lata.x + al.ioffsetXYZ.y*latb.x + al.ioffsetXYZ.z*latc.x;
00448   float ishy = al.ioffsetXYZ.x*lata.y + al.ioffsetXYZ.y*latb.y + al.ioffsetXYZ.z*latc.y;
00449   float ishz = al.ioffsetXYZ.x*lata.z + al.ioffsetXYZ.y*latb.z + al.ioffsetXYZ.z*latc.z;
00450 
00451   float kshx = al.koffsetXYZ.x*lata.x + al.koffsetXYZ.y*latb.x + al.koffsetXYZ.z*latc.x;
00452   float kshy = al.koffsetXYZ.x*lata.y + al.koffsetXYZ.y*latb.y + al.koffsetXYZ.z*latc.y;
00453   float kshz = al.koffsetXYZ.x*lata.z + al.koffsetXYZ.y*latb.z + al.koffsetXYZ.z*latc.z;
00454 
00455   float xij = xyzq[al.i].x + ishx - xyzq[al.j].x;
00456   float yij = xyzq[al.i].y + ishy - xyzq[al.j].y;
00457   float zij = xyzq[al.i].z + ishz - xyzq[al.j].z;
00458 
00459   float xkj = xyzq[al.k].x + kshx - xyzq[al.j].x;
00460   float ykj = xyzq[al.k].y + kshy - xyzq[al.j].y;
00461   float zkj = xyzq[al.k].z + kshz - xyzq[al.j].z;
00462 
00463   float rij_inv = rsqrtf(xij*xij + yij*yij + zij*zij);
00464   float rkj_inv = rsqrtf(xkj*xkj + ykj*ykj + zkj*zkj);
00465 
00466   float xijr = xij*rij_inv;
00467   float yijr = yij*rij_inv;
00468   float zijr = zij*rij_inv;
00469   float xkjr = xkj*rkj_inv;
00470   float ykjr = ykj*rkj_inv;
00471   float zkjr = zkj*rkj_inv;
00472   float cos_theta = xijr*xkjr + yijr*ykjr + zijr*zkjr;
00473 
00474   CudaAngleValue angleValue = angleValues[al.itype];
00475   angleValue.k *= al.scale;
00476 
00477   float diff;
00478   if (angleValue.normal == 1) {
00479     // Restrict values of cos_theta to the interval [-0.999 ... 0.999]
00480     cos_theta = min(0.999f, max(-0.999f, cos_theta));
00481     float theta = acosf(cos_theta);
00482     diff = theta - angleValue.theta0;
00483   } else {
00484     diff = cos_theta - angleValue.theta0;
00485   }
00486 
00487   if (doEnergy) {
00488     energy += (double)(angleValue.k * diff * diff);
00489   }
00490   if (angleValue.normal == 1) {
00491     float inv_sin_theta = rsqrtf(1.0f - cos_theta*cos_theta);
00492     if (inv_sin_theta > 1.0e6) {
00493       diff = (diff < 0.0f) ? 1.0f : -1.0f;
00494     } else {
00495       diff *= -inv_sin_theta;
00496     }
00497   }
00498   diff *= -2.0f*angleValue.k;
00499 
00500   float dtxi = rij_inv*(xkjr - cos_theta*xijr);
00501   float dtxj = rkj_inv*(xijr - cos_theta*xkjr);
00502   float dtyi = rij_inv*(ykjr - cos_theta*yijr);
00503   float dtyj = rkj_inv*(yijr - cos_theta*ykjr);
00504   float dtzi = rij_inv*(zkjr - cos_theta*zijr);
00505   float dtzj = rkj_inv*(zijr - cos_theta*zkjr);
00506 
00507   T T_dtxi, T_dtyi, T_dtzi;
00508   T T_dtxj, T_dtyj, T_dtzj;
00509   calcComponentForces<T>(diff, dtxi, dtyi, dtzi, T_dtxi, T_dtyi, T_dtzi);
00510   calcComponentForces<T>(diff, dtxj, dtyj, dtzj, T_dtxj, T_dtyj, T_dtzj);
00511   T T_dtxk = -T_dtxi - T_dtxj;
00512   T T_dtyk = -T_dtyi - T_dtyj;
00513   T T_dtzk = -T_dtzi - T_dtzj;
00514   storeForces<T>(T_dtxk, T_dtyk, T_dtzk, al.j, stride, force);
00515 
00516   if (angleValue.k_ub) {
00517     float xik = xij - xkj;
00518     float yik = yij - ykj;
00519     float zik = zij - zkj;
00520     float rik_inv = rsqrtf(xik*xik + yik*yik + zik*zik);
00521     float rik = 1.0f/rik_inv;
00522     float diff_ub = rik - angleValue.r_ub;
00523     if (doEnergy) {
00524       energy += (double)(angleValue.k_ub * diff_ub * diff_ub);
00525     }
00526     diff_ub *= -2.0f*angleValue.k_ub*rik_inv;
00527     T T_dxik, T_dyik, T_dzik;
00528     calcComponentForces<T>(diff_ub, xik, yik, zik, T_dxik, T_dyik, T_dzik);
00529     T_dtxi += T_dxik;
00530     T_dtyi += T_dyik;
00531     T_dtzi += T_dzik;
00532     T_dtxj -= T_dxik;
00533     T_dtyj -= T_dyik;
00534     T_dtzj -= T_dzik;
00535   }
00536 
00537   storeForces<T>(T_dtxi, T_dtyi, T_dtzi, al.i, stride, force);
00538   storeForces<T>(T_dtxj, T_dtyj, T_dtzj, al.k, stride, force);
00539 
00540   // Store virial
00541   if (doVirial) {
00542 #ifdef WRITE_FULL_VIRIALS
00543     float fxi = ((float)T_dtxi)*force_to_float;
00544     float fyi = ((float)T_dtyi)*force_to_float;
00545     float fzi = ((float)T_dtzi)*force_to_float;
00546     float fxk = ((float)T_dtxj)*force_to_float;
00547     float fyk = ((float)T_dtyj)*force_to_float;
00548     float fzk = ((float)T_dtzj)*force_to_float;
00549     virial.xx = (fxi*xij) + (fxk*xkj);
00550     virial.xy = (fxi*yij) + (fxk*ykj);
00551     virial.xz = (fxi*zij) + (fxk*zkj);
00552     virial.yx = (fyi*xij) + (fyk*xkj);
00553     virial.yy = (fyi*yij) + (fyk*ykj);
00554     virial.yz = (fyi*zij) + (fyk*zkj);
00555     virial.zx = (fzi*xij) + (fzk*xkj);
00556     virial.zy = (fzi*yij) + (fzk*ykj);
00557     virial.zz = (fzi*zij) + (fzk*zkj);
00558 #endif
00559   }
00560 }

template<typename T, bool doEnergy, bool doVirial>
__global__ void bondedForcesKernel ( const int  start,
const int  numBonds,
const CudaBond *__restrict__  bonds,
const CudaBondValue *__restrict__  bondValues,
const int  numAngles,
const CudaAngle *__restrict__  angles,
const CudaAngleValue *__restrict__  angleValues,
const int  numDihedrals,
const CudaDihedral *__restrict__  dihedrals,
const CudaDihedralValue *__restrict__  dihedralValues,
const int  numImpropers,
const CudaDihedral *__restrict__  impropers,
const CudaDihedralValue *__restrict__  improperValues,
const int  numExclusions,
const CudaExclusion *__restrict__  exclusions,
const int  numCrossterms,
const CudaCrossterm *__restrict__  crossterms,
const CudaCrosstermValue *__restrict__  crosstermValues,
const float  cutoff2,
const float  r2_delta,
const int  r2_delta_expc,
const float *__restrict__  r2_table,
const float4 *__restrict__  exclusionTable,
cudaTextureObject_t  r2_table_tex,
cudaTextureObject_t  exclusionTableTex,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
T *__restrict__  forceSlow,
double *__restrict__  energies_virials 
)

Definition at line 1028 of file ComputeBondedCUDAKernel.cu.

References ComputeBondedCUDAKernel::amdDiheVirialIndex_XX, BLOCK_SYNC, BONDEDFORCESKERNEL_NUM_WARP, double_to_virial, ComputeBondedCUDAKernel::energyIndex_ANGLE, ComputeBondedCUDAKernel::energyIndex_BOND, ComputeBondedCUDAKernel::energyIndex_CROSSTERM, ComputeBondedCUDAKernel::energyIndex_DIHEDRAL, ComputeBondedCUDAKernel::energyIndex_ELECT_SLOW, ComputeBondedCUDAKernel::energyIndex_IMPROPER, if(), llitoulli(), ComputeBondedCUDAKernel::normalVirialIndex_XX, ComputeBondedCUDAKernel::slowVirialIndex_XX, USE_FP_VIRIAL, WARP_FULL_MASK, WARP_SHUFFLE_XOR, WARPSIZE, ComputeBondedCUDAKernel::BondedVirial< T >::xx, ComputeBondedCUDAKernel::BondedVirial< T >::xy, ComputeBondedCUDAKernel::BondedVirial< T >::xz, ComputeBondedCUDAKernel::BondedVirial< T >::yx, ComputeBondedCUDAKernel::BondedVirial< T >::yy, ComputeBondedCUDAKernel::BondedVirial< T >::yz, ComputeBondedCUDAKernel::BondedVirial< T >::zx, ComputeBondedCUDAKernel::BondedVirial< T >::zy, and ComputeBondedCUDAKernel::BondedVirial< T >::zz.

01067                                          {
01068 
01069   // Thread-block index
01070   int indexTB = start + blockIdx.x;
01071 
01072   const int numBondsTB     = (numBonds + blockDim.x - 1)/blockDim.x;
01073   const int numAnglesTB    = (numAngles + blockDim.x - 1)/blockDim.x;
01074   const int numDihedralsTB = (numDihedrals + blockDim.x - 1)/blockDim.x;
01075   const int numImpropersTB = (numImpropers + blockDim.x - 1)/blockDim.x;
01076   const int numExclusionsTB= (numExclusions + blockDim.x - 1)/blockDim.x;
01077   const int numCrosstermsTB= (numCrossterms + blockDim.x - 1)/blockDim.x;
01078 
01079   // Each thread computes single bonded interaction.
01080   // Each thread block computes single bonded type
01081   double energy;
01082   int energyIndex;
01083 
01084   if (doEnergy) {
01085     energy = 0.0;
01086     energyIndex = -1;
01087   }
01088 
01089 #ifdef WRITE_FULL_VIRIALS
01090   ComputeBondedCUDAKernel::BondedVirial<double> virial;
01091   int virialIndex;
01092   if (doVirial) {
01093     virial.xx = 0.0;
01094     virial.xy = 0.0;
01095     virial.xz = 0.0;
01096     virial.yx = 0.0;
01097     virial.yy = 0.0;
01098     virial.yz = 0.0;
01099     virial.zx = 0.0;
01100     virial.zy = 0.0;
01101     virial.zz = 0.0;
01102     virialIndex = ComputeBondedCUDAKernel::normalVirialIndex_XX;
01103   }
01104 #endif
01105 
01106   if (indexTB < numBondsTB) {
01107     int i = threadIdx.x + indexTB*blockDim.x;
01108     if (doEnergy) {
01109       energyIndex = ComputeBondedCUDAKernel::energyIndex_BOND;
01110     }
01111     if (i < numBonds) {
01112       bondForce<T, doEnergy, doVirial>
01113       (i, bonds, bondValues, xyzq,
01114         stride, lata, latb, latc,
01115         force, energy, virial);
01116     }
01117     goto reduce;
01118   }
01119   indexTB -= numBondsTB;
01120 
01121   if (indexTB < numAnglesTB) {
01122     int i = threadIdx.x + indexTB*blockDim.x;
01123     if (doEnergy) {
01124       energyIndex = ComputeBondedCUDAKernel::energyIndex_ANGLE;
01125     }
01126     if (i < numAngles) {
01127       angleForce<T, doEnergy, doVirial>
01128       (i, angles, angleValues, xyzq, stride,
01129         lata, latb, latc,
01130         force, energy, virial);
01131     }
01132     goto reduce;
01133   }
01134   indexTB -= numAnglesTB;
01135 
01136   if (indexTB < numDihedralsTB) {
01137     int i = threadIdx.x + indexTB*blockDim.x;
01138     if (doEnergy) {
01139       energyIndex = ComputeBondedCUDAKernel::energyIndex_DIHEDRAL;
01140     }
01141     if (doVirial) {
01142       virialIndex = ComputeBondedCUDAKernel::amdDiheVirialIndex_XX;
01143     }
01144     if (i < numDihedrals) {
01145       diheForce<T, doEnergy, doVirial>
01146       (i, dihedrals, dihedralValues, xyzq, stride,
01147         lata, latb, latc,
01148         force, energy, virial);
01149     }
01150     goto reduce;
01151   }
01152   indexTB -= numDihedralsTB;
01153 
01154   if (indexTB < numImpropersTB) {
01155     int i = threadIdx.x + indexTB*blockDim.x;
01156     if (doEnergy) {
01157       energyIndex = ComputeBondedCUDAKernel::energyIndex_IMPROPER;
01158     }
01159     if (i < numImpropers) {
01160       diheForce<T, doEnergy, doVirial>
01161       (i, impropers, improperValues, xyzq, stride,
01162         lata, latb, latc,
01163         force, energy, virial);
01164     }
01165     goto reduce;
01166   }
01167   indexTB -= numImpropersTB;
01168 
01169   if (indexTB < numExclusionsTB) {
01170     int i = threadIdx.x + indexTB*blockDim.x;
01171     if (doEnergy) {
01172       energyIndex = ComputeBondedCUDAKernel::energyIndex_ELECT_SLOW;
01173     }
01174     if (doVirial) {
01175       virialIndex = ComputeBondedCUDAKernel::slowVirialIndex_XX;
01176     }
01177     if (i < numExclusions) {
01178       exclusionForce<T, doEnergy, doVirial>
01179       (i, exclusions, r2_delta, r2_delta_expc,
01180 #if __CUDA_ARCH__ >= 350
01181         r2_table, exclusionTable,
01182 #else
01183         r2_table_tex, exclusionTableTex,
01184 #endif
01185         xyzq, stride, lata, latb, latc, cutoff2,
01186         forceSlow, energy, virial);
01187     }
01188     goto reduce;
01189   }
01190   indexTB -= numExclusionsTB;
01191 
01192   if (indexTB < numCrosstermsTB) {
01193     int i = threadIdx.x + indexTB*blockDim.x;
01194     if (doEnergy) {
01195       energyIndex = ComputeBondedCUDAKernel::energyIndex_CROSSTERM;
01196     }
01197     if (doVirial) {
01198       virialIndex = ComputeBondedCUDAKernel::amdDiheVirialIndex_XX;
01199     }
01200     if (i < numCrossterms) {
01201       crosstermForce<T, doEnergy, doVirial>
01202       (i, crossterms, crosstermValues,
01203         xyzq, stride, lata, latb, latc,
01204         force, energy, virial);
01205     }
01206     goto reduce;
01207   }
01208   // indexTB -= numCrosstermsTB;
01209 
01210   reduce:
01211 
01212   // Write energies to global memory
01213   if (doEnergy) {
01214     // energyIndex is constant within thread-block
01215     __shared__ double shEnergy[BONDEDFORCESKERNEL_NUM_WARP];
01216 #pragma unroll
01217     for (int i=16;i >= 1;i/=2) {
01218       energy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energy, i, 32);
01219     }
01220     int laneID = (threadIdx.x & (WARPSIZE - 1));
01221     int warpID = threadIdx.x / WARPSIZE;
01222     if (laneID == 0) {
01223       shEnergy[warpID] = energy;
01224     }
01225     BLOCK_SYNC;
01226     if (warpID == 0) {
01227       energy = (laneID < BONDEDFORCESKERNEL_NUM_WARP) ? shEnergy[laneID] : 0.0;
01228 #pragma unroll
01229       for (int i=16;i >= 1;i/=2) {
01230         energy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energy, i, 32);
01231       }
01232       if (laneID == 0) {
01233         atomicAdd(&energies_virials[energyIndex], energy);
01234       }
01235     }
01236   }
01237 
01238   // Write virials to global memory
01239 #ifdef WRITE_FULL_VIRIALS
01240   if (doVirial) {
01241 #pragma unroll
01242     for (int i=16;i >= 1;i/=2) {
01243       virial.xx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.xx, i, 32);
01244       virial.xy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.xy, i, 32);
01245       virial.xz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.xz, i, 32);
01246       virial.yx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.yx, i, 32);
01247       virial.yy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.yy, i, 32);
01248       virial.yz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.yz, i, 32);
01249       virial.zx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.zx, i, 32);
01250       virial.zy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.zy, i, 32);
01251       virial.zz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.zz, i, 32);
01252     }
01253     __shared__ ComputeBondedCUDAKernel::BondedVirial<double> shVirial[BONDEDFORCESKERNEL_NUM_WARP];
01254     int laneID = (threadIdx.x & (WARPSIZE - 1));
01255     int warpID = threadIdx.x / WARPSIZE;
01256     if (laneID == 0) {
01257       shVirial[warpID] = virial;
01258     }
01259     BLOCK_SYNC;
01260 
01261     if (warpID == 0) {
01262       virial.xx = 0.0;
01263       virial.xy = 0.0;
01264       virial.xz = 0.0;
01265       virial.yx = 0.0;
01266       virial.yy = 0.0;
01267       virial.yz = 0.0;
01268       virial.zx = 0.0;
01269       virial.zy = 0.0;
01270       virial.zz = 0.0;
01271       if (laneID < BONDEDFORCESKERNEL_NUM_WARP) virial = shVirial[laneID];
01272 #pragma unroll
01273       for (int i=16;i >= 1;i/=2) {
01274         virial.xx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.xx, i, 32);
01275         virial.xy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.xy, i, 32);
01276         virial.xz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.xz, i, 32);
01277         virial.yx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.yx, i, 32);
01278         virial.yy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.yy, i, 32);
01279         virial.yz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.yz, i, 32);
01280         virial.zx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.zx, i, 32);
01281         virial.zy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.zy, i, 32);
01282         virial.zz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virial.zz, i, 32);
01283       }   
01284 
01285       if (laneID == 0) {
01286 #ifdef USE_FP_VIRIAL
01287         atomicAdd((unsigned long long int *)&energies_virials[virialIndex + 0], llitoulli(virial.xx*double_to_virial));
01288         atomicAdd((unsigned long long int *)&energies_virials[virialIndex + 1], llitoulli(virial.xy*double_to_virial));
01289         atomicAdd((unsigned long long int *)&energies_virials[virialIndex + 2], llitoulli(virial.xz*double_to_virial));
01290         atomicAdd((unsigned long long int *)&energies_virials[virialIndex + 3], llitoulli(virial.yx*double_to_virial));
01291         atomicAdd((unsigned long long int *)&energies_virials[virialIndex + 4], llitoulli(virial.yy*double_to_virial));
01292         atomicAdd((unsigned long long int *)&energies_virials[virialIndex + 5], llitoulli(virial.yz*double_to_virial));
01293         atomicAdd((unsigned long long int *)&energies_virials[virialIndex + 6], llitoulli(virial.zx*double_to_virial));
01294         atomicAdd((unsigned long long int *)&energies_virials[virialIndex + 7], llitoulli(virial.zy*double_to_virial));
01295         atomicAdd((unsigned long long int *)&energies_virials[virialIndex + 8], llitoulli(virial.zz*double_to_virial));
01296 #else
01297         atomicAdd(&energies_virials[virialIndex + 0], virial.xx);
01298         atomicAdd(&energies_virials[virialIndex + 1], virial.xy);
01299         atomicAdd(&energies_virials[virialIndex + 2], virial.xz);
01300         atomicAdd(&energies_virials[virialIndex + 3], virial.yx);
01301         atomicAdd(&energies_virials[virialIndex + 4], virial.yy);
01302         atomicAdd(&energies_virials[virialIndex + 5], virial.yz);
01303         atomicAdd(&energies_virials[virialIndex + 6], virial.zx);
01304         atomicAdd(&energies_virials[virialIndex + 7], virial.zy);
01305         atomicAdd(&energies_virials[virialIndex + 8], virial.zz);
01306 #endif
01307       }
01308     }
01309   }
01310 #endif
01311 
01312 }

template<typename T, bool doEnergy, bool doVirial>
__device__ void bondForce ( const int  index,
const CudaBond *__restrict__  bondList,
const CudaBondValue *__restrict__  bondValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial< double > &  virial 
)

Definition at line 141 of file ComputeBondedCUDAKernel.cu.

References CudaBond::i, CudaBond::ioffsetXYZ, CudaBond::itype, CudaBond::j, CudaBondValue::k, CudaBond::scale, and CudaBondValue::x0.

00154     {
00155 
00156   CudaBond bl = bondList[index];
00157   CudaBondValue bondValue = bondValues[bl.itype];
00158   if (bondValue.x0 == 0.0f) return;
00159 
00160   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00161   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00162   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00163 
00164   float4 xyzqi = xyzq[bl.i];
00165   float4 xyzqj = xyzq[bl.j];
00166 
00167   float xij = xyzqi.x + shx - xyzqj.x;
00168   float yij = xyzqi.y + shy - xyzqj.y;
00169   float zij = xyzqi.z + shz - xyzqj.z;
00170 
00171   float r = sqrtf(xij*xij + yij*yij + zij*zij);
00172 
00173   float db = r - bondValue.x0;
00174   float fij = db * bondValue.k * bl.scale;
00175  
00176   if (doEnergy) {
00177     energy += (double)(fij*db);
00178   }
00179   fij *= -2.0f/r;
00180   
00181   T T_fxij, T_fyij, T_fzij;
00182   calcComponentForces<T>(fij, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00183   
00184   // Store forces
00185   storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, force);
00186   storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, force);
00187 
00188   // Store virial
00189   if (doVirial) {
00190 #ifdef WRITE_FULL_VIRIALS
00191     float fxij = fij*xij;
00192     float fyij = fij*yij;
00193     float fzij = fij*zij;
00194     virial.xx = (fxij*xij);
00195     virial.xy = (fxij*yij);
00196     virial.xz = (fxij*zij);
00197     virial.yx = (fyij*xij);
00198     virial.yy = (fyij*yij);
00199     virial.yz = (fyij*zij);
00200     virial.zx = (fzij*xij);
00201     virial.zy = (fzij*yij);
00202     virial.zz = (fzij*zij);
00203 #endif
00204   }
00205 }

template<typename T>
__forceinline__ __device__ void calcComponentForces ( float  fij1,
const float  dx1,
const float  dy1,
const float  dz1,
float  fij2,
const float  dx2,
const float  dy2,
const float  dz2,
T &  fxij,
T &  fyij,
T &  fzij 
)

Definition at line 82 of file ComputeBondedCUDAKernel.cu.

00087                              {
00088 
00089   fxij = (T)(fij1*dx1 + fij2*dx2);
00090   fyij = (T)(fij1*dy1 + fij2*dy2);
00091   fzij = (T)(fij1*dz1 + fij2*dz2);
00092 }

template<typename T>
__forceinline__ __device__ void calcComponentForces ( float  fij,
const float  dx,
const float  dy,
const float  dz,
T &  fxij,
T &  fyij,
T &  fzij 
)

Definition at line 53 of file ComputeBondedCUDAKernel.cu.

00056                              {
00057 
00058   fxij = (T)(fij*dx);
00059   fyij = (T)(fij*dy);
00060   fzij = (T)(fij*dz);
00061 
00062 }

template<>
__forceinline__ __device__ void calcComponentForces< long long int > ( float  fij1,
const float  dx1,
const float  dy1,
const float  dz1,
float  fij2,
const float  dx2,
const float  dy2,
const float  dz2,
long long int &  fxij,
long long int &  fyij,
long long int &  fzij 
)

template<>
__forceinline__ __device__ void calcComponentForces< long long int > ( float  fij,
const float  dx,
const float  dy,
const float  dz,
long long int &  fxij,
long long int &  fyij,
long long int &  fzij 
)

template<typename T>
__forceinline__ __device__ void convertForces ( const float  x,
const float  y,
const float  z,
T &  Tx,
T &  Ty,
T &  Tz 
)

Definition at line 33 of file ComputeBondedCUDAKernel.cu.

00034                        {
00035 
00036   Tx = (T)(x);
00037   Ty = (T)(y);
00038   Tz = (T)(z);
00039 }

template<>
__forceinline__ __device__ void convertForces< long long int > ( const float  x,
const float  y,
const float  z,
long long int &  Tx,
long long int &  Ty,
long long int &  Tz 
)

__device__ __forceinline__ float3 cross ( const float3  v1,
const float3  v2 
)

Definition at line 750 of file ComputeBondedCUDAKernel.cu.

Referenced by Sequencer::addRotDragToPosition(), ImproperElem::computeForce(), DihedralElem::computeForce(), CrosstermElem::computeForce(), crosstermForce(), ComputeStir::doForce(), ComputeConsTorque::doForce(), GridforceFullMainGrid::initialize(), ComputeMsmMgr::initialize(), proc_dihedralgrad(), proc_getdihedral(), Lattice::set(), settle1(), Sequencer::submitReductions(), and Lattice::volume().

00750                                                                           {
00751  return make_float3(
00752   v1.y*v2.z-v2.y*v1.z,
00753   v2.x*v1.z-v1.x*v2.z,
00754   v1.x*v2.y-v2.x*v1.y
00755   );
00756 }

template<typename T, bool doEnergy, bool doVirial>
__device__ void crosstermForce ( const int  index,
const CudaCrossterm *__restrict__  crosstermList,
const CudaCrosstermValue *__restrict__  crosstermValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial< double > &  virial 
)

Definition at line 766 of file ComputeBondedCUDAKernel.cu.

References A, B, CudaCrosstermValue::c, cross(), CudaCrosstermValue::dim, dot(), f, CudaCrossterm::i1, CudaCrossterm::i2, CudaCrossterm::i3, CudaCrossterm::i4, CudaCrossterm::i5, CudaCrossterm::i6, CudaCrossterm::i7, CudaCrossterm::i8, CudaCrossterm::itype, M_PI, CudaCrossterm::offset12XYZ, CudaCrossterm::offset23XYZ, CudaCrossterm::offset34XYZ, CudaCrossterm::offset56XYZ, CudaCrossterm::offset67XYZ, CudaCrossterm::offset78XYZ, and CudaCrossterm::scale.

00779     {
00780 
00781   CudaCrossterm cl = crosstermList[index];
00782 
00783   // ----------------------------------------------------------------------------
00784   // Angle between 1 - 2 - 3 - 4
00785   //
00786   // 1 - 2
00787   float3 sh12 = make_float3(
00788     cl.offset12XYZ.x*lata.x + cl.offset12XYZ.y*latb.x + cl.offset12XYZ.z*latc.x,
00789     cl.offset12XYZ.x*lata.y + cl.offset12XYZ.y*latb.y + cl.offset12XYZ.z*latc.y,
00790     cl.offset12XYZ.x*lata.z + cl.offset12XYZ.y*latb.z + cl.offset12XYZ.z*latc.z);
00791 
00792   float4 xyzq1 = xyzq[cl.i1];
00793   float4 xyzq2 = xyzq[cl.i2];
00794 
00795   float3 r12 = make_float3(
00796     xyzq1.x + sh12.x - xyzq2.x,
00797     xyzq1.y + sh12.y - xyzq2.y,
00798     xyzq1.z + sh12.z - xyzq2.z);
00799 
00800   // 2 - 3
00801   float3 sh23 = make_float3(
00802     cl.offset23XYZ.x*lata.x + cl.offset23XYZ.y*latb.x + cl.offset23XYZ.z*latc.x,
00803     cl.offset23XYZ.x*lata.y + cl.offset23XYZ.y*latb.y + cl.offset23XYZ.z*latc.y,
00804     cl.offset23XYZ.x*lata.z + cl.offset23XYZ.y*latb.z + cl.offset23XYZ.z*latc.z);
00805 
00806   float4 xyzq3 = xyzq[cl.i3];
00807 
00808   float3 r23 = make_float3(
00809     xyzq2.x + sh23.x - xyzq3.x,
00810     xyzq2.y + sh23.y - xyzq3.y,
00811     xyzq2.z + sh23.z - xyzq3.z);
00812 
00813   // 3 - 4
00814   float3 sh34 = make_float3(
00815     cl.offset34XYZ.x*lata.x + cl.offset34XYZ.y*latb.x + cl.offset34XYZ.z*latc.x,
00816     cl.offset34XYZ.x*lata.y + cl.offset34XYZ.y*latb.y + cl.offset34XYZ.z*latc.y,
00817     cl.offset34XYZ.x*lata.z + cl.offset34XYZ.y*latb.z + cl.offset34XYZ.z*latc.z);
00818 
00819   float4 xyzq4 = xyzq[cl.i4];
00820 
00821   float3 r34 = make_float3(
00822     xyzq3.x + sh34.x - xyzq4.x,
00823     xyzq3.y + sh34.y - xyzq4.y,
00824     xyzq3.z + sh34.z - xyzq4.z);
00825 
00826   // Calculate the cross products
00827   float3 A = cross(r12, r23);
00828   float3 B = cross(r23, r34);
00829   float3 C = cross(r23, A);
00830 
00831   // Calculate the inverse distances
00832   float inv_rA = rsqrtf(dot(A, A));
00833   float inv_rB = rsqrtf(dot(B, B));
00834   float inv_rC = rsqrtf(dot(C, C));
00835 
00836   //  Calculate the sin and cos
00837   float cos_phi = dot(A, B)*(inv_rA*inv_rB);
00838   float sin_phi = dot(C, B)*(inv_rC*inv_rB);
00839 
00840   float phi = -atan2f(sin_phi,cos_phi);
00841 
00842   // ----------------------------------------------------------------------------
00843   // Angle between 5 - 6 - 7 - 8
00844   //
00845 
00846   // 5 - 6
00847   float3 sh56 = make_float3(
00848     cl.offset56XYZ.x*lata.x + cl.offset56XYZ.y*latb.x + cl.offset56XYZ.z*latc.x,
00849     cl.offset56XYZ.x*lata.y + cl.offset56XYZ.y*latb.y + cl.offset56XYZ.z*latc.y,
00850     cl.offset56XYZ.x*lata.z + cl.offset56XYZ.y*latb.z + cl.offset56XYZ.z*latc.z);
00851 
00852   float4 xyzq5 = xyzq[cl.i5];
00853   float4 xyzq6 = xyzq[cl.i6];
00854 
00855   float3 r56 = make_float3(
00856     xyzq5.x + sh56.x - xyzq6.x,
00857     xyzq5.y + sh56.y - xyzq6.y,
00858     xyzq5.z + sh56.z - xyzq6.z);
00859 
00860   // 6 - 7
00861   float3 sh67 = make_float3(
00862     cl.offset67XYZ.x*lata.x + cl.offset67XYZ.y*latb.x + cl.offset67XYZ.z*latc.x,
00863     cl.offset67XYZ.x*lata.y + cl.offset67XYZ.y*latb.y + cl.offset67XYZ.z*latc.y,
00864     cl.offset67XYZ.x*lata.z + cl.offset67XYZ.y*latb.z + cl.offset67XYZ.z*latc.z);
00865 
00866   float4 xyzq7 = xyzq[cl.i7];
00867 
00868   float3 r67 = make_float3(
00869     xyzq6.x + sh67.x - xyzq7.x,
00870     xyzq6.y + sh67.y - xyzq7.y,
00871     xyzq6.z + sh67.z - xyzq7.z);
00872 
00873   // 7 - 8
00874   float3 sh78 = make_float3(
00875     cl.offset78XYZ.x*lata.x + cl.offset78XYZ.y*latb.x + cl.offset78XYZ.z*latc.x,
00876     cl.offset78XYZ.x*lata.y + cl.offset78XYZ.y*latb.y + cl.offset78XYZ.z*latc.y,
00877     cl.offset78XYZ.x*lata.z + cl.offset78XYZ.y*latb.z + cl.offset78XYZ.z*latc.z);
00878 
00879   float4 xyzq8 = xyzq[cl.i8];
00880 
00881   float3 r78 = make_float3(
00882     xyzq7.x + sh78.x - xyzq8.x,
00883     xyzq7.y + sh78.y - xyzq8.y,
00884     xyzq7.z + sh78.z - xyzq8.z);
00885 
00886   // Calculate the cross products
00887   float3 D = cross(r56, r67);
00888   float3 E = cross(r67, r78);
00889   float3 F = cross(r67, D);
00890   
00891   // Calculate the inverse distances
00892   float inv_rD = rsqrtf(dot(D, D));
00893   float inv_rE = rsqrtf(dot(E, E));
00894   float inv_rF = rsqrtf(dot(F, F));
00895 
00896   //  Calculate the sin and cos
00897   float cos_psi = dot(D, E)*(inv_rD*inv_rE);
00898   float sin_psi = dot(F, E)*(inv_rF*inv_rE);
00899 
00900   float psi = -atan2f(sin_psi,cos_psi);
00901 
00902   // ----------------------------------------------------------------------------
00903   // Calculate the energy
00904 
00905   const float inv_h = (float)( (CudaCrosstermValue::dim) / (2.0*M_PI) );
00906 
00907   // Shift angles
00908   phi = fmod(phi + (float)M_PI, 2.0f*(float)M_PI);
00909   psi = fmod(psi + (float)M_PI, 2.0f*(float)M_PI);
00910 
00911   // distance measured in grid points between angle and smallest value
00912   float phi_h = phi * inv_h;
00913   float psi_h = psi * inv_h;
00914 
00915   // find smallest numbered grid point in stencil
00916   int iphi = (int)floor(phi_h);
00917   int ipsi = (int)floor(psi_h);
00918 
00919   const CudaCrosstermValue& cp = crosstermValues[cl.itype];
00920 
00921   // Load coefficients
00922   float4 c[4];
00923   c[0] = cp.c[iphi][ipsi][0];
00924   c[1] = cp.c[iphi][ipsi][1];
00925   c[2] = cp.c[iphi][ipsi][2];
00926   c[3] = cp.c[iphi][ipsi][3];
00927 
00928   float dphi = phi_h - (float)iphi;
00929   float dpsi = psi_h - (float)ipsi;
00930 
00931   if (doEnergy) {
00932     float energyf =          c[3].x + dphi*( c[3].y + dphi*( c[3].z + dphi*c[3].w ) );
00933     energyf = energyf*dpsi + c[2].x + dphi*( c[2].y + dphi*( c[2].z + dphi*c[2].w ) );
00934     energyf = energyf*dpsi + c[1].x + dphi*( c[1].y + dphi*( c[1].z + dphi*c[1].w ) );
00935     energyf = energyf*dpsi + c[0].x + dphi*( c[0].y + dphi*( c[0].z + dphi*c[0].w ) );
00936     energy += energyf*cl.scale;
00937   }
00938 
00939   float dEdphi =         3.0f*(c[0].w + dpsi*( c[1].w + dpsi*( c[2].w + dpsi*c[3].w ) ));
00940   dEdphi = dEdphi*dphi + 2.0f*(c[0].z + dpsi*( c[1].z + dpsi*( c[2].z + dpsi*c[3].z ) ));
00941   dEdphi = dEdphi*dphi +      (c[0].y + dpsi*( c[1].y + dpsi*( c[2].y + dpsi*c[3].y ) ));  // 13 muls
00942   dEdphi *= cl.scale*inv_h;
00943 
00944   float dEdpsi =         3.0f*(c[3].x + dphi*( c[3].y + dphi*( c[3].z + dphi*c[3].w ) ));
00945   dEdpsi = dEdpsi*dpsi + 2.0f*(c[2].x + dphi*( c[2].y + dphi*( c[2].z + dphi*c[2].w ) ));
00946   dEdpsi = dEdpsi*dpsi +      (c[1].x + dphi*( c[1].y + dphi*( c[1].z + dphi*c[1].w ) ));  // 13 muls
00947   dEdpsi *= cl.scale*inv_h;
00948 
00949   // float normCross1 = dot(A, A);
00950   float square_r23 = dot(r23, r23);
00951   float norm_r23 = sqrtf(square_r23);
00952   float inv_square_r23 = 1.0f/square_r23;
00953   float ff1 = dEdphi*norm_r23*inv_rA*inv_rA;
00954   float ff2 = -dot(r12, r23)*inv_square_r23;
00955   float ff3 = -dot(r34, r23)*inv_square_r23;
00956   float ff4 = -dEdphi*norm_r23*inv_rB*inv_rB;
00957 
00958   float3 f1 = make_float3(ff1*A.x, ff1*A.y, ff1*A.z);
00959   float3 f4 = make_float3(ff4*B.x, ff4*B.y, ff4*B.z);
00960   float3 t1 = make_float3( ff2*f1.x - ff3*f4.x, ff2*f1.y - ff3*f4.y, ff2*f1.z - ff3*f4.z );
00961   float3 f2 = make_float3(  t1.x - f1.x,  t1.y - f1.y,  t1.z - f1.z);
00962   float3 f3 = make_float3( -t1.x - f4.x, -t1.y - f4.y, -t1.z - f4.z);
00963 
00964   T T_f1x, T_f1y, T_f1z;
00965   T T_f2x, T_f2y, T_f2z;
00966   T T_f3x, T_f3y, T_f3z;
00967   T T_f4x, T_f4y, T_f4z;
00968   convertForces<T>(f1.x, f1.y, f1.z, T_f1x, T_f1y, T_f1z);
00969   convertForces<T>(f2.x, f2.y, f2.z, T_f2x, T_f2y, T_f2z);
00970   convertForces<T>(f3.x, f3.y, f3.z, T_f3x, T_f3y, T_f3z);
00971   convertForces<T>(f4.x, f4.y, f4.z, T_f4x, T_f4y, T_f4z);
00972   storeForces<T>(T_f1x, T_f1y, T_f1z, cl.i1, stride, force);
00973   storeForces<T>(T_f2x, T_f2y, T_f2z, cl.i2, stride, force);
00974   storeForces<T>(T_f3x, T_f3y, T_f3z, cl.i3, stride, force);
00975   storeForces<T>(T_f4x, T_f4y, T_f4z, cl.i4, stride, force);
00976 
00977   float square_r67 = dot(r67, r67);
00978   float norm_r67 = sqrtf(square_r67);
00979   float inv_square_r67 = 1.0f/(square_r67);
00980   ff1 = dEdpsi*norm_r67*inv_rD*inv_rD;
00981   ff2 = -dot(r56, r67)*inv_square_r67;
00982   ff3 = -dot(r78, r67)*inv_square_r67;
00983   ff4 = -dEdpsi*norm_r67*inv_rE*inv_rE;
00984   
00985   float3 f5 = make_float3( ff1*D.x, ff1*D.y, ff1*D.z );
00986   float3 f8 = make_float3( ff4*E.x, ff4*E.y, ff4*E.z );
00987   float3 t2 = make_float3( ff2*f5.x - ff3*f8.x, ff2*f5.y - ff3*f8.y, ff2*f5.z - ff3*f8.z );
00988   float3 f6 = make_float3( t2.x - f5.x,  t2.y - f5.y,  t2.z - f5.z );
00989   float3 f7 = make_float3(-t2.x - f8.x, -t2.y - f8.y, -t2.z - f8.z );
00990 
00991   T T_f5x, T_f5y, T_f5z;
00992   T T_f6x, T_f6y, T_f6z;
00993   T T_f7x, T_f7y, T_f7z;
00994   T T_f8x, T_f8y, T_f8z;
00995   convertForces<T>(f5.x, f5.y, f5.z, T_f5x, T_f5y, T_f5z);
00996   convertForces<T>(f6.x, f6.y, f6.z, T_f6x, T_f6y, T_f6z);
00997   convertForces<T>(f7.x, f7.y, f7.z, T_f7x, T_f7y, T_f7z);
00998   convertForces<T>(f8.x, f8.y, f8.z, T_f8x, T_f8y, T_f8z);
00999   storeForces<T>(T_f5x, T_f5y, T_f5z, cl.i5, stride, force);
01000   storeForces<T>(T_f6x, T_f6y, T_f6z, cl.i6, stride, force);
01001   storeForces<T>(T_f7x, T_f7y, T_f7z, cl.i7, stride, force);
01002   storeForces<T>(T_f8x, T_f8y, T_f8z, cl.i8, stride, force);
01003 
01004   // Store virial
01005   if (doVirial) {
01006 #ifdef WRITE_FULL_VIRIALS
01007     float3 s12 = make_float3( f1.x + f2.x, f1.y + f2.y, f1.z + f2.z );
01008     float3 s56 = make_float3( f5.x + f6.x, f5.y + f6.y, f5.z + f6.z );
01009     virial.xx = f1.x*r12.x + s12.x*r23.x - f4.x*r34.x + f5.x*r56.x + s56.x*r67.x - f8.x*r78.x;
01010     virial.xy = f1.x*r12.y + s12.x*r23.y - f4.x*r34.y + f5.x*r56.y + s56.x*r67.y - f8.x*r78.y;
01011     virial.xz = f1.x*r12.z + s12.x*r23.z - f4.x*r34.z + f5.x*r56.z + s56.x*r67.z - f8.x*r78.z;
01012     virial.yx = f1.y*r12.x + s12.y*r23.x - f4.y*r34.x + f5.y*r56.x + s56.y*r67.x - f8.y*r78.x;
01013     virial.yy = f1.y*r12.y + s12.y*r23.y - f4.y*r34.y + f5.y*r56.y + s56.y*r67.y - f8.y*r78.y;
01014     virial.yz = f1.y*r12.z + s12.y*r23.z - f4.y*r34.z + f5.y*r56.z + s56.y*r67.z - f8.y*r78.z;
01015     virial.zx = f1.z*r12.x + s12.z*r23.x - f4.z*r34.x + f5.z*r56.x + s56.z*r67.x - f8.z*r78.x;
01016     virial.zy = f1.z*r12.y + s12.z*r23.y - f4.z*r34.y + f5.z*r56.y + s56.z*r67.y - f8.z*r78.y;
01017     virial.zz = f1.z*r12.z + s12.z*r23.z - f4.z*r34.z + f5.z*r56.z + s56.z*r67.z - f8.z*r78.z;
01018   }
01019 #endif
01020 
01021 }

template<bool doEnergy>
__forceinline__ __device__ void diheComp ( const CudaDihedralValue dihedralValues,
int  ic,
const float  sin_phi,
const float  cos_phi,
const float  scale,
float &  df,
double &  e 
)

Definition at line 569 of file ComputeBondedCUDAKernel.cu.

References CudaDihedralValue::delta, CudaDihedralValue::k, M_PI, CudaDihedralValue::n, and x.

00570                                                                                      {
00571 
00572   df = 0.0f;
00573   if (doEnergy) e = 0.0;
00574 
00575   float phi = atan2f(sin_phi, cos_phi);
00576 
00577   bool lrep = true;
00578   while (lrep) {
00579     CudaDihedralValue dihedralValue = dihedralValues[ic];
00580     dihedralValue.k *= scale;
00581 
00582     // Last dihedral has n low bit set to 0
00583     lrep = (dihedralValue.n & 1);
00584     dihedralValue.n >>= 1;
00585     if (dihedralValue.n) {
00586       float nf = dihedralValue.n;
00587       float x = nf*phi - dihedralValue.delta;
00588       if (doEnergy) {
00589         float sin_x, cos_x;
00590         sincosf(x, &sin_x, &cos_x);
00591         e += (double)(dihedralValue.k*(1.0f + cos_x));
00592         df += (double)(nf*dihedralValue.k*sin_x);
00593       } else {
00594         float sin_x = sinf(x);
00595         df += (double)(nf*dihedralValue.k*sin_x);
00596       }
00597     } else {
00598       float diff = phi - dihedralValue.delta;
00599       if (diff < -(float)(M_PI)) diff += (float)(2.0*M_PI);
00600       if (diff >  (float)(M_PI)) diff -= (float)(2.0*M_PI);
00601       if (doEnergy) {
00602         e += (double)(dihedralValue.k*diff*diff);
00603       }
00604       df -= 2.0f*dihedralValue.k*diff;
00605     }
00606     ic++;
00607   }
00608 
00609 }

template<typename T, bool doEnergy, bool doVirial>
__device__ void diheForce ( const int  index,
const CudaDihedral *__restrict__  diheList,
const CudaDihedralValue *__restrict__  dihedralValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial< double > &  virial 
)

Definition at line 613 of file ComputeBondedCUDAKernel.cu.

References force_to_float, CudaDihedral::i, CudaDihedral::ioffsetXYZ, CudaDihedral::itype, CudaDihedral::j, CudaDihedral::joffsetXYZ, CudaDihedral::k, CudaDihedral::l, CudaDihedral::loffsetXYZ, and CudaDihedral::scale.

00625     {
00626 
00627   CudaDihedral dl = diheList[index];
00628 
00629   float ishx = dl.ioffsetXYZ.x*lata.x + dl.ioffsetXYZ.y*latb.x + dl.ioffsetXYZ.z*latc.x;
00630   float ishy = dl.ioffsetXYZ.x*lata.y + dl.ioffsetXYZ.y*latb.y + dl.ioffsetXYZ.z*latc.y;
00631   float ishz = dl.ioffsetXYZ.x*lata.z + dl.ioffsetXYZ.y*latb.z + dl.ioffsetXYZ.z*latc.z;
00632 
00633   float jshx = dl.joffsetXYZ.x*lata.x + dl.joffsetXYZ.y*latb.x + dl.joffsetXYZ.z*latc.x;
00634   float jshy = dl.joffsetXYZ.x*lata.y + dl.joffsetXYZ.y*latb.y + dl.joffsetXYZ.z*latc.y;
00635   float jshz = dl.joffsetXYZ.x*lata.z + dl.joffsetXYZ.y*latb.z + dl.joffsetXYZ.z*latc.z;
00636 
00637   float lshx = dl.loffsetXYZ.x*lata.x + dl.loffsetXYZ.y*latb.x + dl.loffsetXYZ.z*latc.x;
00638   float lshy = dl.loffsetXYZ.x*lata.y + dl.loffsetXYZ.y*latb.y + dl.loffsetXYZ.z*latc.y;
00639   float lshz = dl.loffsetXYZ.x*lata.z + dl.loffsetXYZ.y*latb.z + dl.loffsetXYZ.z*latc.z;
00640 
00641   float xij = xyzq[dl.i].x + ishx - xyzq[dl.j].x;
00642   float yij = xyzq[dl.i].y + ishy - xyzq[dl.j].y;
00643   float zij = xyzq[dl.i].z + ishz - xyzq[dl.j].z;
00644 
00645   float xjk = xyzq[dl.j].x + jshx - xyzq[dl.k].x;
00646   float yjk = xyzq[dl.j].y + jshy - xyzq[dl.k].y;
00647   float zjk = xyzq[dl.j].z + jshz - xyzq[dl.k].z;
00648 
00649   float xlk = xyzq[dl.l].x + lshx - xyzq[dl.k].x;
00650   float ylk = xyzq[dl.l].y + lshy - xyzq[dl.k].y;
00651   float zlk = xyzq[dl.l].z + lshz - xyzq[dl.k].z;
00652 
00653   // A=F^G, B=H^G.
00654   float ax = yij*zjk - zij*yjk;
00655   float ay = zij*xjk - xij*zjk;
00656   float az = xij*yjk - yij*xjk;
00657   float bx = ylk*zjk - zlk*yjk;
00658   float by = zlk*xjk - xlk*zjk;
00659   float bz = xlk*yjk - ylk*xjk;
00660 
00661   float ra2 = ax*ax + ay*ay + az*az;
00662   float rb2 = bx*bx + by*by + bz*bz;
00663   float rg = sqrtf(xjk*xjk + yjk*yjk + zjk*zjk);
00664 
00665   //    if((ra2 <= rxmin2) .or. (rb2 <= rxmin2) .or. (rg <= rxmin)) then
00666   //          nlinear = nlinear + 1
00667   //       endif
00668 
00669   float rgr = 1.0f / rg;
00670   float ra2r = 1.0f / ra2;
00671   float rb2r = 1.0f / rb2;
00672   float rabr = sqrtf(ra2r*rb2r);
00673 
00674   float cos_phi = (ax*bx + ay*by + az*bz)*rabr;
00675   //
00676   // Note that sin(phi).G/|G|=B^A/(|A|.|B|)
00677   // which can be simplify to sin(phi)=|G|H.A/(|A|.|B|)
00678   float sin_phi = rg*rabr*(ax*xlk + ay*ylk + az*zlk);
00679   //
00680   //     Energy and derivative contributions.
00681 
00682   float df;
00683   double e;
00684   diheComp<doEnergy>(dihedralValues, dl.itype, sin_phi, cos_phi, dl.scale, df, e);
00685 
00686   if (doEnergy) energy += e;
00687 
00688   //
00689   //     Compute derivatives wrt catesian coordinates.
00690   //
00691   // GAA=dE/dphi.|G|/A^2, GBB=dE/dphi.|G|/B^2, FG=F.G, HG=H.G
00692   //  FGA=dE/dphi*F.G/(|G|A^2), HGB=dE/dphi*H.G/(|G|B^2)
00693 
00694   float fg = xij*xjk + yij*yjk + zij*zjk;
00695   float hg = xlk*xjk + ylk*yjk + zlk*zjk;
00696   ra2r *= df;
00697   rb2r *= df;
00698   float fga = fg*ra2r*rgr;
00699   float hgb = hg*rb2r*rgr;
00700   float gaa = ra2r*rg;
00701   float gbb = rb2r*rg;
00702   // DFi=dE/dFi, DGi=dE/dGi, DHi=dE/dHi.
00703 
00704   // Store forces
00705   T T_fix, T_fiy, T_fiz;
00706   calcComponentForces<T>(-gaa, ax, ay, az, T_fix, T_fiy, T_fiz);
00707   storeForces<T>(T_fix, T_fiy, T_fiz, dl.i, stride, force);
00708 
00709   T dgx, dgy, dgz;
00710   calcComponentForces<T>(fga, ax, ay, az, -hgb, bx, by, bz, dgx, dgy, dgz);
00711   T T_fjx = dgx - T_fix;
00712   T T_fjy = dgy - T_fiy;
00713   T T_fjz = dgz - T_fiz;
00714   storeForces<T>(T_fjx, T_fjy, T_fjz, dl.j, stride, force);
00715 
00716   T dhx, dhy, dhz;
00717   calcComponentForces<T>(gbb, bx, by, bz, dhx, dhy, dhz);
00718   T T_fkx = -dhx - dgx;
00719   T T_fky = -dhy - dgy;
00720   T T_fkz = -dhz - dgz;
00721   storeForces<T>(T_fkx, T_fky, T_fkz, dl.k, stride, force);
00722   storeForces<T>(dhx, dhy, dhz, dl.l, stride, force);
00723 
00724   // Store virial
00725   if (doVirial) {
00726 #ifdef WRITE_FULL_VIRIALS
00727     float fix = ((float)T_fix)*force_to_float;
00728     float fiy = ((float)T_fiy)*force_to_float;
00729     float fiz = ((float)T_fiz)*force_to_float;
00730     float fjx = ((float)dgx)*force_to_float;
00731     float fjy = ((float)dgy)*force_to_float;
00732     float fjz = ((float)dgz)*force_to_float;
00733     float flx = ((float)dhx)*force_to_float;
00734     float fly = ((float)dhy)*force_to_float;
00735     float flz = ((float)dhz)*force_to_float;
00736     virial.xx = (fix*xij) + (fjx*xjk) + (flx*xlk);
00737     virial.xy = (fix*yij) + (fjx*yjk) + (flx*ylk);
00738     virial.xz = (fix*zij) + (fjx*zjk) + (flx*zlk);
00739     virial.yx = (fiy*xij) + (fjy*xjk) + (fly*xlk);
00740     virial.yy = (fiy*yij) + (fjy*yjk) + (fly*ylk);
00741     virial.yz = (fiy*zij) + (fjy*zjk) + (fly*zlk);
00742     virial.zx = (fiz*xij) + (fjz*xjk) + (flz*xlk);
00743     virial.zy = (fiz*yij) + (fjz*yjk) + (flz*ylk);
00744     virial.zz = (fiz*zij) + (fjz*zjk) + (flz*zlk);
00745 #endif
00746   }
00747 
00748 }

__device__ __forceinline__ float dot ( const float3  v1,
const float3  v2 
)

Definition at line 758 of file ComputeBondedCUDAKernel.cu.

Referenced by crosstermForce(), and ARestraint::GetDihe().

00758                                                                        {
00759   return (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z);
00760 }

template<typename T, bool doEnergy, bool doVirial>
__device__ void exclusionForce ( const int  index,
const CudaExclusion *__restrict__  exclusionList,
const float  r2_delta,
const int  r2_delta_expc,
cudaTextureObject_t  r2_table_tex,
cudaTextureObject_t  exclusionTableTex,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
T *__restrict__  forceSlow,
double &  energySlow,
ComputeBondedCUDAKernel::BondedVirial< double > &  virialSlow 
)

Definition at line 341 of file ComputeBondedCUDAKernel.cu.

References __ldg, diffa, f, CudaExclusion::i, CudaExclusion::ioffsetXYZ, CudaExclusion::j, and table_i.

00362     {
00363 
00364   CudaExclusion bl = exclusionList[index];
00365 
00366   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00367   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00368   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00369 
00370   float4 xyzqi = xyzq[bl.i];
00371   float4 xyzqj = xyzq[bl.j];
00372 
00373   float xij = xyzqi.x + shx - xyzqj.x;
00374   float yij = xyzqi.y + shy - xyzqj.y;
00375   float zij = xyzqi.z + shz - xyzqj.z;
00376 
00377   float r2 = xij*xij + yij*yij + zij*zij;
00378   if (r2 < cutoff2) {
00379     r2 += r2_delta;
00380 
00381     union { float f; int i; } r2i;
00382     r2i.f = r2;
00383     int table_i = (r2i.i >> 17) + r2_delta_expc;  // table_i >= 0
00384 
00385 #if __CUDA_ARCH__ >= 350
00386     float r2_table_val = __ldg(&r2_table[table_i]);
00387 #else
00388     float r2_table_val = tex1D<float>(r2_table_tex, table_i);
00389 #endif
00390     float diffa = r2 - r2_table_val;
00391     float qq = xyzqi.w * xyzqj.w;
00392 
00393 #if __CUDA_ARCH__ >= 350
00394     float4 slow = __ldg(&exclusionTable[table_i]);
00395 #else
00396     float4 slow = tex1D<float4>(exclusionTableTex, table_i);
00397 #endif
00398 
00399     if (doEnergy) {
00400       energySlow += (double)(qq*(((diffa * (1.0f/6.0f)*slow.x + 0.25f*slow.y ) * diffa + 0.5f*slow.z ) * diffa + slow.w));
00401     }
00402 
00403     float fSlow = -qq*((diffa * slow.x + slow.y) * diffa + slow.z);
00404 
00405     T T_fxij, T_fyij, T_fzij;
00406     calcComponentForces<T>(fSlow, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00407     storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceSlow);
00408     storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceSlow);
00409 
00410     // Store virial
00411     if (doVirial) {
00412 #ifdef WRITE_FULL_VIRIALS
00413       float fxij = fSlow*xij;
00414       float fyij = fSlow*yij;
00415       float fzij = fSlow*zij;
00416       virialSlow.xx = (fxij*xij);
00417       virialSlow.xy = (fxij*yij);
00418       virialSlow.xz = (fxij*zij);
00419       virialSlow.yx = (fyij*xij);
00420       virialSlow.yy = (fyij*yij);
00421       virialSlow.yz = (fyij*zij);
00422       virialSlow.zx = (fzij*xij);
00423       virialSlow.zy = (fzij*yij);
00424       virialSlow.zz = (fzij*zij);
00425 #endif
00426     }
00427   }
00428 }

__device__ long long int lliroundf ( float  f  )  [inline]

Definition at line 17 of file ComputeBondedCUDAKernel.cu.

00018 {
00019     long long int l;
00020     asm("cvt.rni.s64.f32  %0, %1;" : "=l"(l) : "f"(f));
00021     return l;
00022 }

__device__ unsigned long long int llitoulli ( long long int  l  )  [inline]

Definition at line 24 of file ComputeBondedCUDAKernel.cu.

Referenced by bondedForcesKernel(), and modifiedExclusionForcesKernel().

00025 {
00026     unsigned long long int u;
00027     asm("mov.b64    %0, %1;" : "=l"(u) : "l"(l));
00028     return u;
00029 }

template<typename T, bool doEnergy, bool doVirial, bool doElect>
__device__ void modifiedExclusionForce ( const int  index,
const CudaExclusion *__restrict__  exclusionList,
const bool  doSlow,
const float  one_scale14,
const int  vdwCoefTableWidth,
cudaTextureObject_t  vdwCoefTableTex,
cudaTextureObject_t  forceTableTex,
cudaTextureObject_t  energyTableTex,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
double &  energyVdw,
T *__restrict__  forceNbond,
double &  energyNbond,
T *__restrict__  forceSlow,
double &  energySlow,
ComputeBondedCUDAKernel::BondedVirial< double > &  virialNbond,
ComputeBondedCUDAKernel::BondedVirial< double > &  virialSlow 
)

Definition at line 211 of file ComputeBondedCUDAKernel.cu.

References __ldg, CudaExclusion::i, CudaExclusion::ioffsetXYZ, CudaExclusion::j, CudaExclusion::vdwtypei, CudaExclusion::vdwtypej, float2::x, ComputeBondedCUDAKernel::BondedVirial< T >::xx, ComputeBondedCUDAKernel::BondedVirial< T >::xy, ComputeBondedCUDAKernel::BondedVirial< T >::xz, float2::y, ComputeBondedCUDAKernel::BondedVirial< T >::yx, ComputeBondedCUDAKernel::BondedVirial< T >::yy, ComputeBondedCUDAKernel::BondedVirial< T >::yz, ComputeBondedCUDAKernel::BondedVirial< T >::zx, ComputeBondedCUDAKernel::BondedVirial< T >::zy, and ComputeBondedCUDAKernel::BondedVirial< T >::zz.

00237     {
00238 
00239   CudaExclusion bl = exclusionList[index];
00240 
00241   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00242   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00243   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00244 
00245   float4 xyzqi = xyzq[bl.i];
00246   float4 xyzqj = xyzq[bl.j];
00247 
00248   float xij = xyzqi.x + shx - xyzqj.x;
00249   float yij = xyzqi.y + shy - xyzqj.y;
00250   float zij = xyzqi.z + shz - xyzqj.z;
00251 
00252   float r2 = xij*xij + yij*yij + zij*zij;
00253   if (r2 < cutoff2) {
00254 
00255     float rinv = rsqrtf(r2);
00256 
00257     float qq;
00258     if (doElect) qq = one_scale14 * xyzqi.w * xyzqj.w;
00259 
00260     int vdwIndex = bl.vdwtypei + bl.vdwtypej*vdwCoefTableWidth;
00261 #if __CUDA_ARCH__ >= 350
00262     float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
00263 #else
00264     float2 ljab = tex1D<float2>(vdwCoefTableTex, vdwIndex);
00265 #endif
00266 
00267     float4 fi = tex1D<float4>(forceTableTex, rinv);
00268     float4 ei;
00269     if (doEnergy) {
00270       ei = tex1D<float4>(energyTableTex, rinv);
00271       energyVdw   += (double)(ljab.x * ei.z + ljab.y * ei.y);
00272       if (doElect) {
00273         energyNbond += qq * ei.x;
00274         if (doSlow) energySlow  += qq * ei.w;
00275       }
00276     }
00277 
00278     float fNbond;
00279     if (doElect) {
00280       fNbond = -(ljab.x * fi.z + ljab.y * fi.y + qq * fi.x);
00281     } else {
00282       fNbond = -(ljab.x * fi.z + ljab.y * fi.y);
00283     }
00284     T T_fxij, T_fyij, T_fzij;
00285     calcComponentForces<T>(fNbond, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00286     storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceNbond);
00287     storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceNbond);
00288 
00289     float fSlow;
00290     if (doSlow && doElect) {
00291       fSlow = -qq * fi.w;
00292       T T_fxij, T_fyij, T_fzij;
00293       calcComponentForces<T>(fSlow, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00294       storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceSlow);
00295       storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceSlow);
00296     }
00297 
00298     // Store virial
00299     if (doVirial) {
00300 #ifdef WRITE_FULL_VIRIALS
00301       float fxij = fNbond*xij;
00302       float fyij = fNbond*yij;
00303       float fzij = fNbond*zij;
00304       virialNbond.xx = (fxij*xij);
00305       virialNbond.xy = (fxij*yij);
00306       virialNbond.xz = (fxij*zij);
00307       virialNbond.yx = (fyij*xij);
00308       virialNbond.yy = (fyij*yij);
00309       virialNbond.yz = (fyij*zij);
00310       virialNbond.zx = (fzij*xij);
00311       virialNbond.zy = (fzij*yij);
00312       virialNbond.zz = (fzij*zij);
00313 #endif
00314     }
00315 
00316     // Store virial
00317     if (doVirial && doSlow && doElect) {
00318 #ifdef WRITE_FULL_VIRIALS
00319       float fxij = fSlow*xij;
00320       float fyij = fSlow*yij;
00321       float fzij = fSlow*zij;
00322       virialSlow.xx = (fxij*xij);
00323       virialSlow.xy = (fxij*yij);
00324       virialSlow.xz = (fxij*zij);
00325       virialSlow.yx = (fyij*xij);
00326       virialSlow.yy = (fyij*yij);
00327       virialSlow.yz = (fyij*zij);
00328       virialSlow.zx = (fzij*xij);
00329       virialSlow.zy = (fzij*yij);
00330       virialSlow.zz = (fzij*zij);
00331 #endif
00332     }
00333 
00334   }
00335 }

template<typename T, bool doEnergy, bool doVirial, bool doElect>
__global__ void modifiedExclusionForcesKernel ( const int  start,
const int  numModifiedExclusions,
const CudaExclusion *__restrict__  modifiedExclusions,
const bool  doSlow,
const float  one_scale14,
const float  cutoff2,
const int  vdwCoefTableWidth,
const float2 *__restrict__  vdwCoefTable,
cudaTextureObject_t  vdwCoefTableTex,
cudaTextureObject_t  modifiedExclusionForceTableTex,
cudaTextureObject_t  modifiedExclusionEnergyTableTex,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  forceNbond,
T *__restrict__  forceSlow,
double *__restrict__  energies_virials 
)

Definition at line 1315 of file ComputeBondedCUDAKernel.cu.

References BLOCK_SYNC, BONDEDFORCESKERNEL_NUM_WARP, double_to_virial, ComputeBondedCUDAKernel::energyIndex_ELECT, ComputeBondedCUDAKernel::energyIndex_ELECT_SLOW, ComputeBondedCUDAKernel::energyIndex_LJ, if(), llitoulli(), ComputeBondedCUDAKernel::nbondVirialIndex_XX, ComputeBondedCUDAKernel::nbondVirialIndex_XY, ComputeBondedCUDAKernel::nbondVirialIndex_XZ, ComputeBondedCUDAKernel::nbondVirialIndex_YX, ComputeBondedCUDAKernel::nbondVirialIndex_YY, ComputeBondedCUDAKernel::nbondVirialIndex_YZ, ComputeBondedCUDAKernel::nbondVirialIndex_ZX, ComputeBondedCUDAKernel::nbondVirialIndex_ZY, ComputeBondedCUDAKernel::nbondVirialIndex_ZZ, ComputeBondedCUDAKernel::slowVirialIndex_XX, ComputeBondedCUDAKernel::slowVirialIndex_XY, ComputeBondedCUDAKernel::slowVirialIndex_XZ, ComputeBondedCUDAKernel::slowVirialIndex_YX, ComputeBondedCUDAKernel::slowVirialIndex_YY, ComputeBondedCUDAKernel::slowVirialIndex_YZ, ComputeBondedCUDAKernel::slowVirialIndex_ZX, ComputeBondedCUDAKernel::slowVirialIndex_ZY, ComputeBondedCUDAKernel::slowVirialIndex_ZZ, USE_FP_VIRIAL, WARP_FULL_MASK, WARP_SHUFFLE_XOR, WARPSIZE, ComputeBondedCUDAKernel::BondedVirial< T >::xx, ComputeBondedCUDAKernel::BondedVirial< T >::xy, ComputeBondedCUDAKernel::BondedVirial< T >::xz, ComputeBondedCUDAKernel::BondedVirial< T >::yx, ComputeBondedCUDAKernel::BondedVirial< T >::yy, ComputeBondedCUDAKernel::BondedVirial< T >::yz, ComputeBondedCUDAKernel::BondedVirial< T >::zx, ComputeBondedCUDAKernel::BondedVirial< T >::zy, and ComputeBondedCUDAKernel::BondedVirial< T >::zz.

01334     {
01335 
01336   // index
01337   int i = threadIdx.x + (start + blockIdx.x)*blockDim.x;
01338 
01339   double energyVdw, energyNbond, energySlow;
01340   if (doEnergy) {
01341     energyVdw = 0.0;
01342     if (doElect) {
01343       energyNbond = 0.0;
01344       energySlow = 0.0;
01345     }
01346   }
01347 
01348 #ifdef WRITE_FULL_VIRIALS
01349   ComputeBondedCUDAKernel::BondedVirial<double> virialNbond;
01350   ComputeBondedCUDAKernel::BondedVirial<double> virialSlow;
01351   if (doVirial) {
01352     virialNbond.xx = 0.0;
01353     virialNbond.xy = 0.0;
01354     virialNbond.xz = 0.0;
01355     virialNbond.yx = 0.0;
01356     virialNbond.yy = 0.0;
01357     virialNbond.yz = 0.0;
01358     virialNbond.zx = 0.0;
01359     virialNbond.zy = 0.0;
01360     virialNbond.zz = 0.0;
01361     if (doElect) {
01362       virialSlow.xx = 0.0;
01363       virialSlow.xy = 0.0;
01364       virialSlow.xz = 0.0;
01365       virialSlow.yx = 0.0;
01366       virialSlow.yy = 0.0;
01367       virialSlow.yz = 0.0;
01368       virialSlow.zx = 0.0;
01369       virialSlow.zy = 0.0;
01370       virialSlow.zz = 0.0;
01371     }
01372   }
01373 #endif
01374 
01375   if (i < numModifiedExclusions)
01376   {
01377     modifiedExclusionForce<T, doEnergy, doVirial, doElect>
01378     (i, modifiedExclusions, doSlow, one_scale14, vdwCoefTableWidth,
01379 #if __CUDA_ARCH__ >= 350
01380       vdwCoefTable,
01381 #else
01382       vdwCoefTableTex,
01383 #endif
01384       modifiedExclusionForceTableTex, modifiedExclusionEnergyTableTex,
01385       xyzq, stride, lata, latb, latc, cutoff2,
01386       energyVdw, forceNbond, energyNbond,
01387       forceSlow, energySlow, virialNbond, virialSlow);
01388   }
01389 
01390   // Write energies to global memory
01391   if (doEnergy) {
01392     __shared__ double shEnergyVdw[BONDEDFORCESKERNEL_NUM_WARP];
01393     __shared__ double shEnergyNbond[(doElect) ? BONDEDFORCESKERNEL_NUM_WARP : 1];
01394     __shared__ double shEnergySlow[(doElect) ? BONDEDFORCESKERNEL_NUM_WARP : 1];
01395 #pragma unroll
01396     for (int i=16;i >= 1;i/=2) {
01397       energyVdw   += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw, i, 32);
01398       if (doElect) {
01399         energyNbond += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyNbond, i, 32);
01400         energySlow  += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow, i, 32);
01401       }
01402     }
01403     int laneID = (threadIdx.x & (WARPSIZE - 1));
01404     int warpID = threadIdx.x / WARPSIZE;
01405     if (laneID == 0) {
01406       shEnergyVdw[warpID]   = energyVdw;
01407       if (doElect) {
01408         shEnergyNbond[warpID] = energyNbond;
01409         shEnergySlow[warpID]  = energySlow;
01410       }
01411     }
01412     BLOCK_SYNC;
01413     if (warpID == 0) {
01414       energyVdw   = (laneID < BONDEDFORCESKERNEL_NUM_WARP) ? shEnergyVdw[laneID] : 0.0;
01415       if (doElect) {
01416         energyNbond = (laneID < BONDEDFORCESKERNEL_NUM_WARP) ? shEnergyNbond[laneID] : 0.0;
01417         energySlow  = (laneID < BONDEDFORCESKERNEL_NUM_WARP) ? shEnergySlow[laneID] : 0.0;
01418       }
01419 #pragma unroll
01420       for (int i=16;i >= 1;i/=2) {
01421         energyVdw   += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw, i, 32);
01422         if (doElect) {
01423           energyNbond += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyNbond, i, 32);
01424           energySlow  += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow, i, 32);
01425         }
01426       }
01427       if (laneID == 0) {
01428         atomicAdd(&energies_virials[ComputeBondedCUDAKernel::energyIndex_LJ],         energyVdw);
01429         if (doElect) {
01430           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::energyIndex_ELECT],      energyNbond);
01431           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::energyIndex_ELECT_SLOW], energySlow);
01432         }
01433       }
01434     }
01435   }
01436 
01437   // Write virials to global memory
01438 #ifdef WRITE_FULL_VIRIALS
01439   if (doVirial) {
01440 #pragma unroll
01441     for (int i=16;i >= 1;i/=2) {
01442       virialNbond.xx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.xx, i, 32);
01443       virialNbond.xy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.xy, i, 32);
01444       virialNbond.xz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.xz, i, 32);
01445       virialNbond.yx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.yx, i, 32);
01446       virialNbond.yy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.yy, i, 32);
01447       virialNbond.yz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.yz, i, 32);
01448       virialNbond.zx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.zx, i, 32);
01449       virialNbond.zy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.zy, i, 32);
01450       virialNbond.zz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.zz, i, 32);
01451       if (doElect && doSlow) {
01452         virialSlow.xx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.xx, i, 32);
01453         virialSlow.xy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.xy, i, 32);
01454         virialSlow.xz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.xz, i, 32);
01455         virialSlow.yx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.yx, i, 32);
01456         virialSlow.yy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.yy, i, 32);
01457         virialSlow.yz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.yz, i, 32);
01458         virialSlow.zx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.zx, i, 32);
01459         virialSlow.zy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.zy, i, 32);
01460         virialSlow.zz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.zz, i, 32);
01461       }
01462     }
01463     __shared__ ComputeBondedCUDAKernel::BondedVirial<double> shVirialNbond[BONDEDFORCESKERNEL_NUM_WARP];
01464     __shared__ ComputeBondedCUDAKernel::BondedVirial<double> shVirialSlow[(doElect) ? BONDEDFORCESKERNEL_NUM_WARP : 1];
01465     int laneID = (threadIdx.x & (WARPSIZE - 1));
01466     int warpID = threadIdx.x / WARPSIZE;
01467     if (laneID == 0) {
01468       shVirialNbond[warpID] = virialNbond;
01469       shVirialSlow[warpID] = virialSlow;
01470     }
01471     BLOCK_SYNC;
01472 
01473     virialNbond.xx = 0.0;
01474     virialNbond.xy = 0.0;
01475     virialNbond.xz = 0.0;
01476     virialNbond.yx = 0.0;
01477     virialNbond.yy = 0.0;
01478     virialNbond.yz = 0.0;
01479     virialNbond.zx = 0.0;
01480     virialNbond.zy = 0.0;
01481     virialNbond.zz = 0.0;
01482     if (doElect && doSlow) {
01483       virialSlow.xx = 0.0;
01484       virialSlow.xy = 0.0;
01485       virialSlow.xz = 0.0;
01486       virialSlow.yx = 0.0;
01487       virialSlow.yy = 0.0;
01488       virialSlow.yz = 0.0;
01489       virialSlow.zx = 0.0;
01490       virialSlow.zy = 0.0;
01491       virialSlow.zz = 0.0;
01492     }
01493 
01494     if (warpID == 0) {
01495       if (laneID < BONDEDFORCESKERNEL_NUM_WARP) {
01496         virialNbond = shVirialNbond[laneID];
01497         virialSlow = shVirialSlow[laneID];
01498       }
01499 #pragma unroll
01500       for (int i=16;i >= 1;i/=2) {
01501         virialNbond.xx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.xx, i, 32);
01502         virialNbond.xy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.xy, i, 32);
01503         virialNbond.xz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.xz, i, 32);
01504         virialNbond.yx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.yx, i, 32);
01505         virialNbond.yy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.yy, i, 32);
01506         virialNbond.yz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.yz, i, 32);
01507         virialNbond.zx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.zx, i, 32);
01508         virialNbond.zy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.zy, i, 32);
01509         virialNbond.zz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialNbond.zz, i, 32);
01510         if (doElect && doSlow) {
01511           virialSlow.xx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.xx, i, 32);
01512           virialSlow.xy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.xy, i, 32);
01513           virialSlow.xz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.xz, i, 32);
01514           virialSlow.yx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.yx, i, 32);
01515           virialSlow.yy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.yy, i, 32);
01516           virialSlow.yz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.yz, i, 32);
01517           virialSlow.zx += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.zx, i, 32);
01518           virialSlow.zy += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.zy, i, 32);
01519           virialSlow.zz += WARP_SHUFFLE_XOR(WARP_FULL_MASK, virialSlow.zz, i, 32);
01520         }
01521       }
01522 
01523       if (laneID == 0)
01524       {
01525 #ifdef USE_FP_VIRIAL
01526         atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_XX], llitoulli(virialNbond.xx*double_to_virial));
01527         atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_XY], llitoulli(virialNbond.xy*double_to_virial));
01528         atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_XZ], llitoulli(virialNbond.xz*double_to_virial));
01529         atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_YX], llitoulli(virialNbond.yx*double_to_virial));
01530         atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_YY], llitoulli(virialNbond.yy*double_to_virial));
01531         atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_YZ], llitoulli(virialNbond.yz*double_to_virial));
01532         atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_ZX], llitoulli(virialNbond.zx*double_to_virial));
01533         atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_ZY], llitoulli(virialNbond.zy*double_to_virial));
01534         atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_ZZ], llitoulli(virialNbond.zz*double_to_virial));
01535         if (doElect && doSlow) {
01536           atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_XX], llitoulli(virialSlow.xx*double_to_virial));
01537           atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_XY], llitoulli(virialSlow.xy*double_to_virial));
01538           atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_XZ], llitoulli(virialSlow.xz*double_to_virial));
01539           atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_YX], llitoulli(virialSlow.yx*double_to_virial));
01540           atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_YY], llitoulli(virialSlow.yy*double_to_virial));
01541           atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_YZ], llitoulli(virialSlow.yz*double_to_virial));
01542           atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_ZX], llitoulli(virialSlow.zx*double_to_virial));
01543           atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_ZY], llitoulli(virialSlow.zy*double_to_virial));
01544           atomicAdd((unsigned long long int *)&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_ZZ], llitoulli(virialSlow.zz*double_to_virial));
01545         }
01546 #else
01547         atomicAdd(&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_XX], virialNbond.xx);
01548         atomicAdd(&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_XY], virialNbond.xy);
01549         atomicAdd(&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_XZ], virialNbond.xz);
01550         atomicAdd(&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_YX], virialNbond.yx);
01551         atomicAdd(&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_YY], virialNbond.yy);
01552         atomicAdd(&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_YZ], virialNbond.yz);
01553         atomicAdd(&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_ZX], virialNbond.zx);
01554         atomicAdd(&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_ZY], virialNbond.zy);
01555         atomicAdd(&energies_virials[ComputeBondedCUDAKernel::nbondVirialIndex_ZZ], virialNbond.zz);
01556         if (doElect && doSlow) {
01557           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_XX], virialSlow.xx);
01558           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_XY], virialSlow.xy);
01559           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_XZ], virialSlow.xz);
01560           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_YX], virialSlow.yx);
01561           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_YY], virialSlow.yy);
01562           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_YZ], virialSlow.yz);
01563           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_ZX], virialSlow.zx);
01564           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_ZY], virialSlow.zy);
01565           atomicAdd(&energies_virials[ComputeBondedCUDAKernel::slowVirialIndex_ZZ], virialSlow.zz);
01566         }
01567 #endif
01568       }
01569     }
01570   }
01571 #endif
01572 
01573 }

template<typename T>
__forceinline__ __device__ void storeForces ( const T  fx,
const T  fy,
const T  fz,
const int  ind,
const int  stride,
T *  force 
)

Definition at line 118 of file ComputeBondedCUDAKernel.cu.

00120                {
00121   // The generic version can not be used
00122 }

template<>
__forceinline__ __device__ void storeForces< long long int > ( const long long int  fx,
const long long int  fy,
const long long int  fz,
const int  ind,
const int  stride,
long long int *  force 
)


Variable Documentation

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

Referenced by CudaComputeNonbonded::assignPatches(), ComputeNonbondedCUDA::assignPatches(), ComputeBondedCUDAKernel::bondedForce(), build_cuda_exclusions(), build_cuda_force_table(), CudaTileListKernel::buildTileLists(), ComputePmeMgr::chargeGridSubmitted(), ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeMgr::createComputes(), ComputeCUDAMgr::createCudaComputeNonbonded(), cuda_check_local_calc(), cuda_check_remote_calc(), cuda_check_remote_progress(), cuda_initialize(), ComputeNonbondedCUDA::doWork(), ComputeCUDAMgr::getCudaComputeNonbonded(), ComputePmeMgr::initialize(), ComputeCUDAMgr::initialize(), ComputePmeMgr::initialize_computes(), ComputePmeCUDAMgr::initialize_pencils(), ComputePmeMgr::initialize_pencils(), ComputePmeCUDAMgr::isPmeDevice(), CudaComputeNonbondedKernel::nonbondedForce(), ComputeNonbondedCUDA::recvYieldDevice(), CudaComputeNonbondedKernel::reduceVirialEnergy(), ComputeNonbondedCUDA::requirePatch(), and ComputePmeCUDAMgr::setupPencils().


Generated on Tue Sep 26 01:17:15 2017 for NAMD by  doxygen 1.4.7