CVS diff CudaComputeGBISKernel.cu

Difference for src/CudaComputeGBISKernel.cu from version 1.1 to 1.2

version 1.1

version 1.2

Line 303

const TileList* __restrict__ tileLists,

const int* __restrict__ tileJatomStart,

const PatchPairRecord* __restrict__ patchPairs,

const float latticeX, const float latticeY, const float latticeZ,

const float3 lata, const float3 latb, const float3 latc,

const float4* __restrict__ xyzq,

const float cutoff2,

const GBISParam<phase> param,

Line 325

int iatomSize = PPStmp.iatomSize;

int jatomSize = PPStmp.jatomSize;

float shx = tmp.offsetXYZ.x*latticeX;

float shx = tmp.offsetXYZ.x*lata.x + tmp.offsetXYZ.y*latb.x + tmp.offsetXYZ.z*latc.x;

float shy = tmp.offsetXYZ.y*latticeY;

float shy = tmp.offsetXYZ.x*lata.y + tmp.offsetXYZ.y*latb.y + tmp.offsetXYZ.z*latc.y;

float shz = tmp.offsetXYZ.z*latticeZ;

float shz = tmp.offsetXYZ.x*lata.z + tmp.offsetXYZ.y*latb.z + tmp.offsetXYZ.z*latc.z;

// Warp index (0...warpsize-1)

const int wid = threadIdx.x % WARPSIZE;

Line 489

// Phase 1

void CudaComputeGBISKernel::GBISphase1(CudaTileListKernel& tlKernel, const int atomStorageSize,

const float latticeX, const float latticeY, const float latticeZ, const float a_cut, float* h_psiSum,

const float3 lata, const float3 latb, const float3 latc, const float a_cut, float* h_psiSum,

cudaStream_t stream) {

reallocate_device<float>(&psiSum, &psiSumSize, atomStorageSize, 1.2f);

Line 506

GBIS_Kernel<false, false, 1> <<< nblock, nthread, 0, stream >>>

(tlKernel.getNumTileListsGBIS(), tlKernel.getTileListsGBIS(), tlKernel.getTileJatomStartGBIS(),

tlKernel.getPatchPairs(), latticeX, latticeY, latticeZ, tlKernel.get_xyzq(), cutoff2,

tlKernel.getPatchPairs(), lata, latb, latc, tlKernel.get_xyzq(), cutoff2,

param, intRad0, intRadS, NULL, psiSum, NULL, NULL);

cudaCheck(cudaGetLastError());

Line 519

void CudaComputeGBISKernel::GBISphase2(CudaTileListKernel& tlKernel, const int atomStorageSize,

const bool doEnergy, const bool doSlow,

const float latticeX, const float latticeY, const float latticeZ,

const float3 lata, const float3 latb, const float3 latc,

const float r_cut, const float scaling, const float kappa, const float smoothDist,

const float epsilon_p, const float epsilon_s,

float4* d_forces, float* h_dEdaSum, cudaStream_t stream) {

Line 549

#define CALL(DOENERGY, DOSLOW) GBIS_Kernel<DOENERGY, DOSLOW, 2> \

<<< nblock, nthread, 0, stream >>> \

(tlKernel.getNumTileListsGBIS(), tlKernel.getTileListsGBIS(), tlKernel.getTileJatomStartGBIS(), \

tlKernel.getPatchPairs(), latticeX, latticeY, latticeZ, tlKernel.get_xyzq(), param.r_cut2, \

tlKernel.getPatchPairs(), lata, latb, latc, tlKernel.get_xyzq(), param.r_cut2, \

param, bornRad, NULL, NULL, dEdaSum, d_forces, tlKernel.getTileListVirialEnergy())

if (!doEnergy && !doSlow) CALL(false, false);

Line 566

// Phase 3

void CudaComputeGBISKernel::GBISphase3(CudaTileListKernel& tlKernel, const int atomStorageSize,

const float latticeX, const float latticeY, const float latticeZ, const float a_cut,

const float3 lata, const float3 latb, const float3 latc, const float a_cut,

float4* d_forces, cudaStream_t stream) {

int nwarp = GBISKERNEL_NUM_WARP;

Line 580

GBIS_Kernel<false, false, 3> <<< nblock, nthread, 0, stream >>>

(tlKernel.getNumTileListsGBIS(), tlKernel.getTileListsGBIS(), tlKernel.getTileJatomStartGBIS(),

tlKernel.getPatchPairs(), latticeX, latticeY, latticeZ, tlKernel.get_xyzq(), cutoff2,

tlKernel.getPatchPairs(), lata, latb, latc, tlKernel.get_xyzq(), cutoff2,

param, intRad0, intRadS, dHdrPrefix, NULL, d_forces, NULL);

cudaCheck(cudaGetLastError());

Legend:

Removed in v.1.1
changed lines
	Added in v.1.2

Made by using version 1.53 of cvs2html