#include "ComputeNonbondedCUDAKernel.h"#include <stdio.h>Go to the source code of this file.
Defines | |
| #define | SET_EXCL(EXCL, BASE, DIFF) (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31)) |
| #define | BLOCK_SIZE 64 |
| #define | SHARED_SIZE 16 |
| #define | jpqs jpqu.d |
| #define | japs japu.d |
| #define | myPatchPair pp.pp |
| #define | FORCE_INNER_LOOP(IPQ, IAP) |
Functions | |
| void | cuda_bind_exclusions (const unsigned int *t, int n) |
| void | cuda_bind_force_table (const float4 *t) |
| void | cuda_init () |
| void | cuda_bind_patch_pairs (const patch_pair *pp, int npp, const force_list *fl, int nfl, int atoms_size_p, int force_buffers_size_p, int max_atoms_per_patch_p) |
| void | cuda_bind_atom_params (const atom_param *t) |
| void | cuda_bind_atoms (const atom *a) |
| void | cuda_load_forces (float4 *f, float4 *f_slow, int begin, int count) |
| __global__ void | dev_nonbonded (const patch_pair *patch_pairs, const atom *atoms, const atom_param *atom_params, float4 *force_buffers, float4 *slow_force_buffers, float3 lata, float3 latb, float3 latc, float cutoff2) |
| __global__ void | dev_sum_forces (const force_list *force_lists, const float4 *force_buffers, float4 *forces) |
| void | cuda_nonbonded_forces (float3 lata, float3 latb, float3 latc, float cutoff2, int cbegin, int ccount, int pbegin, int pcount, int doSlow) |
| int | cuda_stream_finished () |
Variables | |
| __constant__ unsigned int | exclusions [MAX_EXCLUSIONS] |
| texture< float4, 1, cudaReadModeElementType > | force_table |
| int | patch_pairs_size |
| patch_pair * | patch_pairs |
| int | force_lists_size |
| force_list * | force_lists |
| int | force_buffers_size |
| float4 * | force_buffers |
| float4 * | slow_force_buffers |
| int | atoms_size |
| atom * | atoms |
| atom_param * | atom_params |
| float4 * | forces |
| float4 * | slow_forces |
| int | patch_pairs_alloc |
| int | force_buffers_alloc |
| int | force_lists_alloc |
| int | atoms_alloc |
| int | max_atoms_per_patch |
| cudaStream_t | stream |
|
|
Definition at line 220 of file ComputeNonbondedCUDAKernel.cu. |
|
|
Referenced by dev_nonbonded(). |
|
|
|
|
|
|
|
|
Referenced by dev_nonbonded(). |
|
|
Definition at line 10 of file ComputeNonbondedCUDAKernel.cu. |
|
|
Definition at line 221 of file ComputeNonbondedCUDAKernel.cu. |
|
|
Definition at line 166 of file ComputeNonbondedCUDAKernel.cu. References atom_params, atoms_size, cuda_errcheck(), and stream. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 00166 {
00167 cudaMemcpyAsync(atom_params, t, atoms_size * sizeof(atom_param),
00168 cudaMemcpyHostToDevice, stream);
00169 cuda_errcheck("memcpy to atom_params");
00170 }
|
|
|
Definition at line 172 of file ComputeNonbondedCUDAKernel.cu. References atoms, atoms_size, cuda_errcheck(), and stream. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 00172 {
00173 cuda_errcheck("before memcpy to atoms");
00174 cudaMemcpyAsync(atoms, a, atoms_size * sizeof(atom),
00175 cudaMemcpyHostToDevice, stream);
00176 cuda_errcheck("memcpy to atoms");
00177 }
|
|
||||||||||||
|
Definition at line 13 of file ComputeNonbondedCUDAKernel.cu. References cuda_errcheck(), exclusions, MAX_EXCLUSIONS, and SET_EXCL. Referenced by ComputeNonbondedCUDA::build_exclusions(). 00013 {
00014
00015 #if 0
00016 unsigned int excl[MAX_EXCLUSIONS];
00017
00018 for ( int i=0; i<MAX_EXCLUSIONS; ++i ) { excl[i] = 0; }
00019
00020 SET_EXCL(excl,0,2);
00021 SET_EXCL(excl,0,3);
00022 SET_EXCL(excl,0,4);
00023
00024 cudaMemcpyToSymbol(exclusions, excl, MAX_EXCLUSIONS*sizeof(unsigned int), 0);
00025 #endif
00026
00027 cudaMemcpyToSymbol(exclusions, t, n*sizeof(unsigned int), 0);
00028 cuda_errcheck("memcpy to exclusions");
00029 }
|
|
|
Definition at line 34 of file ComputeNonbondedCUDAKernel.cu. References cuda_errcheck(), force_table, and FORCE_TABLE_SIZE. Referenced by ComputeNonbondedCUDA::build_force_table(). 00034 {
00035 static cudaArray *ct;
00036 if ( ! ct ) {
00037 cudaMallocArray(&ct, &force_table.channelDesc, FORCE_TABLE_SIZE, 1);
00038 cuda_errcheck("allocating force table");
00039 } cudaMemcpyToArray(ct, 0, 0, t, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice); // cudaMemcpy(ct, t, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice);
00040 cuda_errcheck("memcpy to force table");
00041
00042 force_table.normalized = true;
00043 force_table.addressMode[0] = cudaAddressModeClamp;
00044 force_table.addressMode[1] = cudaAddressModeClamp;
00045 force_table.filterMode = cudaFilterModeLinear;
00046
00047 cudaBindTextureToArray(force_table, ct);
00048 cuda_errcheck("binding force table to texture");
00049 }
|
|
||||||||||||||||||||||||||||||||
|
Definition at line 96 of file ComputeNonbondedCUDAKernel.cu. References atom_params, atoms, atoms_alloc, atoms_size, cuda_errcheck(), force_buffers, force_buffers_alloc, force_buffers_size, force_lists, force_lists_alloc, force_lists_size, forces, max_atoms_per_patch, patch_pairs, patch_pairs_alloc, patch_pairs_size, slow_force_buffers, and slow_forces. Referenced by ComputeNonbondedCUDA::doWork(). 00099 {
00100
00101 patch_pairs_size = npp;
00102 force_buffers_size = force_buffers_size_p;
00103 force_lists_size = nfl;
00104 atoms_size = atoms_size_p;
00105 max_atoms_per_patch = max_atoms_per_patch_p;
00106
00107 #if 0
00108 printf("%d %d %d %d %d %d %d %d\n",
00109 patch_pairs_size , patch_pairs_alloc ,
00110 force_buffers_size , force_buffers_alloc ,
00111 force_lists_size , force_lists_alloc ,
00112 atoms_size , atoms_alloc );
00113 #endif
00114
00115 if ( patch_pairs_size > patch_pairs_alloc ||
00116 force_buffers_size > force_buffers_alloc ||
00117 force_lists_size > force_lists_alloc ||
00118 atoms_size > atoms_alloc ) {
00119
00120 patch_pairs_alloc = (int) (1.2 * patch_pairs_size);
00121 force_buffers_alloc = (int) (1.2 * force_buffers_size);
00122 force_lists_alloc = (int) (1.2 * force_lists_size);
00123 atoms_alloc = (int) (1.2 * atoms_size);
00124
00125 if ( forces ) cudaFree(forces);
00126 if ( slow_forces ) cudaFree(slow_forces);
00127 if ( atom_params ) cudaFree(atom_params);
00128 if ( atoms ) cudaFree(atoms);
00129 if ( force_buffers ) cudaFree(force_buffers);
00130 if ( slow_force_buffers ) cudaFree(slow_force_buffers);
00131 if ( force_lists ) cudaFree(force_lists);
00132 if ( patch_pairs ) cudaFree(patch_pairs);
00133 cuda_errcheck("free everything");
00134
00135 #if 1
00136 int totalmem = patch_pairs_alloc * sizeof(patch_pair) +
00137 force_lists_alloc * sizeof(force_list) +
00138 2 * force_buffers_alloc * sizeof(float4) +
00139 atoms_alloc * sizeof(atom) +
00140 atoms_alloc * sizeof(atom_param) +
00141 2 * atoms_alloc * sizeof(float4);
00142 // printf("allocating %d MB of memory on GPU\n", totalmem >> 20);
00143 #endif
00144
00145 cudaMalloc((void**) &patch_pairs, patch_pairs_alloc * sizeof(patch_pair));
00146 cudaMalloc((void**) &force_lists, force_lists_alloc * sizeof(force_list));
00147 cudaMalloc((void**) &force_buffers, force_buffers_alloc * sizeof(float4));
00148 cudaMalloc((void**) &slow_force_buffers, force_buffers_alloc * sizeof(float4));
00149 cudaMalloc((void**) &atoms, atoms_alloc * sizeof(atom));
00150 cudaMalloc((void**) &atom_params, atoms_alloc * sizeof(atom_param));
00151 cudaMalloc((void**) &forces, atoms_alloc * sizeof(float4));
00152 cudaMalloc((void**) &slow_forces, atoms_alloc * sizeof(float4));
00153 cuda_errcheck("malloc everything");
00154
00155 }
00156
00157 cudaMemcpy(patch_pairs, pp, npp * sizeof(patch_pair),
00158 cudaMemcpyHostToDevice);
00159 cuda_errcheck("memcpy to patch_pairs");
00160
00161 cudaMemcpy(force_lists, fl, nfl * sizeof(force_list),
00162 cudaMemcpyHostToDevice);
00163 cuda_errcheck("memcpy to force_lists");
00164 }
|
|
|
Definition at line 77 of file ComputeNonbondedCUDAKernel.cu. References atom_params, atoms, atoms_alloc, cuda_errcheck(), force_buffers, force_buffers_alloc, force_lists, force_lists_alloc, forces, patch_pairs, patch_pairs_alloc, slow_force_buffers, slow_forces, and stream. Referenced by cuda_initialize(). 00077 {
00078 forces = 0;
00079 slow_forces = 0;
00080 atom_params = 0;
00081 atoms = 0;
00082 force_buffers = 0;
00083 slow_force_buffers = 0;
00084 force_lists = 0;
00085 patch_pairs = 0;
00086
00087 patch_pairs_alloc = 0;
00088 force_buffers_alloc = 0;
00089 force_lists_alloc = 0;
00090 atoms_alloc = 0;
00091
00092 cudaStreamCreate(&stream);
00093 cuda_errcheck("cudaStreamCreate");
00094 }
|
|
||||||||||||||||||||
|
Definition at line 179 of file ComputeNonbondedCUDAKernel.cu. References cuda_errcheck(), forces, slow_forces, and stream. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 00179 {
00180 // printf("load forces %d %d %d\n",begin,count,atoms_size);
00181 cudaMemcpyAsync(f+begin, forces+begin, count * sizeof(float4),
00182 cudaMemcpyDeviceToHost, stream);
00183 if ( f_slow ) {
00184 cudaMemcpyAsync(f_slow+begin, slow_forces+begin, count * sizeof(float4),
00185 cudaMemcpyDeviceToHost, stream);
00186 }
00187 cuda_errcheck("memcpy from forces");
00188 }
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 468 of file ComputeNonbondedCUDAKernel.cu. References cuda_errcheck(), dev_nonbonded(), dev_sum_forces(), force_lists, patch_pairs, slow_force_buffers, and stream. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 00469 {
00470
00471 if ( ccount ) {
00472 // printf("%d %d %d\n",cbegin,ccount,patch_pairs_size);
00473 dev_nonbonded<<< ccount, BLOCK_SIZE, 0, stream
00474 >>>(patch_pairs+cbegin,atoms,atom_params,force_buffers,
00475 (doSlow?slow_force_buffers:0), lata, latb, latc, cutoff2);
00476 cuda_errcheck("dev_nonbonded");
00477 }
00478
00479 if ( pcount ) {
00480 // printf("%d %d %d\n",pbegin,pcount,force_lists_size);
00481 dev_sum_forces<<< pcount, 64, 0, stream
00482 >>>(force_lists+pbegin,force_buffers,forces);
00483 if ( doSlow ) {
00484 dev_sum_forces<<< pcount, 64, 0, stream
00485 >>>(force_lists+pbegin,slow_force_buffers,slow_forces);
00486 }
00487 cuda_errcheck("dev_sum_forces");
00488 }
00489
00490 }
|
|
|
Definition at line 493 of file ComputeNonbondedCUDAKernel.cu. References stream. Referenced by ComputeNonbondedCUDA::doWork(). 00493 {
00494 return ( cudaStreamQuery(stream) == cudaSuccess );
00495 }
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 223 of file ComputeNonbondedCUDAKernel.cu. References atom_params, atoms, force_buffers, FORCE_INNER_LOOP, j, myPatchPair, and slow_force_buffers. Referenced by cuda_nonbonded_forces(). 00230 {
00231 // call with two blocks per patch_pair
00232 // call with BLOCK_SIZE threads per block
00233 // call with no shared memory
00234
00235 #define jpqs jpqu.d
00236 __shared__ union {
00237 atom d[SHARED_SIZE];
00238 unsigned int i[BLOCK_SIZE];
00239 float f[BLOCK_SIZE];
00240 } jpqu;
00241
00242 #define japs japu.d
00243 __shared__ union {
00244 atom_param d[SHARED_SIZE];
00245 unsigned int i[BLOCK_SIZE];
00246 } japu;
00247
00248 #define myPatchPair pp.pp
00249 __shared__ union { patch_pair pp; unsigned int i[12]; } pp;
00250
00251 if ( threadIdx.x < (sizeof(patch_pair)>>2) ) {
00252 unsigned int tmp = ((unsigned int*)patch_pairs)[
00253 (sizeof(patch_pair)>>2)*blockIdx.x+threadIdx.x];
00254 pp.i[threadIdx.x] = tmp;
00255 }
00256 __syncthreads();
00257
00258 // convert scaled offset with current lattice
00259 if ( threadIdx.x == 0 ) {
00260 float offx = myPatchPair.offset.x * lata.x
00261 + myPatchPair.offset.y * latb.x
00262 + myPatchPair.offset.z * latc.x;
00263 float offy = myPatchPair.offset.x * lata.y
00264 + myPatchPair.offset.y * latb.y
00265 + myPatchPair.offset.z * latc.y;
00266 float offz = myPatchPair.offset.x * lata.z
00267 + myPatchPair.offset.y * latb.z
00268 + myPatchPair.offset.z * latc.z;
00269 myPatchPair.offset.x = offx;
00270 myPatchPair.offset.y = offy;
00271 myPatchPair.offset.z = offz;
00272 }
00273 __syncthreads();
00274
00275 #if 0
00276 // compute records duplicated so this is no longer needed
00277 if ( blockIdx.y ) {
00278
00279 if ( myPatchPair.patch1_force_start == myPatchPair.patch2_force_start ) {
00280 return;
00281 }
00282
00283 } else { // swap patches
00284
00285 if ( threadIdx.x == 0 &&
00286 myPatchPair.patch1_force_start != myPatchPair.patch2_force_start ) {
00287
00288 #undef SWAP
00289 #define SWAP(FIELD1, FIELD2) { \
00290 unsigned int tmp1 = myPatchPair.FIELD1; \
00291 unsigned int tmp2 = myPatchPair.FIELD2; \
00292 if ( tmp1 != tmp2 ) { \
00293 myPatchPair.FIELD1 = tmp2; \
00294 myPatchPair.FIELD2 = tmp1; \
00295 } \
00296 }
00297
00298 SWAP(patch1_size, patch2_size)
00299 SWAP(patch1_force_size, patch2_force_size)
00300 SWAP(patch1_atom_start, patch2_atom_start)
00301 SWAP(patch1_force_start, patch2_force_start)
00302
00303 myPatchPair.offset.x *= -1.f;
00304 myPatchPair.offset.y *= -1.f;
00305 myPatchPair.offset.z *= -1.f;
00306
00307 }
00308
00309 __syncthreads();
00310
00311 }
00312 #endif
00313
00314 for ( int blocki = 0;
00315 blocki < myPatchPair.patch1_force_size;
00316 blocki += BLOCK_SIZE ) {
00317
00318 __syncthreads();
00319
00320 atom ipq;
00321 struct {
00322 float sqrt_epsilon;
00323 float half_sigma;
00324 int index; } iap;
00325
00326 // load patch 1
00327 if ( blocki + threadIdx.x < myPatchPair.patch1_force_size ) {
00328 int i = myPatchPair.patch1_atom_start + blocki + threadIdx.x;
00329 float4 tmpa = ((float4*)atoms)[i];
00330
00331 ipq.position.x = tmpa.x + myPatchPair.offset.x;
00332 ipq.position.y = tmpa.y + myPatchPair.offset.y;
00333 ipq.position.z = tmpa.z + myPatchPair.offset.z;
00334 ipq.charge = tmpa.w;
00335
00336 uint4 tmpap = ((uint4*)atom_params)[i];
00337
00338 jpqu.i[threadIdx.x] = tmpap.x;
00339 iap.sqrt_epsilon = jpqu.f[threadIdx.x];
00340 jpqu.i[threadIdx.x] = tmpap.y;
00341 iap.half_sigma = jpqu.f[threadIdx.x];
00342 iap.index = tmpap.z;
00343 }
00344
00345 float4 ife, ife_slow;
00346 ife.x = 0.f;
00347 ife.y = 0.f;
00348 ife.z = 0.f;
00349 ife.w = 0.f;
00350 ife_slow.x = 0.f;
00351 ife_slow.y = 0.f;
00352 ife_slow.z = 0.f;
00353 ife_slow.w = 0.f;
00354
00355 for ( int blockj = 0;
00356 blockj < myPatchPair.patch2_size;
00357 blockj += SHARED_SIZE ) {
00358
00359 int shared_size = myPatchPair.patch2_size - blockj;
00360 if ( shared_size > SHARED_SIZE ) shared_size = SHARED_SIZE;
00361
00362 // load patch 2
00363 // sync needed because of loop, could avoid with double-buffering
00364 __syncthreads();
00365
00366 if ( threadIdx.x < 4 * shared_size ) {
00367 int j = myPatchPair.patch2_atom_start + blockj;
00368 jpqu.i[threadIdx.x] = ((unsigned int *)(atoms + j))[threadIdx.x];
00369 japu.i[threadIdx.x] = ((unsigned int *)(atom_params + j))[threadIdx.x];
00370 }
00371 __syncthreads();
00372
00373 // calc forces on patch 1
00374 if ( blocki + threadIdx.x < myPatchPair.patch1_force_size ) {
00375
00376 // be careful not to use // comments inside macros!
00377 #define FORCE_INNER_LOOP(IPQ,IAP) \
00378 for ( int j = 0; j < shared_size; ++j ) { \
00379 /* actually calculate force */ \
00380 float tmpx = jpqs[j].position.x - IPQ.position.x; \
00381 float tmpy = jpqs[j].position.y - IPQ.position.y; \
00382 float tmpz = jpqs[j].position.z - IPQ.position.z; \
00383 float r2 = tmpx*tmpx + tmpy*tmpy + tmpz*tmpz; \
00384 if ( r2 < cutoff2 ) { \
00385 float4 fi = tex1D(force_table, 1.f/sqrt(r2)); \
00386 bool excluded = false; \
00387 int indexdiff = (int)(IAP.index) - (int)(japs[j].index); \
00388 if ( abs(indexdiff) <= (int) japs[j].excl_maxdiff ) { \
00389 indexdiff += japs[j].excl_index; \
00390 excluded = ((exclusions[indexdiff>>5] & (1<<(indexdiff&31))) != 0); \
00391 } \
00392 float e = IAP.half_sigma + japs[j].half_sigma; /* sigma */ \
00393 e *= e*e; /* sigma^3 */ \
00394 e *= e; /* sigma^6 */ \
00395 e *= ( e * fi.z + fi.y ); /* s^12 * fi.z - s^6 * fi.y */ \
00396 e *= IAP.sqrt_epsilon * japs[j].sqrt_epsilon; /* full L-J */ \
00397 float e_slow = IPQ.charge * jpqs[j].charge; \
00398 e += e_slow * fi.x; \
00399 e_slow *= fi.w; \
00400 if ( ! excluded ) { \
00401 ife.w += r2 * e; \
00402 ife.x += tmpx * e; \
00403 ife.y += tmpy * e; \
00404 ife.z += tmpz * e; \
00405 ife_slow.w += r2 * e_slow; \
00406 ife_slow.x += tmpx * e_slow; \
00407 ife_slow.y += tmpy * e_slow; \
00408 ife_slow.z += tmpz * e_slow; \
00409 } \
00410 } /* cutoff */ \
00411 }
00412 FORCE_INNER_LOOP(ipq,iap)
00413
00414 } // if
00415 } // blockj loop
00416
00417 if ( blocki + threadIdx.x < myPatchPair.patch1_force_size ) {
00418 int i_out = myPatchPair.patch1_force_start + blocki + threadIdx.x;
00419 force_buffers[i_out] = ife;
00420 if ( slow_force_buffers ) {
00421 slow_force_buffers[i_out] = ife_slow;
00422 }
00423 }
00424
00425 } // blocki loop
00426
00427 }
|
|
||||||||||||||||
|
Definition at line 430 of file ComputeNonbondedCUDAKernel.cu. References force_buffers, force_lists, forces, and j. Referenced by cuda_nonbonded_forces(). 00433 {
00434 // call with one block per patch
00435 // call multiple of 64 threads per block
00436 // call with no shared memory
00437
00438 __shared__ force_list myForceList;
00439
00440 if ( threadIdx.x == 0 ) {
00441 myForceList = force_lists[blockIdx.x];
00442 }
00443 __syncthreads();
00444
00445 for ( int j = threadIdx.x; j < myForceList.patch_size; j += blockDim.x ) {
00446
00447 const float4 *fbuf = force_buffers + myForceList.force_list_start + j;
00448 float4 fout;
00449 fout.x = 0.f;
00450 fout.y = 0.f;
00451 fout.z = 0.f;
00452 fout.w = 0.f;
00453 for ( int i=0; i < myForceList.force_list_size; ++i ) {
00454 float4 f = *fbuf;
00455 fout.x += f.x;
00456 fout.y += f.y;
00457 fout.z += f.z;
00458 fout.w += f.w;
00459 fbuf += myForceList.patch_size;
00460 }
00461
00462 forces[myForceList.force_output_start + j] = fout;
00463
00464 }
00465 }
|
|
|
Definition at line 63 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_atom_params(), cuda_bind_patch_pairs(), cuda_init(), and dev_nonbonded(). |
|
|
Definition at line 62 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_atoms(), cuda_bind_patch_pairs(), cuda_init(), and dev_nonbonded(). |
|
|
Definition at line 70 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(), and cuda_init(). |
|
|
Definition at line 61 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), and cuda_bind_patch_pairs(). |
|
|
Definition at line 8 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_exclusions(), and Molecule::get_exclusion(). |
|
|
Definition at line 58 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(), cuda_init(), dev_nonbonded(), and dev_sum_forces(). |
|
|
Definition at line 68 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(), and cuda_init(). |
|
|
Definition at line 57 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(). |
|
|
Definition at line 55 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(), cuda_init(), cuda_nonbonded_forces(), and dev_sum_forces(). |
|
|
Definition at line 69 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(), and cuda_init(). |
|
|
Definition at line 54 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(). |
|
|
Definition at line 32 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_force_table(). |
|
|
Definition at line 64 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(), cuda_init(), cuda_load_forces(), and dev_sum_forces(). |
|
|
Definition at line 72 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(). |
|
|
Definition at line 52 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(), cuda_init(), and cuda_nonbonded_forces(). |
|
|
Definition at line 67 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(), and cuda_init(). |
|
|
Definition at line 51 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(). |
|
|
Definition at line 59 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(), cuda_init(), cuda_nonbonded_forces(), and dev_nonbonded(). |
|
|
Definition at line 65 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_patch_pairs(), cuda_init(), and cuda_load_forces(). |
|
|
Definition at line 75 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_init(), cuda_load_forces(), cuda_nonbonded_forces(), cuda_stream_finished(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
1.3.9.1