Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputeNonbondedCUDAKernel.cu File Reference

#include "ComputeNonbondedCUDAKernel.h"
#include <stdio.h>

Go to the source code of this file.

Defines

#define SET_EXCL(EXCL, BASE, DIFF)   (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))
#define BLOCK_SIZE   64
#define SHARED_SIZE   16
#define jpqs   jpqu.d
#define japs   japu.d
#define myPatchPair   pp.pp
#define FORCE_INNER_LOOP(IPQ, IAP)

Functions

void cuda_bind_exclusions (const unsigned int *t, int n)
void cuda_bind_force_table (const float4 *t)
void cuda_init ()
void cuda_bind_patch_pairs (const patch_pair *pp, int npp, const force_list *fl, int nfl, int atoms_size_p, int force_buffers_size_p, int max_atoms_per_patch_p)
void cuda_bind_atom_params (const atom_param *t)
void cuda_bind_atoms (const atom *a)
void cuda_load_forces (float4 *f, float4 *f_slow, int begin, int count)
__global__ void dev_nonbonded (const patch_pair *patch_pairs, const atom *atoms, const atom_param *atom_params, float4 *force_buffers, float4 *slow_force_buffers, float3 lata, float3 latb, float3 latc, float cutoff2)
__global__ void dev_sum_forces (const force_list *force_lists, const float4 *force_buffers, float4 *forces)
void cuda_nonbonded_forces (float3 lata, float3 latb, float3 latc, float cutoff2, int cbegin, int ccount, int pbegin, int pcount, int doSlow)
int cuda_stream_finished ()

Variables

__constant__ unsigned int exclusions [MAX_EXCLUSIONS]
texture< float4, 1, cudaReadModeElementType > force_table
int patch_pairs_size
patch_pair * patch_pairs
int force_lists_size
force_list * force_lists
int force_buffers_size
float4 * force_buffers
float4 * slow_force_buffers
int atoms_size
atom * atoms
atom_param * atom_params
float4 * forces
float4 * slow_forces
int patch_pairs_alloc
int force_buffers_alloc
int force_lists_alloc
int atoms_alloc
int max_atoms_per_patch
cudaStream_t stream


Define Documentation

#define BLOCK_SIZE   64
 

Definition at line 220 of file ComputeNonbondedCUDAKernel.cu.

#define FORCE_INNER_LOOP IPQ,
IAP   ) 
 

Referenced by dev_nonbonded().

#define japs   japu.d
 

#define jpqs   jpqu.d
 

#define myPatchPair   pp.pp
 

Referenced by dev_nonbonded().

#define SET_EXCL EXCL,
BASE,
DIFF   )     (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))
 

Definition at line 10 of file ComputeNonbondedCUDAKernel.cu.

#define SHARED_SIZE   16
 

Definition at line 221 of file ComputeNonbondedCUDAKernel.cu.


Function Documentation

void cuda_bind_atom_params const atom_param *  t  ) 
 

Definition at line 166 of file ComputeNonbondedCUDAKernel.cu.

References atom_params, atoms_size, cuda_errcheck(), and stream.

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

00166                                                 {
00167   cudaMemcpyAsync(atom_params, t, atoms_size * sizeof(atom_param),
00168                                 cudaMemcpyHostToDevice, stream);
00169   cuda_errcheck("memcpy to atom_params");
00170 }

void cuda_bind_atoms const atom *  a  ) 
 

Definition at line 172 of file ComputeNonbondedCUDAKernel.cu.

References atoms, atoms_size, cuda_errcheck(), and stream.

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

00172                                     {
00173   cuda_errcheck("before memcpy to atoms");
00174   cudaMemcpyAsync(atoms, a, atoms_size * sizeof(atom),
00175                                 cudaMemcpyHostToDevice, stream);
00176   cuda_errcheck("memcpy to atoms");
00177 }

void cuda_bind_exclusions const unsigned int *  t,
int  n
 

Definition at line 13 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), exclusions, MAX_EXCLUSIONS, and SET_EXCL.

Referenced by ComputeNonbondedCUDA::build_exclusions().

00013                                                         {
00014 
00015 #if 0
00016   unsigned int excl[MAX_EXCLUSIONS];
00017 
00018   for ( int i=0; i<MAX_EXCLUSIONS; ++i ) { excl[i] = 0; }
00019 
00020   SET_EXCL(excl,0,2);
00021   SET_EXCL(excl,0,3);
00022   SET_EXCL(excl,0,4);
00023 
00024   cudaMemcpyToSymbol(exclusions, excl, MAX_EXCLUSIONS*sizeof(unsigned int), 0);
00025 #endif
00026 
00027   cudaMemcpyToSymbol(exclusions, t, n*sizeof(unsigned int), 0);
00028   cuda_errcheck("memcpy to exclusions");
00029 }

void cuda_bind_force_table const float4 *  t  ) 
 

Definition at line 34 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), force_table, and FORCE_TABLE_SIZE.

Referenced by ComputeNonbondedCUDA::build_force_table().

00034                                             {
00035     static cudaArray *ct;
00036     if ( ! ct ) {
00037       cudaMallocArray(&ct, &force_table.channelDesc, FORCE_TABLE_SIZE, 1);
00038       cuda_errcheck("allocating force table");
00039     }     cudaMemcpyToArray(ct, 0, 0, t, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice);     // cudaMemcpy(ct, t, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice);
00040     cuda_errcheck("memcpy to force table");
00041 
00042     force_table.normalized = true;
00043     force_table.addressMode[0] = cudaAddressModeClamp;
00044     force_table.addressMode[1] = cudaAddressModeClamp;
00045     force_table.filterMode = cudaFilterModeLinear;
00046 
00047     cudaBindTextureToArray(force_table, ct);
00048     cuda_errcheck("binding force table to texture");
00049 }

void cuda_bind_patch_pairs const patch_pair *  pp,
int  npp,
const force_list *  fl,
int  nfl,
int  atoms_size_p,
int  force_buffers_size_p,
int  max_atoms_per_patch_p
 

Definition at line 96 of file ComputeNonbondedCUDAKernel.cu.

References atom_params, atoms, atoms_alloc, atoms_size, cuda_errcheck(), force_buffers, force_buffers_alloc, force_buffers_size, force_lists, force_lists_alloc, force_lists_size, forces, max_atoms_per_patch, patch_pairs, patch_pairs_alloc, patch_pairs_size, slow_force_buffers, and slow_forces.

Referenced by ComputeNonbondedCUDA::doWork().

00099                                                    {
00100 
00101   patch_pairs_size = npp;
00102   force_buffers_size = force_buffers_size_p;
00103   force_lists_size = nfl;
00104   atoms_size = atoms_size_p;
00105   max_atoms_per_patch = max_atoms_per_patch_p;
00106 
00107 #if 0
00108  printf("%d %d %d %d %d %d %d %d\n",
00109       patch_pairs_size , patch_pairs_alloc ,
00110       force_buffers_size , force_buffers_alloc ,
00111       force_lists_size , force_lists_alloc ,
00112       atoms_size , atoms_alloc );
00113 #endif
00114 
00115  if ( patch_pairs_size > patch_pairs_alloc ||
00116       force_buffers_size > force_buffers_alloc ||
00117       force_lists_size > force_lists_alloc ||
00118       atoms_size > atoms_alloc ) {
00119 
00120   patch_pairs_alloc = (int) (1.2 * patch_pairs_size);
00121   force_buffers_alloc = (int) (1.2 * force_buffers_size);
00122   force_lists_alloc = (int) (1.2 * force_lists_size);
00123   atoms_alloc = (int) (1.2 * atoms_size);
00124 
00125   if ( forces ) cudaFree(forces);
00126   if ( slow_forces ) cudaFree(slow_forces);
00127   if ( atom_params ) cudaFree(atom_params);
00128   if ( atoms ) cudaFree(atoms);
00129   if ( force_buffers ) cudaFree(force_buffers);
00130   if ( slow_force_buffers ) cudaFree(slow_force_buffers);
00131   if ( force_lists ) cudaFree(force_lists);
00132   if ( patch_pairs ) cudaFree(patch_pairs);
00133   cuda_errcheck("free everything");
00134 
00135 #if 1
00136   int totalmem = patch_pairs_alloc * sizeof(patch_pair) +
00137                 force_lists_alloc * sizeof(force_list) +
00138                 2 * force_buffers_alloc * sizeof(float4) +
00139                 atoms_alloc * sizeof(atom) +
00140                 atoms_alloc * sizeof(atom_param) +
00141                 2 * atoms_alloc * sizeof(float4);
00142   // printf("allocating %d MB of memory on GPU\n", totalmem >> 20);
00143 #endif
00144 
00145   cudaMalloc((void**) &patch_pairs, patch_pairs_alloc * sizeof(patch_pair));
00146   cudaMalloc((void**) &force_lists, force_lists_alloc * sizeof(force_list));
00147   cudaMalloc((void**) &force_buffers, force_buffers_alloc * sizeof(float4));
00148   cudaMalloc((void**) &slow_force_buffers, force_buffers_alloc * sizeof(float4));
00149   cudaMalloc((void**) &atoms, atoms_alloc * sizeof(atom));
00150   cudaMalloc((void**) &atom_params, atoms_alloc * sizeof(atom_param));
00151   cudaMalloc((void**) &forces, atoms_alloc * sizeof(float4));
00152   cudaMalloc((void**) &slow_forces, atoms_alloc * sizeof(float4));
00153   cuda_errcheck("malloc everything");
00154 
00155  }
00156 
00157   cudaMemcpy(patch_pairs, pp, npp * sizeof(patch_pair),
00158                                 cudaMemcpyHostToDevice);
00159   cuda_errcheck("memcpy to patch_pairs");
00160 
00161   cudaMemcpy(force_lists, fl, nfl * sizeof(force_list),
00162                                 cudaMemcpyHostToDevice);
00163   cuda_errcheck("memcpy to force_lists");
00164 }

void cuda_init  ) 
 

Definition at line 77 of file ComputeNonbondedCUDAKernel.cu.

References atom_params, atoms, atoms_alloc, cuda_errcheck(), force_buffers, force_buffers_alloc, force_lists, force_lists_alloc, forces, patch_pairs, patch_pairs_alloc, slow_force_buffers, slow_forces, and stream.

Referenced by cuda_initialize().

00077                  {
00078   forces = 0;
00079   slow_forces = 0;
00080   atom_params = 0;
00081   atoms = 0;
00082   force_buffers = 0;
00083   slow_force_buffers = 0;
00084   force_lists = 0;
00085   patch_pairs = 0;
00086 
00087   patch_pairs_alloc = 0;
00088   force_buffers_alloc = 0;
00089   force_lists_alloc = 0;
00090   atoms_alloc = 0;
00091 
00092   cudaStreamCreate(&stream);
00093   cuda_errcheck("cudaStreamCreate");
00094 }

void cuda_load_forces float4 *  f,
float4 *  f_slow,
int  begin,
int  count
 

Definition at line 179 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), forces, slow_forces, and stream.

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

00179                                                                        {
00180   // printf("load forces %d %d %d\n",begin,count,atoms_size);
00181   cudaMemcpyAsync(f+begin, forces+begin, count * sizeof(float4),
00182                                 cudaMemcpyDeviceToHost, stream);
00183   if ( f_slow ) {
00184     cudaMemcpyAsync(f_slow+begin, slow_forces+begin, count * sizeof(float4),
00185                                 cudaMemcpyDeviceToHost, stream);
00186   }
00187   cuda_errcheck("memcpy from forces");
00188 }

void cuda_nonbonded_forces float3  lata,
float3  latb,
float3  latc,
float  cutoff2,
int  cbegin,
int  ccount,
int  pbegin,
int  pcount,
int  doSlow
 

Definition at line 468 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), dev_nonbonded(), dev_sum_forces(), force_lists, patch_pairs, slow_force_buffers, and stream.

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

00469                                                                             {
00470 
00471  if ( ccount ) {
00472   // printf("%d %d %d\n",cbegin,ccount,patch_pairs_size);
00473   dev_nonbonded<<< ccount, BLOCK_SIZE, 0, stream
00474         >>>(patch_pairs+cbegin,atoms,atom_params,force_buffers,
00475              (doSlow?slow_force_buffers:0), lata, latb, latc, cutoff2);
00476   cuda_errcheck("dev_nonbonded");
00477  }
00478 
00479  if ( pcount ) {
00480   // printf("%d %d %d\n",pbegin,pcount,force_lists_size);
00481   dev_sum_forces<<< pcount, 64, 0, stream
00482         >>>(force_lists+pbegin,force_buffers,forces);
00483   if ( doSlow ) {
00484     dev_sum_forces<<< pcount, 64, 0, stream
00485         >>>(force_lists+pbegin,slow_force_buffers,slow_forces);
00486   }
00487   cuda_errcheck("dev_sum_forces");
00488  }
00489 
00490 }

int cuda_stream_finished  ) 
 

Definition at line 493 of file ComputeNonbondedCUDAKernel.cu.

References stream.

Referenced by ComputeNonbondedCUDA::doWork().

00493                            {
00494   return ( cudaStreamQuery(stream) == cudaSuccess );
00495 }

__global__ void dev_nonbonded const patch_pair *  patch_pairs,
const atom *  atoms,
const atom_param *  atom_params,
float4 *  force_buffers,
float4 *  slow_force_buffers,
float3  lata,
float3  latb,
float3  latc,
float  cutoff2
[static]
 

Definition at line 223 of file ComputeNonbondedCUDAKernel.cu.

References atom_params, atoms, force_buffers, FORCE_INNER_LOOP, j, myPatchPair, and slow_force_buffers.

Referenced by cuda_nonbonded_forces().

00230                        {
00231 // call with two blocks per patch_pair
00232 // call with BLOCK_SIZE threads per block
00233 // call with no shared memory
00234 
00235   #define jpqs jpqu.d
00236   __shared__ union {
00237     atom d[SHARED_SIZE];
00238     unsigned int i[BLOCK_SIZE];
00239     float f[BLOCK_SIZE];
00240   } jpqu;
00241 
00242   #define japs japu.d
00243   __shared__ union {
00244     atom_param d[SHARED_SIZE];
00245     unsigned int i[BLOCK_SIZE];
00246   } japu;
00247 
00248   #define myPatchPair pp.pp
00249   __shared__ union { patch_pair pp; unsigned int i[12]; } pp;
00250 
00251   if ( threadIdx.x < (sizeof(patch_pair)>>2) ) {
00252     unsigned int tmp = ((unsigned int*)patch_pairs)[
00253                         (sizeof(patch_pair)>>2)*blockIdx.x+threadIdx.x];
00254     pp.i[threadIdx.x] = tmp;
00255   }
00256   __syncthreads();
00257 
00258   // convert scaled offset with current lattice
00259   if ( threadIdx.x == 0 ) {
00260     float offx = myPatchPair.offset.x * lata.x
00261                + myPatchPair.offset.y * latb.x
00262                + myPatchPair.offset.z * latc.x;
00263     float offy = myPatchPair.offset.x * lata.y
00264                + myPatchPair.offset.y * latb.y
00265                + myPatchPair.offset.z * latc.y;
00266     float offz = myPatchPair.offset.x * lata.z
00267                + myPatchPair.offset.y * latb.z
00268                + myPatchPair.offset.z * latc.z;
00269     myPatchPair.offset.x = offx;
00270     myPatchPair.offset.y = offy;
00271     myPatchPair.offset.z = offz;
00272   }
00273   __syncthreads();
00274 
00275 #if 0
00276 // compute records duplicated so this is no longer needed
00277   if ( blockIdx.y ) {
00278 
00279     if ( myPatchPair.patch1_force_start == myPatchPair.patch2_force_start ) {
00280       return;
00281     }
00282 
00283   } else {  // swap patches
00284 
00285     if ( threadIdx.x == 0 &&
00286          myPatchPair.patch1_force_start != myPatchPair.patch2_force_start ) {
00287 
00288 #undef SWAP
00289 #define SWAP(FIELD1, FIELD2) { \
00290         unsigned int tmp1 = myPatchPair.FIELD1; \
00291         unsigned int tmp2 = myPatchPair.FIELD2; \
00292         if ( tmp1 != tmp2 ) { \
00293           myPatchPair.FIELD1 = tmp2; \
00294           myPatchPair.FIELD2 = tmp1; \
00295         } \
00296       }
00297 
00298       SWAP(patch1_size, patch2_size)
00299       SWAP(patch1_force_size, patch2_force_size)
00300       SWAP(patch1_atom_start, patch2_atom_start)
00301       SWAP(patch1_force_start, patch2_force_start)
00302 
00303       myPatchPair.offset.x *= -1.f;
00304       myPatchPair.offset.y *= -1.f;
00305       myPatchPair.offset.z *= -1.f;
00306 
00307     }
00308 
00309     __syncthreads();
00310 
00311   }
00312 #endif
00313 
00314   for ( int blocki = 0;
00315         blocki < myPatchPair.patch1_force_size;
00316         blocki += BLOCK_SIZE ) {
00317 
00318   __syncthreads();
00319 
00320   atom ipq;
00321   struct {
00322     float sqrt_epsilon;
00323     float half_sigma;
00324     int index; } iap;
00325 
00326   // load patch 1
00327   if ( blocki + threadIdx.x < myPatchPair.patch1_force_size ) {
00328     int i = myPatchPair.patch1_atom_start + blocki + threadIdx.x;
00329     float4 tmpa = ((float4*)atoms)[i];
00330 
00331     ipq.position.x = tmpa.x + myPatchPair.offset.x;
00332     ipq.position.y = tmpa.y + myPatchPair.offset.y;
00333     ipq.position.z = tmpa.z + myPatchPair.offset.z;
00334     ipq.charge = tmpa.w;
00335 
00336     uint4 tmpap = ((uint4*)atom_params)[i];
00337 
00338     jpqu.i[threadIdx.x] = tmpap.x;
00339     iap.sqrt_epsilon = jpqu.f[threadIdx.x];
00340     jpqu.i[threadIdx.x] = tmpap.y;
00341     iap.half_sigma = jpqu.f[threadIdx.x];
00342     iap.index = tmpap.z;
00343   }
00344 
00345   float4 ife, ife_slow;
00346   ife.x = 0.f;
00347   ife.y = 0.f;
00348   ife.z = 0.f;
00349   ife.w = 0.f;
00350   ife_slow.x = 0.f;
00351   ife_slow.y = 0.f;
00352   ife_slow.z = 0.f;
00353   ife_slow.w = 0.f;
00354 
00355   for ( int blockj = 0;
00356         blockj < myPatchPair.patch2_size;
00357         blockj += SHARED_SIZE ) {
00358 
00359   int shared_size = myPatchPair.patch2_size - blockj;
00360   if ( shared_size > SHARED_SIZE ) shared_size = SHARED_SIZE;
00361 
00362   // load patch 2
00363   // sync needed because of loop, could avoid with double-buffering
00364   __syncthreads();
00365 
00366   if ( threadIdx.x < 4 * shared_size ) {
00367     int j = myPatchPair.patch2_atom_start + blockj;
00368     jpqu.i[threadIdx.x] = ((unsigned int *)(atoms + j))[threadIdx.x];
00369     japu.i[threadIdx.x] = ((unsigned int *)(atom_params + j))[threadIdx.x];
00370   }
00371   __syncthreads();
00372 
00373   // calc forces on patch 1
00374   if ( blocki + threadIdx.x < myPatchPair.patch1_force_size ) {
00375 
00376 // be careful not to use // comments inside macros!
00377 #define FORCE_INNER_LOOP(IPQ,IAP) \
00378     for ( int j = 0; j < shared_size; ++j ) { \
00379       /* actually calculate force */ \
00380       float tmpx = jpqs[j].position.x - IPQ.position.x; \
00381       float tmpy = jpqs[j].position.y - IPQ.position.y; \
00382       float tmpz = jpqs[j].position.z - IPQ.position.z; \
00383       float r2 = tmpx*tmpx + tmpy*tmpy + tmpz*tmpz; \
00384       if ( r2 < cutoff2 ) { \
00385         float4 fi = tex1D(force_table, 1.f/sqrt(r2)); \
00386         bool excluded = false; \
00387         int indexdiff = (int)(IAP.index) - (int)(japs[j].index); \
00388         if ( abs(indexdiff) <= (int) japs[j].excl_maxdiff ) { \
00389           indexdiff += japs[j].excl_index; \
00390           excluded = ((exclusions[indexdiff>>5] & (1<<(indexdiff&31))) != 0); \
00391         } \
00392         float e = IAP.half_sigma + japs[j].half_sigma;  /* sigma */ \
00393         e *= e*e;  /* sigma^3 */ \
00394         e *= e;  /* sigma^6 */ \
00395         e *= ( e * fi.z + fi.y );  /* s^12 * fi.z - s^6 * fi.y */ \
00396         e *= IAP.sqrt_epsilon * japs[j].sqrt_epsilon;  /* full L-J */ \
00397         float e_slow = IPQ.charge * jpqs[j].charge; \
00398         e += e_slow * fi.x; \
00399         e_slow *= fi.w; \
00400         if ( ! excluded ) { \
00401           ife.w += r2 * e; \
00402           ife.x += tmpx * e; \
00403           ife.y += tmpy * e; \
00404           ife.z += tmpz * e; \
00405           ife_slow.w += r2 * e_slow; \
00406           ife_slow.x += tmpx * e_slow; \
00407           ife_slow.y += tmpy * e_slow; \
00408           ife_slow.z += tmpz * e_slow; \
00409         } \
00410       }  /* cutoff */ \
00411     }
00412     FORCE_INNER_LOOP(ipq,iap)
00413 
00414   } // if
00415   } // blockj loop
00416 
00417   if ( blocki + threadIdx.x < myPatchPair.patch1_force_size ) {
00418     int i_out = myPatchPair.patch1_force_start + blocki + threadIdx.x;
00419     force_buffers[i_out] = ife;
00420     if ( slow_force_buffers ) {
00421       slow_force_buffers[i_out] = ife_slow;
00422     }
00423   }
00424 
00425   } // blocki loop
00426 
00427 }

__global__ void dev_sum_forces const force_list *  force_lists,
const float4 *  force_buffers,
float4 *  forces
[static]
 

Definition at line 430 of file ComputeNonbondedCUDAKernel.cu.

References force_buffers, force_lists, forces, and j.

Referenced by cuda_nonbonded_forces().

00433                         {
00434 // call with one block per patch
00435 // call multiple of 64 threads per block
00436 // call with no shared memory
00437 
00438   __shared__ force_list myForceList;
00439 
00440   if ( threadIdx.x == 0 ) {
00441     myForceList = force_lists[blockIdx.x];
00442   }
00443   __syncthreads();
00444 
00445   for ( int j = threadIdx.x; j < myForceList.patch_size; j += blockDim.x ) {
00446 
00447     const float4 *fbuf = force_buffers + myForceList.force_list_start + j;
00448     float4 fout;
00449     fout.x = 0.f;
00450     fout.y = 0.f;
00451     fout.z = 0.f;
00452     fout.w = 0.f;
00453     for ( int i=0; i < myForceList.force_list_size; ++i ) {
00454       float4 f = *fbuf;
00455       fout.x += f.x;
00456       fout.y += f.y;
00457       fout.z += f.z;
00458       fout.w += f.w;
00459       fbuf += myForceList.patch_size;
00460     }
00461 
00462     forces[myForceList.force_output_start + j] = fout;
00463 
00464   }
00465 }


Variable Documentation

atom_param* atom_params [static]
 

Definition at line 63 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_atom_params(), cuda_bind_patch_pairs(), cuda_init(), and dev_nonbonded().

atom* atoms [static]
 

Definition at line 62 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_atoms(), cuda_bind_patch_pairs(), cuda_init(), and dev_nonbonded().

int atoms_alloc [static]
 

Definition at line 70 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

int atoms_size [static]
 

Definition at line 61 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), and cuda_bind_patch_pairs().

__constant__ unsigned int exclusions[MAX_EXCLUSIONS]
 

Definition at line 8 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_exclusions(), and Molecule::get_exclusion().

float4* force_buffers [static]
 

Definition at line 58 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_init(), dev_nonbonded(), and dev_sum_forces().

int force_buffers_alloc [static]
 

Definition at line 68 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

int force_buffers_size [static]
 

Definition at line 57 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs().

force_list* force_lists [static]
 

Definition at line 55 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_init(), cuda_nonbonded_forces(), and dev_sum_forces().

int force_lists_alloc [static]
 

Definition at line 69 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

int force_lists_size [static]
 

Definition at line 54 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs().

texture<float4, 1, cudaReadModeElementType> force_table
 

Definition at line 32 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_force_table().

float4* forces [static]
 

Definition at line 64 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_init(), cuda_load_forces(), and dev_sum_forces().

int max_atoms_per_patch [static]
 

Definition at line 72 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs().

patch_pair* patch_pairs [static]
 

Definition at line 52 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_init(), and cuda_nonbonded_forces().

int patch_pairs_alloc [static]
 

Definition at line 67 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

int patch_pairs_size [static]
 

Definition at line 51 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs().

float4* slow_force_buffers [static]
 

Definition at line 59 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_init(), cuda_nonbonded_forces(), and dev_nonbonded().

float4* slow_forces [static]
 

Definition at line 65 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_init(), and cuda_load_forces().

cudaStream_t stream
 

Definition at line 75 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_init(), cuda_load_forces(), cuda_nonbonded_forces(), cuda_stream_finished(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().


Generated on Tue Nov 24 04:07:46 2009 for NAMD by  doxygen 1.3.9.1