Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputeNonbondedCUDAKernel.h File Reference

Go to the source code of this file.

Classes

struct  shared_atom

Defines

#define __align__(X)
#define COPY_ATOM(DEST, SOURCE)
#define COPY_PARAM(DEST, SOURCE)
#define COPY_ATOM_TO_SHARED(ATOM, PARAM, SHARED)
#define COPY_ATOM_FROM_SHARED(ATOM, PARAM, SHARED)
#define MAX_ATOMS_PER_PATCH   384
#define MAX_EXCLUSIONS   2048
#define FORCE_TABLE_SIZE   4096

Functions

void cuda_errcheck (const char *msg)
struct __align__ (16) patch_pair
void cuda_bind_exclusions (const unsigned int *t, int n)
void cuda_bind_force_table (const float4 *t)
void cuda_init ()
void cuda_bind_patch_pairs (const patch_pair *pp, int npp, const force_list *fl, int nfl, int atoms_size_p, int force_buffers_size_p, int max_atoms_per_patch_p)
void cuda_bind_atom_params (const atom_param *t)
void cuda_bind_atoms (const atom *a)
void cuda_load_forces (float4 *f, float4 *f_slow, int begin, int count)
void cuda_nonbonded_forces (float3 lata, float3 latb, float3 latc, float cutoff2, int cbegin, int ccount, int pbegin, int pcount, int doSlow)
int cuda_stream_finished ()


Define Documentation

#define __align__  ) 
 

Definition at line 8 of file ComputeNonbondedCUDAKernel.h.

Referenced by __align__().

#define COPY_ATOM DEST,
SOURCE   ) 
 

Value:

{ \
  DEST.position.x = SOURCE.position.x; \
  DEST.position.y = SOURCE.position.y; \
  DEST.position.z = SOURCE.position.z; \
  DEST.charge = SOURCE.charge; \
  }

Definition at line 54 of file ComputeNonbondedCUDAKernel.h.

#define COPY_ATOM_FROM_SHARED ATOM,
PARAM,
SHARED   ) 
 

Value:

{ \
    COPY_ATOM( ATOM, SHARED ) \
    COPY_PARAM( PARAM, SHARED ) \
  }

Definition at line 74 of file ComputeNonbondedCUDAKernel.h.

#define COPY_ATOM_TO_SHARED ATOM,
PARAM,
SHARED   ) 
 

Value:

{ \
    COPY_ATOM( SHARED, ATOM ) \
    COPY_PARAM( SHARED, PARAM ) \
  }

Definition at line 69 of file ComputeNonbondedCUDAKernel.h.

#define COPY_PARAM DEST,
SOURCE   ) 
 

Value:

{ \
  DEST.sqrt_epsilon = SOURCE.sqrt_epsilon; \
  DEST.half_sigma = SOURCE.half_sigma; \
  DEST.index = SOURCE.index; \
  DEST.excl_index = SOURCE.excl_index; \
  DEST.excl_maxdiff = SOURCE.excl_maxdiff; \
  }

Definition at line 61 of file ComputeNonbondedCUDAKernel.h.

#define FORCE_TABLE_SIZE   4096
 

Definition at line 91 of file ComputeNonbondedCUDAKernel.h.

Referenced by ComputeNonbondedCUDA::build_force_table(), and cuda_bind_force_table().

#define MAX_ATOMS_PER_PATCH   384
 

Definition at line 80 of file ComputeNonbondedCUDAKernel.h.

Referenced by ComputeNonbondedCUDA::doWork().

#define MAX_EXCLUSIONS   2048
 

Definition at line 83 of file ComputeNonbondedCUDAKernel.h.

Referenced by cuda_bind_exclusions().


Function Documentation

struct __align__ 16   ) 
 

Definition at line 11 of file ComputeNonbondedCUDAKernel.h.

References __align__.

00011                                 {  // must be multiple of 16!
00012   float4 offset;
00013   unsigned int patch1_size;
00014   unsigned int patch2_size;
00015   unsigned int patch1_force_size;  // non-fixed atoms at start of list
00016   // unsigned int patch2_force_size;
00017   unsigned int patch1_atom_start;
00018   unsigned int patch2_atom_start;
00019   unsigned int patch1_force_start;
00020   // unsigned int patch2_force_start;  // same as patch1_force_start for self
00021   unsigned int pad1, pad2;
00022 };

void cuda_bind_atom_params const atom_param *  t  ) 
 

Definition at line 166 of file ComputeNonbondedCUDAKernel.cu.

References atom_params, atoms_size, cuda_errcheck(), and stream.

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

00166                                                 {
00167   cudaMemcpyAsync(atom_params, t, atoms_size * sizeof(atom_param),
00168                                 cudaMemcpyHostToDevice, stream);
00169   cuda_errcheck("memcpy to atom_params");
00170 }

void cuda_bind_atoms const atom *  a  ) 
 

Definition at line 172 of file ComputeNonbondedCUDAKernel.cu.

References atoms, atoms_size, cuda_errcheck(), and stream.

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

00172                                     {
00173   cuda_errcheck("before memcpy to atoms");
00174   cudaMemcpyAsync(atoms, a, atoms_size * sizeof(atom),
00175                                 cudaMemcpyHostToDevice, stream);
00176   cuda_errcheck("memcpy to atoms");
00177 }

void cuda_bind_exclusions const unsigned int *  t,
int  n
 

Definition at line 13 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), exclusions, MAX_EXCLUSIONS, and SET_EXCL.

Referenced by ComputeNonbondedCUDA::build_exclusions().

00013                                                         {
00014 
00015 #if 0
00016   unsigned int excl[MAX_EXCLUSIONS];
00017 
00018   for ( int i=0; i<MAX_EXCLUSIONS; ++i ) { excl[i] = 0; }
00019 
00020   SET_EXCL(excl,0,2);
00021   SET_EXCL(excl,0,3);
00022   SET_EXCL(excl,0,4);
00023 
00024   cudaMemcpyToSymbol(exclusions, excl, MAX_EXCLUSIONS*sizeof(unsigned int), 0);
00025 #endif
00026 
00027   cudaMemcpyToSymbol(exclusions, t, n*sizeof(unsigned int), 0);
00028   cuda_errcheck("memcpy to exclusions");
00029 }

void cuda_bind_force_table const float4 *  t  ) 
 

Definition at line 34 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), force_table, and FORCE_TABLE_SIZE.

Referenced by ComputeNonbondedCUDA::build_force_table().

00034                                             {
00035     static cudaArray *ct;
00036     if ( ! ct ) {
00037       cudaMallocArray(&ct, &force_table.channelDesc, FORCE_TABLE_SIZE, 1);
00038       cuda_errcheck("allocating force table");
00039     }     cudaMemcpyToArray(ct, 0, 0, t, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice);     // cudaMemcpy(ct, t, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice);
00040     cuda_errcheck("memcpy to force table");
00041 
00042     force_table.normalized = true;
00043     force_table.addressMode[0] = cudaAddressModeClamp;
00044     force_table.addressMode[1] = cudaAddressModeClamp;
00045     force_table.filterMode = cudaFilterModeLinear;
00046 
00047     cudaBindTextureToArray(force_table, ct);
00048     cuda_errcheck("binding force table to texture");
00049 }

void cuda_bind_patch_pairs const patch_pair *  pp,
int  npp,
const force_list *  fl,
int  nfl,
int  atoms_size_p,
int  force_buffers_size_p,
int  max_atoms_per_patch_p
 

Definition at line 96 of file ComputeNonbondedCUDAKernel.cu.

References atom_params, atoms, atoms_alloc, atoms_size, cuda_errcheck(), force_buffers, force_buffers_alloc, force_buffers_size, force_lists, force_lists_alloc, force_lists_size, forces, max_atoms_per_patch, patch_pairs, patch_pairs_alloc, patch_pairs_size, slow_force_buffers, and slow_forces.

Referenced by ComputeNonbondedCUDA::doWork().

00099                                                    {
00100 
00101   patch_pairs_size = npp;
00102   force_buffers_size = force_buffers_size_p;
00103   force_lists_size = nfl;
00104   atoms_size = atoms_size_p;
00105   max_atoms_per_patch = max_atoms_per_patch_p;
00106 
00107 #if 0
00108  printf("%d %d %d %d %d %d %d %d\n",
00109       patch_pairs_size , patch_pairs_alloc ,
00110       force_buffers_size , force_buffers_alloc ,
00111       force_lists_size , force_lists_alloc ,
00112       atoms_size , atoms_alloc );
00113 #endif
00114 
00115  if ( patch_pairs_size > patch_pairs_alloc ||
00116       force_buffers_size > force_buffers_alloc ||
00117       force_lists_size > force_lists_alloc ||
00118       atoms_size > atoms_alloc ) {
00119 
00120   patch_pairs_alloc = (int) (1.2 * patch_pairs_size);
00121   force_buffers_alloc = (int) (1.2 * force_buffers_size);
00122   force_lists_alloc = (int) (1.2 * force_lists_size);
00123   atoms_alloc = (int) (1.2 * atoms_size);
00124 
00125   if ( forces ) cudaFree(forces);
00126   if ( slow_forces ) cudaFree(slow_forces);
00127   if ( atom_params ) cudaFree(atom_params);
00128   if ( atoms ) cudaFree(atoms);
00129   if ( force_buffers ) cudaFree(force_buffers);
00130   if ( slow_force_buffers ) cudaFree(slow_force_buffers);
00131   if ( force_lists ) cudaFree(force_lists);
00132   if ( patch_pairs ) cudaFree(patch_pairs);
00133   cuda_errcheck("free everything");
00134 
00135 #if 1
00136   int totalmem = patch_pairs_alloc * sizeof(patch_pair) +
00137                 force_lists_alloc * sizeof(force_list) +
00138                 2 * force_buffers_alloc * sizeof(float4) +
00139                 atoms_alloc * sizeof(atom) +
00140                 atoms_alloc * sizeof(atom_param) +
00141                 2 * atoms_alloc * sizeof(float4);
00142   // printf("allocating %d MB of memory on GPU\n", totalmem >> 20);
00143 #endif
00144 
00145   cudaMalloc((void**) &patch_pairs, patch_pairs_alloc * sizeof(patch_pair));
00146   cudaMalloc((void**) &force_lists, force_lists_alloc * sizeof(force_list));
00147   cudaMalloc((void**) &force_buffers, force_buffers_alloc * sizeof(float4));
00148   cudaMalloc((void**) &slow_force_buffers, force_buffers_alloc * sizeof(float4));
00149   cudaMalloc((void**) &atoms, atoms_alloc * sizeof(atom));
00150   cudaMalloc((void**) &atom_params, atoms_alloc * sizeof(atom_param));
00151   cudaMalloc((void**) &forces, atoms_alloc * sizeof(float4));
00152   cudaMalloc((void**) &slow_forces, atoms_alloc * sizeof(float4));
00153   cuda_errcheck("malloc everything");
00154 
00155  }
00156 
00157   cudaMemcpy(patch_pairs, pp, npp * sizeof(patch_pair),
00158                                 cudaMemcpyHostToDevice);
00159   cuda_errcheck("memcpy to patch_pairs");
00160 
00161   cudaMemcpy(force_lists, fl, nfl * sizeof(force_list),
00162                                 cudaMemcpyHostToDevice);
00163   cuda_errcheck("memcpy to force_lists");
00164 }

void cuda_errcheck const char *  msg  ) 
 

Definition at line 20 of file ComputeNonbondedCUDA.C.

References NAMD_die().

Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_patch_pairs(), cuda_init(), cuda_load_forces(), cuda_nonbonded_forces(), and ComputeNonbondedCUDA::finishWork().

00020                                     {
00021   cudaError_t err;
00022   if ((err = cudaGetLastError()) != cudaSuccess) {
00023     char errmsg[1024];
00024     sprintf(errmsg,"CUDA error %s: %s", msg, cudaGetErrorString(err));
00025     NAMD_die(errmsg);
00026   }
00027 }

void cuda_init  ) 
 

Definition at line 77 of file ComputeNonbondedCUDAKernel.cu.

References atom_params, atoms, atoms_alloc, cuda_errcheck(), force_buffers, force_buffers_alloc, force_lists, force_lists_alloc, forces, patch_pairs, patch_pairs_alloc, slow_force_buffers, slow_forces, and stream.

Referenced by cuda_initialize().

00077                  {
00078   forces = 0;
00079   slow_forces = 0;
00080   atom_params = 0;
00081   atoms = 0;
00082   force_buffers = 0;
00083   slow_force_buffers = 0;
00084   force_lists = 0;
00085   patch_pairs = 0;
00086 
00087   patch_pairs_alloc = 0;
00088   force_buffers_alloc = 0;
00089   force_lists_alloc = 0;
00090   atoms_alloc = 0;
00091 
00092   cudaStreamCreate(&stream);
00093   cuda_errcheck("cudaStreamCreate");
00094 }

void cuda_load_forces float4 *  f,
float4 *  f_slow,
int  begin,
int  count
 

Definition at line 179 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), forces, slow_forces, and stream.

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

00179                                                                        {
00180   // printf("load forces %d %d %d\n",begin,count,atoms_size);
00181   cudaMemcpyAsync(f+begin, forces+begin, count * sizeof(float4),
00182                                 cudaMemcpyDeviceToHost, stream);
00183   if ( f_slow ) {
00184     cudaMemcpyAsync(f_slow+begin, slow_forces+begin, count * sizeof(float4),
00185                                 cudaMemcpyDeviceToHost, stream);
00186   }
00187   cuda_errcheck("memcpy from forces");
00188 }

void cuda_nonbonded_forces float3  lata,
float3  latb,
float3  latc,
float  cutoff2,
int  cbegin,
int  ccount,
int  pbegin,
int  pcount,
int  doSlow
 

Definition at line 468 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), dev_nonbonded(), dev_sum_forces(), force_lists, patch_pairs, slow_force_buffers, and stream.

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

00469                                                                             {
00470 
00471  if ( ccount ) {
00472   // printf("%d %d %d\n",cbegin,ccount,patch_pairs_size);
00473   dev_nonbonded<<< ccount, BLOCK_SIZE, 0, stream
00474         >>>(patch_pairs+cbegin,atoms,atom_params,force_buffers,
00475              (doSlow?slow_force_buffers:0), lata, latb, latc, cutoff2);
00476   cuda_errcheck("dev_nonbonded");
00477  }
00478 
00479  if ( pcount ) {
00480   // printf("%d %d %d\n",pbegin,pcount,force_lists_size);
00481   dev_sum_forces<<< pcount, 64, 0, stream
00482         >>>(force_lists+pbegin,force_buffers,forces);
00483   if ( doSlow ) {
00484     dev_sum_forces<<< pcount, 64, 0, stream
00485         >>>(force_lists+pbegin,slow_force_buffers,slow_forces);
00486   }
00487   cuda_errcheck("dev_sum_forces");
00488  }
00489 
00490 }

int cuda_stream_finished  ) 
 

Definition at line 493 of file ComputeNonbondedCUDAKernel.cu.

References stream.

Referenced by ComputeNonbondedCUDA::doWork().

00493                            {
00494   return ( cudaStreamQuery(stream) == cudaSuccess );
00495 }


Generated on Tue Nov 24 04:07:46 2009 for NAMD by  doxygen 1.3.9.1