Go to the source code of this file.
Classes | |
| struct | shared_atom |
Defines | |
| #define | __align__(X) |
| #define | COPY_ATOM(DEST, SOURCE) |
| #define | COPY_PARAM(DEST, SOURCE) |
| #define | COPY_ATOM_TO_SHARED(ATOM, PARAM, SHARED) |
| #define | COPY_ATOM_FROM_SHARED(ATOM, PARAM, SHARED) |
| #define | MAX_ATOMS_PER_PATCH 384 |
| #define | MAX_EXCLUSIONS 2048 |
| #define | FORCE_TABLE_SIZE 4096 |
Functions | |
| void | cuda_errcheck (const char *msg) |
| struct | __align__ (16) patch_pair |
| void | cuda_bind_exclusions (const unsigned int *t, int n) |
| void | cuda_bind_force_table (const float4 *t) |
| void | cuda_init () |
| void | cuda_bind_patch_pairs (const patch_pair *pp, int npp, const force_list *fl, int nfl, int atoms_size_p, int force_buffers_size_p, int max_atoms_per_patch_p) |
| void | cuda_bind_atom_params (const atom_param *t) |
| void | cuda_bind_atoms (const atom *a) |
| void | cuda_load_forces (float4 *f, float4 *f_slow, int begin, int count) |
| void | cuda_nonbonded_forces (float3 lata, float3 latb, float3 latc, float cutoff2, int cbegin, int ccount, int pbegin, int pcount, int doSlow) |
| int | cuda_stream_finished () |
|
|
Definition at line 8 of file ComputeNonbondedCUDAKernel.h. Referenced by __align__(). |
|
|
Value: { \
DEST.position.x = SOURCE.position.x; \
DEST.position.y = SOURCE.position.y; \
DEST.position.z = SOURCE.position.z; \
DEST.charge = SOURCE.charge; \
}
Definition at line 54 of file ComputeNonbondedCUDAKernel.h. |
|
|
Value: { \
COPY_ATOM( ATOM, SHARED ) \
COPY_PARAM( PARAM, SHARED ) \
}
Definition at line 74 of file ComputeNonbondedCUDAKernel.h. |
|
|
Value: { \
COPY_ATOM( SHARED, ATOM ) \
COPY_PARAM( SHARED, PARAM ) \
}
Definition at line 69 of file ComputeNonbondedCUDAKernel.h. |
|
|
Value: { \
DEST.sqrt_epsilon = SOURCE.sqrt_epsilon; \
DEST.half_sigma = SOURCE.half_sigma; \
DEST.index = SOURCE.index; \
DEST.excl_index = SOURCE.excl_index; \
DEST.excl_maxdiff = SOURCE.excl_maxdiff; \
}
Definition at line 61 of file ComputeNonbondedCUDAKernel.h. |
|
|
Definition at line 91 of file ComputeNonbondedCUDAKernel.h. Referenced by ComputeNonbondedCUDA::build_force_table(), and cuda_bind_force_table(). |
|
|
Definition at line 80 of file ComputeNonbondedCUDAKernel.h. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 83 of file ComputeNonbondedCUDAKernel.h. Referenced by cuda_bind_exclusions(). |
|
|
Definition at line 11 of file ComputeNonbondedCUDAKernel.h. References __align__. 00011 { // must be multiple of 16!
00012 float4 offset;
00013 unsigned int patch1_size;
00014 unsigned int patch2_size;
00015 unsigned int patch1_force_size; // non-fixed atoms at start of list
00016 // unsigned int patch2_force_size;
00017 unsigned int patch1_atom_start;
00018 unsigned int patch2_atom_start;
00019 unsigned int patch1_force_start;
00020 // unsigned int patch2_force_start; // same as patch1_force_start for self
00021 unsigned int pad1, pad2;
00022 };
|
|
|
Definition at line 166 of file ComputeNonbondedCUDAKernel.cu. References atom_params, atoms_size, cuda_errcheck(), and stream. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 00166 {
00167 cudaMemcpyAsync(atom_params, t, atoms_size * sizeof(atom_param),
00168 cudaMemcpyHostToDevice, stream);
00169 cuda_errcheck("memcpy to atom_params");
00170 }
|
|
|
Definition at line 172 of file ComputeNonbondedCUDAKernel.cu. References atoms, atoms_size, cuda_errcheck(), and stream. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 00172 {
00173 cuda_errcheck("before memcpy to atoms");
00174 cudaMemcpyAsync(atoms, a, atoms_size * sizeof(atom),
00175 cudaMemcpyHostToDevice, stream);
00176 cuda_errcheck("memcpy to atoms");
00177 }
|
|
||||||||||||
|
Definition at line 13 of file ComputeNonbondedCUDAKernel.cu. References cuda_errcheck(), exclusions, MAX_EXCLUSIONS, and SET_EXCL. Referenced by ComputeNonbondedCUDA::build_exclusions(). 00013 {
00014
00015 #if 0
00016 unsigned int excl[MAX_EXCLUSIONS];
00017
00018 for ( int i=0; i<MAX_EXCLUSIONS; ++i ) { excl[i] = 0; }
00019
00020 SET_EXCL(excl,0,2);
00021 SET_EXCL(excl,0,3);
00022 SET_EXCL(excl,0,4);
00023
00024 cudaMemcpyToSymbol(exclusions, excl, MAX_EXCLUSIONS*sizeof(unsigned int), 0);
00025 #endif
00026
00027 cudaMemcpyToSymbol(exclusions, t, n*sizeof(unsigned int), 0);
00028 cuda_errcheck("memcpy to exclusions");
00029 }
|
|
|
Definition at line 34 of file ComputeNonbondedCUDAKernel.cu. References cuda_errcheck(), force_table, and FORCE_TABLE_SIZE. Referenced by ComputeNonbondedCUDA::build_force_table(). 00034 {
00035 static cudaArray *ct;
00036 if ( ! ct ) {
00037 cudaMallocArray(&ct, &force_table.channelDesc, FORCE_TABLE_SIZE, 1);
00038 cuda_errcheck("allocating force table");
00039 } cudaMemcpyToArray(ct, 0, 0, t, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice); // cudaMemcpy(ct, t, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice);
00040 cuda_errcheck("memcpy to force table");
00041
00042 force_table.normalized = true;
00043 force_table.addressMode[0] = cudaAddressModeClamp;
00044 force_table.addressMode[1] = cudaAddressModeClamp;
00045 force_table.filterMode = cudaFilterModeLinear;
00046
00047 cudaBindTextureToArray(force_table, ct);
00048 cuda_errcheck("binding force table to texture");
00049 }
|
|
||||||||||||||||||||||||||||||||
|
Definition at line 96 of file ComputeNonbondedCUDAKernel.cu. References atom_params, atoms, atoms_alloc, atoms_size, cuda_errcheck(), force_buffers, force_buffers_alloc, force_buffers_size, force_lists, force_lists_alloc, force_lists_size, forces, max_atoms_per_patch, patch_pairs, patch_pairs_alloc, patch_pairs_size, slow_force_buffers, and slow_forces. Referenced by ComputeNonbondedCUDA::doWork(). 00099 {
00100
00101 patch_pairs_size = npp;
00102 force_buffers_size = force_buffers_size_p;
00103 force_lists_size = nfl;
00104 atoms_size = atoms_size_p;
00105 max_atoms_per_patch = max_atoms_per_patch_p;
00106
00107 #if 0
00108 printf("%d %d %d %d %d %d %d %d\n",
00109 patch_pairs_size , patch_pairs_alloc ,
00110 force_buffers_size , force_buffers_alloc ,
00111 force_lists_size , force_lists_alloc ,
00112 atoms_size , atoms_alloc );
00113 #endif
00114
00115 if ( patch_pairs_size > patch_pairs_alloc ||
00116 force_buffers_size > force_buffers_alloc ||
00117 force_lists_size > force_lists_alloc ||
00118 atoms_size > atoms_alloc ) {
00119
00120 patch_pairs_alloc = (int) (1.2 * patch_pairs_size);
00121 force_buffers_alloc = (int) (1.2 * force_buffers_size);
00122 force_lists_alloc = (int) (1.2 * force_lists_size);
00123 atoms_alloc = (int) (1.2 * atoms_size);
00124
00125 if ( forces ) cudaFree(forces);
00126 if ( slow_forces ) cudaFree(slow_forces);
00127 if ( atom_params ) cudaFree(atom_params);
00128 if ( atoms ) cudaFree(atoms);
00129 if ( force_buffers ) cudaFree(force_buffers);
00130 if ( slow_force_buffers ) cudaFree(slow_force_buffers);
00131 if ( force_lists ) cudaFree(force_lists);
00132 if ( patch_pairs ) cudaFree(patch_pairs);
00133 cuda_errcheck("free everything");
00134
00135 #if 1
00136 int totalmem = patch_pairs_alloc * sizeof(patch_pair) +
00137 force_lists_alloc * sizeof(force_list) +
00138 2 * force_buffers_alloc * sizeof(float4) +
00139 atoms_alloc * sizeof(atom) +
00140 atoms_alloc * sizeof(atom_param) +
00141 2 * atoms_alloc * sizeof(float4);
00142 // printf("allocating %d MB of memory on GPU\n", totalmem >> 20);
00143 #endif
00144
00145 cudaMalloc((void**) &patch_pairs, patch_pairs_alloc * sizeof(patch_pair));
00146 cudaMalloc((void**) &force_lists, force_lists_alloc * sizeof(force_list));
00147 cudaMalloc((void**) &force_buffers, force_buffers_alloc * sizeof(float4));
00148 cudaMalloc((void**) &slow_force_buffers, force_buffers_alloc * sizeof(float4));
00149 cudaMalloc((void**) &atoms, atoms_alloc * sizeof(atom));
00150 cudaMalloc((void**) &atom_params, atoms_alloc * sizeof(atom_param));
00151 cudaMalloc((void**) &forces, atoms_alloc * sizeof(float4));
00152 cudaMalloc((void**) &slow_forces, atoms_alloc * sizeof(float4));
00153 cuda_errcheck("malloc everything");
00154
00155 }
00156
00157 cudaMemcpy(patch_pairs, pp, npp * sizeof(patch_pair),
00158 cudaMemcpyHostToDevice);
00159 cuda_errcheck("memcpy to patch_pairs");
00160
00161 cudaMemcpy(force_lists, fl, nfl * sizeof(force_list),
00162 cudaMemcpyHostToDevice);
00163 cuda_errcheck("memcpy to force_lists");
00164 }
|
|
|
Definition at line 20 of file ComputeNonbondedCUDA.C. References NAMD_die(). Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_patch_pairs(), cuda_init(), cuda_load_forces(), cuda_nonbonded_forces(), and ComputeNonbondedCUDA::finishWork(). 00020 {
00021 cudaError_t err;
00022 if ((err = cudaGetLastError()) != cudaSuccess) {
00023 char errmsg[1024];
00024 sprintf(errmsg,"CUDA error %s: %s", msg, cudaGetErrorString(err));
00025 NAMD_die(errmsg);
00026 }
00027 }
|
|
|
Definition at line 77 of file ComputeNonbondedCUDAKernel.cu. References atom_params, atoms, atoms_alloc, cuda_errcheck(), force_buffers, force_buffers_alloc, force_lists, force_lists_alloc, forces, patch_pairs, patch_pairs_alloc, slow_force_buffers, slow_forces, and stream. Referenced by cuda_initialize(). 00077 {
00078 forces = 0;
00079 slow_forces = 0;
00080 atom_params = 0;
00081 atoms = 0;
00082 force_buffers = 0;
00083 slow_force_buffers = 0;
00084 force_lists = 0;
00085 patch_pairs = 0;
00086
00087 patch_pairs_alloc = 0;
00088 force_buffers_alloc = 0;
00089 force_lists_alloc = 0;
00090 atoms_alloc = 0;
00091
00092 cudaStreamCreate(&stream);
00093 cuda_errcheck("cudaStreamCreate");
00094 }
|
|
||||||||||||||||||||
|
Definition at line 179 of file ComputeNonbondedCUDAKernel.cu. References cuda_errcheck(), forces, slow_forces, and stream. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 00179 {
00180 // printf("load forces %d %d %d\n",begin,count,atoms_size);
00181 cudaMemcpyAsync(f+begin, forces+begin, count * sizeof(float4),
00182 cudaMemcpyDeviceToHost, stream);
00183 if ( f_slow ) {
00184 cudaMemcpyAsync(f_slow+begin, slow_forces+begin, count * sizeof(float4),
00185 cudaMemcpyDeviceToHost, stream);
00186 }
00187 cuda_errcheck("memcpy from forces");
00188 }
|
|
||||||||||||||||||||||||||||||||||||||||
|
Definition at line 468 of file ComputeNonbondedCUDAKernel.cu. References cuda_errcheck(), dev_nonbonded(), dev_sum_forces(), force_lists, patch_pairs, slow_force_buffers, and stream. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 00469 {
00470
00471 if ( ccount ) {
00472 // printf("%d %d %d\n",cbegin,ccount,patch_pairs_size);
00473 dev_nonbonded<<< ccount, BLOCK_SIZE, 0, stream
00474 >>>(patch_pairs+cbegin,atoms,atom_params,force_buffers,
00475 (doSlow?slow_force_buffers:0), lata, latb, latc, cutoff2);
00476 cuda_errcheck("dev_nonbonded");
00477 }
00478
00479 if ( pcount ) {
00480 // printf("%d %d %d\n",pbegin,pcount,force_lists_size);
00481 dev_sum_forces<<< pcount, 64, 0, stream
00482 >>>(force_lists+pbegin,force_buffers,forces);
00483 if ( doSlow ) {
00484 dev_sum_forces<<< pcount, 64, 0, stream
00485 >>>(force_lists+pbegin,slow_force_buffers,slow_forces);
00486 }
00487 cuda_errcheck("dev_sum_forces");
00488 }
00489
00490 }
|
|
|
Definition at line 493 of file ComputeNonbondedCUDAKernel.cu. References stream. Referenced by ComputeNonbondedCUDA::doWork(). 00493 {
00494 return ( cudaStreamQuery(stream) == cudaSuccess );
00495 }
|
1.3.9.1