00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <string.h>
00025 #include <cuda.h>
00026
00027 #if CUDART_VERSION < 4000
00028 #error The VMD MDFF feature requires CUDA 4.0 or later
00029 #endif
00030
00031 #include "Inform.h"
00032 #include "utilities.h"
00033 #include "WKFThreads.h"
00034 #include "WKFUtils.h"
00035 #include "CUDAKernels.h"
00036 #include "CUDASpatialSearch.h"
00037 #include "CUDASort.h"
00038
00039 #include "AtomSel.h"
00040 #include "VMDApp.h"
00041 #include "MoleculeList.h"
00042 #include "VolumetricData.h"
00043 #include "VolMapCreate.h"
00044
00045 #include <tcl.h>
00046 #include "TclCommands.h"
00047
00048 #if 1
00049 #define CUERR { cudaError_t err; \
00050 if ((err = cudaGetLastError()) != cudaSuccess) { \
00051 printf("CUDA error: %s, %s line %d\n", cudaGetErrorString(err), __FILE__, __LINE__); \
00052 printf("Thread aborting...\n"); \
00053 return NULL; }}
00054 #else
00055 #define CUERR
00056 #endif
00057
00058
00059
00060
00061
00062 #if 0
00063 #define RESTRICT __restrict__
00064 #else
00065 #define RESTRICT
00066 #endif
00067
00068 #if __CUDA_ARCH__ >= 300
00069 #define MAXTHRSORTHASH 512
00070 #define MINBLOCKSORTHASH 4
00071 #elif __CUDA_ARCH__ >= 200
00072 #define MAXTHRSORTHASH 512
00073 #define MINBLOCKSORTHASH 1
00074 #else
00075 #define MAXTHRSORTHASH 512
00076 #define MINBLOCKSORTHASH 1
00077 #endif
00078
00079
00080
00081
00082
00083
00084
00085
00086 #define GRID_CELL_EMPTY 0xffffffff
00087
00088
00089 __global__ static void
00090
00091 hashAtoms(unsigned int natoms,
00092 const float4 * RESTRICT xyzr,
00093 int3 numcells,
00094 float invgridspacing,
00095 unsigned int * RESTRICT atomIndex,
00096 unsigned int * RESTRICT atomHash) {
00097 unsigned int index = (blockIdx.x * blockDim.x) + threadIdx.x;
00098 if (index >= natoms)
00099 return;
00100
00101 float4 atom = xyzr[index];
00102
00103
00104 int3 cell;
00105 cell.x = min(int(atom.x * invgridspacing), numcells.x-1);
00106 cell.y = min(int(atom.y * invgridspacing), numcells.y-1);
00107 cell.z = min(int(atom.z * invgridspacing), numcells.z-1);
00108
00109 unsigned int hash = (cell.z * numcells.y * numcells.x) +
00110 (cell.y * numcells.x) + cell.x;
00111
00112 atomIndex[index] = index;
00113 atomHash[index] = hash;
00114 }
00115
00116
00117
00118 __global__ static void
00119
00120 sortAtomsColorsGenCellLists(unsigned int natoms,
00121 const float4 * RESTRICT xyzr_d,
00122 const float4 * RESTRICT color_d,
00123 const unsigned int *atomIndex_d,
00124 const unsigned int *atomHash_d,
00125 float4 * RESTRICT sorted_xyzr_d,
00126 float4 * RESTRICT sorted_color_d,
00127 uint2 * RESTRICT cellStartEnd_d) {
00128 extern __shared__ unsigned int hash_s[];
00129 unsigned int index = (blockIdx.x * blockDim.x) + threadIdx.x;
00130 unsigned int hash;
00131
00132 if (index < natoms) {
00133 hash = atomHash_d[index];
00134 hash_s[threadIdx.x+1] = hash;
00135 if (index > 0 && threadIdx.x == 0) {
00136
00137 hash_s[0] = atomHash_d[index-1];
00138 }
00139 }
00140
00141 __syncthreads();
00142
00143 if (index < natoms) {
00144
00145
00146
00147 if (index == 0 || hash != hash_s[threadIdx.x]) {
00148 cellStartEnd_d[hash].x = index;
00149 if (index > 0)
00150 cellStartEnd_d[hash_s[threadIdx.x]].y = index;
00151 }
00152
00153 if (index == natoms - 1) {
00154 cellStartEnd_d[hash].y = index + 1;
00155 }
00156
00157
00158 unsigned int sortedIndex = atomIndex_d[index];
00159 float4 pos = xyzr_d[sortedIndex];
00160 sorted_xyzr_d[index] = pos;
00161
00162
00163 if (color_d != NULL) {
00164 float4 col = color_d[sortedIndex];
00165 sorted_color_d[index] = col;
00166 }
00167 }
00168 }
00169
00170
00171
00172
00173 __global__ static void
00174
00175 sortAtomsGenCellLists(unsigned int natoms,
00176 const float4 * RESTRICT xyzr_d,
00177 const unsigned int *atomIndex_d,
00178 const unsigned int *atomHash_d,
00179 float4 * RESTRICT sorted_xyzr_d,
00180 uint2 * RESTRICT cellStartEnd_d) {
00181 extern __shared__ unsigned int hash_s[];
00182 unsigned int index = (blockIdx.x * blockDim.x) + threadIdx.x;
00183 unsigned int hash;
00184
00185 if (index < natoms) {
00186 hash = atomHash_d[index];
00187 hash_s[threadIdx.x+1] = hash;
00188 if (index > 0 && threadIdx.x == 0) {
00189
00190 hash_s[0] = atomHash_d[index-1];
00191 }
00192 }
00193
00194 __syncthreads();
00195
00196 if (index < natoms) {
00197
00198
00199
00200 if (index == 0 || hash != hash_s[threadIdx.x]) {
00201 cellStartEnd_d[hash].x = index;
00202 if (index > 0)
00203 cellStartEnd_d[hash_s[threadIdx.x]].y = index;
00204 }
00205
00206 if (index == natoms - 1) {
00207 cellStartEnd_d[hash].y = index + 1;
00208 }
00209
00210
00211 unsigned int sortedIndex = atomIndex_d[index];
00212 float4 pos = xyzr_d[sortedIndex];
00213 sorted_xyzr_d[index] = pos;
00214 }
00215 }
00216
00217
00218
00219 int vmd_cuda_build_density_atom_grid(int natoms,
00220 const float4 * xyzr_d,
00221 const float4 * color_d,
00222 float4 * sorted_xyzr_d,
00223 float4 * sorted_color_d,
00224 unsigned int *atomIndex_d,
00225 unsigned int *atomHash_d,
00226 uint2 * cellStartEnd_d,
00227 int3 volsz,
00228 float invgridspacing) {
00229
00230
00231 dim3 hBsz(256, 1, 1);
00232
00233
00234
00235
00236
00237
00238
00239 if (natoms > 16000000)
00240 hBsz.x = 512;
00241
00242 dim3 hGsz(((natoms+hBsz.x-1) / hBsz.x), 1, 1);
00243
00244
00245
00246
00247 hashAtoms<<<hGsz, hBsz>>>(natoms, xyzr_d, volsz, invgridspacing,
00248 atomIndex_d, atomHash_d);
00249
00250
00251
00252 if (dev_radix_sort_by_key(atomHash_d, atomIndex_d, natoms,
00253 (unsigned int *) NULL, (unsigned int *) NULL,
00254 NULL, 0, 0U, 0U) != 0) {
00255
00256
00257
00258
00259 printf("dev_radix_sort_by_key() failed: %s line %d\n", __FILE__, __LINE__);
00260 return -1;
00261 }
00262
00263
00264 int ncells = volsz.x * volsz.y * volsz.z;
00265 cudaMemset(cellStartEnd_d, GRID_CELL_EMPTY, ncells*sizeof(uint2));
00266
00267
00268
00269
00270 unsigned int smemSize = sizeof(unsigned int)*(hBsz.x+1);
00271 sortAtomsColorsGenCellLists<<<hGsz, hBsz, smemSize>>>(
00272 natoms, xyzr_d, color_d, atomIndex_d, atomHash_d,
00273 sorted_xyzr_d, sorted_color_d, cellStartEnd_d);
00274
00275
00276
00277
00278 cudaDeviceSynchronize();
00279 cudaError_t err = cudaGetLastError();
00280 if (err != cudaSuccess) {
00281 printf("CUDA error: %s, %s line %d\n", cudaGetErrorString(err), __FILE__, __LINE__);
00282 return -1;
00283 }
00284
00285 return 0;
00286 }
00287
00288
00289
00290 int vmd_cuda_build_density_atom_grid(int natoms,
00291 const float4 * xyzr_d,
00292 float4 *& sorted_xyzr_d,
00293 uint2 *& cellStartEnd_d,
00294 int3 volsz,
00295 float invgridspacing) {
00296
00297 unsigned int *atomIndex_d = NULL;
00298 unsigned int *atomHash_d = NULL;
00299
00300 cudaMalloc((void**)&sorted_xyzr_d, natoms * sizeof(float4));
00301 cudaMalloc((void**)&atomIndex_d, natoms * sizeof(unsigned int));
00302 cudaMalloc((void**)&atomHash_d, natoms * sizeof(unsigned int));
00303
00304
00305 int ncells = volsz.x * volsz.y * volsz.z;
00306 cudaMalloc((void**)&cellStartEnd_d, ncells * sizeof(uint2));
00307
00308
00309 dim3 hBsz(256, 1, 1);
00310 dim3 hGsz(((natoms+hBsz.x-1) / hBsz.x), 1, 1);
00311
00312
00313 hashAtoms<<<hGsz, hBsz>>>(natoms, xyzr_d, volsz, invgridspacing,
00314 atomIndex_d, atomHash_d);
00315
00316
00317
00318 if (dev_radix_sort_by_key(atomHash_d, atomIndex_d, natoms,
00319 (unsigned int *) NULL, (unsigned int *) NULL,
00320 NULL, 0, 0U, 0U) != 0) {
00321
00322
00323
00324
00325 printf("dev_radix_sort_by_key() failed: %s line %d\n", __FILE__, __LINE__);
00326
00327
00328 cudaFree(sorted_xyzr_d);
00329 cudaFree(atomIndex_d);
00330 cudaFree(atomHash_d);
00331 cudaFree(cellStartEnd_d);
00332
00333 return -1;
00334 }
00335
00336
00337 cudaMemset(cellStartEnd_d, GRID_CELL_EMPTY, ncells*sizeof(uint2));
00338
00339
00340
00341
00342 unsigned int smemSize = sizeof(unsigned int)*(hBsz.x+1);
00343 sortAtomsGenCellLists<<<hGsz, hBsz, smemSize>>>(
00344 natoms, xyzr_d, atomIndex_d, atomHash_d,
00345 sorted_xyzr_d, cellStartEnd_d);
00346
00347
00348
00349
00350 cudaDeviceSynchronize();
00351 cudaError_t err = cudaGetLastError();
00352 if (err != cudaSuccess) {
00353 printf("CUDA error: %s, %s line %d\n", cudaGetErrorString(err), __FILE__, __LINE__);
00354 return -1;
00355 }
00356
00357
00358 cudaFree(atomIndex_d);
00359 cudaFree(atomHash_d);
00360
00361 return 0;
00362 }
00363
00364
00365