#include "common.h"#include "charm++.h"#include <cuda_runtime.h>#include <cuda.h>#include "WorkDistrib.h"#include "ComputeMgr.h"#include "ProxyMgr.h"#include "ComputeNonbondedCUDA.h"#include "ComputeNonbondedCUDAKernel.h"#include "LJTable.h"#include "ObjectArena.h"#include "SortAtoms.h"#include <algorithm>#include "NamdTypes.h"Go to the source code of this file.
Classes | |
| struct | exlist_sortop |
| struct | cr_sortop |
Defines | |
| #define | CUDA_EVENT_ID_BASE 100 |
| #define | CUDA_TRACE_REMOTE(START, END) |
| #define | CUDA_TRACE_LOCAL(START, END) |
| #define | REGISTER_DEVICE_EVENTS(DEV) |
| #define | SET_EXCL(EXCL, BASE, DIFF) (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31)) |
| #define | PATCH_PAIRS_REF ResizeArray<patch_pair> &patch_pairs(*patch_pairs_ptr); |
| #define | FORCE_LISTS_REF ResizeArray<force_list> &force_lists(*force_lists_ptr); |
| #define | CUDA_POLL(FN, ARG) CcdCallFnAfter(FN,ARG,0.1) |
| #define | GBISP() |
| #define | count_limit 1000000 |
| #define | stream2 stream |
Functions | |
| void | cuda_errcheck (const char *msg) |
| void | cuda_die (const char *msg) |
| void | cuda_getargs (char **argv) |
| int | cuda_device_pe () |
| bool | cuda_device_shared_with_pe (int pe) |
| bool | sortop_bitreverse (int a, int b) |
| void | cuda_register_user_events () |
| void | cuda_initialize () |
| void | send_build_cuda_force_table () |
| void | build_cuda_force_table () |
| void | register_cuda_compute_self (ComputeID c, PatchID pid) |
| void | register_cuda_compute_pair (ComputeID c, PatchID pid[], int t[]) |
| void | unregister_cuda_compute (ComputeID c) |
| void | cuda_check_remote_progress (void *arg, double) |
| void | cuda_check_local_progress (void *arg, double) |
| void | cuda_check_remote_calc (void *arg, double) |
| void | cuda_check_local_calc (void *arg, double) |
Variables | |
| __thread cudaStream_t | stream |
| __thread cudaStream_t | stream2 |
| char * | devicelist |
| __thread int | usedevicelist |
| __thread int | ignoresharing |
| __thread int | mergegrids |
| __thread int | shared_gpu |
| __thread int | first_pe_sharing_gpu |
| __thread int | next_pe_sharing_gpu |
| __thread int | devicePe |
| __thread int | numPesSharingDevice |
| __thread int * | pesSharingDevice |
| __thread int | gpu_is_mine |
| __thread ComputeNonbondedCUDA * | cudaCompute = 0 |
| __thread ComputeMgr * | computeMgr = 0 |
| __thread int2 * | exclusionsByAtom |
| __thread int | atomsChanged = 0 |
| __thread int | computesChanged = 0 |
| __thread int | pairlistsValid = 0 |
| __thread float | pairlistTolerance = 0. |
| __thread int | usePairlists = 0 |
| __thread int | savePairlists = 0 |
| __thread float | plcutoff2 = 0 |
| __thread cudaEvent_t | start_calc |
| __thread cudaEvent_t | end_remote_download |
| __thread cudaEvent_t | end_local_download |
| __thread ResizeArray< patch_pair > * | patch_pairs_ptr |
| __thread ResizeArray< force_list > * | force_lists_ptr |
| __thread int | num_atom_records_allocated |
| __thread atom_param * | atom_params |
| __thread atom * | atoms |
| __thread int | num_virials |
| __thread int | num_virials_allocated |
| __thread float * | virials |
| __thread float * | slow_virials |
| __thread float * | energy_gbis |
| __thread float * | intRad0H |
| __thread float * | intRadSH |
| __thread float * | bornRadH |
| __thread float * | dHdrPrefixH |
| __thread int | cuda_timer_count |
| __thread double | cuda_timer_total |
| __thread double | kernel_time |
| __thread double | remote_submit_time |
| __thread double | local_submit_time |
| __thread int | check_remote_count |
| __thread int | check_local_count |
| __thread int | kernel_launch_state = 0 |
|
|
Definition at line 1020 of file ComputeNonbondedCUDA.C. |
|
|
Definition at line 117 of file ComputeNonbondedCUDA.C. |
|
|
Definition at line 1012 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_calc(), cuda_check_local_progress(), cuda_check_remote_calc(), cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Value: do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \ CUDA_EVENT_ID_BASE + 2 * dev + 1, START, END); } while (0) Definition at line 121 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_progress(). |
|
|
Value: do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \ CUDA_EVENT_ID_BASE + 2 * dev, START, END); } while (0) Definition at line 118 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_remote_progress(). |
|
|
Definition at line 714 of file ComputeNonbondedCUDA.C. |
|
|
Definition at line 1017 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), ComputeNonbondedCUDA::noWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 713 of file ComputeNonbondedCUDA.C. |
|
|
Value: traceRegisterUserEvent("CUDA device " #DEV " remote", CUDA_EVENT_ID_BASE + 2 * DEV); \ traceRegisterUserEvent("CUDA device " #DEV " local", CUDA_EVENT_ID_BASE + 2 * DEV + 1); Referenced by cuda_register_user_events(). |
|
|
Referenced by ComputeNonbondedCUDA::build_exclusions(). |
|
|
Referenced by cuda_init(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 354 of file ComputeNonbondedCUDA.C. References ComputeNonbondedCUDA::build_force_table(), ComputeNonbondedCUDA::build_lj_table(), and devicePe. Referenced by ComputeMgr::recvBuildCudaForceTable(). 00354 {
00355 if ( devicePe != CkMyPe() ) return;
00356 ComputeNonbondedCUDA::build_lj_table();
00357 ComputeNonbondedCUDA::build_force_table();
00358 }
|
|
||||||||||||
|
Definition at line 1650 of file ComputeNonbondedCUDA.C. References computeMgr, cuda_check_local_calc(), CUDA_POLL, end_local_download, next_pe_sharing_gpu, and ComputeMgr::sendYieldDevice(). Referenced by cuda_check_local_calc(), and ComputeNonbondedCUDA::recvYieldDevice(). 01650 {
01651 // in theory we only need end_local_calc, but overlap isn't reliable
01652 // if ( cudaEventQuery(end_local_calc) == cudaSuccess ) {
01653 if ( cudaEventQuery(end_local_download) == cudaSuccess ) {
01654 // CkPrintf("Pe %d yielding to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
01655 computeMgr->sendYieldDevice(next_pe_sharing_gpu);
01656 // CkPrintf("Pe %d yielded to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
01657 } else {
01658 CUDA_POLL(cuda_check_local_calc, arg);
01659 }
01660 }
|
|
||||||||||||
|
Definition at line 1049 of file ComputeNonbondedCUDA.C. References check_local_count, cuda_check_local_progress(), cuda_errcheck(), CUDA_POLL, CUDA_TRACE_LOCAL, end_local_download, kernel_time, local_submit_time, NAMD_bug(), and NAMD_die(). Referenced by cuda_check_local_progress(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 01049 {
01050
01051 cudaError_t err = cudaEventQuery(end_local_download);
01052 if ( err == cudaSuccess ) {
01053 double wall_time = CkWallTimer();
01054 CUDA_TRACE_LOCAL(local_submit_time,wall_time);
01055 kernel_time = wall_time - kernel_time;
01056 ((ComputeNonbondedCUDA *) arg)->messageFinishWork();
01057 check_local_count = 0;
01058 } else if ( err != cudaErrorNotReady ) {
01059 cuda_errcheck("in cuda_check_local_progress");
01060 NAMD_bug("cuda_errcheck missed error in cuda_check_local_progress");
01061 } else if ( ++check_local_count >= count_limit ) {
01062 char errmsg[256];
01063 sprintf(errmsg,"cuda_check_local_progress polled %d times over %f s on step %d",
01064 check_local_count, CkWallTimer() - local_submit_time,
01065 ((ComputeNonbondedCUDA *) arg)->step);
01066 cuda_errcheck(errmsg);
01067 NAMD_die(errmsg);
01068 } else if ( check_remote_count ) {
01069 NAMD_bug("nonzero check_remote_count in cuda_check_local_progress");
01070 } else {
01071 CUDA_POLL(cuda_check_local_progress, arg);
01072 }
01073 }
|
|
||||||||||||
|
Definition at line 1638 of file ComputeNonbondedCUDA.C. References computeMgr, cuda_check_remote_calc(), CUDA_POLL, end_remote_download, next_pe_sharing_gpu, and ComputeMgr::sendYieldDevice(). Referenced by cuda_check_remote_calc(), and ComputeNonbondedCUDA::recvYieldDevice(). 01638 {
01639 // in theory we only need end_remote_calc, but overlap isn't reliable
01640 // if ( cudaEventQuery(end_remote_calc) == cudaSuccess ) {
01641 if ( cudaEventQuery(end_remote_download) == cudaSuccess ) {
01642 // CkPrintf("Pe %d yielding to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
01643 computeMgr->sendYieldDevice(next_pe_sharing_gpu);
01644 // CkPrintf("Pe %d yielded to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
01645 } else {
01646 CUDA_POLL(cuda_check_remote_calc, arg);
01647 }
01648 }
|
|
||||||||||||
|
Definition at line 1024 of file ComputeNonbondedCUDA.C. References check_remote_count, cuda_check_remote_progress(), cuda_errcheck(), CUDA_POLL, CUDA_TRACE_REMOTE, end_remote_download, local_submit_time, NAMD_bug(), NAMD_die(), and remote_submit_time. Referenced by cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice(). 01024 {
01025
01026 cudaError_t err = cudaEventQuery(end_remote_download);
01027 if ( err == cudaSuccess ) {
01028 local_submit_time = CkWallTimer();
01029 CUDA_TRACE_REMOTE(remote_submit_time,local_submit_time);
01030 ((ComputeNonbondedCUDA *) arg)->messageFinishWork();
01031 check_remote_count = 0;
01032 } else if ( err != cudaErrorNotReady ) {
01033 cuda_errcheck("in cuda_check_remote_progress");
01034 NAMD_bug("cuda_errcheck missed error in cuda_check_remote_progress");
01035 } else if ( ++check_remote_count >= count_limit ) {
01036 char errmsg[256];
01037 sprintf(errmsg,"cuda_check_remote_progress polled %d times over %f s on step %d",
01038 check_remote_count, CkWallTimer() - remote_submit_time,
01039 ((ComputeNonbondedCUDA *) arg)->step);
01040 cuda_errcheck(errmsg);
01041 NAMD_die(errmsg);
01042 } else if ( check_local_count ) {
01043 NAMD_bug("nonzero check_local_count in cuda_check_remote_progress");
01044 } else {
01045 CUDA_POLL(cuda_check_remote_progress, arg);
01046 }
01047 }
|
|
|
Definition at line 93 of file ComputeNonbondedCUDA.C. Referenced by ComputeMgr::createComputes(). 00093 { return devicePe; }
|
|
|
Definition at line 95 of file ComputeNonbondedCUDA.C. References pesSharingDevice. Referenced by ComputeMgr::createComputes(). 00095 {
00096 for ( int i=0; i<numPesSharingDevice; ++i ) {
00097 if ( pesSharingDevice[i] == pe ) return true;
00098 }
00099 return false;
00100 }
|
|
|
Definition at line 54 of file ComputeNonbondedCUDA.C. References CmiPhysicalNodeID, and NAMD_die(). Referenced by cuda_initialize(). 00054 {
00055 char host[128];
00056 #ifdef NOHOSTNAME
00057 sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00058 #else
00059 gethostname(host, 128); host[127] = 0;
00060 #endif
00061 char devstr[128] = "";
00062 int devnum;
00063 if ( cudaGetDevice(&devnum) == cudaSuccess ) {
00064 sprintf(devstr, " device %d", devnum);
00065 }
00066 char errmsg[1024];
00067 sprintf(errmsg,"CUDA error on Pe %d (%s%s): %s", CkMyPe(), host, devstr, msg);
00068 NAMD_die(errmsg);
00069 }
|
|
|
Definition at line 34 of file ComputeNonbondedCUDA.C. References CmiPhysicalNodeID, and NAMD_die(). Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_initialize(), cuda_nonbonded_forces(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::messageFinishWork(). 00034 {
00035 cudaError_t err;
00036 if ((err = cudaGetLastError()) != cudaSuccess) {
00037 char host[128];
00038 #ifdef NOHOSTNAME
00039 sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00040 #else
00041 gethostname(host, 128); host[127] = 0;
00042 #endif
00043 char devstr[128] = "";
00044 int devnum;
00045 if ( cudaGetDevice(&devnum) == cudaSuccess ) {
00046 sprintf(devstr, " device %d", devnum);
00047 }
00048 char errmsg[1024];
00049 sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
00050 NAMD_die(errmsg);
00051 }
00052 }
|
|
|
Definition at line 76 of file ComputeNonbondedCUDA.C. References devicelist, ignoresharing, mergegrids, and usedevicelist. Referenced by all_init(). 00076 {
00077 devicelist = 0;
00078 usedevicelist = CmiGetArgStringDesc(argv, "+devices", &devicelist,
00079 "comma-delimited list of CUDA device numbers such as 0,2,1,2");
00080 ignoresharing = CmiGetArgFlag(argv, "+ignoresharing");
00081 mergegrids = CmiGetArgFlag(argv, "+mergegrids");
00082 }
|
|
|
Definition at line 150 of file ComputeNonbondedCUDA.C. References CmiPhysicalNodeID, cuda_die(), cuda_errcheck(), cuda_register_user_events(), devicelist, devicePe, first_pe_sharing_gpu, gpu_is_mine, j, NAMD_bug(), NAMD_die(), next_pe_sharing_gpu, numPesSharingDevice, pesSharingDevice, shared_gpu, and sortop_bitreverse(). Referenced by all_init(). 00150 {
00151
00152 if ( 0 == CkMyPe() ) cuda_register_user_events();
00153
00154 char host[128];
00155 #ifdef NOHOSTNAME
00156 sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00157 #else
00158 gethostname(host, 128); host[127] = 0;
00159 #endif
00160
00161 int myPhysicalNodeID = CmiPhysicalNodeID(CkMyPe());
00162 int myRankInPhysicalNode;
00163 int numPesOnPhysicalNode;
00164 int *pesOnPhysicalNode;
00165 CmiGetPesOnPhysicalNode(myPhysicalNodeID,
00166 &pesOnPhysicalNode,&numPesOnPhysicalNode);
00167
00168 {
00169 int i;
00170 for ( i=0; i < numPesOnPhysicalNode; ++i ) {
00171 if ( i && (pesOnPhysicalNode[i] <= pesOnPhysicalNode[i-1]) ) {
00172 i = numPesOnPhysicalNode;
00173 break;
00174 }
00175 if ( pesOnPhysicalNode[i] == CkMyPe() ) break;
00176 }
00177 if ( i == numPesOnPhysicalNode || i != CmiPhysicalRank(CkMyPe()) ) {
00178 CkPrintf("Bad result from CmiGetPesOnPhysicalNode!\n");
00179 for ( i=0; i < numPesOnPhysicalNode; ++i ) {
00180 CkPrintf("pe %d physnode rank %d of %d is %d\n", CkMyPe(),
00181 i, numPesOnPhysicalNode, pesOnPhysicalNode[i]);
00182 }
00183 myRankInPhysicalNode = 0;
00184 numPesOnPhysicalNode = 1;
00185 pesOnPhysicalNode = new int[1];
00186 pesOnPhysicalNode[0] = CkMyPe();
00187 } else {
00188 myRankInPhysicalNode = i;
00189 }
00190 }
00191 // CkPrintf("Pe %d ranks %d in physical node\n",CkMyPe(),myRankInPhysicalNode);
00192
00193 int deviceCount = 0;
00194 cudaGetDeviceCount(&deviceCount);
00195 cuda_errcheck("in cudaGetDeviceCount");
00196 if ( deviceCount <= 0 ) {
00197 cuda_die("No CUDA devices found.");
00198 }
00199
00200 int *devices;
00201 int ndevices = 0;
00202 int nexclusive = 0;
00203 if ( usedevicelist ) {
00204 devices = new int[strlen(devicelist)];
00205 int i = 0;
00206 while ( devicelist[i] ) {
00207 ndevices += sscanf(devicelist+i,"%d",devices+ndevices);
00208 while ( devicelist[i] && isdigit(devicelist[i]) ) ++i;
00209 while ( devicelist[i] && ! isdigit(devicelist[i]) ) ++i;
00210 }
00211 } else {
00212 if ( ! CkMyPe() ) {
00213 CkPrintf("Did not find +devices i,j,k,... argument, using all\n");
00214 }
00215 devices = new int[deviceCount];
00216 for ( int i=0; i<deviceCount; ++i ) {
00217 int dev = i % deviceCount;
00218 #if CUDA_VERSION >= 2020
00219 cudaDeviceProp deviceProp;
00220 cudaGetDeviceProperties(&deviceProp, dev);
00221 cuda_errcheck("in cudaGetDeviceProperties");
00222 if ( deviceProp.computeMode != cudaComputeModeProhibited
00223 && (deviceProp.major > 1 || deviceProp.minor >= 1)
00224 && deviceProp.canMapHostMemory
00225 && deviceProp.multiProcessorCount > 2 ) { // exclude weak cards
00226 devices[ndevices++] = dev;
00227 }
00228 if ( deviceProp.computeMode == cudaComputeModeExclusive ) {
00229 ++nexclusive;
00230 }
00231 #else
00232 devices[ndevices++] = dev;
00233 #endif
00234 }
00235 }
00236
00237 if ( ! ndevices ) {
00238 cuda_die("All CUDA devices are in prohibited mode, of compute capability 1.0, or otherwise unusable.");
00239 }
00240
00241 shared_gpu = 0;
00242 gpu_is_mine = 1;
00243 first_pe_sharing_gpu = CkMyPe();
00244 next_pe_sharing_gpu = CkMyPe();
00245
00246 /* if ( (ndevices >= numPesOnPhysicalNode) || (nexclusive == 0) ) */ {
00247
00248 int dev;
00249 if ( numPesOnPhysicalNode > 1 ) {
00250 int myDeviceRank = myRankInPhysicalNode * ndevices / numPesOnPhysicalNode;
00251 dev = devices[myDeviceRank];
00252 devicePe = CkMyPe();
00253 if ( ignoresharing ) {
00254 pesSharingDevice = new int[1];
00255 pesSharingDevice[0] = CkMyPe();
00256 numPesSharingDevice = 1;
00257 } else {
00258 pesSharingDevice = new int[numPesOnPhysicalNode];
00259 devicePe = -1;
00260 numPesSharingDevice = 0;
00261 for ( int i = 0; i < numPesOnPhysicalNode; ++i ) {
00262 if ( i * ndevices / numPesOnPhysicalNode == myDeviceRank ) {
00263 int thisPe = pesOnPhysicalNode[i];
00264 pesSharingDevice[numPesSharingDevice++] = thisPe;
00265 if ( devicePe < 1 ) devicePe = thisPe;
00266 if ( sortop_bitreverse(thisPe,devicePe) ) devicePe = thisPe;
00267 }
00268 }
00269 for ( int j = 0; j < ndevices; ++j ) {
00270 if ( devices[j] == dev && j != myDeviceRank ) shared_gpu = 1;
00271 }
00272 }
00273 if ( shared_gpu && devicePe == CkMyPe() ) {
00274 CkPrintf("Pe %d sharing CUDA device %d\n", CkMyPe(), dev);
00275 }
00276 } else { // in case phys node code is lying
00277 dev = devices[CkMyPe() % ndevices];
00278 devicePe = CkMyPe();
00279 pesSharingDevice = new int[1];
00280 pesSharingDevice[0] = CkMyPe();
00281 numPesSharingDevice = 1;
00282 }
00283
00284 if ( devicePe != CkMyPe() ) {
00285 CkPrintf("Pe %d physical rank %d will use CUDA device of pe %d\n",
00286 CkMyPe(), myRankInPhysicalNode, devicePe);
00287 return;
00288 }
00289
00290 // disable token-passing but don't submit local until remote finished
00291 // if shared_gpu is true, otherwise submit all work immediately
00292 first_pe_sharing_gpu = CkMyPe();
00293 next_pe_sharing_gpu = CkMyPe();
00294
00295 gpu_is_mine = ( first_pe_sharing_gpu == CkMyPe() );
00296
00297 if ( dev >= deviceCount ) {
00298 char buf[256];
00299 sprintf(buf,"Pe %d unable to bind to CUDA device %d on %s because only %d devices are present",
00300 CkMyPe(), dev, host, deviceCount);
00301 NAMD_die(buf);
00302 }
00303
00304 cudaError_t err;
00305 cudaDeviceProp deviceProp;
00306 err = cudaGetDeviceProperties(&deviceProp, dev);
00307 if (err == cudaSuccess) {
00308 CkPrintf("Pe %d physical rank %d binding to CUDA device %d on %s: '%s' Mem: %dMB Rev: %d.%d\n",
00309 CkMyPe(), myRankInPhysicalNode, dev, host,
00310 deviceProp.name, deviceProp.totalGlobalMem / (1024*1024),
00311 deviceProp.major, deviceProp.minor);
00312
00313 err = cudaSetDevice(dev);
00314 }
00315 if ( err != cudaSuccess) {
00316 char errmsg[1024];
00317 sprintf(errmsg,"CUDA error binding to device %d on pe %d: %s",
00318 dev, CkMyPe(), cudaGetErrorString(err));
00319 NAMD_die(errmsg);
00320 }
00321
00322 } // just let CUDA pick a device for us
00323
00324 cudaSetDeviceFlags(cudaDeviceMapHost);
00325 cuda_errcheck("in cudaSetDeviceFlags");
00326
00327 int dev;
00328 if ( cudaGetDevice(&dev) == cudaSuccess ) {
00329 cudaDeviceProp deviceProp;
00330 cudaGetDeviceProperties(&deviceProp, dev);
00331 cuda_errcheck("in cudaGetDeviceProperties");
00332 if ( deviceProp.computeMode == cudaComputeModeProhibited )
00333 cuda_die("device in prohibited mode");
00334 if ( deviceProp.major < 2 && deviceProp.minor < 1 )
00335 cuda_die("device not of compute capability 1.1 or higher");
00336 if ( ! deviceProp.canMapHostMemory )
00337 cuda_die("device cannot map host memory");
00338 }
00339
00340 if ( sizeof(patch_pair) & 15 ) NAMD_bug("sizeof(patch_pair) % 16 != 0");
00341 if ( sizeof(force_list) & 15 ) NAMD_bug("sizeof(force_list) % 16 != 0");
00342 if ( sizeof(atom) & 15 ) NAMD_bug("sizeof(atom) % 16 != 0");
00343 if ( sizeof(atom_param) & 15 ) NAMD_bug("sizeof(atom_param) % 16 != 0");
00344
00345 }
|
|
|
Definition at line 125 of file ComputeNonbondedCUDA.C. References REGISTER_DEVICE_EVENTS. Referenced by cuda_initialize(). 00125 {
00126
00127 #define REGISTER_DEVICE_EVENTS(DEV) \
00128 traceRegisterUserEvent("CUDA device " #DEV " remote", CUDA_EVENT_ID_BASE + 2 * DEV); \
00129 traceRegisterUserEvent("CUDA device " #DEV " local", CUDA_EVENT_ID_BASE + 2 * DEV + 1);
00130
00131 REGISTER_DEVICE_EVENTS(0)
00132 REGISTER_DEVICE_EVENTS(1)
00133 REGISTER_DEVICE_EVENTS(2)
00134 REGISTER_DEVICE_EVENTS(3)
00135 REGISTER_DEVICE_EVENTS(4)
00136 REGISTER_DEVICE_EVENTS(5)
00137 REGISTER_DEVICE_EVENTS(6)
00138 REGISTER_DEVICE_EVENTS(7)
00139 REGISTER_DEVICE_EVENTS(8)
00140 REGISTER_DEVICE_EVENTS(9)
00141 REGISTER_DEVICE_EVENTS(10)
00142 REGISTER_DEVICE_EVENTS(11)
00143 REGISTER_DEVICE_EVENTS(12)
00144 REGISTER_DEVICE_EVENTS(13)
00145 REGISTER_DEVICE_EVENTS(14)
00146 REGISTER_DEVICE_EVENTS(15)
00147
00148 }
|
|
||||||||||||||||
|
||||||||||||
|
Definition at line 640 of file ComputeNonbondedCUDA.C. References ResizeArray< Elem >::add(), ComputeNonbondedCUDA::compute_record::c, cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, and ComputeNonbondedCUDA::requirePatch(). Referenced by ComputeNonbondedSelf::initialize(). 00640 {
00641
00642 if ( ! cudaCompute ) NAMD_bug("register_self called early");
00643
00644 cudaCompute->requirePatch(pid);
00645
00646 ComputeNonbondedCUDA::compute_record cr;
00647 cr.c = c;
00648 cr.pid[0] = pid; cr.pid[1] = pid;
00649 cr.offset = 0.;
00650 if ( cudaCompute->patchRecords[pid].isLocal ) {
00651 cudaCompute->localComputeRecords.add(cr);
00652 } else {
00653 cudaCompute->remoteComputeRecords.add(cr);
00654 }
00655 }
|
|
|
Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 350 of file ComputeNonbondedCUDA.C. References computeMgr, and ComputeMgr::sendBuildCudaForceTable(). Referenced by ComputeNonbondedUtil::select(). 00350 {
00351 computeMgr->sendBuildCudaForceTable();
00352 }
|
|
||||||||||||
|
Definition at line 102 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(). 00102 {
00103 if ( a == b ) return 0;
00104 for ( int bit = 1; bit; bit *= 2 ) {
00105 if ( (a&bit) != (b&bit) ) return ((a&bit) < (b&bit));
00106 }
00107 return 0;
00108 }
|
|
|
Definition at line 691 of file ComputeNonbondedCUDA.C. References NAMD_bug(). 00691 { // static
00692
00693 NAMD_bug("unregister_compute unimplemented");
00694
00695 }
|
|
|
Definition at line 988 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 989 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 697 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::atomUpdate(), ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 1002 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1022 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_progress(). |
|
|
Definition at line 1021 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_remote_progress(). |
|
|
|
Definition at line 698 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::requirePatch(). |
|
|
Definition at line 1006 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 1007 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 347 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), register_cuda_compute_pair(), and register_cuda_compute_self(). |
|
|
Definition at line 71 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(), and cuda_initialize(). |
|
|
Definition at line 87 of file ComputeNonbondedCUDA.C. Referenced by build_cuda_force_table(), and cuda_initialize(). |
|
|
Definition at line 1004 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 709 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_local_calc(), cuda_check_local_progress(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 708 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_remote_calc(), cuda_check_remote_progress(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 996 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 525 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::build_exclusions(), and ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 85 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(). |
|
|
Definition at line 712 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(). |
|
|
Definition at line 91 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 73 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(). |
|
|
Definition at line 999 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1000 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1089 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1008 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_progress(), and ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 1010 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_progress(), and cuda_check_remote_progress(). |
|
|
Definition at line 74 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(), and ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 86 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_calc(), cuda_check_remote_calc(), and cuda_initialize(). |
|
|
Definition at line 986 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 992 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 993 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 88 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(). |
|
|
Definition at line 700 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 701 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 711 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(). |
|
|
Definition at line 89 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::assignPatches(), cuda_device_shared_with_pe(), and cuda_initialize(). |
|
|
Definition at line 704 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1009 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 703 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 84 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 995 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 707 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 148 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_intRad(), cuda_init(), cuda_nonbonded_forces(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 149 of file ComputeNonbondedCUDAKernel.cu. |
|
|
Definition at line 72 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(). |
|
|
Definition at line 702 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 994 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
1.3.9.1