#include "common.h"#include "charm++.h"#include <cuda_runtime.h>#include <cuda.h>#include "WorkDistrib.h"#include "ComputeMgr.h"#include "ProxyMgr.h"#include "ComputeNonbondedCUDA.h"#include "ComputeNonbondedCUDAKernel.h"#include "LJTable.h"#include "ObjectArena.h"#include "SortAtoms.h"#include <algorithm>#include "NamdTypes.h"Go to the source code of this file.
Classes | |
| struct | exlist_sortop |
| struct | cr_sortop |
Defines | |
| #define | CUDA_EVENT_ID_POLL_REMOTE 98 |
| #define | CUDA_TRACE_POLL_REMOTE traceUserEvent(CUDA_EVENT_ID_POLL_REMOTE) |
| #define | CUDA_EVENT_ID_POLL_LOCAL 99 |
| #define | CUDA_TRACE_POLL_LOCAL traceUserEvent(CUDA_EVENT_ID_POLL_LOCAL) |
| #define | CUDA_EVENT_ID_BASE 100 |
| #define | CUDA_TRACE_REMOTE(START, END) |
| #define | CUDA_TRACE_LOCAL(START, END) |
| #define | REGISTER_DEVICE_EVENTS(DEV) |
| #define | SET_EXCL(EXCL, BASE, DIFF) (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31)) |
| #define | PATCH_PAIRS_REF ResizeArray<patch_pair> &patch_pairs(*patch_pairs_ptr); |
| #define | FORCE_LISTS_REF ResizeArray<force_list> &force_lists(*force_lists_ptr); |
| #define | CUDA_POLL(FN, ARG) CcdCallFnAfter(FN,ARG,0.1) |
| #define | GBISP() |
| #define | count_limit 1000000 |
| #define | stream2 stream |
Functions | |
| void | cuda_errcheck (const char *msg) |
| void | cuda_die (const char *msg) |
| void | cuda_getargs (char **argv) |
| int | cuda_device_pe () |
| bool | cuda_device_shared_with_pe (int pe) |
| bool | sortop_bitreverse (int a, int b) |
| void | cuda_register_user_events () |
| void | cuda_initialize () |
| void | send_build_cuda_force_table () |
| void | build_cuda_force_table () |
| void | register_cuda_compute_self (ComputeID c, PatchID pid) |
| void | register_cuda_compute_pair (ComputeID c, PatchID pid[], int t[]) |
| void | unregister_cuda_compute (ComputeID c) |
| void | cuda_check_remote_progress (void *arg, double walltime) |
| void | cuda_check_local_progress (void *arg, double walltime) |
| void | cuda_check_remote_calc (void *arg, double) |
| void | cuda_check_local_calc (void *arg, double) |
Variables | |
| __thread cudaStream_t | stream |
| __thread cudaStream_t | stream2 |
| char * | devicelist |
| __thread int | usedevicelist |
| __thread int | ignoresharing |
| __thread int | mergegrids |
| __thread int | shared_gpu |
| __thread int | first_pe_sharing_gpu |
| __thread int | next_pe_sharing_gpu |
| __thread int | devicePe |
| __thread int | numPesSharingDevice |
| __thread int * | pesSharingDevice |
| __thread int | gpu_is_mine |
| __thread ComputeNonbondedCUDA * | cudaCompute = 0 |
| __thread ComputeMgr * | computeMgr = 0 |
| __thread int2 * | exclusionsByAtom |
| __thread int | atomsChanged = 0 |
| __thread int | computesChanged = 0 |
| __thread int | pairlistsValid = 0 |
| __thread float | pairlistTolerance = 0. |
| __thread int | usePairlists = 0 |
| __thread int | savePairlists = 0 |
| __thread float | plcutoff2 = 0 |
| __thread cudaEvent_t | start_calc |
| __thread cudaEvent_t | end_remote_download |
| __thread cudaEvent_t | end_local_download |
| __thread ResizeArray< patch_pair > * | patch_pairs_ptr |
| __thread ResizeArray< force_list > * | force_lists_ptr |
| __thread int | num_atom_records_allocated |
| __thread atom_param * | atom_params |
| __thread atom * | atoms |
| __thread int | num_virials |
| __thread int | num_virials_allocated |
| __thread float * | virials |
| __thread float * | slow_virials |
| __thread float * | energy_gbis |
| __thread float * | intRad0H |
| __thread float * | intRadSH |
| __thread float * | bornRadH |
| __thread float * | dHdrPrefixH |
| __thread int | cuda_timer_count |
| __thread double | cuda_timer_total |
| __thread double | kernel_time |
| __thread double | remote_submit_time |
| __thread double | local_submit_time |
| __thread int | check_remote_count |
| __thread int | check_local_count |
| __thread int | kernel_launch_state = 0 |
|
|
Definition at line 1032 of file ComputeNonbondedCUDA.C. |
|
|
Definition at line 123 of file ComputeNonbondedCUDA.C. |
|
|
Definition at line 120 of file ComputeNonbondedCUDA.C. Referenced by cuda_register_user_events(). |
|
|
Definition at line 117 of file ComputeNonbondedCUDA.C. Referenced by cuda_register_user_events(). |
|
|
Definition at line 1024 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_calc(), cuda_check_local_progress(), cuda_check_remote_calc(), cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Value: do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \ CUDA_EVENT_ID_BASE + 2 * dev + 1, START, END); } while (0) Definition at line 127 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_progress(). |
|
|
Definition at line 121 of file ComputeNonbondedCUDA.C. |
|
|
Definition at line 118 of file ComputeNonbondedCUDA.C. |
|
|
Value: do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \ CUDA_EVENT_ID_BASE + 2 * dev, START, END); } while (0) Definition at line 124 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_remote_progress(). |
|
|
Definition at line 723 of file ComputeNonbondedCUDA.C. |
|
|
Definition at line 1029 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), ComputeNonbondedCUDA::noWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 722 of file ComputeNonbondedCUDA.C. |
|
|
Value: traceRegisterUserEvent("CUDA device " #DEV " remote", CUDA_EVENT_ID_BASE + 2 * DEV); \ traceRegisterUserEvent("CUDA device " #DEV " local", CUDA_EVENT_ID_BASE + 2 * DEV + 1); Referenced by cuda_register_user_events(). |
|
|
Referenced by ComputeNonbondedCUDA::build_exclusions(). |
|
|
Referenced by cuda_init(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 363 of file ComputeNonbondedCUDA.C. References ComputeNonbondedCUDA::build_force_table(), ComputeNonbondedCUDA::build_lj_table(), and devicePe. Referenced by ComputeMgr::recvBuildCudaForceTable(). 00363 {
00364 if ( devicePe != CkMyPe() ) return;
00365 ComputeNonbondedCUDA::build_lj_table();
00366 ComputeNonbondedCUDA::build_force_table();
00367 }
|
|
||||||||||||
|
Definition at line 1666 of file ComputeNonbondedCUDA.C. References computeMgr, cuda_check_local_calc(), CUDA_POLL, end_local_download, next_pe_sharing_gpu, and ComputeMgr::sendYieldDevice(). Referenced by cuda_check_local_calc(), and ComputeNonbondedCUDA::recvYieldDevice(). 01666 {
01667 // in theory we only need end_local_calc, but overlap isn't reliable
01668 // if ( cudaEventQuery(end_local_calc) == cudaSuccess ) {
01669 if ( cudaEventQuery(end_local_download) == cudaSuccess ) {
01670 // CkPrintf("Pe %d yielding to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
01671 computeMgr->sendYieldDevice(next_pe_sharing_gpu);
01672 // CkPrintf("Pe %d yielded to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
01673 } else {
01674 CUDA_POLL(cuda_check_local_calc, arg);
01675 }
01676 }
|
|
||||||||||||
|
Definition at line 1065 of file ComputeNonbondedCUDA.C. References check_local_count, cuda_check_local_progress(), cuda_errcheck(), CUDA_POLL, CUDA_TRACE_LOCAL, end_local_download, kernel_time, local_submit_time, NAMD_bug(), and NAMD_die(). Referenced by cuda_check_local_progress(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 01065 {
01066
01067 CUDA_TRACE_POLL_LOCAL;
01068 cudaError_t err = cudaEventQuery(end_local_download);
01069 if ( err == cudaSuccess ) {
01070 CUDA_TRACE_LOCAL(local_submit_time,walltime);
01071 kernel_time = walltime - kernel_time;
01072 ((ComputeNonbondedCUDA *) arg)->messageFinishWork();
01073 check_local_count = 0;
01074 } else if ( err != cudaErrorNotReady ) {
01075 cuda_errcheck("in cuda_check_local_progress");
01076 NAMD_bug("cuda_errcheck missed error in cuda_check_local_progress");
01077 } else if ( ++check_local_count >= count_limit ) {
01078 char errmsg[256];
01079 sprintf(errmsg,"cuda_check_local_progress polled %d times over %f s on step %d",
01080 check_local_count, walltime - local_submit_time,
01081 ((ComputeNonbondedCUDA *) arg)->step);
01082 cuda_errcheck(errmsg);
01083 NAMD_die(errmsg);
01084 } else if ( check_remote_count ) {
01085 NAMD_bug("nonzero check_remote_count in cuda_check_local_progress");
01086 } else {
01087 CUDA_POLL(cuda_check_local_progress, arg);
01088 }
01089 }
|
|
||||||||||||
|
Definition at line 1654 of file ComputeNonbondedCUDA.C. References computeMgr, cuda_check_remote_calc(), CUDA_POLL, end_remote_download, next_pe_sharing_gpu, and ComputeMgr::sendYieldDevice(). Referenced by cuda_check_remote_calc(), and ComputeNonbondedCUDA::recvYieldDevice(). 01654 {
01655 // in theory we only need end_remote_calc, but overlap isn't reliable
01656 // if ( cudaEventQuery(end_remote_calc) == cudaSuccess ) {
01657 if ( cudaEventQuery(end_remote_download) == cudaSuccess ) {
01658 // CkPrintf("Pe %d yielding to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
01659 computeMgr->sendYieldDevice(next_pe_sharing_gpu);
01660 // CkPrintf("Pe %d yielded to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
01661 } else {
01662 CUDA_POLL(cuda_check_remote_calc, arg);
01663 }
01664 }
|
|
||||||||||||
|
Definition at line 1036 of file ComputeNonbondedCUDA.C. References check_remote_count, cuda_check_remote_progress(), cuda_errcheck(), CUDA_POLL, CUDA_TRACE_REMOTE, end_remote_download, kernel_time, local_submit_time, NAMD_bug(), NAMD_die(), and remote_submit_time. Referenced by cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice(). 01036 {
01037
01038 CUDA_TRACE_POLL_REMOTE;
01039 cudaError_t err = cudaEventQuery(end_remote_download);
01040 if ( err == cudaSuccess ) {
01041 local_submit_time = walltime;
01042 CUDA_TRACE_REMOTE(remote_submit_time,local_submit_time);
01043 if ( mergegrids ) { // no local
01044 kernel_time = local_submit_time - kernel_time;
01045 }
01046 ((ComputeNonbondedCUDA *) arg)->messageFinishWork();
01047 check_remote_count = 0;
01048 } else if ( err != cudaErrorNotReady ) {
01049 cuda_errcheck("in cuda_check_remote_progress");
01050 NAMD_bug("cuda_errcheck missed error in cuda_check_remote_progress");
01051 } else if ( ++check_remote_count >= count_limit ) {
01052 char errmsg[256];
01053 sprintf(errmsg,"cuda_check_remote_progress polled %d times over %f s on step %d",
01054 check_remote_count, walltime - remote_submit_time,
01055 ((ComputeNonbondedCUDA *) arg)->step);
01056 cuda_errcheck(errmsg);
01057 NAMD_die(errmsg);
01058 } else if ( check_local_count ) {
01059 NAMD_bug("nonzero check_local_count in cuda_check_remote_progress");
01060 } else {
01061 CUDA_POLL(cuda_check_remote_progress, arg);
01062 }
01063 }
|
|
|
Definition at line 93 of file ComputeNonbondedCUDA.C. Referenced by ComputeMgr::createComputes(). 00093 { return devicePe; }
|
|
|
Definition at line 95 of file ComputeNonbondedCUDA.C. References pesSharingDevice. Referenced by ComputeMgr::createComputes(). 00095 {
00096 for ( int i=0; i<numPesSharingDevice; ++i ) {
00097 if ( pesSharingDevice[i] == pe ) return true;
00098 }
00099 return false;
00100 }
|
|
|
Definition at line 54 of file ComputeNonbondedCUDA.C. References CmiPhysicalNodeID, and NAMD_die(). Referenced by cuda_initialize(). 00054 {
00055 char host[128];
00056 #ifdef NOHOSTNAME
00057 sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00058 #else
00059 gethostname(host, 128); host[127] = 0;
00060 #endif
00061 char devstr[128] = "";
00062 int devnum;
00063 if ( cudaGetDevice(&devnum) == cudaSuccess ) {
00064 sprintf(devstr, " device %d", devnum);
00065 }
00066 char errmsg[1024];
00067 sprintf(errmsg,"CUDA error on Pe %d (%s%s): %s", CkMyPe(), host, devstr, msg);
00068 NAMD_die(errmsg);
00069 }
|
|
|
Definition at line 34 of file ComputeNonbondedCUDA.C. References CmiPhysicalNodeID, and NAMD_die(). Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_initialize(), cuda_nonbonded_forces(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::messageFinishWork(). 00034 {
00035 cudaError_t err;
00036 if ((err = cudaGetLastError()) != cudaSuccess) {
00037 char host[128];
00038 #ifdef NOHOSTNAME
00039 sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00040 #else
00041 gethostname(host, 128); host[127] = 0;
00042 #endif
00043 char devstr[128] = "";
00044 int devnum;
00045 if ( cudaGetDevice(&devnum) == cudaSuccess ) {
00046 sprintf(devstr, " device %d", devnum);
00047 }
00048 char errmsg[1024];
00049 sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
00050 NAMD_die(errmsg);
00051 }
00052 }
|
|
|
Definition at line 76 of file ComputeNonbondedCUDA.C. References devicelist, ignoresharing, mergegrids, and usedevicelist. Referenced by all_init(). 00076 {
00077 devicelist = 0;
00078 usedevicelist = CmiGetArgStringDesc(argv, "+devices", &devicelist,
00079 "comma-delimited list of CUDA device numbers such as 0,2,1,2");
00080 ignoresharing = CmiGetArgFlag(argv, "+ignoresharing");
00081 mergegrids = CmiGetArgFlag(argv, "+mergegrids");
00082 }
|
|
|
Definition at line 159 of file ComputeNonbondedCUDA.C. References CmiPhysicalNodeID, cuda_die(), cuda_errcheck(), cuda_register_user_events(), devicelist, devicePe, first_pe_sharing_gpu, gpu_is_mine, j, NAMD_bug(), NAMD_die(), next_pe_sharing_gpu, numPesSharingDevice, pesSharingDevice, shared_gpu, and sortop_bitreverse(). Referenced by all_init(). 00159 {
00160
00161 if ( 0 == CkMyPe() ) cuda_register_user_events();
00162
00163 char host[128];
00164 #ifdef NOHOSTNAME
00165 sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00166 #else
00167 gethostname(host, 128); host[127] = 0;
00168 #endif
00169
00170 int myPhysicalNodeID = CmiPhysicalNodeID(CkMyPe());
00171 int myRankInPhysicalNode;
00172 int numPesOnPhysicalNode;
00173 int *pesOnPhysicalNode;
00174 CmiGetPesOnPhysicalNode(myPhysicalNodeID,
00175 &pesOnPhysicalNode,&numPesOnPhysicalNode);
00176
00177 {
00178 int i;
00179 for ( i=0; i < numPesOnPhysicalNode; ++i ) {
00180 if ( i && (pesOnPhysicalNode[i] <= pesOnPhysicalNode[i-1]) ) {
00181 i = numPesOnPhysicalNode;
00182 break;
00183 }
00184 if ( pesOnPhysicalNode[i] == CkMyPe() ) break;
00185 }
00186 if ( i == numPesOnPhysicalNode || i != CmiPhysicalRank(CkMyPe()) ) {
00187 CkPrintf("Bad result from CmiGetPesOnPhysicalNode!\n");
00188 for ( i=0; i < numPesOnPhysicalNode; ++i ) {
00189 CkPrintf("pe %d physnode rank %d of %d is %d\n", CkMyPe(),
00190 i, numPesOnPhysicalNode, pesOnPhysicalNode[i]);
00191 }
00192 myRankInPhysicalNode = 0;
00193 numPesOnPhysicalNode = 1;
00194 pesOnPhysicalNode = new int[1];
00195 pesOnPhysicalNode[0] = CkMyPe();
00196 } else {
00197 myRankInPhysicalNode = i;
00198 }
00199 }
00200 // CkPrintf("Pe %d ranks %d in physical node\n",CkMyPe(),myRankInPhysicalNode);
00201
00202 int deviceCount = 0;
00203 cudaGetDeviceCount(&deviceCount);
00204 cuda_errcheck("in cudaGetDeviceCount");
00205 if ( deviceCount <= 0 ) {
00206 cuda_die("No CUDA devices found.");
00207 }
00208
00209 int *devices;
00210 int ndevices = 0;
00211 int nexclusive = 0;
00212 if ( usedevicelist ) {
00213 devices = new int[strlen(devicelist)];
00214 int i = 0;
00215 while ( devicelist[i] ) {
00216 ndevices += sscanf(devicelist+i,"%d",devices+ndevices);
00217 while ( devicelist[i] && isdigit(devicelist[i]) ) ++i;
00218 while ( devicelist[i] && ! isdigit(devicelist[i]) ) ++i;
00219 }
00220 } else {
00221 if ( ! CkMyPe() ) {
00222 CkPrintf("Did not find +devices i,j,k,... argument, using all\n");
00223 }
00224 devices = new int[deviceCount];
00225 for ( int i=0; i<deviceCount; ++i ) {
00226 int dev = i % deviceCount;
00227 #if CUDA_VERSION >= 2020
00228 cudaDeviceProp deviceProp;
00229 cudaGetDeviceProperties(&deviceProp, dev);
00230 cuda_errcheck("in cudaGetDeviceProperties");
00231 if ( deviceProp.computeMode != cudaComputeModeProhibited
00232 && (deviceProp.major > 1 || deviceProp.minor >= 1)
00233 && deviceProp.canMapHostMemory
00234 && deviceProp.multiProcessorCount > 2 ) { // exclude weak cards
00235 devices[ndevices++] = dev;
00236 }
00237 if ( deviceProp.computeMode == cudaComputeModeExclusive ) {
00238 ++nexclusive;
00239 }
00240 #else
00241 devices[ndevices++] = dev;
00242 #endif
00243 }
00244 }
00245
00246 if ( ! ndevices ) {
00247 cuda_die("All CUDA devices are in prohibited mode, of compute capability 1.0, unable to map host memory, too small, or otherwise unusable.");
00248 }
00249
00250 shared_gpu = 0;
00251 gpu_is_mine = 1;
00252 first_pe_sharing_gpu = CkMyPe();
00253 next_pe_sharing_gpu = CkMyPe();
00254
00255 /* if ( (ndevices >= numPesOnPhysicalNode) || (nexclusive == 0) ) */ {
00256
00257 int dev;
00258 if ( numPesOnPhysicalNode > 1 ) {
00259 int myDeviceRank = myRankInPhysicalNode * ndevices / numPesOnPhysicalNode;
00260 dev = devices[myDeviceRank];
00261 devicePe = CkMyPe();
00262 if ( ignoresharing ) {
00263 pesSharingDevice = new int[1];
00264 pesSharingDevice[0] = CkMyPe();
00265 numPesSharingDevice = 1;
00266 } else {
00267 pesSharingDevice = new int[numPesOnPhysicalNode];
00268 devicePe = -1;
00269 numPesSharingDevice = 0;
00270 for ( int i = 0; i < numPesOnPhysicalNode; ++i ) {
00271 if ( i * ndevices / numPesOnPhysicalNode == myDeviceRank ) {
00272 int thisPe = pesOnPhysicalNode[i];
00273 pesSharingDevice[numPesSharingDevice++] = thisPe;
00274 if ( devicePe < 1 ) devicePe = thisPe;
00275 if ( sortop_bitreverse(thisPe,devicePe) ) devicePe = thisPe;
00276 }
00277 }
00278 for ( int j = 0; j < ndevices; ++j ) {
00279 if ( devices[j] == dev && j != myDeviceRank ) shared_gpu = 1;
00280 }
00281 }
00282 if ( shared_gpu && devicePe == CkMyPe() ) {
00283 CkPrintf("Pe %d sharing CUDA device %d\n", CkMyPe(), dev);
00284 }
00285 } else { // in case phys node code is lying
00286 dev = devices[CkMyPe() % ndevices];
00287 devicePe = CkMyPe();
00288 pesSharingDevice = new int[1];
00289 pesSharingDevice[0] = CkMyPe();
00290 numPesSharingDevice = 1;
00291 }
00292
00293 if ( devicePe != CkMyPe() ) {
00294 CkPrintf("Pe %d physical rank %d will use CUDA device of pe %d\n",
00295 CkMyPe(), myRankInPhysicalNode, devicePe);
00296 return;
00297 }
00298
00299 // disable token-passing but don't submit local until remote finished
00300 // if shared_gpu is true, otherwise submit all work immediately
00301 first_pe_sharing_gpu = CkMyPe();
00302 next_pe_sharing_gpu = CkMyPe();
00303
00304 gpu_is_mine = ( first_pe_sharing_gpu == CkMyPe() );
00305
00306 if ( dev >= deviceCount ) {
00307 char buf[256];
00308 sprintf(buf,"Pe %d unable to bind to CUDA device %d on %s because only %d devices are present",
00309 CkMyPe(), dev, host, deviceCount);
00310 NAMD_die(buf);
00311 }
00312
00313 cudaError_t err;
00314 cudaDeviceProp deviceProp;
00315 err = cudaGetDeviceProperties(&deviceProp, dev);
00316 if (err == cudaSuccess) {
00317 CkPrintf("Pe %d physical rank %d binding to CUDA device %d on %s: '%s' Mem: %dMB Rev: %d.%d\n",
00318 CkMyPe(), myRankInPhysicalNode, dev, host,
00319 deviceProp.name, deviceProp.totalGlobalMem / (1024*1024),
00320 deviceProp.major, deviceProp.minor);
00321
00322 err = cudaSetDevice(dev);
00323 }
00324 if ( err != cudaSuccess) {
00325 char errmsg[1024];
00326 sprintf(errmsg,"CUDA error binding to device %d on pe %d: %s",
00327 dev, CkMyPe(), cudaGetErrorString(err));
00328 NAMD_die(errmsg);
00329 }
00330
00331 } // just let CUDA pick a device for us
00332
00333 cudaSetDeviceFlags(cudaDeviceMapHost);
00334 cuda_errcheck("in cudaSetDeviceFlags");
00335
00336 int dev;
00337 if ( cudaGetDevice(&dev) == cudaSuccess ) {
00338 cudaDeviceProp deviceProp;
00339 cudaGetDeviceProperties(&deviceProp, dev);
00340 cuda_errcheck("in cudaGetDeviceProperties");
00341 if ( deviceProp.computeMode == cudaComputeModeProhibited )
00342 cuda_die("device in prohibited mode");
00343 if ( deviceProp.major < 2 && deviceProp.minor < 1 )
00344 cuda_die("device not of compute capability 1.1 or higher");
00345 if ( ! deviceProp.canMapHostMemory )
00346 cuda_die("device cannot map host memory");
00347 }
00348
00349 if ( sizeof(patch_pair) & 15 ) NAMD_bug("sizeof(patch_pair) % 16 != 0");
00350 if ( sizeof(force_list) & 15 ) NAMD_bug("sizeof(force_list) % 16 != 0");
00351 if ( sizeof(atom) & 15 ) NAMD_bug("sizeof(atom) % 16 != 0");
00352 if ( sizeof(atom_param) & 15 ) NAMD_bug("sizeof(atom_param) % 16 != 0");
00353
00354 }
|
|
|
Definition at line 131 of file ComputeNonbondedCUDA.C. References CUDA_EVENT_ID_POLL_LOCAL, CUDA_EVENT_ID_POLL_REMOTE, and REGISTER_DEVICE_EVENTS. Referenced by cuda_initialize(). 00131 {
00132
00133 traceRegisterUserEvent("CUDA poll remote", CUDA_EVENT_ID_POLL_REMOTE);
00134 traceRegisterUserEvent("CUDA poll local", CUDA_EVENT_ID_POLL_LOCAL);
00135
00136 #define REGISTER_DEVICE_EVENTS(DEV) \
00137 traceRegisterUserEvent("CUDA device " #DEV " remote", CUDA_EVENT_ID_BASE + 2 * DEV); \
00138 traceRegisterUserEvent("CUDA device " #DEV " local", CUDA_EVENT_ID_BASE + 2 * DEV + 1);
00139
00140 REGISTER_DEVICE_EVENTS(0)
00141 REGISTER_DEVICE_EVENTS(1)
00142 REGISTER_DEVICE_EVENTS(2)
00143 REGISTER_DEVICE_EVENTS(3)
00144 REGISTER_DEVICE_EVENTS(4)
00145 REGISTER_DEVICE_EVENTS(5)
00146 REGISTER_DEVICE_EVENTS(6)
00147 REGISTER_DEVICE_EVENTS(7)
00148 REGISTER_DEVICE_EVENTS(8)
00149 REGISTER_DEVICE_EVENTS(9)
00150 REGISTER_DEVICE_EVENTS(10)
00151 REGISTER_DEVICE_EVENTS(11)
00152 REGISTER_DEVICE_EVENTS(12)
00153 REGISTER_DEVICE_EVENTS(13)
00154 REGISTER_DEVICE_EVENTS(14)
00155 REGISTER_DEVICE_EVENTS(15)
00156
00157 }
|
|
||||||||||||||||
|
||||||||||||
|
Definition at line 649 of file ComputeNonbondedCUDA.C. References ResizeArray< Elem >::add(), ComputeNonbondedCUDA::compute_record::c, cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, and ComputeNonbondedCUDA::requirePatch(). Referenced by ComputeNonbondedSelf::initialize(). 00649 {
00650
00651 if ( ! cudaCompute ) NAMD_bug("register_self called early");
00652
00653 cudaCompute->requirePatch(pid);
00654
00655 ComputeNonbondedCUDA::compute_record cr;
00656 cr.c = c;
00657 cr.pid[0] = pid; cr.pid[1] = pid;
00658 cr.offset = 0.;
00659 if ( cudaCompute->patchRecords[pid].isLocal ) {
00660 cudaCompute->localComputeRecords.add(cr);
00661 } else {
00662 cudaCompute->remoteComputeRecords.add(cr);
00663 }
00664 }
|
|
|
Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 359 of file ComputeNonbondedCUDA.C. References computeMgr, and ComputeMgr::sendBuildCudaForceTable(). Referenced by ComputeNonbondedUtil::select(). 00359 {
00360 computeMgr->sendBuildCudaForceTable();
00361 }
|
|
||||||||||||
|
Definition at line 102 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(). 00102 {
00103 if ( a == b ) return 0;
00104 for ( int bit = 1; bit; bit *= 2 ) {
00105 if ( (a&bit) != (b&bit) ) return ((a&bit) < (b&bit));
00106 }
00107 return 0;
00108 }
|
|
|
Definition at line 700 of file ComputeNonbondedCUDA.C. References NAMD_bug(). 00700 { // static
00701
00702 NAMD_bug("unregister_compute unimplemented");
00703
00704 }
|
|
|
Definition at line 1000 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1001 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 706 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::atomUpdate(), ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 1014 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1034 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_progress(). |
|
|
Definition at line 1033 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_remote_progress(). |
|
|
|
Definition at line 707 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::requirePatch(). |
|
|
Definition at line 1018 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 1019 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 356 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), register_cuda_compute_pair(), and register_cuda_compute_self(). |
|
|
Definition at line 71 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(), and cuda_initialize(). |
|
|
Definition at line 87 of file ComputeNonbondedCUDA.C. Referenced by build_cuda_force_table(), and cuda_initialize(). |
|
|
Definition at line 1016 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 718 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_local_calc(), cuda_check_local_progress(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 717 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_remote_calc(), cuda_check_remote_progress(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1008 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 534 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::build_exclusions(), and ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 85 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(). |
|
|
Definition at line 721 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(). |
|
|
Definition at line 91 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 73 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(). |
|
|
Definition at line 1011 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1012 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1105 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1020 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_progress(), cuda_check_remote_progress(), and ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 1022 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_progress(), and cuda_check_remote_progress(). |
|
|
Definition at line 74 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(), and ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 86 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_calc(), cuda_check_remote_calc(), and cuda_initialize(). |
|
|
Definition at line 998 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 1004 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 1005 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 88 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(). |
|
|
Definition at line 709 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 710 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 720 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(). |
|
|
Definition at line 89 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::assignPatches(), cuda_device_shared_with_pe(), and cuda_initialize(). |
|
|
Definition at line 713 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1021 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 712 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 84 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 1007 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 716 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 148 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_intRad(), cuda_init(), cuda_nonbonded_forces(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 149 of file ComputeNonbondedCUDAKernel.cu. |
|
|
Definition at line 72 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(). |
|
|
Definition at line 711 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 1006 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
1.3.9.1