#include "common.h"#include "charm++.h"#include <cuda_runtime.h>#include <cuda.h>#include "WorkDistrib.h"#include "ComputeMgr.h"#include "ComputeNonbondedCUDA.h"#include "ComputeNonbondedCUDAKernel.h"#include "ObjectArena.h"Go to the source code of this file.
|
|
Definition at line 545 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_calc(), cuda_check_local_progress(), cuda_check_remote_calc(), cuda_check_remote_progress(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Referenced by ComputeNonbondedCUDA::build_exclusions(), and cuda_bind_exclusions(). |
|
|
Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 207 of file ComputeNonbondedCUDA.C. References ComputeNonbondedCUDA::build_force_table(). Referenced by ComputeNonbondedUtil::select(). 00207 {
00208 ComputeNonbondedCUDA::build_force_table();
00209 }
|
|
||||||||||||
|
Definition at line 902 of file ComputeNonbondedCUDA.C. References ccd_index_local_calc, computeMgr, cuda_check_local_calc(), CUDA_CONDITION, end_local_download, next_pe_sharing_gpu, and ComputeMgr::sendYieldDevice(). Referenced by cuda_check_local_calc(), and ComputeNonbondedCUDA::recvYieldDevice(). 00902 {
00903 // in theory we only need end_local_calc, but overlap isn't reliable
00904 // if ( cudaEventQuery(end_local_calc) == cudaSuccess ) {
00905 if ( cudaEventQuery(end_local_download) == cudaSuccess ) {
00906 // CkPrintf("Pe %d yielding to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
00907 computeMgr->sendYieldDevice(next_pe_sharing_gpu);
00908 // CkPrintf("Pe %d yielded to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
00909 } else {
00910 ccd_index_local_calc = CcdCallOnCondition(CUDA_CONDITION, cuda_check_local_calc, arg);
00911 }
00912 }
|
|
||||||||||||
|
Definition at line 556 of file ComputeNonbondedCUDA.C. References ccd_index_local_download, cuda_check_local_progress(), CUDA_CONDITION, end_local_download, kernel_time, and WorkDistrib::messageEnqueueWork(). Referenced by cuda_check_local_progress(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 00556 {
00557 if ( cudaEventQuery(end_local_download) == cudaSuccess ) {
00558 kernel_time += CkWallTimer();
00559 WorkDistrib::messageEnqueueWork((ComputeNonbondedCUDA *) arg);
00560 } else {
00561 ccd_index_local_download = CcdCallOnCondition(CUDA_CONDITION, cuda_check_local_progress, arg);
00562 }
00563 }
|
|
||||||||||||
|
Definition at line 889 of file ComputeNonbondedCUDA.C. References ccd_index_remote_calc, computeMgr, cuda_check_remote_calc(), CUDA_CONDITION, end_remote_download, next_pe_sharing_gpu, and ComputeMgr::sendYieldDevice(). Referenced by cuda_check_remote_calc(), and ComputeNonbondedCUDA::recvYieldDevice(). 00889 {
00890 // in theory we only need end_remote_calc, but overlap isn't reliable
00891 // if ( cudaEventQuery(end_remote_calc) == cudaSuccess ) {
00892 if ( cudaEventQuery(end_remote_download) == cudaSuccess ) {
00893 // CkPrintf("Pe %d yielding to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
00894 computeMgr->sendYieldDevice(next_pe_sharing_gpu);
00895 // CkPrintf("Pe %d yielded to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
00896 } else {
00897 ccd_index_remote_calc = CcdCallOnCondition(CUDA_CONDITION, cuda_check_remote_calc, arg);
00898 }
00899 }
|
|
||||||||||||
|
Definition at line 547 of file ComputeNonbondedCUDA.C. References ccd_index_remote_download, cuda_check_remote_progress(), CUDA_CONDITION, end_remote_download, and WorkDistrib::messageEnqueueWork(). Referenced by cuda_check_remote_progress(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). 00547 {
00548 if ( cudaEventQuery(end_remote_download) == cudaSuccess ) {
00549 // ((ComputeNonbondedCUDA *) arg)->finishWork();
00550 WorkDistrib::messageEnqueueWork((ComputeNonbondedCUDA *) arg);
00551 } else {
00552 ccd_index_remote_download = CcdCallOnCondition(CUDA_CONDITION, cuda_check_remote_progress, arg);
00553 }
00554 }
|
|
|
Definition at line 20 of file ComputeNonbondedCUDA.C. References NAMD_die(). Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_patch_pairs(), cuda_init(), cuda_load_forces(), cuda_nonbonded_forces(), and ComputeNonbondedCUDA::finishWork(). 00020 {
00021 cudaError_t err;
00022 if ((err = cudaGetLastError()) != cudaSuccess) {
00023 char errmsg[1024];
00024 sprintf(errmsg,"CUDA error %s: %s", msg, cudaGetErrorString(err));
00025 NAMD_die(errmsg);
00026 }
00027 }
|
|
|
Definition at line 33 of file ComputeNonbondedCUDA.C. References devicelist, ignoresharing, and usedevicelist. Referenced by all_init(). 00033 {
00034 devicelist = 0;
00035 usedevicelist = CmiGetArgStringDesc(argv, "+devices", &devicelist,
00036 "comma-delimited list of CUDA device numbers such as 0,2,1,2");
00037 ignoresharing = CmiGetArgFlag(argv, "+ignoresharing");
00038 }
|
|
|
Definition at line 46 of file ComputeNonbondedCUDA.C. References CmiPhysicalNodeID, cuda_init(), devicelist, first_pe_sharing_gpu, gpu_is_mine, NAMD_die(), next_pe_sharing_gpu, and shared_gpu. Referenced by all_init(). 00046 {
00047
00048 char host[128];
00049 #ifdef NOHOSTNAME
00050 sprintf(host,"unknown");
00051 #else
00052 gethostname(host, 128); host[127] = 0;
00053 #endif
00054
00055 int myPhysicalNodeID = CmiPhysicalNodeID(CkMyPe());
00056 int myRankInPhysicalNode;
00057 int numPesOnPhysicalNode;
00058 int *pesOnPhysicalNode;
00059 CmiGetPesOnPhysicalNode(myPhysicalNodeID,
00060 &pesOnPhysicalNode,&numPesOnPhysicalNode);
00061 {
00062 int i;
00063 for ( i=0; i < numPesOnPhysicalNode; ++i ) {
00064 if ( i && (pesOnPhysicalNode[i] <= pesOnPhysicalNode[i-1]) ) {
00065 i = numPesOnPhysicalNode;
00066 break;
00067 }
00068 if ( pesOnPhysicalNode[i] == CkMyPe() ) break;
00069 }
00070 if ( i == numPesOnPhysicalNode || i != CmiPhysicalRank(CkMyPe()) ) {
00071 CkPrintf("Bad result from CmiGetPesOnPhysicalNode!\n");
00072 for ( i=0; i < numPesOnPhysicalNode; ++i ) {
00073 CkPrintf("pe %d physnode rank %d of %d is %d\n", CkMyPe(),
00074 i, numPesOnPhysicalNode, pesOnPhysicalNode[i]);
00075 }
00076 myRankInPhysicalNode = 0;
00077 numPesOnPhysicalNode = 1;
00078 pesOnPhysicalNode = new int[1];
00079 pesOnPhysicalNode[0] = CkMyPe();
00080 } else {
00081 myRankInPhysicalNode = i;
00082 }
00083 }
00084 // CkPrintf("Pe %d ranks %d in physical node\n",CkMyPe(),myRankInPhysicalNode);
00085
00086 int deviceCount = 0;
00087 cudaGetDeviceCount(&deviceCount);
00088 if ( deviceCount <= 0 ) {
00089 NAMD_die("No CUDA devices found.");
00090 }
00091
00092 int *devices;
00093 int ndevices = 0;
00094 int nexclusive = 0;
00095 if ( usedevicelist ) {
00096 devices = new int[strlen(devicelist)];
00097 int i = 0;
00098 while ( devicelist[i] ) {
00099 ndevices += sscanf(devicelist+i,"%d",devices+ndevices);
00100 while ( devicelist[i] && isdigit(devicelist[i]) ) ++i;
00101 while ( devicelist[i] && ! isdigit(devicelist[i]) ) ++i;
00102 }
00103 } else {
00104 if ( ! CkMyPe() ) {
00105 CkPrintf("Did not find +devices i,j,k,... argument, using all\n");
00106 }
00107 devices = new int[deviceCount];
00108 for ( int i=0; i<deviceCount; ++i ) {
00109 int dev = i % deviceCount;
00110 #if CUDA_VERSION >= 2020
00111 cudaDeviceProp deviceProp;
00112 cudaGetDeviceProperties(&deviceProp, dev);
00113 if ( deviceProp.computeMode != cudaComputeModeProhibited
00114 && deviceProp.multiProcessorCount > 2 ) { // exclude weak cards
00115 devices[ndevices++] = dev;
00116 }
00117 if ( deviceProp.computeMode == cudaComputeModeExclusive ) {
00118 ++nexclusive;
00119 }
00120 #else
00121 devices[ndevices++] = dev;
00122 #endif
00123 }
00124 }
00125
00126 if ( ! ndevices ) {
00127 NAMD_die("All CUDA devices are in prohibited mode.");
00128 }
00129
00130 shared_gpu = 0;
00131 gpu_is_mine = 1;
00132 first_pe_sharing_gpu = CkMyPe();
00133 next_pe_sharing_gpu = CkMyPe();
00134
00135 if ( (ndevices >= numPesOnPhysicalNode) || (nexclusive == 0) ) {
00136
00137 int dev;
00138 if ( numPesOnPhysicalNode > 1 ) {
00139 dev = devices[myRankInPhysicalNode % ndevices];
00140 if ( ! ignoresharing ) {
00141 for ( int i = (myRankInPhysicalNode + 1) % numPesOnPhysicalNode;
00142 i != myRankInPhysicalNode;
00143 i = (i + 1) % numPesOnPhysicalNode ) {
00144 if (devices[i % ndevices] == dev) {
00145 shared_gpu = 1;
00146 next_pe_sharing_gpu = pesOnPhysicalNode[i];
00147 break;
00148 }
00149 }
00150 }
00151 if ( shared_gpu ) {
00152 for ( int i = 0; i < numPesOnPhysicalNode; ++i ) {
00153 if (devices[i % ndevices] == dev) {
00154 first_pe_sharing_gpu = pesOnPhysicalNode[i];
00155 break;
00156 }
00157 }
00158 CkPrintf("Pe %d sharing CUDA device %d first %d next %d\n",
00159 CkMyPe(), dev, first_pe_sharing_gpu, next_pe_sharing_gpu);
00160 }
00161 } else { // in case phys node code is lying
00162 dev = devices[CkMyPe() % ndevices];
00163 }
00164
00165 // disable token-passing but don't submit local until remote finished
00166 // if shared_gpu is true, otherwise submit all work immediately
00167 first_pe_sharing_gpu = CkMyPe();
00168 next_pe_sharing_gpu = CkMyPe();
00169
00170 gpu_is_mine = ( first_pe_sharing_gpu == CkMyPe() );
00171
00172 if ( dev >= deviceCount ) {
00173 char buf[256];
00174 sprintf(buf,"Pe %d unable to bind to CUDA device %d on %s because only %d devices are present",
00175 CkMyPe(), dev, host, deviceCount);
00176 NAMD_die(buf);
00177 }
00178
00179 cudaDeviceProp deviceProp;
00180 cudaGetDeviceProperties(&deviceProp, dev);
00181 CkPrintf("Pe %d physical rank %d binding to CUDA device %d on %s: '%s' Mem: %dMB Rev: %d.%d\n",
00182 CkMyPe(), myRankInPhysicalNode, dev, host,
00183 deviceProp.name, deviceProp.totalGlobalMem / (1024*1024),
00184 deviceProp.major, deviceProp.minor);
00185
00186 cudaSetDevice(dev);
00187 cudaError_t err;
00188 if ((err = cudaGetLastError()) != cudaSuccess) {
00189 char errmsg[1024];
00190 sprintf(errmsg,"CUDA error binding to device %d on pe %d: %s",
00191 dev, CkMyPe(), cudaGetErrorString(err));
00192 NAMD_die(errmsg);
00193 }
00194
00195 } // just let CUDA pick a device for us
00196
00197 if ( sizeof(patch_pair) & 15 ) NAMD_die("sizeof(patch_pair) % 16 != 0");
00198 if ( sizeof(force_list) & 15 ) NAMD_die("sizeof(force_list) % 16 != 0");
00199 if ( sizeof(atom) & 15 ) NAMD_die("sizeof(atom) % 16 != 0");
00200 if ( sizeof(atom_param) & 15 ) NAMD_die("sizeof(atom_param) % 16 != 0");
00201
00202 cuda_init();
00203
00204 }
|
|
||||||||||||||||
|
Definition at line 422 of file ComputeNonbondedCUDA.C. References ResizeArray< Elem >::add(), ComputeNonbondedCUDA::compute_record::c, PatchMap::center(), cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_die(), PatchMap::node(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchMap, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, ComputeNonbondedCUDA::requirePatch(), Vector::x, Vector::y, and Vector::z. Referenced by ComputeNonbondedPair::initialize(). 00422 {
00423
00424 if ( ! cudaCompute ) NAMD_die("register_pair called early");
00425
00426 cudaCompute->requirePatch(pid[0]);
00427 cudaCompute->requirePatch(pid[1]);
00428
00429 ComputeNonbondedCUDA::compute_record cr, cr2;
00430 cr.c = c; cr2.c = c;
00431 cr.pid[0] = pid[0]; cr.pid[1] = pid[1];
00432 cr2.pid[0] = pid[1]; cr2.pid[1] = pid[0];
00433
00434 int t1 = t[0];
00435 int t2 = t[1];
00436 Vector offset = cudaCompute->patchMap->center(pid[0])
00437 - cudaCompute->patchMap->center(pid[1]);
00438 offset.x += (t1%3-1) - (t2%3-1);
00439 offset.y += ((t1/3)%3-1) - ((t2/3)%3-1);
00440 offset.z += (t1/9-1) - (t2/9-1);
00441 cr.offset = offset;
00442 cr2.offset = -1. * offset;
00443
00444 if ( cudaCompute->patchMap->node(pid[0]) == CkMyPe() ) {
00445 cudaCompute->localComputeRecords.add(cr);
00446 } else {
00447 cudaCompute->remoteComputeRecords.add(cr);
00448 }
00449 if ( cudaCompute->patchMap->node(pid[1]) == CkMyPe() ) {
00450 cudaCompute->localComputeRecords.add(cr2);
00451 } else {
00452 cudaCompute->remoteComputeRecords.add(cr2);
00453 }
00454 }
|
|
||||||||||||
|
Definition at line 405 of file ComputeNonbondedCUDA.C. References ResizeArray< Elem >::add(), ComputeNonbondedCUDA::compute_record::c, cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_die(), PatchMap::node(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchMap, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, and ComputeNonbondedCUDA::requirePatch(). Referenced by ComputeNonbondedSelf::initialize(). 00405 {
00406
00407 if ( ! cudaCompute ) NAMD_die("register_self called early");
00408
00409 cudaCompute->requirePatch(pid);
00410
00411 ComputeNonbondedCUDA::compute_record cr;
00412 cr.c = c;
00413 cr.pid[0] = pid; cr.pid[1] = pid;
00414 cr.offset = 0.;
00415 if ( cudaCompute->patchMap->node(pid) == CkMyPe() ) {
00416 cudaCompute->localComputeRecords.add(cr);
00417 } else {
00418 cudaCompute->remoteComputeRecords.add(cr);
00419 }
00420 }
|
|
|
Definition at line 456 of file ComputeNonbondedCUDA.C. References NAMD_die(). 00456 { // static
00457
00458 NAMD_die("unregister_compute unimplemented");
00459
00460 }
|
|
|
Definition at line 534 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 535 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 462 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::atomUpdate(), ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 901 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_calc(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 544 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_progress(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 888 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_remote_calc(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 543 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_remote_progress(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 323 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_local_calc(), and cuda_check_remote_calc(). |
|
|
Definition at line 463 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::requirePatch(). |
|
|
Definition at line 539 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 540 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::finishWork(). |
|
|
Definition at line 322 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), register_cuda_compute_pair(), and register_cuda_compute_self(). |
|
|
Definition at line 29 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(), and cuda_initialize(). |
|
|
Definition at line 469 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 470 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_local_calc(), cuda_check_local_progress(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 467 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 468 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_remote_calc(), cuda_check_remote_progress(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 325 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::build_exclusions(), and ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 41 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(). |
|
|
Definition at line 531 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 536 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 44 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 31 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(). |
|
|
Definition at line 579 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 541 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_progress(), and ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 42 of file ComputeNonbondedCUDA.C. Referenced by cuda_check_local_calc(), cuda_check_remote_calc(), and cuda_initialize(). |
|
|
Definition at line 533 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 530 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(). |
|
|
Definition at line 40 of file ComputeNonbondedCUDA.C. Referenced by cuda_initialize(). |
|
|
Definition at line 537 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 466 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 465 of file ComputeNonbondedCUDA.C. Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 75 of file ComputeNonbondedCUDAKernel.cu. Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_init(), cuda_load_forces(), cuda_nonbonded_forces(), cuda_stream_finished(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice(). |
|
|
Definition at line 30 of file ComputeNonbondedCUDA.C. Referenced by cuda_getargs(). |
1.3.9.1