version 1.100 | version 1.101 |
---|
| |
// Zero array sizes and pointers | // Zero array sizes and pointers |
init_arrays(); | init_arrays(); |
| |
| atoms_size = 0; |
| atoms = NULL; |
| |
forces_size = 0; | forces_size = 0; |
forces = NULL; | forces = NULL; |
| |
| |
if ( master->slavePes[slaveIndex] != CkMyPe() ) { | if ( master->slavePes[slaveIndex] != CkMyPe() ) { |
NAMD_bug("ComputeNonbondedCUDA slavePes[slaveIndex] != CkMyPe"); | NAMD_bug("ComputeNonbondedCUDA slavePes[slaveIndex] != CkMyPe"); |
} | } |
| deviceID = master->deviceID; |
registerPatches(); | registerPatches(); |
return; | return; |
} | } |
| |
cuda_errcheck("in cudaDeviceGetStreamPriorityRange"); | cuda_errcheck("in cudaDeviceGetStreamPriorityRange"); |
if ( leastPriority != greatestPriority ) { | if ( leastPriority != greatestPriority ) { |
if ( CkMyNode() == 0 ) { | if ( CkMyNode() == 0 ) { |
int dev; | int dev = deviceCUDA->getDeviceID(); |
cudaGetDevice(&dev); | |
cuda_errcheck("cudaGetDevice"); | |
CkPrintf("CUDA device %d stream priority range %d %d\n", dev, leastPriority, greatestPriority); | CkPrintf("CUDA device %d stream priority range %d %d\n", dev, leastPriority, greatestPriority); |
} | } |
if ( deviceCUDA->getMergeGrids() && params->PMEOn && params->PMEOffload ) { | if ( deviceCUDA->getMergeGrids() && params->PMEOn && params->PMEOffload && !params->usePMECUDA) { |
greatestPriority = leastPriority; | greatestPriority = leastPriority; |
} | } |
| if (params->usePMECUDA) greatestPriority = leastPriority; |
cudaStreamCreateWithPriority(&stream,cudaStreamDefault,greatestPriority); | cudaStreamCreateWithPriority(&stream,cudaStreamDefault,greatestPriority); |
cudaStreamCreateWithPriority(&stream2,cudaStreamDefault,leastPriority); | cudaStreamCreateWithPriority(&stream2,cudaStreamDefault,leastPriority); |
} else | } else |
| |
{ | { |
cudaStreamCreate(&stream); | cudaStreamCreate(&stream); |
cuda_errcheck("cudaStreamCreate"); | cuda_errcheck("cudaStreamCreate"); |
int dev; | int dev = deviceCUDA->getDeviceID(); |
cudaGetDevice(&dev); | |
cuda_errcheck("cudaGetDevice"); | |
cudaDeviceProp deviceProp; | cudaDeviceProp deviceProp; |
cudaGetDeviceProperties(&deviceProp, dev); | cudaGetDeviceProperties(&deviceProp, dev); |
cuda_errcheck("cudaGetDeviceProperties"); | cuda_errcheck("cudaGetDeviceProperties"); |
| |
} | } |
cuda_errcheck("cudaStreamCreate"); | cuda_errcheck("cudaStreamCreate"); |
| |
| // Get GPU device ID |
| deviceID = deviceCUDA->getDeviceID(); |
| |
cuda_init(); | cuda_init(); |
if ( max_grid_size < 65535 ) NAMD_bug("bad CUDA max_grid_size"); | if ( max_grid_size < 65535 ) NAMD_bug("bad CUDA max_grid_size"); |
build_exclusions(); | build_exclusions(); |
| |
patch_pairs_ptr = new ResizeArray<patch_pair>; | patch_pairs_ptr = new ResizeArray<patch_pair>; |
patch_pair_num_ptr = new ResizeArray<int>; | patch_pair_num_ptr = new ResizeArray<int>; |
| |
if ( params->PMEOn && params->PMEOffload ) deviceCUDA->setGpuIsMine(0); | if ( params->PMEOn && params->PMEOffload && !params->usePMECUDA) deviceCUDA->setGpuIsMine(0); |
} | } |
| |
| |
| |
static __thread int vdw_types_size; | static __thread int vdw_types_size; |
static __thread int* vdw_types; | static __thread int* vdw_types; |
| |
static __thread int atoms_size; | |
static __thread atom* atoms; | |
| |
static __thread int dummy_size; | static __thread int dummy_size; |
static __thread float* dummy_dev; | static __thread float* dummy_dev; |
| |
| |
vdw_types_size = 0; | vdw_types_size = 0; |
vdw_types = NULL; | vdw_types = NULL; |
| |
atoms_size = 0; | |
atoms = NULL; | |
| |
dummy_size = 0; | dummy_size = 0; |
dummy_dev = NULL; | dummy_dev = NULL; |
| |
| |
} | } |
| |
int ComputeNonbondedCUDA::noWork() { | int ComputeNonbondedCUDA::noWork() { |
//fprintf(stderr, "ComputeNonbondedCUDA::noWork() %d\n",CkMyPe()); | |
| |
SimParameters *simParams = Node::Object()->simParameters; | SimParameters *simParams = Node::Object()->simParameters; |
Flags &flags = master->patchRecords[hostedPatches[0]].p->flags; | Flags &flags = master->patchRecords[hostedPatches[0]].p->flags; |
| |
GBISP("C.N.CUDA[%d]::doWork: seq %d, phase %d, workStarted %d, atomsChanged %d\n", \ | GBISP("C.N.CUDA[%d]::doWork: seq %d, phase %d, workStarted %d, atomsChanged %d\n", \ |
CkMyPe(), sequence(), gbisPhase, workStarted, atomsChanged); | CkMyPe(), sequence(), gbisPhase, workStarted, atomsChanged); |
| |
| // Set GPU device ID |
| cudaCheck(cudaSetDevice(deviceID)); |
| |
ResizeArray<patch_pair> &patch_pairs(*patch_pairs_ptr); | ResizeArray<patch_pair> &patch_pairs(*patch_pairs_ptr); |
ResizeArray<int> &patch_pair_num(*patch_pair_num_ptr); | ResizeArray<int> &patch_pair_num(*patch_pair_num_ptr); |
| |
| |
num_remote_atoms = num_atoms - num_local_atoms; | num_remote_atoms = num_atoms - num_local_atoms; |
reallocate_host<atom_param>(&atom_params, &atom_params_size, num_atoms, 1.2f); | reallocate_host<atom_param>(&atom_params, &atom_params_size, num_atoms, 1.2f); |
reallocate_host<int>(&vdw_types, &vdw_types_size, num_atoms, 1.2f); | reallocate_host<int>(&vdw_types, &vdw_types_size, num_atoms, 1.2f); |
reallocate_host<atom>(&atoms, &atoms_size, num_atoms, 1.2f); | reallocate_host<CudaAtom>(&atoms, &atoms_size, num_atoms, 1.2f); |
reallocate_host<float4>(&forces, &forces_size, num_atoms, 1.2f, cudaHostAllocMapped); | reallocate_host<float4>(&forces, &forces_size, num_atoms, 1.2f, cudaHostAllocMapped); |
reallocate_host<float4>(&slow_forces, &slow_forces_size, num_atoms, 1.2f, cudaHostAllocMapped); | reallocate_host<float4>(&slow_forces, &slow_forces_size, num_atoms, 1.2f, cudaHostAllocMapped); |
if (simParams->GBISOn) { | if (simParams->GBISOn) { |
| |
{ | { |
#if 1 | #if 1 |
const CudaAtom *ac = pr.p->getCudaAtomList(); | const CudaAtom *ac = pr.p->getCudaAtomList(); |
atom *ap = atoms + start; | CudaAtom *ap = atoms + start; |
memcpy(ap, ac, sizeof(atom)*n); | memcpy(ap, ac, sizeof(atom)*n); |
#else | #else |
Vector center = | Vector center = |
| |
} | } |
} | } |
| |
//GBISP("finished active patches\n") | |
| |
//CkPrintf("maxMovement = %f maxTolerance = %f save = %d use = %d\n", | |
// maxAtomMovement, maxPatchTolerance, | |
// flags.savePairlists, flags.usePairlists); | |
| |
savePairlists = 0; | savePairlists = 0; |
usePairlists = 0; | usePairlists = 0; |
if ( flags.savePairlists ) { | if ( flags.savePairlists ) { |
| |
latc.z = lattice.c().z; | latc.z = lattice.c().z; |
SimParameters *simParams = Node::Object()->simParameters; | SimParameters *simParams = Node::Object()->simParameters; |
| |
| // Set GPU device ID |
| cudaSetDevice(deviceID); |
| |
const bool streaming = ! (deviceCUDA->getNoStreaming() || simParams->GBISOn); | const bool streaming = ! (deviceCUDA->getNoStreaming() || simParams->GBISOn); |
| |
double walltime; | double walltime; |
| |
} | } |
} | } |
atomsChanged = 0; | atomsChanged = 0; |
cuda_bind_atoms(atoms); | cuda_bind_atoms((const atom *)atoms); |
cuda_bind_forces(forces, slow_forces); | cuda_bind_forces(forces, slow_forces); |
cuda_bind_virials(virials, force_ready_queue, block_order); | cuda_bind_virials(virials, force_ready_queue, block_order); |
if ( simParams->GBISOn) { | if ( simParams->GBISOn) { |
| |
} | } |
} // phases | } // phases |
} // GBISOn | } // GBISOn |
if ( simParams->PMEOn && simParams->PMEOffload ) break; | if ( simParams->PMEOn && simParams->PMEOffload && !simParams->usePMECUDA) break; |
| |
default: | default: |
GBISP("C.N.CUDA[%d]::recvYieldDevice: case default\n", CkMyPe()) | GBISP("C.N.CUDA[%d]::recvYieldDevice: case default\n", CkMyPe()) |