| version 1.100 | version 1.101 |
|---|
| |
| // Zero array sizes and pointers | // Zero array sizes and pointers |
| init_arrays(); | init_arrays(); |
| | |
| | atoms_size = 0; |
| | atoms = NULL; |
| | |
| forces_size = 0; | forces_size = 0; |
| forces = NULL; | forces = NULL; |
| | |
| |
| if ( master->slavePes[slaveIndex] != CkMyPe() ) { | if ( master->slavePes[slaveIndex] != CkMyPe() ) { |
| NAMD_bug("ComputeNonbondedCUDA slavePes[slaveIndex] != CkMyPe"); | NAMD_bug("ComputeNonbondedCUDA slavePes[slaveIndex] != CkMyPe"); |
| } | } |
| | deviceID = master->deviceID; |
| registerPatches(); | registerPatches(); |
| return; | return; |
| } | } |
| |
| cuda_errcheck("in cudaDeviceGetStreamPriorityRange"); | cuda_errcheck("in cudaDeviceGetStreamPriorityRange"); |
| if ( leastPriority != greatestPriority ) { | if ( leastPriority != greatestPriority ) { |
| if ( CkMyNode() == 0 ) { | if ( CkMyNode() == 0 ) { |
| int dev; | int dev = deviceCUDA->getDeviceID(); |
| cudaGetDevice(&dev); | |
| cuda_errcheck("cudaGetDevice"); | |
| CkPrintf("CUDA device %d stream priority range %d %d\n", dev, leastPriority, greatestPriority); | CkPrintf("CUDA device %d stream priority range %d %d\n", dev, leastPriority, greatestPriority); |
| } | } |
| if ( deviceCUDA->getMergeGrids() && params->PMEOn && params->PMEOffload ) { | if ( deviceCUDA->getMergeGrids() && params->PMEOn && params->PMEOffload && !params->usePMECUDA) { |
| greatestPriority = leastPriority; | greatestPriority = leastPriority; |
| } | } |
| | if (params->usePMECUDA) greatestPriority = leastPriority; |
| cudaStreamCreateWithPriority(&stream,cudaStreamDefault,greatestPriority); | cudaStreamCreateWithPriority(&stream,cudaStreamDefault,greatestPriority); |
| cudaStreamCreateWithPriority(&stream2,cudaStreamDefault,leastPriority); | cudaStreamCreateWithPriority(&stream2,cudaStreamDefault,leastPriority); |
| } else | } else |
| |
| { | { |
| cudaStreamCreate(&stream); | cudaStreamCreate(&stream); |
| cuda_errcheck("cudaStreamCreate"); | cuda_errcheck("cudaStreamCreate"); |
| int dev; | int dev = deviceCUDA->getDeviceID(); |
| cudaGetDevice(&dev); | |
| cuda_errcheck("cudaGetDevice"); | |
| cudaDeviceProp deviceProp; | cudaDeviceProp deviceProp; |
| cudaGetDeviceProperties(&deviceProp, dev); | cudaGetDeviceProperties(&deviceProp, dev); |
| cuda_errcheck("cudaGetDeviceProperties"); | cuda_errcheck("cudaGetDeviceProperties"); |
| |
| } | } |
| cuda_errcheck("cudaStreamCreate"); | cuda_errcheck("cudaStreamCreate"); |
| | |
| | // Get GPU device ID |
| | deviceID = deviceCUDA->getDeviceID(); |
| | |
| cuda_init(); | cuda_init(); |
| if ( max_grid_size < 65535 ) NAMD_bug("bad CUDA max_grid_size"); | if ( max_grid_size < 65535 ) NAMD_bug("bad CUDA max_grid_size"); |
| build_exclusions(); | build_exclusions(); |
| |
| patch_pairs_ptr = new ResizeArray<patch_pair>; | patch_pairs_ptr = new ResizeArray<patch_pair>; |
| patch_pair_num_ptr = new ResizeArray<int>; | patch_pair_num_ptr = new ResizeArray<int>; |
| | |
| if ( params->PMEOn && params->PMEOffload ) deviceCUDA->setGpuIsMine(0); | if ( params->PMEOn && params->PMEOffload && !params->usePMECUDA) deviceCUDA->setGpuIsMine(0); |
| } | } |
| | |
| | |
| |
| static __thread int vdw_types_size; | static __thread int vdw_types_size; |
| static __thread int* vdw_types; | static __thread int* vdw_types; |
| | |
| static __thread int atoms_size; | |
| static __thread atom* atoms; | |
| | |
| static __thread int dummy_size; | static __thread int dummy_size; |
| static __thread float* dummy_dev; | static __thread float* dummy_dev; |
| | |
| |
| vdw_types_size = 0; | vdw_types_size = 0; |
| vdw_types = NULL; | vdw_types = NULL; |
| | |
| atoms_size = 0; | |
| atoms = NULL; | |
| | |
| dummy_size = 0; | dummy_size = 0; |
| dummy_dev = NULL; | dummy_dev = NULL; |
| | |
| |
| } | } |
| | |
| int ComputeNonbondedCUDA::noWork() { | int ComputeNonbondedCUDA::noWork() { |
| //fprintf(stderr, "ComputeNonbondedCUDA::noWork() %d\n",CkMyPe()); | |
| | |
| SimParameters *simParams = Node::Object()->simParameters; | SimParameters *simParams = Node::Object()->simParameters; |
| Flags &flags = master->patchRecords[hostedPatches[0]].p->flags; | Flags &flags = master->patchRecords[hostedPatches[0]].p->flags; |
| |
| GBISP("C.N.CUDA[%d]::doWork: seq %d, phase %d, workStarted %d, atomsChanged %d\n", \ | GBISP("C.N.CUDA[%d]::doWork: seq %d, phase %d, workStarted %d, atomsChanged %d\n", \ |
| CkMyPe(), sequence(), gbisPhase, workStarted, atomsChanged); | CkMyPe(), sequence(), gbisPhase, workStarted, atomsChanged); |
| | |
| | // Set GPU device ID |
| | cudaCheck(cudaSetDevice(deviceID)); |
| | |
| ResizeArray<patch_pair> &patch_pairs(*patch_pairs_ptr); | ResizeArray<patch_pair> &patch_pairs(*patch_pairs_ptr); |
| ResizeArray<int> &patch_pair_num(*patch_pair_num_ptr); | ResizeArray<int> &patch_pair_num(*patch_pair_num_ptr); |
| | |
| |
| num_remote_atoms = num_atoms - num_local_atoms; | num_remote_atoms = num_atoms - num_local_atoms; |
| reallocate_host<atom_param>(&atom_params, &atom_params_size, num_atoms, 1.2f); | reallocate_host<atom_param>(&atom_params, &atom_params_size, num_atoms, 1.2f); |
| reallocate_host<int>(&vdw_types, &vdw_types_size, num_atoms, 1.2f); | reallocate_host<int>(&vdw_types, &vdw_types_size, num_atoms, 1.2f); |
| reallocate_host<atom>(&atoms, &atoms_size, num_atoms, 1.2f); | reallocate_host<CudaAtom>(&atoms, &atoms_size, num_atoms, 1.2f); |
| reallocate_host<float4>(&forces, &forces_size, num_atoms, 1.2f, cudaHostAllocMapped); | reallocate_host<float4>(&forces, &forces_size, num_atoms, 1.2f, cudaHostAllocMapped); |
| reallocate_host<float4>(&slow_forces, &slow_forces_size, num_atoms, 1.2f, cudaHostAllocMapped); | reallocate_host<float4>(&slow_forces, &slow_forces_size, num_atoms, 1.2f, cudaHostAllocMapped); |
| if (simParams->GBISOn) { | if (simParams->GBISOn) { |
| |
| { | { |
| #if 1 | #if 1 |
| const CudaAtom *ac = pr.p->getCudaAtomList(); | const CudaAtom *ac = pr.p->getCudaAtomList(); |
| atom *ap = atoms + start; | CudaAtom *ap = atoms + start; |
| memcpy(ap, ac, sizeof(atom)*n); | memcpy(ap, ac, sizeof(atom)*n); |
| #else | #else |
| Vector center = | Vector center = |
| |
| } | } |
| } | } |
| | |
| //GBISP("finished active patches\n") | |
| | |
| //CkPrintf("maxMovement = %f maxTolerance = %f save = %d use = %d\n", | |
| // maxAtomMovement, maxPatchTolerance, | |
| // flags.savePairlists, flags.usePairlists); | |
| | |
| savePairlists = 0; | savePairlists = 0; |
| usePairlists = 0; | usePairlists = 0; |
| if ( flags.savePairlists ) { | if ( flags.savePairlists ) { |
| |
| latc.z = lattice.c().z; | latc.z = lattice.c().z; |
| SimParameters *simParams = Node::Object()->simParameters; | SimParameters *simParams = Node::Object()->simParameters; |
| | |
| | // Set GPU device ID |
| | cudaSetDevice(deviceID); |
| | |
| const bool streaming = ! (deviceCUDA->getNoStreaming() || simParams->GBISOn); | const bool streaming = ! (deviceCUDA->getNoStreaming() || simParams->GBISOn); |
| | |
| double walltime; | double walltime; |
| |
| } | } |
| } | } |
| atomsChanged = 0; | atomsChanged = 0; |
| cuda_bind_atoms(atoms); | cuda_bind_atoms((const atom *)atoms); |
| cuda_bind_forces(forces, slow_forces); | cuda_bind_forces(forces, slow_forces); |
| cuda_bind_virials(virials, force_ready_queue, block_order); | cuda_bind_virials(virials, force_ready_queue, block_order); |
| if ( simParams->GBISOn) { | if ( simParams->GBISOn) { |
| |
| } | } |
| } // phases | } // phases |
| } // GBISOn | } // GBISOn |
| if ( simParams->PMEOn && simParams->PMEOffload ) break; | if ( simParams->PMEOn && simParams->PMEOffload && !simParams->usePMECUDA) break; |
| | |
| default: | default: |
| GBISP("C.N.CUDA[%d]::recvYieldDevice: case default\n", CkMyPe()) | GBISP("C.N.CUDA[%d]::recvYieldDevice: case default\n", CkMyPe()) |