9 #define MIN_DEBUG_LEVEL 3 13 #ifdef NODEGROUP_FORCE_REGISTER 14 GroupRestraintsCUDA::GroupRestraintsCUDA(
const GroupRestraintParam *param,
bool _mGpuOn,
int _numDevices,
int _deviceIndex,
int grp) {
18 numDevices = _numDevices;
19 deviceIndex = _deviceIndex;
21 splitOverDevicesGrp1=
false;
22 splitOverDevicesGrp2=
false;
25 inv_group1_mass = 0.0;
26 inv_group2_mass = 0.0;
29 numRestrainedGroup1 = group1Index.size();
30 numRestrainedGroup2 = group2Index.size();
31 totalNumRestrained = numRestrainedGroup1 + numRestrainedGroup2;
34 calcGroup1COM = (numRestrainedGroup1 ? true :
false);
37 groupAtomsSOAIndex.resize(totalNumRestrained);
45 allocate_host<double>(&h_resEnergy, 1);
46 allocate_host<double3>(&h_diffCOM, 1);
47 allocate_host<double3>(&h_group1COM, 1);
48 allocate_host<double3>(&h_group2COM, 1);
50 allocate_host<double3>(&h_resForce, 1);
51 allocate_device<double3>(&d_group1COM, 1);
52 allocate_device<double3>(&d_group2COM, 1);
53 allocate_device<int>(&d_groupAtomsSOAIndex, totalNumRestrained);
54 allocate_device<unsigned int>(&d_tbcatomic, 1);
56 cudaCheck(cudaMemset(d_tbcatomic, 0,
sizeof(
unsigned int)));
59 allocate_device<double3>(&d_peer1COM,
sizeof(double3));
60 allocate_device<double3>(&d_peer2COM,
sizeof(double3));
62 allocate_host<double3>(&h_peer1COM,
sizeof(double3));
63 allocate_host<double3>(&h_peer2COM,
sizeof(double3)*numDevices);
70 double total_mass = 0.0;
72 for (
int i = 0; i < numRestrainedGroup2; ++i) {
73 int index = group2Index[i];
74 if (index > -1 && index < totalAtoms) {
78 sprintf(err_msg,
"Group restraints: Bad atom index for %s!" 79 " Atom indices must be within [%d, %d].\n", groupName, 0, totalAtoms - 1);
83 inv_group2_mass = 1.0 / total_mass;
89 for (
int i = 0; i < numRestrainedGroup1; ++i) {
90 int index = group1Index[i];
91 if (index > -1 && index < totalAtoms) {
95 sprintf(err_msg,
"Group restraints: Bad atom index for %s!" 96 " Atom indices must be within [%d, %d].\n", groupName, 0, totalAtoms - 1);
100 inv_group1_mass = 1.0 / total_mass;
106 h_group1COM->
x = ref.
x;
107 h_group1COM->y = ref.
y;
108 h_group1COM->z = ref.
z;
112 GroupRestraintsCUDA::~GroupRestraintsCUDA() {
113 deallocate_host<double>(&h_resEnergy);
114 deallocate_host<double3>(&h_resForce);
115 deallocate_host<double3>(&h_diffCOM);
116 deallocate_host<double3>(&h_group1COM);
117 deallocate_host<double3>(&h_group2COM);
118 deallocate_device<double3>(&d_group1COM);
119 deallocate_device<double3>(&d_group2COM);
120 deallocate_device<int>(&d_groupAtomsSOAIndex);
121 deallocate_device<unsigned int>(&d_tbcatomic);
125 void GroupRestraintsCUDA::updateAtoms(
126 std::vector<AtomMap*> &atomMapsList,
127 std::vector<CudaLocalRecord> &localRecords,
128 const int *h_globalToLocalID) {
132 numRestrainedGroup1Local = numRestrainedGroup1;
133 numRestrainedGroup2Local = numRestrainedGroup2;
135 groupAtomsSOAIndex.clear();
137 const std::vector<int> &group1Index = resParam->GetGroup1AtomIndex();
138 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" updateAtoms grp1 "<< group1Index.size() <<
"\n" <<
endi);
139 if(numRestrainedGroup1 != group1Index.size()) {
141 sprintf(
"Number of atoms in group 1 restraint for '%s' is changed!", groupName);
145 for(
int i = 0 ; i < numRestrainedGroup1; ++i){
146 int gid = group1Index[i];
148 DebugM(2,
"[" << CkMyPe() <<
"]" <<
" updateAtoms grp1 looking for "<< gid <<
"\n" <<
endi);
150 for(
int j = 0 ; j < atomMapsList.size(); ++j){
151 lid = atomMapsList[j]->localID(gid);
160 NAMD_bug(
"LocalAtomID not found in patchMap");
162 splitOverDevicesGrp1=
true;
167 int soaPid = h_globalToLocalID[lid.
pid];
170 int soaIndex = localRecords[soaPid].bufferOffset + lid.
index;
171 DebugM(2,
"[" << CkMyPe() <<
"]" <<
" updateAtoms grp1 found gid "<< gid <<
" as soa "<< soaIndex <<
"\n" <<
endi);
173 groupAtomsSOAIndex.push_back(soaIndex);
175 groupAtomsSOAIndex[i] = soaIndex;
179 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" updateAtoms grp1 soa "<< groupAtomsSOAIndex.size() <<
"\n" <<
endi);
183 numRestrainedGroup1Local = groupAtomsSOAIndex.size();
185 if(numRestrainedGroup1Local>0)
186 std::sort(groupAtomsSOAIndex.begin(), groupAtomsSOAIndex.begin()+numRestrainedGroup1Local);
191 const std::vector<int> &group2Index = resParam->GetGroup2AtomIndex();
192 if(numRestrainedGroup2 != group2Index.size()) {
194 sprintf(
"Number of atoms in group 2 restraint for '%s' is changed!", groupName);
197 DebugM(4,
"[" << CkMyPe() <<
"]" <<
" updateAtoms grp2 "<< group2Index.size() <<
"\n" <<
endi);
199 for(
int i = 0 ; i < numRestrainedGroup2; ++i){
200 int gid = group2Index[i];
202 DebugM(2,
"[" << CkMyPe() <<
"]" <<
" updateAtoms grp2 looking for "<< gid <<
"\n" <<
endi);
204 for(
int j = 0 ; j < atomMapsList.size(); ++j){
205 lid = atomMapsList[j]->localID(gid);
214 NAMD_bug(
"LocalAtomID not found in patchMap");
216 splitOverDevicesGrp2=
true;
222 int soaPid = h_globalToLocalID[lid.
pid];
225 int soaIndex = localRecords[soaPid].bufferOffset + lid.
index;
226 DebugM(2,
"[" << CkMyPe() <<
"]" <<
" updateAtoms grp2 found "<< gid <<
" as SOA "<< soaIndex <<
"\n" <<
endi);
229 groupAtomsSOAIndex.push_back(soaIndex);
231 groupAtomsSOAIndex[i + numRestrainedGroup1] = soaIndex;
232 DebugM(2,
"[" << CkMyPe() <<
"]" <<
" updateAtoms grp2 SOA soaidx "<< soaIndex<<
" size now "<< groupAtomsSOAIndex.size() <<
"\n" <<
endi);
236 numRestrainedGroup2Local = groupAtomsSOAIndex.size() - numRestrainedGroup1Local;
237 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" updateAtoms grp2 SOA now "<< groupAtomsSOAIndex.size() <<
" numRestrainedGroup1Local " << numRestrainedGroup1Local <<
" numRestrainedGroup2Local " << numRestrainedGroup2Local <<
"\n" <<
endi);
239 if(groupAtomsSOAIndex.size()>0){
241 if(numRestrainedGroup2Local>0)
242 std::sort(groupAtomsSOAIndex.begin() + numRestrainedGroup1Local, groupAtomsSOAIndex.end());
244 copy_HtoD<int>(groupAtomsSOAIndex.data(), d_groupAtomsSOAIndex, groupAtomsSOAIndex.size());
247 void GroupRestraintsCUDA::initPeerCOM(double3** d_peerCOM1G,
248 double3** d_peerCOM2G, cudaStream_t stream){
249 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" initPeerCOM\n" <<
endi);
250 initPeerCOMmgpuG(numDevices, deviceIndex, d_peerCOM1G, d_peer1COM, stream);
251 initPeerCOMmgpuG(numDevices, deviceIndex, d_peerCOM2G, d_peer2COM, stream);
254 void GroupRestraintsCUDA::doCOM_mgpu(
256 const char3* d_transform,
258 const double* d_pos_x,
259 const double* d_pos_y,
260 const double* d_pos_z,
262 cudaStream_t stream) {
265 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" doCOM_mgpu g1count "<< numRestrainedGroup1Local<<
" g2count "<<numRestrainedGroup2Local <<
endi);
266 if(numRestrainedGroup1Local>0 && calcGroup1COM)
269 computeCOMMgpu(numRestrainedGroup1Local,lat,
270 d_mass, d_pos_x, d_pos_y, d_pos_z,
271 d_transform, this->d_groupAtomsSOAIndex,
275 numDevices, deviceIndex,
278 cudaCheck(cudaStreamSynchronize(stream));
279 copy_DtoH_sync<double3>(d_peer1COM, h_peer1COM, 1);
280 DebugM(3,
"gid " << groupRestID <<
" deviceIndex "<< deviceIndex <<
" g1 COM " <<h_peer1COM->x*inv_group1_mass<<
", " 281 <<h_peer1COM->y*inv_group1_mass<<
", " 282 <<h_peer2COM->z*inv_group1_mass<<
"\n"<<
endi);
285 if(numRestrainedGroup2Local>0)
288 computeCOMMgpu(numRestrainedGroup2Local,lat,
289 d_mass, d_pos_x, d_pos_y, d_pos_z,
291 this->d_groupAtomsSOAIndex+numRestrainedGroup1Local,
294 this->d_tbcatomic, numDevices, deviceIndex,
297 cudaCheck(cudaStreamSynchronize(stream));
298 copy_DtoH_sync<double3>(d_peer2COM, h_peer2COM, 1);
299 DebugM(3,
" gid "<< groupRestID <<
" deviceIndex "<< deviceIndex <<
" g2 COM " <<h_peer2COM[0].x*inv_group2_mass<<
", " 300 <<h_peer2COM[0].y*inv_group2_mass<<
", " 301 <<h_peer2COM[0].z*inv_group2_mass<<
"\n"<<
endi);
307 void GroupRestraintsCUDA::doForce(
314 const char3* d_transform,
316 const double* d_pos_x,
317 const double* d_pos_y,
318 const double* d_pos_z,
319 double* d_f_normal_x,
320 double* d_f_normal_y,
321 double* d_f_normal_z,
326 double3** d_peer1COM,
327 double3** d_peer2COM,
328 cudaStream_t stream) {
329 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" doForce g1 "<< numRestrainedGroup1Local <<
" g2 "<< numRestrainedGroup2Local <<
"\n" <<
endi);
334 computeGroupRestraint_2Group(
339 numRestrainedGroup1Local,
340 numRestrainedGroup1Local + numRestrainedGroup2Local,
345 this->inv_group1_mass,
346 this->inv_group2_mass,
347 this->d_groupAtomsSOAIndex,
372 if(numRestrainedGroup2Local>0)
374 computeGroupRestraint_1Group(
379 numRestrainedGroup2Local,
384 this->inv_group2_mass,
385 this->d_groupAtomsSOAIndex,
410 cudaCheck(cudaStreamSynchronize(stream));
412 h_extEnergy[0] += h_resEnergy[0];
415 if (!calcGroup1COM) {
416 h_extForce->x += h_resForce->x;
417 h_extForce->y += h_resForce->y;
418 h_extForce->z += h_resForce->z;
422 sprintf(msg,
"GRES: %9d %14s %14.4f %14.4f %14.4f %19.4f %14.4f %14.4f %14.4f\n",
423 timeStep, groupName, h_diffCOM->x, h_diffCOM->y, h_diffCOM->z,
424 h_resForce->x, h_resForce->y, h_resForce->z, h_resEnergy[0]);
442 ComputeGroupRestraintsCUDA::ComputeGroupRestraintsCUDA(
const int outputFreq,
444 gResOutputFreq = outputFreq;
446 numDevices = _numDevices;
447 deviceIndex = _deviceIndex;
448 const std::map<std::string, GroupRestraintParam*> & groupMap = resList.
GetGroupResMap();
450 for (
auto it = groupMap.begin(); it != groupMap.end(); ++it) {
451 GroupRestraintsCUDA * gResCUDA =
new GroupRestraintsCUDA(it->second, mGpuOn, numDevices, deviceIndex, restraintsCUDAList.size());
452 restraintsCUDAList.push_back(gResCUDA);
456 ComputeGroupRestraintsCUDA::~ComputeGroupRestraintsCUDA() {
457 int numGroup = restraintsCUDAList.size();
458 for (
int i = 0; i < numGroup; ++i) {
459 delete restraintsCUDAList[i];
461 restraintsCUDAList.clear();
465 void ComputeGroupRestraintsCUDA::updateAtoms(
466 std::vector<AtomMap*> &atomMapsList,
467 std::vector<CudaLocalRecord> &localRecords,
468 const int *h_globalToLocalID) {
470 int numGroup = restraintsCUDAList.size();
472 for (
int i = 0; i < numGroup; ++i) {
473 restraintsCUDAList[i]->updateAtoms(atomMapsList, localRecords, h_globalToLocalID);
474 if(mGpuOn && restraintsCUDAList[i]->numRestrainedGroup1Local + restraintsCUDAList[i]->numRestrainedGroup2Local >0)
482 void ComputeGroupRestraintsCUDA::initPeerCOM(cudaStream_t stream){
483 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" initPeerCOM\n" <<
endi);
484 int numGroup = restraintsCUDAList.size();
486 for (
int i = 0; i < numGroup; ++i) {
494 void ComputeGroupRestraintsCUDA::doCOM_mgpu(
496 const char3* d_transform,
498 const double* d_pos_x,
499 const double* d_pos_y,
500 const double* d_pos_z,
501 cudaStream_t stream) {
502 int numGroup = restraintsCUDAList.size();
503 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" doCOM_mgpu "<< numGroup <<
"\n" <<
endi);
504 for (
int gIdx = 0; gIdx < numGroup; ++gIdx) {
506 restraintsCUDAList[gIdx]->doCOM_mgpu(
508 d_mass, d_pos_x, d_pos_y, d_pos_z, gIdx,
514 void ComputeGroupRestraintsCUDA::doForce(
519 const char3* d_transform,
521 const double* d_pos_x,
522 const double* d_pos_y,
523 const double* d_pos_z,
524 double* d_f_normal_x,
525 double* d_f_normal_y,
526 double* d_f_normal_z,
531 cudaStream_t stream) {
533 const int doOutput = (timeStep % gResOutputFreq) == 0;
536 int doVirCalc = (doOutput ? 1 : doVirial);
537 int numGroup = restraintsCUDAList.size();
541 h_extEnergy[0] = 0.0;
545 h_extVirial->
xx = 0.0;
546 h_extVirial->
xy = 0.0;
547 h_extVirial->
xz = 0.0;
548 h_extVirial->
yx = 0.0;
549 h_extVirial->
yy = 0.0;
550 h_extVirial->
yz = 0.0;
551 h_extVirial->
zx = 0.0;
552 h_extVirial->
zy = 0.0;
553 h_extVirial->
zz = 0.0;
556 if(timeStep % (100 * gResOutputFreq) == 0) {
558 sprintf(msg,
"\nGRES_TITLE: %3s %14s %14s %14s %14s %19s %14s %14s %14s\n",
559 "TS",
"GROUP_NAME",
"DISTANCE.X",
"DISTANCE.Y",
"DISTANCE.Z",
560 "FORCE.X",
"FORCE.Y",
"FORCE.Z",
"ENERGY");
565 for (
int gIdx = 0; gIdx < numGroup; ++gIdx) {
566 restraintsCUDAList[gIdx]->doForce(
567 timeStep, doEnergy, doVirCalc, doOutput, gIdx, lat, d_transform,
568 d_mass, d_pos_x, d_pos_y, d_pos_z,
569 d_f_normal_x, d_f_normal_y, d_f_normal_z, d_virial,
570 h_extEnergy, h_extForce, h_extVirial, cudaMgr->
curGrp1COM[gIdx], cudaMgr->
curGrp2COM[gIdx], stream);
std::ostream & endi(std::ostream &s)
Molecule stores the structural information for the system.
const std::map< std::string, GroupRestraintParam * > & GetGroupResMap() const
Vector GetGroupRes1Position() const
std::atomic< int > reducerGroupRestraintDevice
Vector GetResDirection() const
NAMD_HOST_DEVICE double3 make_double3(float3 a)
void NAMD_bug(const char *err_msg)
const char * GetGroupName() const
static ComputeCUDAMgr * getComputeCUDAMgr()
void NAMD_die(const char *err_msg)
Real atommass(int anum) const
Vector GetResCenter() const
const std::vector< int > & GetGroup2AtomIndex() const
bool GetUseDistMagnitude() const
const std::vector< int > & GetGroup1AtomIndex() const