11 #define MIN_DEBUG_LEVEL 3 20 #ifdef NODEGROUP_FORCE_REGISTER 22 ComputeSMDCUDA::ComputeSMDCUDA(
23 std::vector<HomePatch*> &patchList,
24 double springConstant,
25 double transverseSpringConstant,
38 this->patchList = &patchList;
39 this->springConstant = springConstant;
40 this->transverseSpringConstant = transverseSpringConstant;
41 this->velocity = velocity;
42 this->direction = direction;
43 this->outputFrequency = outputFrequency;
44 this->firstTimeStep = firstTimeStep;
45 this->filename = filename;
46 this->isMasterDevice = isMasterDevice;
47 this->numAtoms = numAtoms;
48 this->mGpuOn =_mGpuOn;
49 this->numDevices = numDevices;
50 this->deviceIndex = deviceIndex;
53 smdAtomsGlobalIndex.clear();
55 allocate_host<double3>(&curCOM, 1);
58 smdAtomsSOAIndex.resize(this->numSMDAtoms);
59 allocate_device<unsigned int>(&d_tbcatomic, 1);
60 allocate_device<double3>(&d_curCOM, 1);
61 allocate_device<int>(&d_smdAtomsSOAIndex, this->numSMDAtoms);
65 allocate_device<double3>(&d_peerCOM,
sizeof(double3));
67 allocate_host<double3>(&h_peerCOM,
sizeof(double3));
76 copy_HtoD<double3>(curCOM, d_curCOM, 1);
77 cudaCheck(cudaMemset(d_tbcatomic, 0,
sizeof(
unsigned int)));
81 ComputeSMDCUDA::~ComputeSMDCUDA(){
83 deallocate_host<double3>(&curCOM);
84 deallocate_device<unsigned int>(&d_tbcatomic);
85 deallocate_device<int>(&d_smdAtomsSOAIndex);
89 void ComputeSMDCUDA::parseAtoms(){
92 origCOM.x = origCOM.y = origCOM.z = 0;
94 int numPDBAtoms = smdpdb.num_atoms();
95 if(numPDBAtoms < 1 )
NAMD_die(
"No Atoms found in SMDFile\n");
99 if (numPDBAtoms != this->numAtoms){
100 fprintf(stderr,
"Error, wrong numPDB (%d vs %d)\n",numPDBAtoms, this->numAtoms);
101 NAMD_die(
"The number of atoms in SMDFile must be equal to the total number of atoms in the structure!\n");
105 for(
int i = 0; i < numPDBAtoms; i++){
107 PDBAtom *atom = smdpdb.atom(i);
109 smdAtomsGlobalIndex.push_back(i);
113 origCOM.x += atom->
xcoor()*mass;
114 origCOM.y += atom->
ycoor()*mass;
115 origCOM.z += atom->
zcoor()*mass;
120 inv_group_mass = 1.0 / imass;
121 origCOM.x *= inv_group_mass;
122 origCOM.y *= inv_group_mass;
123 origCOM.z *= inv_group_mass;
126 NAMD_die(
"SMDFile contained no SMD atoms (atoms w/ nonzero occupancy)\n");
128 this->numSMDAtoms = smdAtomsGlobalIndex.size();
131 void ComputeSMDCUDA::updateAtoms(
132 std::vector<AtomMap*> &atomMapsList,
133 std::vector<CudaLocalRecord> &localRecords,
134 const int* h_globalToLocalID) {
135 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" updateAtoms full "<< smdAtomsGlobalIndex.size()<<
" SOA "<< smdAtomsSOAIndex.size()<<
"\n" <<
endi);
136 smdAtomsSOAIndex.clear();
138 smdAtomsSOAtoGlobalLocalMap.clear();
140 for(
int i = 0 ; i < this->numSMDAtoms; i++){
141 int gid = smdAtomsGlobalIndex[i];
144 for(
int j = 0 ; j < atomMapsList.size(); j++){
145 lid = atomMapsList[j]->localID(gid);
146 if( lid.
pid != -1)
break;
153 NAMD_bug(
" LocalAtomID not found in patchMap");
158 int soaPid = h_globalToLocalID[lid.
pid];
159 int soaIndex = localRecords[soaPid].bufferOffset + lid.
index;
160 smdAtomsSOAIndex.push_back(soaIndex);
163 smdAtomsSOAtoGlobalLocalMap[soaIndex]=gid;
167 int numLocalSMDAtoms=smdAtomsSOAIndex.size();
168 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" updateAtoms " << numLocalSMDAtoms <<
"\n" <<
endi);
169 if(numLocalSMDAtoms>0)
175 std::sort(smdAtomsSOAIndex.begin(), smdAtomsSOAIndex.end());
176 copy_HtoD<int>(smdAtomsSOAIndex.data(), d_smdAtomsSOAIndex, numLocalSMDAtoms);
179 void ComputeSMDCUDA::computeCOMMGpu(
181 const float * d_mass,
182 const double* d_pos_x,
183 const double* d_pos_y,
184 const double* d_pos_z,
185 const char3* d_transform,
188 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" computeCOMMGpu " << this->smdAtomsSOAIndex.size() <<
" 1st "<< this->smdAtomsSOAIndex[0]<<
"\n" <<
endi);
190 computeCOMSMDMgpu(this->smdAtomsSOAIndex.size(),lat,
191 d_mass, d_pos_x, d_pos_y, d_pos_z,
192 d_transform, this->d_smdAtomsSOAIndex,
193 d_peerCOM, cudaMgr->curSMDCOM,
194 this->d_tbcatomic, numDevices, deviceIndex, stream);
196 cudaCheck(cudaStreamSynchronize(stream));
197 copy_DtoH_sync<double3>(d_peerCOM, h_peerCOM, 1);
198 DebugM(3,
"deviceIndex "<< deviceIndex <<
" COM " <<h_peerCOM[0].x*inv_group_mass<<
", " 199 <<h_peerCOM[0].y*inv_group_mass<<
", " 200 <<h_peerCOM[0].z*inv_group_mass<<
"\n"<<
endi);
205 void ComputeSMDCUDA::doForce(
210 const double* d_pos_x,
211 const double* d_pos_y,
212 const double* d_pos_z,
213 const char3* d_transform,
214 double* d_f_normal_x,
215 double* d_f_normal_y,
216 double* d_f_normal_z,
224 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" doForce " << this->smdAtomsSOAIndex.size() <<
"\n" <<
endi);
227 bool doOutput = ((timeStep % this->outputFrequency) == 0);
230 doOutput&=cudaMgr->reducerSMDDevice == deviceIndex;
234 cudaCheck(cudaStreamSynchronize(stream));
235 copy_DtoH_sync<double3>(d_peerCOM, h_peerCOM, 1);
236 DebugM(3,
" deviceIndex "<< deviceIndex <<
" com " <<h_peerCOM[0].x*inv_group_mass<<
", " 237 <<h_peerCOM[0].y*inv_group_mass<<
", " 238 <<h_peerCOM[0].z*inv_group_mass<<
"\n"<<
endi);
247 this->inv_group_mass,
248 this->springConstant,
249 this->transverseSpringConstant,
252 doEnergy || doOutput,
264 this->smdAtomsSOAIndex.size(),
265 this->d_smdAtomsSOAIndex,
279 cudaCheck(cudaStreamSynchronize(stream));
281 Vector f(h_extForce->x, h_extForce->y, h_extForce->z);
284 if(cudaMgr->reducerSMDDevice == deviceIndex)
287 outputStep(timeStep, curCOM, h_extForce);
292 void ComputeSMDCUDA::outputStep(
const int timeStep, double3* curCOM, double3* extForce)
294 Vector p(curCOM->x, curCOM->y, curCOM->z);
295 Vector f(extForce->x, extForce->y, extForce->z);
296 if(timeStep % (100*this->outputFrequency) == 0) {
297 iout <<
"SMDTITLE: TS CURRENT_POSITION FORCE\n" <<
endi;
302 void ComputeSMDCUDA::outputCOM(std::string comment)
306 Vector p(curCOM->x, curCOM->y, curCOM->z);
313 void ComputeSMDCUDA::initPeerCOM(double3** d_peerPoolCOM, cudaStream_t stream){
314 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" initPeerCOM\n" <<
endi);
315 initPeerCOMmgpu(numDevices, deviceIndex, d_peerPoolCOM, d_peerCOM, stream);
319 void ComputeSMDCUDA::dump(std::string comment,
322 const double* d_pos_x,
323 const double* d_pos_y,
324 const double* d_pos_z,
330 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" dump " << comment<<
" "<< smdAtomsSOAIndex.size() <<
"\n" <<
endi);
340 allocate_host<double>(&pos_x, numAtoms);
341 allocate_host<double>(&pos_y, numAtoms);
342 allocate_host<double>(&pos_z, numAtoms);
343 allocate_host<float>(&mass, numAtoms);
344 copy_DtoH_sync<double>(d_pos_x, pos_x, numAtoms);
345 copy_DtoH_sync<double>(d_pos_y, pos_y, numAtoms);
346 copy_DtoH_sync<double>(d_pos_z, pos_z, numAtoms);
347 copy_DtoH_sync<float>(d_mass, mass, numAtoms);
349 std::string name(
"smd-");
350 name+=std::to_string(CkMyPe());
352 ofs.open(name, std::ofstream::out | std::ofstream::app);
353 for(
int i=0; i<smdAtomsSOAIndex.size(); i++)
355 int soaid=smdAtomsSOAIndex[i];
356 ofs <<step<<
"- "<< smdAtomsSOAtoGlobalLocalMap[soaid]<<
":"<<pos_x[soaid] <<
","<<pos_y[soaid]<<
","<<pos_z[soaid]<<
","<<mass[soaid]<<
"\n";
362 #endif // NODEGROUP_FORCE_REGISTER
std::ostream & endi(std::ostream &s)
Molecule stores the structural information for the system.
std::atomic< int > reducerSMDDevice
__thread DeviceCUDA * deviceCUDA
void NAMD_bug(const char *err_msg)
static ComputeCUDAMgr * getComputeCUDAMgr()
void NAMD_die(const char *err_msg)
Real atommass(int anum) const