NAMD
ComputePmeCUDA.C
Go to the documentation of this file.
1 #include <numeric>
2 #include <algorithm>
3 #ifdef NAMD_CUDA
4 #include <cuda_runtime.h>
5 #endif
6 #ifdef NAMD_HIP
7 #include <hip/hip_runtime.h>
8 #endif
9 #include "Node.h"
10 #include "SimParameters.h"
11 #include "Priorities.h"
12 #include "ComputeNonbondedUtil.h"
13 #include "ComputePmeCUDA.h"
14 #include "ComputePmeCUDAMgr.h"
15 #include "PmeSolver.h"
16 #include "HomePatch.h"
17 #include "PatchData.h"
18 #include "NamdEventsProfiling.h"
19 #include "TestArray.h"
20 
21 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
22 //
23 // Class creator, multiple patches
24 //
26  setNumPatches(pids.size());
27  patches.resize(getNumPatches());
28  for (int i=0;i < getNumPatches();i++) {
29  patches[i].patchID = pids[i];
30  }
31  selfEnergy = 0;
32  selfEnergyFEP = 0;
33  selfEnergyTI1 = 0;
34  selfEnergyTI2 = 0;
35 }
36 
37 //
38 // Class creator, single patch
39 //
41  setNumPatches(1);
42  patches.resize(getNumPatches());
43  patches[0].patchID = pid;
44  selfEnergy = 0;
45  selfEnergyFEP = 0;
46  selfEnergyTI1 = 0;
47  selfEnergyTI2 = 0;
48 }
49 
50 //
51 // Class destructor
52 //
54  for (int i=0;i < getNumPatches();i++) {
55  if (patches[i].positionBox != NULL) {
56  PatchMap::Object()->patch(patches[i].patchID)->unregisterPositionPickup(this, &patches[i].positionBox);
57  }
58  if (patches[i].avgPositionBox != NULL) {
59  PatchMap::Object()->patch(patches[i].patchID)->unregisterAvgPositionPickup(this, &patches[i].avgPositionBox);
60  }
61  if (patches[i].forceBox != NULL) {
62  PatchMap::Object()->patch(patches[i].patchID)->unregisterForceDeposit(this, &patches[i].forceBox);
63  }
64  }
65  delete reduction;
66  CmiDestroyLock(lock);
67 }
68 
69 //
70 // Initialize
71 //
73  lock = CmiCreateLock();
74 
75  // Sanity Check
77  if (simParams->lesOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, lesOn not yet implemented");
78  if (simParams->pairInteractionOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, pairInteractionOn not yet implemented");
79 
80  sendAtomsDone = false;
82 #ifdef NODEGROUP_FORCE_REGISTER
83  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
84  PatchData *patchData = cpdata.ckLocalBranch();
85 #endif
86  // basePriority = PME_PRIORITY;
87  patchCounter = getNumPatches();
88 
89  // Get proxy to ComputePmeCUDAMgr
90  computePmeCUDAMgrProxy = CkpvAccess(BOCclass_group).computePmeCUDAMgr;
91  mgr = computePmeCUDAMgrProxy.ckLocalBranch();
92  if (mgr == NULL)
93  NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, unable to locate local branch of BOC entry computePmeCUDAMgr");
94  pmeGrid = mgr->getPmeGrid();
95 
96  for (int i=0;i < getNumPatches();i++) {
97  if (patches[i].positionBox != NULL || patches[i].avgPositionBox != NULL
98  || patches[i].forceBox != NULL || patches[i].patch != NULL)
99  NAMD_bug("ComputePmeCUDA::initialize() called twice or boxes not set to NULL");
100  if (!(patches[i].patch = PatchMap::Object()->patch(patches[i].patchID))) {
101  NAMD_bug("ComputePmeCUDA::initialize() patch not found");
102  }
103  patches[i].positionBox = patches[i].patch->registerPositionPickup(this);
104  patches[i].forceBox = patches[i].patch->registerForceDeposit(this);
105  patches[i].avgPositionBox = patches[i].patch->registerAvgPositionPickup(this);
106  }
107 
108  setupActivePencils();
109 }
110 
112  atomsChanged = true;
113 }
114 
115 //
116 // Setup, see which pencils overlap with the patches held by this compute
117 //
118 void ComputePmeCUDA::setupActivePencils() {
119  PatchMap *patchMap = PatchMap::Object();
120 
121  for (int i=0;i < getNumPatches();i++) {
122  int homey = -1;
123  int homez = -1;
124  mgr->getHomePencil(patches[i].patchID, homey, homez);
125 
126  patches[i].homePencilY = homey;
127  patches[i].homePencilZ = homez;
128  patches[i].homePencilNode = mgr->getNode(homey,homez);
129  RegisterPatchMsg *msg = new RegisterPatchMsg();
130  msg->i = homey;
131  msg->j = homez;
132  computePmeCUDAMgrProxy[patches[i].homePencilNode].registerPatch(msg);
133  }
134 
135  atomsChanged = true;
136 
137 }
138 
140 
141  if (patches[0].patch->flags.doFullElectrostatics) return 0;
142 
143  reduction->submit();
144 
145  for (int i=0;i < getNumPatches();i++) {
146  patches[i].positionBox->skip();
147  patches[i].forceBox->skip();
148  // We only need to call skip() once
149  if (patches[i].patchID == 0) computePmeCUDAMgrProxy[patches[i].homePencilNode].skip();
150  }
151 
152  return 1;
153 }
154 
156  NAMD_EVENT_START(1, NamdProfileEvent::COMPUTE_PME_CUDA);
157 
158  if (sendAtomsDone) {
159  // Second part of computation: receive forces from ComputePmeCUDAMgr
160  // basePriority = PME_OFFLOAD_PRIORITY;
161  sendAtomsDone = false;
162  recvForces();
163  } else {
164  // First part of computation: send atoms to ComputePmeCUDAMgr
165  sendAtomsDone = true;
166  // basePriority = COMPUTE_HOME_PRIORITY + PATCH_PRIORITY(patchID);
167  sendAtoms();
168  }
169  NAMD_EVENT_STOP(1, NamdProfileEvent::COMPUTE_PME_CUDA);
170 }
171 
172 void ComputePmeCUDA::sendAtoms() {
173 
174  Lattice& lattice = patches[0].patch->lattice;
175  Vector origin = lattice.origin();
176  Vector recip1 = lattice.a_r();
177  Vector recip2 = lattice.b_r();
178  Vector recip3 = lattice.c_r();
179  double ox = origin.x;
180  double oy = origin.y;
181  double oz = origin.z;
182  double r1x = recip1.x;
183  double r1y = recip1.y;
184  double r1z = recip1.z;
185  double r2x = recip2.x;
186  double r2y = recip2.y;
187  double r2z = recip2.z;
188  double r3x = recip3.x;
189  double r3y = recip3.y;
190  double r3z = recip3.z;
191 
193  for (int i=0;i < getNumPatches();i++) {
194  if (patches[i].pmeForceMsg != NULL)
195  NAMD_bug("ComputePmeCUDA::sendAtoms, pmeForceMsg is not empty");
196 
197  const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
199 
200  bool doMolly = patches[i].patch->flags.doMolly;
201  bool doEnergy = patches[i].patch->flags.doEnergy;
202  bool doVirial = patches[i].patch->flags.doVirial;
203  PatchMap *patchMap = PatchMap::Object();
204 
205  // Send atom patch to pencil(s)
206  // #ifdef NETWORK_PROGRESS
207  // CmiNetworkProgress();
208  // #endif
209 
210  CompAtom *x = patches[i].positionBox->open();
211  if ( doMolly ) {
212  patches[i].positionBox->close(&x);
213  x = patches[i].avgPositionBox->open();
214  }
215 
216  int numAtoms = patches[i].patch->getNumAtoms();
217 
218  // FIXME:
219  // the computation of self energies makes a bold assumption that charges are never changed
220  // this is not true when the system has some kind of chemical interactions, like reduction-oxidation reaction
221  // we may need an additional flag to determine whether NAMD is simulating a system with varying atomic charges
222  const bool isFirstStep = (patches[0].patch->flags.step == simParams->firstTimestep) || (selfEnergy == 0);
223  if (doEnergy) {
224  if (simParams->alchFepOn) {
225  calcSelfEnergyFEP(numAtoms, x, isFirstStep);
226 // fprintf(stdout, "self lambda1 = %lf ; self lambda2 = %lf\n", selfEnergy, selfEnergy_F);
227  } else if (simParams->alchThermIntOn) {
228  calcSelfEnergyTI(numAtoms, x, isFirstStep);
229  } else {
230  calcSelfEnergy(numAtoms, x, isFirstStep);
231  }
232  }
233 
234  // const Vector ucenter = patches[i].patch->lattice.unscale(patchMap->center(patches[i].patchID));
235  // const BigReal recip11 = patches[i].patch->lattice.a_r().x;
236  // const BigReal recip22 = patches[i].patch->lattice.b_r().y;
237  // const BigReal recip33 = patches[i].patch->lattice.c_r().z;
238 
239 // PmeAtomMsg *msg = new (numAtoms, numAtoms, numAtoms, PRIORITY_SIZE) PmeAtomMsg;
240  PmeAtomMsg *msg;
241  const int alchGrid = simParams->alchOn ? 1 : 0;
242  const int alchDecoupleGrid = simParams->alchDecouple ? 1: 0;
243  const int alchSoftCoreOrTI = (simParams->alchElecLambdaStart > 0 || simParams->alchThermIntOn) ? 1 : 0;
244  msg = new (numAtoms, alchGrid * numAtoms, alchGrid * numAtoms,
245  alchDecoupleGrid * numAtoms, alchDecoupleGrid * numAtoms,
246  alchSoftCoreOrTI * numAtoms, PRIORITY_SIZE) PmeAtomMsg;
247 
249  // NOTE:
250  // patch already contains the centered coordinates and scaled charges
251  // memcpy(msg->atoms, patch->getCudaAtomList(), sizeof(CudaAtom)*numAtoms);
252 
253 
254  msg->numAtoms = numAtoms;
255  // msg->patchIndex = i;
256  msg->i = patches[i].homePencilY;
257  msg->j = patches[i].homePencilZ;
258  msg->compute = this;
259  msg->pe = CkMyPe();
260  msg->doEnergy = doEnergy;
261  msg->doVirial = doVirial;
262  msg->lattice = lattice;
263  msg->simulationStep = patches[0].patch->flags.step;
264  CudaAtom *atoms = msg->atoms;
265  float *atomChargeFactors1 = msg->chargeFactors1; // normal + appearing atoms
266  float *atomChargeFactors2 = msg->chargeFactors2; // normal + disappearing atoms
267  float *atomChargeFactors3 = msg->chargeFactors3; // only appearing atoms
268  float *atomChargeFactors4 = msg->chargeFactors4; // only disappearing atoms
269  float *atomChargeFactors5 = msg->chargeFactors5; // only normal atoms
270  // BigReal miny = 1.0e20;
271  // BigReal minz = 1.0e20;
272  for (int j=0;j < numAtoms;j++) {
273  CudaAtom atom;
274  float factor1 = 1.0f;
275  float factor2 = 1.0f;
276  float factor3 = 1.0f;
277  float factor4 = 1.0f;
278  float factor5 = 1.0f;
279 #if 1
280  BigReal q = x[j].charge;
281  // Convert atom positions to range [0,1)
282  double px = x[j].position.x - ox;
283  double py = x[j].position.y - oy;
284  double pz = x[j].position.z - oz;
285  double wx = px*r1x + py*r1y + pz*r1z;
286  double wy = px*r2x + py*r2y + pz*r2z;
287  double wz = px*r3x + py*r3y + pz*r3z;
288  // double wx = x[j].position.x*recip11;
289  // double wy = x[j].position.y*recip22;
290  // double wz = x[j].position.z*recip33;
291  wx = (wx - (floor(wx + 0.5) - 0.5));
292  wy = (wy - (floor(wy + 0.5) - 0.5));
293  wz = (wz - (floor(wz + 0.5) - 0.5));
294  // wx = (wx - floor(wx));
295  // wy = (wy - floor(wy));
296  // wz = (wz - floor(wz));
297  // if (wx >= 1.0) wx -= 1.0;
298  // if (wy >= 1.0) wy -= 1.0;
299  // if (wz >= 1.0) wz -= 1.0;
300  atom.x = (float)wx;
301  atom.y = (float)wy;
302  atom.z = (float)wz;
303  if (atom.x >= 1.0f) atom.x -= 1.0f;
304  if (atom.y >= 1.0f) atom.y -= 1.0f;
305  if (atom.z >= 1.0f) atom.z -= 1.0f;
306  atom.q = (float)(q*coulomb_sqrt);
307  // CHC: for multiple grids, charges shouldn't be scaled here!
308  int part = x[j].partition;
309  if(simParams->alchOn){
310  switch(part){
311  case 0: {
312  factor1 = 1.0f;
313  factor2 = 1.0f;
314  if (simParams->alchDecouple) {
315  factor3 = 0.0f;
316  factor4 = 0.0f;
317  }
318  if (simParams->alchElecLambdaStart || simParams->alchThermIntOn) {
319  factor5 = 1.0f;
320  }
321  break;
322  }
323  case 1: {
324  factor1 = 1.0f;
325  factor2 = 0.0f;
326  if (simParams->alchDecouple) {
327  factor3 = 1.0f;
328  factor4 = 0.0f;
329  }
330  if (simParams->alchElecLambdaStart || simParams->alchThermIntOn) {
331  factor5 = 0.0f;
332  }
333  break;
334  }
335  case 2: {
336  factor1 = 0.0f;
337  factor2 = 1.0f;
338  if (simParams->alchDecouple) {
339  factor3 = 0.0f;
340  factor4 = 1.0f;
341  }
342  if (simParams->alchElecLambdaStart || simParams->alchThermIntOn) {
343  factor5 = 0.0f;
344  }
345  break;
346  }
347  default: NAMD_bug("Invalid partition number"); break;
348  }
349 // atom.q *= elecLambda;
350  atomChargeFactors1[j] = factor1;
351  atomChargeFactors2[j] = factor2;
352  if (simParams->alchDecouple) {
353  atomChargeFactors3[j] = factor3;
354  atomChargeFactors4[j] = factor4;
355  }
356  if (simParams->alchElecLambdaStart || simParams->alchThermIntOn) {
357  atomChargeFactors5[j] = factor5;
358  }
359  }
360  atoms[j] = atom;
361  // miny = std::min(x[j].position.y, miny);
362  // minz = std::min(x[j].position.z, minz);
363 #else
364  atom.x = (float) x[j].position.x;
365  atom.y = (float) x[j].position.y;
366  atom.z = (float) x[j].position.z;
367  atom.q = x[j].charge;
368 #endif
369  }
370 #if defined(NTESTPID)
371  if (NTESTPID == patches[i].patch->getPatchID()) {
372  char fname[128];
373  char remark[128];
374  sprintf(fname, "pme_xyzq_soa_pid%d_step%d.bin", NTESTPID,
375  patches[i].patch->flags.step);
376  sprintf(remark, "SOA PME xyzq, patch %d, step %d", NTESTPID,
377  patches[i].patch->flags.step);
378  TestArray_write<float>(fname, remark, (float *) atoms, 4*numAtoms);
379  }
380 #endif // NTESTPID
381  // Calculate corner with minimum y and z for this patch
382  // double wy = miny*recip22;
383  // double wz = minz*recip33;
384  // msg->miny = (int)((double)pmeGrid.K2*(wy - (floor(wy + 0.5) - 0.5)));
385  // msg->minz = (int)((double)pmeGrid.K3*(wz - (floor(wz + 0.5) - 0.5)));
386 
387  // For local (within shared memory node), get pointer to memory location and do direct memcpy
388  // For global (on different shread memory nodes),
389  if (patches[i].homePencilNode == CkMyNode()) {
390  mgr->recvAtoms(msg);
391  } else {
392  computePmeCUDAMgrProxy[patches[i].homePencilNode].recvAtoms(msg);
393  }
394 
395  if ( doMolly )
396  patches[i].avgPositionBox->close(&x);
397  else
398  patches[i].positionBox->close(&x);
399  }
400 
401  reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += selfEnergy;
402  if (simParams->alchFepOn) {
403  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_F) += selfEnergyFEP;
404  }
405  if (simParams->alchThermIntOn) {
406  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_TI_1) += selfEnergyTI1;
407  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_TI_2) += selfEnergyTI2;
408  }
409  reduction->submit();
410 }
411 
412 //
413 // Calculate self-energy and send to PmeSolver
414 //
415 void ComputePmeCUDA::calcSelfEnergy(int numAtoms, CompAtom *x, bool isFirstStep) {
416  // FIXME: check if the atomic charges are constant during the simulation.
417  bool constantAtomicCharges = true;
418  if (isFirstStep || !constantAtomicCharges) {
419  selfEnergy = 0;
420  for (int i=0;i < numAtoms;i++) {
421  selfEnergy += x[i].charge*x[i].charge;
422  }
423  //const double SQRT_PI = 1.7724538509055160273; /* mathematica 15 digits*/
426  }
427 }
428 
429 void ComputePmeCUDA::calcSelfEnergyFEP(int numAtoms, CompAtom *atoms, bool isFirstStep) {
431  if(simParams->alchFepOn == false){
432  NAMD_bug("Called calcSelfEnergyFEP() in non-FEP code!");
433  }
434  // FIXME: check if the atomic charges are constant during the simulation.
435  bool constantAtomicCharges = true;
436  BigReal scaleLambda1, scaleLambda2; // scale factors for λ_1 and λ_2
437  const BigReal alchLambda1 = simParams->getCurrentLambda(patches[0].patch->flags.step);
438  const BigReal alchLambda2 = simParams->getCurrentLambda2(patches[0].patch->flags.step);
439  static thread_local BigReal lambda1Up = simParams->getElecLambda(alchLambda1);
440  static thread_local BigReal lambda2Up = simParams->getElecLambda(alchLambda2);
441  static thread_local BigReal lambda1Down = simParams->getElecLambda(1.0 - alchLambda1);
442  static thread_local BigReal lambda2Down = simParams->getElecLambda(1.0 - alchLambda2);
443  if (isFirstStep || !constantAtomicCharges ||
444  lambda1Up != simParams->getElecLambda(alchLambda1) ||
445  lambda2Up != simParams->getElecLambda(alchLambda2) ||
446  lambda1Down != simParams->getElecLambda(1.0 - alchLambda1) ||
447  lambda2Down != simParams->getElecLambda(1.0 - alchLambda2))
448  {
449  lambda1Up = simParams->getElecLambda(alchLambda1);
450  lambda2Up = simParams->getElecLambda(alchLambda2);
451  lambda1Down = simParams->getElecLambda(1.0 - alchLambda1);
452  lambda2Down = simParams->getElecLambda(1.0 - alchLambda2);
453  selfEnergy = 0.0; // self energy for λ_1
454  selfEnergyFEP = 0.0; // self energy for λ_2
455  for (int i = 0; i < numAtoms; ++i) {
456  switch (atoms[i].partition) {
457  case 0: {
458  scaleLambda1 = 1.0f;
459  scaleLambda2 = 1.0f;
460  break;
461  }
462  case 1: {
463  scaleLambda1 = lambda1Up; // lambda1 up
464  scaleLambda2 = lambda2Up; // lambda2 up
465  break;
466  }
467  case 2: {
468  scaleLambda1 = lambda1Down; // lambda1 down
469  scaleLambda2 = lambda2Down; // lambda2 down
470  break;
471  }
472  default: NAMD_bug("Invalid partition number");
473  }
474  selfEnergy += scaleLambda1 * (atoms[i].charge * atoms[i].charge);
475  selfEnergyFEP += scaleLambda2 * (atoms[i].charge * atoms[i].charge);
476  if (simParams->alchDecouple) {
477  selfEnergy += (1.0 - scaleLambda1) * (atoms[i].charge * atoms[i].charge);
478  selfEnergyFEP += (1.0 - scaleLambda2) * (atoms[i].charge * atoms[i].charge);
479  }
480  // scale factor for partition 0 is always 1
481  // so it's not necessary to compensate the self energy with (lambda1Up + lambda1Down - 1.0) * -1.0
482  }
485  }
486 }
487 
488 void ComputePmeCUDA::calcSelfEnergyTI(int numAtoms, CompAtom *atoms, bool isFirstStep) {
490  if(simParams->alchThermIntOn == false){
491  NAMD_bug("Called calcSelfEnergyTI() in non-FEP code!");
492  }
493  // FIXME: check if the atomic charges are constant during the simulation.
494  bool constantAtomicCharges = true;
495  const BigReal alchLambda1 = simParams->getCurrentLambda(patches[0].patch->flags.step);
496  static thread_local BigReal lambda1Up = simParams->getElecLambda(alchLambda1);
497  static thread_local BigReal lambda1Down = simParams->getElecLambda(1.0 - alchLambda1);
498  if (isFirstStep || !constantAtomicCharges ||
499  lambda1Up != simParams->getElecLambda(alchLambda1) ||
500  lambda1Down != simParams->getElecLambda(1.0 - alchLambda1))
501  {
502  lambda1Up = simParams->getElecLambda(alchLambda1);
503  lambda1Down = simParams->getElecLambda(1.0 - alchLambda1);
504  selfEnergy = 0.0;
505  selfEnergyTI1 = 0.0;
506  selfEnergyTI2 = 0.0;
507  BigReal elecLambda1 = 0.0;
508  double factor_ti_1 = 0.0;
509  double factor_ti_2 = 0.0;
510  for (int i = 0; i < numAtoms; ++i) {
511  switch (atoms[i].partition) {
512  case 0: {
513  elecLambda1 = 1.0;
514  factor_ti_1 = 0.0;
515  factor_ti_2 = 0.0;
516  break;
517  }
518  case 1: {
519  elecLambda1 = lambda1Up;
520  factor_ti_1 = 1.0;
521  factor_ti_2 = 0.0;
522  break;
523  }
524  case 2: {
525  elecLambda1 = lambda1Down;
526  factor_ti_1 = 0.0;
527  factor_ti_2 = 1.0;
528  break;
529  }
530  default: NAMD_bug("Invalid partition number");
531  }
532  selfEnergy += elecLambda1 * (atoms[i].charge * atoms[i].charge);
533  selfEnergyTI1 += factor_ti_1 * (atoms[i].charge * atoms[i].charge);
534  selfEnergyTI2 += factor_ti_2 * (atoms[i].charge * atoms[i].charge);
535  if (simParams->alchDecouple) {
536  selfEnergy += (1.0 - elecLambda1) * (atoms[i].charge * atoms[i].charge);
537  selfEnergyTI1 -= factor_ti_1 * (atoms[i].charge * atoms[i].charge);
538  selfEnergyTI2 -= factor_ti_2 * (atoms[i].charge * atoms[i].charge);
539  }
540  }
544  }
545 }
546 
547 void ComputePmeCUDA::recvForces() {
548 
549  Lattice& lattice = patches[0].patch->lattice;
550  Vector origin = lattice.origin();
551  Vector recip1 = lattice.a_r();
552  Vector recip2 = lattice.b_r();
553  Vector recip3 = lattice.c_r();
554  double r1x = recip1.x;
555  double r1y = recip1.y;
556  double r1z = recip1.z;
557  double r2x = recip2.x;
558  double r2y = recip2.y;
559  double r2z = recip2.z;
560  double r3x = recip3.x;
561  double r3y = recip3.y;
562  double r3z = recip3.z;
563 
565 
566  double alchLambda1, lambda1Up, lambda1Down;
567  double lambda3Up, lambda3Down;
568  if(simParams->alchOn){
569  alchLambda1 = simParams->getCurrentLambda(patches[0].patch->flags.step);
570  lambda1Up = simParams->getElecLambda(alchLambda1);
571  lambda1Down = simParams->getElecLambda(1 - alchLambda1);
572  if (simParams->alchDecouple) {
573  lambda3Up = 1 - lambda1Up;
574  lambda3Down = 1 - lambda1Down;
575  }
576  }
577 
578  //fprintf(stderr, "AP recvForces\n");
579  for (int i=0;i < getNumPatches();i++) {
580  if (patches[i].pmeForceMsg == NULL)
581  NAMD_bug("ComputePmeCUDA::recvForces, no message in pmeForceMsg");
582 
583  const CudaForce* force = patches[i].pmeForceMsg->force;
584  const CudaForce* force2 = patches[i].pmeForceMsg->force2;
585  const CudaForce* force3 = patches[i].pmeForceMsg->force3;
586  const CudaForce* force4 = patches[i].pmeForceMsg->force4;
587  const CudaForce* force5 = patches[i].pmeForceMsg->force5;
588  Results *r = patches[i].forceBox->open();
589  int numAtoms = patches[i].pmeForceMsg->numAtoms;
590  Force *f = r->f[Results::slow];
591 #ifdef TESTPID
592  if (patches[i].patchID == TESTPID) {
593  fprintf(stderr, "Storing scaled PME CudaForce array for patch %d\n",
594  patches[i].patchID);
595  TestArray_write<float>("scaled_pme_force_good.bin",
596  "scaled PME CudaForce good",
597  (float *) force, 3*numAtoms);
598  TestArray_write<double>("lattice_good.bin", "Lattice good",
599  (double *) &lattice, 3*7);
600  }
601 #endif
602 #if defined(NTESTPID)
603  if (NTESTPID == patches[i].patch->getPatchID()) {
604  char fname[128];
605  char remark[128];
606  sprintf(fname, "pme_fxyz_soa_pid%d_step%d.bin", NTESTPID,
607  patches[i].patch->flags.step);
608  sprintf(remark, "SOA PME fxyz, patch %d, step %d", NTESTPID,
609  patches[i].patch->flags.step);
610  TestArray_write<float>(fname, remark, (float *) force, 3*numAtoms);
611  }
612 #endif // NTESTPID
613  if (!patches[i].pmeForceMsg->numStrayAtoms && !simParams->commOnly) {
614  for(int j=0;j < numAtoms;j++) {
615  double f1 = 0.0;
616  double f2 = 0.0;
617  double f3 = 0.0;
618  if (simParams->alchOn) {
619  f1 += force2[j].x * lambda1Down + force[j].x * lambda1Up;
620  f2 += force2[j].y * lambda1Down + force[j].y * lambda1Up;
621  f3 += force2[j].z * lambda1Down + force[j].z * lambda1Up;
622  if (simParams->alchDecouple) {
623  f1 += force3[j].x * lambda3Up + force4[j].x * lambda3Down;
624  f2 += force3[j].y * lambda3Up + force4[j].y * lambda3Down;
625  f3 += force3[j].z * lambda3Up + force4[j].z * lambda3Down;
626  }
627  if (bool(simParams->alchElecLambdaStart) || simParams->alchThermIntOn) {
628  f1 += force5[j].x * (lambda1Up + lambda1Down - 1.0) * (-1.0);
629  f2 += force5[j].y * (lambda1Up + lambda1Down - 1.0) * (-1.0);
630  f3 += force5[j].z * (lambda1Up + lambda1Down - 1.0) * (-1.0);
631  }
632  } else {
633  f1 += force[j].x;
634  f2 += force[j].y;
635  f3 += force[j].z;
636  }
637  f[j].x += f1*r1x + f2*r2x + f3*r3x;
638  f[j].y += f1*r1y + f2*r2y + f3*r3y;
639  f[j].z += f1*r1z + f2*r2z + f3*r3z;
640  }
641  }
642 
643 #ifdef TESTPID
644  if (patches[i].patchID == TESTPID) {
645  fprintf(stderr, "Storing slow force array for patch %d\n",
646  patches[i].patchID);
647  TestArray_write<double>("pme_force_good.bin", "PME force good",
648  (double *) f, 3*numAtoms);
649  }
650 #endif
651 
652  patches[i].forceBox->close(&r);
653  delete patches[i].pmeForceMsg;
654  patches[i].pmeForceMsg = NULL;
655  }
656 
657 }
658 
660  bool done = false;
661  int i;
662  CmiLock(lock);
663  patchCounter--;
664  i = patchCounter;
665  if (patchCounter == 0) {
666  patchCounter = getNumPatches();
667  done = true;
668  }
669  CmiUnlock(lock);
670  if (patches[i].pmeForceMsg != NULL)
671  NAMD_bug("ComputePmeCUDA::storePmeForceMsg, already contains message");
672  patches[i].pmeForceMsg = msg;
673  return done;
674 }
675 #endif // NAMD_CUDA
static Node * Object()
Definition: Node.h:86
void setNumPatches(int n)
Definition: Compute.h:52
float * chargeFactors3
#define NAMD_EVENT_STOP(eon, id)
int sequence(void)
Definition: Compute.h:64
int size(void) const
Definition: ResizeArray.h:131
void unregisterAvgPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
Definition: Patch.C:139
int32 ComputeID
Definition: NamdTypes.h:288
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
static PatchMap * Object()
Definition: PatchMap.h:27
void getHomePencil(PatchID patchID, int &homey, int &homez)
Definition: Vector.h:72
virtual void submit(void)=0
SimParameters * simParameters
Definition: Node.h:181
#define COULOMB
Definition: common.h:53
BigReal & item(int i)
Definition: ReductionMgr.h:336
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
Definition: Patch.C:238
float x
Definition: CudaRecord.h:63
void recvAtoms(PmeAtomMsg *msg)
BigReal z
Definition: Vector.h:74
Position position
Definition: NamdTypes.h:78
float * chargeFactors1
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:368
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:290
Patch * patch(PatchID pid)
Definition: PatchMap.h:244
ComputePmeCUDA(ComputeID c, PatchIDList &pids)
#define SQRT_PI
Definition: ComputeExt.C:30
bool storePmeForceMsg(PmeForceMsg *msg)
Charge charge
Definition: NamdTypes.h:79
float * chargeFactors4
#define PRIORITY_SIZE
Definition: Priorities.h:13
#define PME_PRIORITY
Definition: Priorities.h:29
#define NAMD_EVENT_START(eon, id)
void NAMD_bug(const char *err_msg)
Definition: common.C:195
CudaAtom * atoms
Force * f[maxNumForces]
Definition: PatchTypes.h:150
uint8 partition
Definition: NamdTypes.h:81
BigReal x
Definition: Vector.h:74
float z
Definition: CudaRecord.h:63
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
virtual ~ComputePmeCUDA()
float * chargeFactors5
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
Definition: Patch.C:121
#define simParams
Definition: Output.C:131
int getNumPatches()
Definition: Compute.h:53
BigReal y
Definition: Vector.h:74
int getNode(int i, int j)
float y
Definition: CudaRecord.h:63
ComputePmeCUDA * compute
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
float * chargeFactors2
int32 PatchID
Definition: NamdTypes.h:287
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278
double BigReal
Definition: common.h:123