NAMD
ComputePmeCUDA.C
Go to the documentation of this file.
1 #include <numeric>
2 #include <algorithm>
3 #ifdef NAMD_CUDA
4 #include <cuda_runtime.h>
5 #endif
6 #ifdef NAMD_HIP
7 #include <hip/hip_runtime.h>
8 #endif
9 #include "Node.h"
10 #include "SimParameters.h"
11 #include "Priorities.h"
12 #include "ComputeNonbondedUtil.h"
13 #include "ComputePmeCUDA.h"
14 #include "ComputePmeCUDAMgr.h"
15 #include "PmeSolver.h"
16 #include "HomePatch.h"
17 #include "PatchData.h"
18 #include "NamdEventsProfiling.h"
19 #include "TestArray.h"
20 
21 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
22 //
23 // Class creator, multiple patches
24 //
26  setNumPatches(pids.size());
27  patches.resize(getNumPatches());
28  for (int i=0;i < getNumPatches();i++) {
29  patches[i].patchID = pids[i];
30  }
31  selfEnergy = 0;
32  selfEnergyFEP = 0;
33  selfEnergyTI1 = 0;
34  selfEnergyTI2 = 0;
35 }
36 
37 //
38 // Class creator, single patch
39 //
41  setNumPatches(1);
42  patches.resize(getNumPatches());
43  patches[0].patchID = pid;
44  selfEnergy = 0;
45  selfEnergyFEP = 0;
46  selfEnergyTI1 = 0;
47  selfEnergyTI2 = 0;
48 }
49 
50 //
51 // Class destructor
52 //
54  for (int i=0;i < getNumPatches();i++) {
55  if (patches[i].positionBox != NULL) {
56  PatchMap::Object()->patch(patches[i].patchID)->unregisterPositionPickup(this, &patches[i].positionBox);
57  }
58  if (patches[i].avgPositionBox != NULL) {
59  PatchMap::Object()->patch(patches[i].patchID)->unregisterAvgPositionPickup(this, &patches[i].avgPositionBox);
60  }
61  if (patches[i].forceBox != NULL) {
62  PatchMap::Object()->patch(patches[i].patchID)->unregisterForceDeposit(this, &patches[i].forceBox);
63  }
64  }
65  delete reduction;
66  CmiDestroyLock(lock);
67 }
68 
69 //
70 // Initialize
71 //
73  lock = CmiCreateLock();
74 
75  // Sanity Check
77  if (simParams->lesOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, lesOn not yet implemented");
78  if (simParams->pairInteractionOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, pairInteractionOn not yet implemented");
79 
80  sendAtomsDone = false;
82 #ifdef NODEGROUP_FORCE_REGISTER
83  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
84  PatchData *patchData = cpdata.ckLocalBranch();
85  nodeReduction = patchData->reduction;
86 #endif
87  // basePriority = PME_PRIORITY;
88  patchCounter = getNumPatches();
89 
90  // Get proxy to ComputePmeCUDAMgr
91  computePmeCUDAMgrProxy = CkpvAccess(BOCclass_group).computePmeCUDAMgr;
92  mgr = computePmeCUDAMgrProxy.ckLocalBranch();
93  if (mgr == NULL)
94  NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, unable to locate local branch of BOC entry computePmeCUDAMgr");
95  pmeGrid = mgr->getPmeGrid();
96 
97  for (int i=0;i < getNumPatches();i++) {
98  if (patches[i].positionBox != NULL || patches[i].avgPositionBox != NULL
99  || patches[i].forceBox != NULL || patches[i].patch != NULL)
100  NAMD_bug("ComputePmeCUDA::initialize() called twice or boxes not set to NULL");
101  if (!(patches[i].patch = PatchMap::Object()->patch(patches[i].patchID))) {
102  NAMD_bug("ComputePmeCUDA::initialize() patch not found");
103  }
104  patches[i].positionBox = patches[i].patch->registerPositionPickup(this);
105  patches[i].forceBox = patches[i].patch->registerForceDeposit(this);
106  patches[i].avgPositionBox = patches[i].patch->registerAvgPositionPickup(this);
107  }
108 
109  setupActivePencils();
110 }
111 
113  atomsChanged = true;
114 }
115 
116 //
117 // Setup, see which pencils overlap with the patches held by this compute
118 //
119 void ComputePmeCUDA::setupActivePencils() {
120  PatchMap *patchMap = PatchMap::Object();
121 
122  for (int i=0;i < getNumPatches();i++) {
123  int homey = -1;
124  int homez = -1;
125  mgr->getHomePencil(patches[i].patchID, homey, homez);
126 
127  patches[i].homePencilY = homey;
128  patches[i].homePencilZ = homez;
129  patches[i].homePencilNode = mgr->getNode(homey,homez);
130  RegisterPatchMsg *msg = new RegisterPatchMsg();
131  msg->i = homey;
132  msg->j = homez;
133  computePmeCUDAMgrProxy[patches[i].homePencilNode].registerPatch(msg);
134  }
135 
136  atomsChanged = true;
137 
138 }
139 
141 
142  if (patches[0].patch->flags.doFullElectrostatics) return 0;
143 
144  reduction->submit();
145 
146  for (int i=0;i < getNumPatches();i++) {
147  patches[i].positionBox->skip();
148  patches[i].forceBox->skip();
149  // We only need to call skip() once
150  if (patches[i].patchID == 0) computePmeCUDAMgrProxy[patches[i].homePencilNode].skip();
151  }
152 
153  return 1;
154 }
155 
157  NAMD_EVENT_START(1, NamdProfileEvent::COMPUTE_PME_CUDA);
158 
159  if (sendAtomsDone) {
160  // Second part of computation: receive forces from ComputePmeCUDAMgr
161  // basePriority = PME_OFFLOAD_PRIORITY;
162  sendAtomsDone = false;
163  recvForces();
164  } else {
165  // First part of computation: send atoms to ComputePmeCUDAMgr
166  sendAtomsDone = true;
167  // basePriority = COMPUTE_HOME_PRIORITY + PATCH_PRIORITY(patchID);
168  sendAtoms();
169  }
170  NAMD_EVENT_STOP(1, NamdProfileEvent::COMPUTE_PME_CUDA);
171 }
172 
173 void ComputePmeCUDA::sendAtoms() {
174 
175  Lattice& lattice = patches[0].patch->lattice;
176  Vector origin = lattice.origin();
177  Vector recip1 = lattice.a_r();
178  Vector recip2 = lattice.b_r();
179  Vector recip3 = lattice.c_r();
180  double ox = origin.x;
181  double oy = origin.y;
182  double oz = origin.z;
183  double r1x = recip1.x;
184  double r1y = recip1.y;
185  double r1z = recip1.z;
186  double r2x = recip2.x;
187  double r2y = recip2.y;
188  double r2z = recip2.z;
189  double r3x = recip3.x;
190  double r3y = recip3.y;
191  double r3z = recip3.z;
192 
194  for (int i=0;i < getNumPatches();i++) {
195  if (patches[i].pmeForceMsg != NULL)
196  NAMD_bug("ComputePmeCUDA::sendAtoms, pmeForceMsg is not empty");
197 
198  const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
200 
201  bool doMolly = patches[i].patch->flags.doMolly;
202  bool doEnergy = patches[i].patch->flags.doEnergy;
203  bool doVirial = patches[i].patch->flags.doVirial;
204  PatchMap *patchMap = PatchMap::Object();
205 
206  // Send atom patch to pencil(s)
207  // #ifdef NETWORK_PROGRESS
208  // CmiNetworkProgress();
209  // #endif
210 
211  CompAtom *x = patches[i].positionBox->open();
212  if ( doMolly ) {
213  patches[i].positionBox->close(&x);
214  x = patches[i].avgPositionBox->open();
215  }
216 
217  int numAtoms = patches[i].patch->getNumAtoms();
218 
219  // FIXME:
220  // the computation of self energies makes a bold assumption that charges are never changed
221  // this is not true when the system has some kind of chemical interactions, like reduction-oxidation reaction
222  // we may need an additional flag to determine whether NAMD is simulating a system with varying atomic charges
223  const bool isFirstStep = (patches[0].patch->flags.step == simParams->firstTimestep) || (selfEnergy == 0);
224  if (doEnergy) {
225  if (simParams->alchFepOn) {
226  calcSelfEnergyFEP(numAtoms, x, isFirstStep);
227 // fprintf(stdout, "self lambda1 = %lf ; self lambda2 = %lf\n", selfEnergy, selfEnergy_F);
228  } else if (simParams->alchThermIntOn) {
229  calcSelfEnergyTI(numAtoms, x, isFirstStep);
230  } else {
231  calcSelfEnergy(numAtoms, x, isFirstStep);
232  }
233  }
234 
235  // const Vector ucenter = patches[i].patch->lattice.unscale(patchMap->center(patches[i].patchID));
236  // const BigReal recip11 = patches[i].patch->lattice.a_r().x;
237  // const BigReal recip22 = patches[i].patch->lattice.b_r().y;
238  // const BigReal recip33 = patches[i].patch->lattice.c_r().z;
239 
240 // PmeAtomMsg *msg = new (numAtoms, numAtoms, numAtoms, PRIORITY_SIZE) PmeAtomMsg;
241  PmeAtomMsg *msg;
242  const int alchGrid = simParams->alchOn ? 1 : 0;
243  const int alchDecoupleGrid = simParams->alchDecouple ? 1: 0;
244  const int alchSoftCoreOrTI = (simParams->alchElecLambdaStart > 0 || simParams->alchThermIntOn) ? 1 : 0;
245  msg = new (numAtoms, alchGrid * numAtoms, alchGrid * numAtoms,
246  alchDecoupleGrid * numAtoms, alchDecoupleGrid * numAtoms,
247  alchSoftCoreOrTI * numAtoms, PRIORITY_SIZE) PmeAtomMsg;
248 
250  // NOTE:
251  // patch already contains the centered coordinates and scaled charges
252  // memcpy(msg->atoms, patch->getCudaAtomList(), sizeof(CudaAtom)*numAtoms);
253 
254 
255  msg->numAtoms = numAtoms;
256  // msg->patchIndex = i;
257  msg->i = patches[i].homePencilY;
258  msg->j = patches[i].homePencilZ;
259  msg->compute = this;
260  msg->pe = CkMyPe();
261  msg->doEnergy = doEnergy;
262  msg->doVirial = doVirial;
263  msg->lattice = lattice;
264  msg->simulationStep = patches[0].patch->flags.step;
265  CudaAtom *atoms = msg->atoms;
266  float *atomChargeFactors1 = msg->chargeFactors1; // normal + appearing atoms
267  float *atomChargeFactors2 = msg->chargeFactors2; // normal + disappearing atoms
268  float *atomChargeFactors3 = msg->chargeFactors3; // only appearing atoms
269  float *atomChargeFactors4 = msg->chargeFactors4; // only disappearing atoms
270  float *atomChargeFactors5 = msg->chargeFactors5; // only normal atoms
271  // BigReal miny = 1.0e20;
272  // BigReal minz = 1.0e20;
273  for (int j=0;j < numAtoms;j++) {
274  CudaAtom atom;
275  float factor1 = 1.0f;
276  float factor2 = 1.0f;
277  float factor3 = 1.0f;
278  float factor4 = 1.0f;
279  float factor5 = 1.0f;
280 #if 1
281  BigReal q = x[j].charge;
282  // Convert atom positions to range [0,1)
283  double px = x[j].position.x - ox;
284  double py = x[j].position.y - oy;
285  double pz = x[j].position.z - oz;
286  double wx = px*r1x + py*r1y + pz*r1z;
287  double wy = px*r2x + py*r2y + pz*r2z;
288  double wz = px*r3x + py*r3y + pz*r3z;
289  // double wx = x[j].position.x*recip11;
290  // double wy = x[j].position.y*recip22;
291  // double wz = x[j].position.z*recip33;
292  wx = (wx - (floor(wx + 0.5) - 0.5));
293  wy = (wy - (floor(wy + 0.5) - 0.5));
294  wz = (wz - (floor(wz + 0.5) - 0.5));
295  // wx = (wx - floor(wx));
296  // wy = (wy - floor(wy));
297  // wz = (wz - floor(wz));
298  // if (wx >= 1.0) wx -= 1.0;
299  // if (wy >= 1.0) wy -= 1.0;
300  // if (wz >= 1.0) wz -= 1.0;
301  atom.x = (float)wx;
302  atom.y = (float)wy;
303  atom.z = (float)wz;
304  if (atom.x >= 1.0f) atom.x -= 1.0f;
305  if (atom.y >= 1.0f) atom.y -= 1.0f;
306  if (atom.z >= 1.0f) atom.z -= 1.0f;
307  atom.q = (float)(q*coulomb_sqrt);
308  // CHC: for multiple grids, charges shouldn't be scaled here!
309  int part = x[j].partition;
310  if(simParams->alchOn){
311  switch(part){
312  case 0: {
313  factor1 = 1.0f;
314  factor2 = 1.0f;
315  if (simParams->alchDecouple) {
316  factor3 = 0.0f;
317  factor4 = 0.0f;
318  }
319  if (simParams->alchElecLambdaStart || simParams->alchThermIntOn) {
320  factor5 = 1.0f;
321  }
322  break;
323  }
324  case 1: {
325  factor1 = 1.0f;
326  factor2 = 0.0f;
327  if (simParams->alchDecouple) {
328  factor3 = 1.0f;
329  factor4 = 0.0f;
330  }
331  if (simParams->alchElecLambdaStart || simParams->alchThermIntOn) {
332  factor5 = 0.0f;
333  }
334  break;
335  }
336  case 2: {
337  factor1 = 0.0f;
338  factor2 = 1.0f;
339  if (simParams->alchDecouple) {
340  factor3 = 0.0f;
341  factor4 = 1.0f;
342  }
343  if (simParams->alchElecLambdaStart || simParams->alchThermIntOn) {
344  factor5 = 0.0f;
345  }
346  break;
347  }
348  default: NAMD_bug("Invalid partition number"); break;
349  }
350 // atom.q *= elecLambda;
351  atomChargeFactors1[j] = factor1;
352  atomChargeFactors2[j] = factor2;
353  if (simParams->alchDecouple) {
354  atomChargeFactors3[j] = factor3;
355  atomChargeFactors4[j] = factor4;
356  }
357  if (simParams->alchElecLambdaStart || simParams->alchThermIntOn) {
358  atomChargeFactors5[j] = factor5;
359  }
360  }
361  atoms[j] = atom;
362  // miny = std::min(x[j].position.y, miny);
363  // minz = std::min(x[j].position.z, minz);
364 #else
365  atom.x = (float) x[j].position.x;
366  atom.y = (float) x[j].position.y;
367  atom.z = (float) x[j].position.z;
368  atom.q = x[j].charge;
369 #endif
370  }
371 #if defined(NTESTPID)
372  if (NTESTPID == patches[i].patch->getPatchID()) {
373  char fname[128];
374  char remark[128];
375  sprintf(fname, "pme_xyzq_soa_pid%d_step%d.bin", NTESTPID,
376  patches[i].patch->flags.step);
377  sprintf(remark, "SOA PME xyzq, patch %d, step %d", NTESTPID,
378  patches[i].patch->flags.step);
379  TestArray_write<float>(fname, remark, (float *) atoms, 4*numAtoms);
380  }
381 #endif // NTESTPID
382  // Calculate corner with minimum y and z for this patch
383  // double wy = miny*recip22;
384  // double wz = minz*recip33;
385  // msg->miny = (int)((double)pmeGrid.K2*(wy - (floor(wy + 0.5) - 0.5)));
386  // msg->minz = (int)((double)pmeGrid.K3*(wz - (floor(wz + 0.5) - 0.5)));
387 
388  // For local (within shared memory node), get pointer to memory location and do direct memcpy
389  // For global (on different shread memory nodes),
390  if (patches[i].homePencilNode == CkMyNode()) {
391  mgr->recvAtoms(msg);
392  } else {
393  computePmeCUDAMgrProxy[patches[i].homePencilNode].recvAtoms(msg);
394  }
395 
396  if ( doMolly )
397  patches[i].avgPositionBox->close(&x);
398  else
399  patches[i].positionBox->close(&x);
400  }
401 
402 #ifdef NODEGROUP_FORCE_REGISTER
403  // XXX Expect a race condition, need atomic access to nodeReduction
404  nodeReduction->item(REDUCTION_ELECT_ENERGY_SLOW) += selfEnergy;
405  if (simParams->alchFepOn) {
406  nodeReduction->item(REDUCTION_ELECT_ENERGY_SLOW_F) += selfEnergyFEP;
407  }
408  if (simParams->alchThermIntOn) {
409  nodeReduction->item(REDUCTION_ELECT_ENERGY_SLOW_TI_1) += selfEnergyTI1;
410  nodeReduction->item(REDUCTION_ELECT_ENERGY_SLOW_TI_2) += selfEnergyTI2;
411  }
412 #endif
413  reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += selfEnergy;
414  if (simParams->alchFepOn) {
415  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_F) += selfEnergyFEP;
416  }
417  if (simParams->alchThermIntOn) {
418  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_TI_1) += selfEnergyTI1;
419  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_TI_2) += selfEnergyTI2;
420  }
421  reduction->submit();
422 }
423 
424 //
425 // Calculate self-energy and send to PmeSolver
426 //
427 void ComputePmeCUDA::calcSelfEnergy(int numAtoms, CompAtom *x, bool isFirstStep) {
428  // FIXME: check if the atomic charges are constant during the simulation.
429  bool constantAtomicCharges = true;
430  if (isFirstStep || !constantAtomicCharges) {
431  selfEnergy = 0;
432  for (int i=0;i < numAtoms;i++) {
433  selfEnergy += x[i].charge*x[i].charge;
434  }
435  //const double SQRT_PI = 1.7724538509055160273; /* mathematica 15 digits*/
438  }
439 }
440 
441 void ComputePmeCUDA::calcSelfEnergyFEP(int numAtoms, CompAtom *atoms, bool isFirstStep) {
443  if(simParams->alchFepOn == false){
444  NAMD_bug("Called calcSelfEnergyFEP() in non-FEP code!");
445  }
446  // FIXME: check if the atomic charges are constant during the simulation.
447  bool constantAtomicCharges = true;
448  BigReal scaleLambda1, scaleLambda2; // scale factors for λ_1 and λ_2
449  const BigReal alchLambda1 = simParams->getCurrentLambda(patches[0].patch->flags.step);
450  const BigReal alchLambda2 = simParams->getCurrentLambda2(patches[0].patch->flags.step);
451  static thread_local BigReal lambda1Up = simParams->getElecLambda(alchLambda1);
452  static thread_local BigReal lambda2Up = simParams->getElecLambda(alchLambda2);
453  static thread_local BigReal lambda1Down = simParams->getElecLambda(1.0 - alchLambda1);
454  static thread_local BigReal lambda2Down = simParams->getElecLambda(1.0 - alchLambda2);
455  if (isFirstStep || !constantAtomicCharges ||
456  lambda1Up != simParams->getElecLambda(alchLambda1) ||
457  lambda2Up != simParams->getElecLambda(alchLambda2) ||
458  lambda1Down != simParams->getElecLambda(1.0 - alchLambda1) ||
459  lambda2Down != simParams->getElecLambda(1.0 - alchLambda2))
460  {
461  lambda1Up = simParams->getElecLambda(alchLambda1);
462  lambda2Up = simParams->getElecLambda(alchLambda2);
463  lambda1Down = simParams->getElecLambda(1.0 - alchLambda1);
464  lambda2Down = simParams->getElecLambda(1.0 - alchLambda2);
465  selfEnergy = 0.0; // self energy for λ_1
466  selfEnergyFEP = 0.0; // self energy for λ_2
467  for (int i = 0; i < numAtoms; ++i) {
468  switch (atoms[i].partition) {
469  case 0: {
470  scaleLambda1 = 1.0f;
471  scaleLambda2 = 1.0f;
472  break;
473  }
474  case 1: {
475  scaleLambda1 = lambda1Up; // lambda1 up
476  scaleLambda2 = lambda2Up; // lambda2 up
477  break;
478  }
479  case 2: {
480  scaleLambda1 = lambda1Down; // lambda1 down
481  scaleLambda2 = lambda2Down; // lambda2 down
482  break;
483  }
484  default: NAMD_bug("Invalid partition number");
485  }
486  selfEnergy += scaleLambda1 * (atoms[i].charge * atoms[i].charge);
487  selfEnergyFEP += scaleLambda2 * (atoms[i].charge * atoms[i].charge);
488  if (simParams->alchDecouple) {
489  selfEnergy += (1.0 - scaleLambda1) * (atoms[i].charge * atoms[i].charge);
490  selfEnergyFEP += (1.0 - scaleLambda2) * (atoms[i].charge * atoms[i].charge);
491  }
492  // scale factor for partition 0 is always 1
493  // so it's not necessary to compensate the self energy with (lambda1Up + lambda1Down - 1.0) * -1.0
494  }
497  }
498 }
499 
500 void ComputePmeCUDA::calcSelfEnergyTI(int numAtoms, CompAtom *atoms, bool isFirstStep) {
502  if(simParams->alchThermIntOn == false){
503  NAMD_bug("Called calcSelfEnergyTI() in non-FEP code!");
504  }
505  // FIXME: check if the atomic charges are constant during the simulation.
506  bool constantAtomicCharges = true;
507  const BigReal alchLambda1 = simParams->getCurrentLambda(patches[0].patch->flags.step);
508  static thread_local BigReal lambda1Up = simParams->getElecLambda(alchLambda1);
509  static thread_local BigReal lambda1Down = simParams->getElecLambda(1.0 - alchLambda1);
510  if (isFirstStep || !constantAtomicCharges ||
511  lambda1Up != simParams->getElecLambda(alchLambda1) ||
512  lambda1Down != simParams->getElecLambda(1.0 - alchLambda1))
513  {
514  lambda1Up = simParams->getElecLambda(alchLambda1);
515  lambda1Down = simParams->getElecLambda(1.0 - alchLambda1);
516  selfEnergy = 0.0;
517  selfEnergyTI1 = 0.0;
518  selfEnergyTI2 = 0.0;
519  BigReal elecLambda1 = 0.0;
520  double factor_ti_1 = 0.0;
521  double factor_ti_2 = 0.0;
522  for (int i = 0; i < numAtoms; ++i) {
523  switch (atoms[i].partition) {
524  case 0: {
525  elecLambda1 = 1.0;
526  factor_ti_1 = 0.0;
527  factor_ti_2 = 0.0;
528  break;
529  }
530  case 1: {
531  elecLambda1 = lambda1Up;
532  factor_ti_1 = 1.0;
533  factor_ti_2 = 0.0;
534  break;
535  }
536  case 2: {
537  elecLambda1 = lambda1Down;
538  factor_ti_1 = 0.0;
539  factor_ti_2 = 1.0;
540  break;
541  }
542  default: NAMD_bug("Invalid partition number");
543  }
544  selfEnergy += elecLambda1 * (atoms[i].charge * atoms[i].charge);
545  selfEnergyTI1 += factor_ti_1 * (atoms[i].charge * atoms[i].charge);
546  selfEnergyTI2 += factor_ti_2 * (atoms[i].charge * atoms[i].charge);
547  if (simParams->alchDecouple) {
548  selfEnergy += (1.0 - elecLambda1) * (atoms[i].charge * atoms[i].charge);
549  selfEnergyTI1 -= factor_ti_1 * (atoms[i].charge * atoms[i].charge);
550  selfEnergyTI2 -= factor_ti_2 * (atoms[i].charge * atoms[i].charge);
551  }
552  }
556  }
557 }
558 
559 void ComputePmeCUDA::recvForces() {
560 
561  Lattice& lattice = patches[0].patch->lattice;
562  Vector origin = lattice.origin();
563  Vector recip1 = lattice.a_r();
564  Vector recip2 = lattice.b_r();
565  Vector recip3 = lattice.c_r();
566  double r1x = recip1.x;
567  double r1y = recip1.y;
568  double r1z = recip1.z;
569  double r2x = recip2.x;
570  double r2y = recip2.y;
571  double r2z = recip2.z;
572  double r3x = recip3.x;
573  double r3y = recip3.y;
574  double r3z = recip3.z;
575 
577 
578  double alchLambda1, lambda1Up, lambda1Down;
579  double lambda3Up, lambda3Down;
580  if(simParams->alchOn){
581  alchLambda1 = simParams->getCurrentLambda(patches[0].patch->flags.step);
582  lambda1Up = simParams->getElecLambda(alchLambda1);
583  lambda1Down = simParams->getElecLambda(1 - alchLambda1);
584  if (simParams->alchDecouple) {
585  lambda3Up = 1 - lambda1Up;
586  lambda3Down = 1 - lambda1Down;
587  }
588  }
589 
590  //fprintf(stderr, "AP recvForces\n");
591  for (int i=0;i < getNumPatches();i++) {
592  if (patches[i].pmeForceMsg == NULL)
593  NAMD_bug("ComputePmeCUDA::recvForces, no message in pmeForceMsg");
594 
595  const CudaForce* force = patches[i].pmeForceMsg->force;
596  const CudaForce* force2 = patches[i].pmeForceMsg->force2;
597  const CudaForce* force3 = patches[i].pmeForceMsg->force3;
598  const CudaForce* force4 = patches[i].pmeForceMsg->force4;
599  const CudaForce* force5 = patches[i].pmeForceMsg->force5;
600  Results *r = patches[i].forceBox->open();
601  int numAtoms = patches[i].pmeForceMsg->numAtoms;
602  Force *f = r->f[Results::slow];
603 #ifdef TESTPID
604  if (patches[i].patchID == TESTPID) {
605  fprintf(stderr, "Storing scaled PME CudaForce array for patch %d\n",
606  patches[i].patchID);
607  TestArray_write<float>("scaled_pme_force_good.bin",
608  "scaled PME CudaForce good",
609  (float *) force, 3*numAtoms);
610  TestArray_write<double>("lattice_good.bin", "Lattice good",
611  (double *) &lattice, 3*7);
612  }
613 #endif
614 #if defined(NTESTPID)
615  if (NTESTPID == patches[i].patch->getPatchID()) {
616  char fname[128];
617  char remark[128];
618  sprintf(fname, "pme_fxyz_soa_pid%d_step%d.bin", NTESTPID,
619  patches[i].patch->flags.step);
620  sprintf(remark, "SOA PME fxyz, patch %d, step %d", NTESTPID,
621  patches[i].patch->flags.step);
622  TestArray_write<float>(fname, remark, (float *) force, 3*numAtoms);
623  }
624 #endif // NTESTPID
625  if (!patches[i].pmeForceMsg->numStrayAtoms && !simParams->commOnly) {
626  for(int j=0;j < numAtoms;j++) {
627  double f1 = 0.0;
628  double f2 = 0.0;
629  double f3 = 0.0;
630  if (simParams->alchOn) {
631  f1 += force2[j].x * lambda1Down + force[j].x * lambda1Up;
632  f2 += force2[j].y * lambda1Down + force[j].y * lambda1Up;
633  f3 += force2[j].z * lambda1Down + force[j].z * lambda1Up;
634  if (simParams->alchDecouple) {
635  f1 += force3[j].x * lambda3Up + force4[j].x * lambda3Down;
636  f2 += force3[j].y * lambda3Up + force4[j].y * lambda3Down;
637  f3 += force3[j].z * lambda3Up + force4[j].z * lambda3Down;
638  }
639  if (bool(simParams->alchElecLambdaStart) || simParams->alchThermIntOn) {
640  f1 += force5[j].x * (lambda1Up + lambda1Down - 1.0) * (-1.0);
641  f2 += force5[j].y * (lambda1Up + lambda1Down - 1.0) * (-1.0);
642  f3 += force5[j].z * (lambda1Up + lambda1Down - 1.0) * (-1.0);
643  }
644  } else {
645  f1 += force[j].x;
646  f2 += force[j].y;
647  f3 += force[j].z;
648  }
649  f[j].x += f1*r1x + f2*r2x + f3*r3x;
650  f[j].y += f1*r1y + f2*r2y + f3*r3y;
651  f[j].z += f1*r1z + f2*r2z + f3*r3z;
652  }
653  }
654 
655 #ifdef TESTPID
656  if (patches[i].patchID == TESTPID) {
657  fprintf(stderr, "Storing slow force array for patch %d\n",
658  patches[i].patchID);
659  TestArray_write<double>("pme_force_good.bin", "PME force good",
660  (double *) f, 3*numAtoms);
661  }
662 #endif
663 
664  patches[i].forceBox->close(&r);
665  delete patches[i].pmeForceMsg;
666  patches[i].pmeForceMsg = NULL;
667  }
668 
669 }
670 
672  bool done = false;
673  int i;
674  CmiLock(lock);
675  patchCounter--;
676  i = patchCounter;
677  if (patchCounter == 0) {
678  patchCounter = getNumPatches();
679  done = true;
680  }
681  CmiUnlock(lock);
682  if (patches[i].pmeForceMsg != NULL)
683  NAMD_bug("ComputePmeCUDA::storePmeForceMsg, already contains message");
684  patches[i].pmeForceMsg = msg;
685  return done;
686 }
687 #endif // NAMD_CUDA
static Node * Object()
Definition: Node.h:86
void setNumPatches(int n)
Definition: Compute.h:52
float * chargeFactors3
#define NAMD_EVENT_STOP(eon, id)
int sequence(void)
Definition: Compute.h:64
int size(void) const
Definition: ResizeArray.h:131
void unregisterAvgPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
Definition: Patch.C:139
int32 ComputeID
Definition: NamdTypes.h:278
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
static PatchMap * Object()
Definition: PatchMap.h:27
void getHomePencil(PatchID patchID, int &homey, int &homez)
Definition: Vector.h:72
SimParameters * simParameters
Definition: Node.h:181
#define COULOMB
Definition: common.h:53
BigReal & item(int i)
Definition: ReductionMgr.h:313
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
Definition: Patch.C:238
float x
Definition: CudaRecord.h:63
void recvAtoms(PmeAtomMsg *msg)
BigReal z
Definition: Vector.h:74
Position position
Definition: NamdTypes.h:77
float * chargeFactors1
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:366
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:279
NodeReduction * reduction
Definition: PatchData.h:133
Patch * patch(PatchID pid)
Definition: PatchMap.h:244
ComputePmeCUDA(ComputeID c, PatchIDList &pids)
#define SQRT_PI
Definition: ComputeExt.C:30
bool storePmeForceMsg(PmeForceMsg *msg)
Charge charge
Definition: NamdTypes.h:78
float * chargeFactors4
#define PRIORITY_SIZE
Definition: Priorities.h:13
#define PME_PRIORITY
Definition: Priorities.h:29
#define NAMD_EVENT_START(eon, id)
void NAMD_bug(const char *err_msg)
Definition: common.C:195
CudaAtom * atoms
Force * f[maxNumForces]
Definition: PatchTypes.h:146
uint8 partition
Definition: NamdTypes.h:80
BigReal x
Definition: Vector.h:74
float z
Definition: CudaRecord.h:63
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
virtual ~ComputePmeCUDA()
float * chargeFactors5
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
Definition: Patch.C:121
#define simParams
Definition: Output.C:129
int getNumPatches()
Definition: Compute.h:53
BigReal y
Definition: Vector.h:74
int getNode(int i, int j)
float y
Definition: CudaRecord.h:63
void submit(void)
Definition: ReductionMgr.h:324
ComputePmeCUDA * compute
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
float * chargeFactors2
int32 PatchID
Definition: NamdTypes.h:277
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278
double BigReal
Definition: common.h:123