NAMD
Sequencer.C
Go to the documentation of this file.
1 
7 /*****************************************************************************
8  * $Source: /home/cvs/namd/cvsroot/namd2/src/Sequencer.C,v $
9  * $Author: jim $
10  * $Date: 2016/08/26 19:40:32 $
11  * $Revision: 1.1230 $
12  *****************************************************************************/
13 
14 // The UPPER_BOUND macro is used to eliminate all of the per atom
15 // computation done for the numerical integration in Sequencer::integrate()
16 // other than the actual force computation and atom migration.
17 // The idea is to "turn off" the integration for doing performance
18 // profiling in order to get an upper bound on the speedup available
19 // by moving the integration parts to the GPU.
20 //
21 // Define it in the Make.config file, i.e. CXXOPTS += -DUPPER_BOUND
22 // or simply uncomment the line below.
23 //
24 //#define UPPER_BOUND
25 
26 //for gbis debugging; print net force on each atom
27 #include "CudaRecord.h"
28 #include "PatchData.h"
29 #include "common.h"
30 #define PRINT_FORCES 0
31 
32 #include "InfoStream.h"
33 #include "Node.h"
34 #include "SimParameters.h"
35 #include "Sequencer.h"
36 #include "HomePatch.h"
37 #include "ReductionMgr.h"
38 #include "CollectionMgr.h"
39 #include "BroadcastObject.h"
40 #include "Output.h"
41 #include "Controller.h"
42 #include "Broadcasts.h"
43 #include "Molecule.h"
44 #include "NamdOneTools.h"
45 #include "LdbCoordinator.h"
46 #include "Thread.h"
47 #include "Random.h"
48 #include "PatchMap.inl"
49 #include "ComputeMgr.h"
50 #include "ComputeGlobal.h"
51 #include "NamdEventsProfiling.h"
52 #include <iomanip>
53 #include "ComputeCUDAMgr.h"
54 #include "CollectionMaster.h"
55 #include "IMDOutput.h"
56 #include "CudaGlobalMasterServer.h"
57 
58 #include "TestArray.h"
59 
60 #include <algorithm> // Used for sorting
61 
62 #define MIN_DEBUG_LEVEL 3
63 //#define DEBUGM
64 //
65 // Define NL_DEBUG below to activate D_*() macros in integrate_SOA()
66 // for debugging.
67 //
68 //#define NL_DEBUG
69 #include "Debug.h"
70 
71 #if USE_HPM
72 #define START_HPM_STEP 1000
73 #define STOP_HPM_STEP 1500
74 #endif
75 
76 #include "DeviceCUDA.h"
77 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
78 #ifdef WIN32
79 #define __thread __declspec(thread)
80 #endif
81 extern __thread DeviceCUDA *deviceCUDA;
82 #ifdef __IBMCPP__
83 // IBM compiler requires separate definition for static members
85 #endif
86 #endif
87 
88 #define SPECIAL_PATCH_ID 91
89 
90 //
91 // BEGIN
92 // print_* routines
93 // assist in debugging SOA integration code
94 //
95 static void print_vel_AOS(
96  const FullAtom *a,
97  int ilo=0, int ihip1=1
98  ) {
99  printf("AOS Velocities:\n");
100  for (int i=ilo; i < ihip1; i++) {
101  printf("%d %g %g %g\n", i,
102  a[i].velocity.x, a[i].velocity.y, a[i].velocity.z);
103  }
104 }
105 
106 
107 static void print_vel_SOA(
108  const double *vel_x,
109  const double *vel_y,
110  const double *vel_z,
111  int ilo=0, int ihip1=1
112  ) {
113  printf("SOA Velocities:\n");
114  for (int i=ilo; i < ihip1; i++) {
115  printf("%d %g %g %g\n", i, vel_x[i], vel_y[i], vel_z[i]);
116  }
117 }
118 
119 
120 static void print_tensor(const Tensor& t) {
121  printf("%g %g %g %g %g %g %g %g %g\n",
122  t.xx, t.xy, t.xz, t.yx, t.yy, t.yz, t.zx, t.zy, t.zz);
123 }
124 //
125 // END
126 // print_* routines
127 // assist in debugging SOA integration code
128 //
129 
130 
142 struct CheckStep {
143  int period;
144  int nextstep;
145 
149  inline int check(int step) {
150  if (step == nextstep) return( nextstep += period, 1 );
151  else return 0;
152  }
153 
159  inline int init(int initstep, int initperiod, int delta=0) {
160  period = initperiod;
161  nextstep = initstep - (initstep % period) - (delta % period);
162  while (nextstep <= initstep) nextstep += period;
163  // returns true if initstep is divisible by period
164  return (initstep + period == nextstep);
165  }
166 
167  CheckStep() : period(0), nextstep(0) { }
168 };
169 
170 
172  simParams(Node::Object()->simParameters),
173  patch(p),
174  collection(CollectionMgr::Object()),
175  ldbSteps(0),
176  pairlistsAreValid(0),
177  pairlistsAge(0),
178  pairlistsAgeLimit(0)
179 {
187  int ntypes = simParams->pressureProfileAtomTypes;
188  int nslabs = simParams->pressureProfileSlabs;
191  REDUCTIONS_PPROF_INTERNAL, 3*nslabs*ntypes);
192  } else {
194  }
195  if (simParams->multigratorOn) {
197  } else {
198  multigratorReduction = NULL;
199  }
200  ldbCoordinator = (LdbCoordinator::Object());
203 
204  // Is soluteScaling enabled?
205  if (simParams->soluteScalingOn) {
206  // If so, we must "manually" perform charge scaling on startup because
207  // Sequencer will not get a scripting task for initial charge scaling.
208  // Subsequent rescalings will take place through a scripting task.
210  }
211 
213  stochRescale_count = 0;
215  masterThread = true;
216 // patch->write_tip4_props();
217 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER)
219 #if 0
220  CUDASequencer = new SequencerCUDA(deviceCUDA->getDeviceID(),
221  simParams);
222 #else
223  CUDASequencer = SequencerCUDA::InstanceInit(deviceCUDA->getDeviceID(),
224  simParams);
225 
227 #endif
228  CUDASequencer->patchData->reduction->zero();
229  }
230 #endif
231 }
232 
234 {
235  delete broadcast;
236  delete reduction;
237  delete min_reduction;
239  delete random;
241 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER)
243  delete CUDASequencer;
245  }
246 #endif
247 }
248 
249 // Invoked by thread
250 void Sequencer::threadRun(Sequencer* arg)
251 {
253  arg->algorithm();
254 }
255 
256 // Invoked by Node::run() via HomePatch::runSequencer()
257 void Sequencer::run(void)
258 {
259  // create a Thread and invoke it
260  DebugM(4, "::run() - this = " << this << "\n" );
261  thread = CthCreate((CthVoidFn)&(threadRun),(void*)(this),SEQ_STK_SZ);
262  CthSetStrategyDefault(thread);
263  priority = PATCH_PRIORITY(patch->getPatchID());
264  awaken();
265 }
266 
268 {
270  CthSuspend();
272 }
273 
274 // Defines sequence of operations on a patch. e.g. when
275 // to push out information for Compute objects to consume
276 // when to migrate atoms, when to add forces to velocity update.
278 {
279  int scriptTask;
280  int scriptSeq = 0;
281  // Blocking receive for the script barrier.
282  while ( (scriptTask = broadcast->scriptBarrier.get(scriptSeq++)) != SCRIPT_END ) {
283  switch ( scriptTask ) {
284  case SCRIPT_OUTPUT:
286  break;
287  case SCRIPT_FORCEOUTPUT:
289  break;
290  case SCRIPT_MEASURE:
292  break;
293  case SCRIPT_REINITVELS:
295  break;
296  case SCRIPT_RESCALEVELS:
298  break;
301  break;
303  reloadCharges();
304  break;
305  case SCRIPT_CHECKPOINT:
306  patch->checkpoint();
308  break;
309  case SCRIPT_REVERT:
310  patch->revert();
312  pairlistsAreValid = 0;
313  break;
319  break;
320  case SCRIPT_ATOMSENDRECV:
321  case SCRIPT_ATOMSEND:
322  case SCRIPT_ATOMRECV:
323  patch->exchangeAtoms(scriptTask);
324  break;
325  case SCRIPT_MINIMIZE:
326 #if 0
328  NAMD_die("Minimization is currently not supported on the GPU integrator\n");
329  }
330 #endif
331  minimize();
332  break;
333  case SCRIPT_RUN:
334  case SCRIPT_CONTINUE:
335  //
336  // DJH: Call a cleaned up version of integrate().
337  //
338  // We could test for simulation options and call a more basic version
339  // of integrate() where we can avoid performing most tests.
340  //
341 #ifdef SEQUENCER_SOA
342  if ( simParams->SOAintegrateOn ) {
343 #ifdef NODEGROUP_FORCE_REGISTER
344 
346  else {
347 #endif
348  integrate_SOA(scriptTask);
349 #ifdef NODEGROUP_FORCE_REGISTER
350  }
351 #endif
352  }
353  else
354 #endif
355  integrate(scriptTask);
356  break;
357  default:
358  NAMD_bug("Unknown task in Sequencer::algorithm");
359  }
360  }
362  terminate();
363 }
364 
365 
366 #ifdef SEQUENCER_SOA
367 
369 //
370 // begin SOA code
371 //
372 
373 #if defined(NODEGROUP_FORCE_REGISTER)
374 
376  PatchMap* patchMap = PatchMap::Object();
377  CUDASequencer->numPatchesCheckedIn += 1;
378  if (CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe())) {
379  masterThread = false;
380  CUDASequencer->waitingThreads.push_back(CthSelf());
381  NAMD_EVENT_STOP(patch->flags.event_on, NamdProfileEvent::INTEGRATE_SOA_1);
382  CthSuspend();
383 
384  // JM: if a thread get here, it will be for migrating atoms until the end of the simulation
385  while(true){
386  // read global flags
387  int lastStep = CUDASequencer->patchData->flags.step;
388  int startup = (CUDASequencer->patchData->flags.step == simParams->firstTimestep);
389  if (CUDASequencer->breakSuspends) break;
391  this->patch->positionsReady_GPU(true, startup);
392  } else {
393  this->patch->positionsReady_SOA(true);
394  }
395  CUDASequencer->numPatchesCheckedIn += 1;
396  CUDASequencer->waitingThreads.push_back(CthSelf());
397  if(CUDASequencer->numPatchesCheckedIn == patchMap->numPatchesOnNode(CkMyPe()) - 1 &&
398  CUDASequencer->masterThreadSleeping){
399  CUDASequencer->masterThreadSleeping = false;
400  CthAwaken(CUDASequencer->masterThread);
401  }
402  CthSuspend();
403  }
404  }
405 }
406 void Sequencer::wakeULTs(){
407  CUDASequencer->numPatchesCheckedIn = 0;
408  for (CthThread t : CUDASequencer->waitingThreads) {
409  CthAwaken(t);
410  }
411  CUDASequencer->waitingThreads.clear();
412 }
413 
414 void Sequencer::runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup) {
415 
416  PatchMap* map = PatchMap::Object();
417 
418  bool isMaster = deviceCUDA->getMasterPe() == CkMyPe();
419  CmiNodeBarrier();
420 
421  // Sync after the node barrier. This is making sure that the position buffers have been
422  // populated. However, this doesn't need to happen at the node level. I.e. the non-pme
423  // nonbonded calculations can begin before the PME device is finished setting it's positions.
424  // There is a node barrier after the forces are done, so we don't have to worry about
425  // the positions being updated before the positions have been set
426  if (isMaster) {
427  CUDASequencer->sync();
428  }
429 
430 
431  // JM: Each masterPE owns a particular copy of the compute object we need to launch
432  // work on. The goal is to launch work on everyone, but for migration steps, sometimes
433  // there are a few operation that need to be launched on computes owned by different PEs.
434  // ComputeBondedCUDA::openBoxesOnPe() is an example: There is a list of PEs on each compute
435  // which holds information on which proxy object it should also invoke openBoxesOnPe();
436 
437  // We need to be mindful of that and, since we want to launch methods on different computes.
438  // A data structure that holds all nonbonded Computes from all masterPEs is necessary
440  ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
441  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
443  cudaMgr->getCudaPmeOneDevice() : NULL );
444  int reducePme = ( cudaPme && patch->flags.doVirial );
445 #ifdef NAMD_CUDA
446  auto cudaGlobal = deviceCUDA->getIsGlobalDevice() ? cudaMgr->getCudaGlobalMaster() : nullptr;
447  if (isMaster && cudaGlobal) cudaGlobal->setStep(static_cast<int64_t>(patch->flags.step));
448 #endif // NAMD_CUDA
449  // fprintf(stderr, "Patch %d invoking computes\n", this->patch->patchID);
450 
451 
452  // JM NOTE: I don't think the scheme below holds for nMasterPes > 1, check it out laters
453 
454  // Invoking computes on the GPU //
455  if(doMigration){
456  // JM: if we're on a migration step, we call the setup functions manually
457  // which means:
458  // 0. masterPe->doWork();
459  // 1. openBoxesOnPe();
460  // loadTuplesOnPe();
461  // 2. masterPe->launchWork();
462  // 3. finishPatchesOnPe();
463  // 4. masterPe->finishReductions();
464 
465  if(isMaster){
466  NAMD_EVENT_START(1, NamdProfileEvent::MIG_ATOMUPDATE);
467  cudaNbond->atomUpdate();
468  cudaBond->atomUpdate();
469  cudaNbond->doWork();
470  cudaBond->doWork();
471  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_ATOMUPDATE);
472 
473  if (cudaPme && !simParams->useDeviceMigration) CUDASequencer->atomUpdatePme();
474 #ifdef NAMD_CUDA
475  if (cudaGlobal) {
476  cudaGlobal->updateAtomMaps();
477  }
478 #endif // NAMD_CUDA
479  }
480 
481  CmiNodeBarrier();
482 
484  if(isMaster){
485  CUDASequencer->launch_set_compute_positions();
486  CUDASequencer->sync(); // TODO move this to tuple migration
487  }
488  CmiNodeBarrier();
489  }
490 
491  NAMD_EVENT_START(1, NamdProfileEvent::MIG_OPENBOXESONPE);
492 
493  // Here we need to do the following, for each Comput
494  for(int i = 0 ; i < CkNumPes(); i++){
495  // Here I need to find if the PE is on the bonded PE list
496  // XXX NOTE: This might be inefficient. Check the overhead later
497  ComputeBondedCUDA* b = CUDASequencer->patchData->cudaBondedList[i];
498  CudaComputeNonbonded* nb = CUDASequencer->patchData->cudaNonbondedList[i];
499  if (b == NULL) continue;
500  auto list = std::find(std::begin(b->getBondedPes()), std::end(b->getBondedPes()), CkMyPe());
501  if( list != std::end(b->getBondedPes()) ){
502  b->openBoxesOnPe(startup);
503 
504  // XXX NOTE: nb has a differente PE list!!! We need a different loop for nb
505  nb->openBoxesOnPe();
506 
507  }
508  CmiNodeBarrier();
509  }
510  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_OPENBOXESONPE);
511  // for the bonded kernels, there's an additional step here, loadTuplesOnPe
512  // JM NOTE: Those are major hotspots, they account for 50% of the migration time.
513  CmiNodeBarrier();
514  NAMD_EVENT_START(1, NamdProfileEvent::MIG_LOADTUPLESONPE);
515 
516  // NOTE: problem here: One of the CompAtomExt structures is turning to null, why?
517  cudaBond->loadTuplesOnPe(startup);
518  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_LOADTUPLESONPE);
519  CmiNodeBarrier();
520  NAMD_EVENT_START(1, NamdProfileEvent::MIG_COPYTUPLEDATA);
521 
523  cudaBond->copyTupleDataGPU(startup);
524  } else {
525  cudaBond->copyTupleDataSN();
526  }
527 
528  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_COPYTUPLEDATA);
529  // waits until everyone has finished to open their respective boxes
530  // node barrier actually prevents the error that is happening.
531  CmiNodeBarrier();
532  if(isMaster){
533  // launches work on the masterPe
534  NAMD_EVENT_START(1, NamdProfileEvent::MIG_LAUNCHWORK);
535  cudaBond->launchWork();
536  cudaNbond->launchWork();
537  if (cudaPme) {
538  cudaPme->compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
539  }
540  cudaNbond->reSortTileLists();
541 #ifdef NAMD_CUDA
542  if (cudaGlobal) {
543  cudaGlobal->communicateToClients(&(this->patch->lattice));
544  cudaGlobal->communicateToMD();
545  }
546 #endif // NAMD_CUDA
547  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_LAUNCHWORK);
548  }
549 
550  CmiNodeBarrier();
551  //global master force calculation
552 
553  if(doGlobal) {
554  NAMD_EVENT_START(1, NamdProfileEvent::GM_CALCULATE);
556  // Zero all SOA global forces before computing next global force
557  NAMD_EVENT_START(1, NamdProfileEvent::GM_ZERO);
558  int numhp = PatchMap::Object()->numHomePatches();
560  for(int i = 0; i < numhp; ++i) {
561  HomePatch *hp = hpList->item(i).patch;
562  hp->zero_global_forces_SOA();
563  }
564  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_ZERO);
565  NAMD_EVENT_START(1, NamdProfileEvent::GM_DOWORK);
566  // call globalmaster to calculate the force from client.
567  computeGlobal->doWork();
568  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_DOWORK);
569  NAMD_EVENT_START(1, NamdProfileEvent::GM_BARRIER);
570  CmiNodeBarrier();
571  // CkPrintf("post doWork step %d \n",this->patch->flags.step);
572  // CUDASequencer->printSOAPositionsAndVelocities();
573  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_BARRIER);
574  if(isMaster) {
575  // aggregate and copy the global forces to d_f_global device buffer
576  NAMD_EVENT_START(1, NamdProfileEvent::GM_CPY_FORCE);
577  CUDASequencer->copyGlobalForcesToDevice();
578  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CPY_FORCE);
579  }
580  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CALCULATE);
581  }
582  // if (cudaGlobal && isMaster) {
583  // cudaGlobal->communicateToClients(&(this->patch->lattice));
584  // cudaGlobal->communicateToMD();
585  // }
586  // CmiNodeBarrier();
587  NAMD_EVENT_START(1, NamdProfileEvent::MIG_FINISHPATCHES);
588  cudaNbond->finishPatches();
589  cudaBond->finishPatches();
590  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_FINISHPATCHES);
591  CmiNodeBarrier();
592 
593  // finishes reduction with masterPe!
594  if(isMaster){
595  cudaNbond->finishReductions();
596  if (cudaPme) cudaPme->finishReduction(reducePme);
597  cudaBond->finishReductions();
598 #ifdef NAMD_CUDA
599  if (cudaGlobal) cudaGlobal->finishReductions(
601  CUDASequencer->patchData->reduction);
602 #endif // NAMD_CUDA
603  }
604  CmiNodeBarrier();
605  }
606  // if we're not on a migration step, do the work only on masterPE, except globalmaster work
607  else {
608  int doNbond = patch->flags.doNonbonded;
609  if(isMaster) {
610  // JM NOTE: We issue the nonbonded work first and sync it last
611  if (cudaPme) {
612  // cudaPme->compute(this->patch->lattice, reducePme, this->patch->flags.step);
613  cudaPme->compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
614  }
615  if (doNbond) cudaNbond->doWork();
616  cudaBond->doWork();
617 #ifdef NAMD_CUDA
618  if (cudaGlobal) {
619  cudaGlobal->communicateToClients(&(this->patch->lattice));
620  cudaGlobal->communicateToMD();
621  }
622 #endif // NAMD_CUDA
623  }
624  //global master force calculation
625  if(doGlobal) {
626  NAMD_EVENT_START(1, NamdProfileEvent::GM_CALCULATE);
627  NAMD_EVENT_START(1, NamdProfileEvent::GM_ZERO);
629  // Zero all SOA global forces before computing next global force
630  int numhp = PatchMap::Object()->numHomePatches();
632  for(int i = 0; i < numhp; ++i) {
633  HomePatch *hp = hpList->item(i).patch;
634  hp->zero_global_forces_SOA();
635  }
636  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_ZERO);
637  // call globalmaster to calculate the force from client.
638  NAMD_EVENT_START(1, NamdProfileEvent::GM_DOWORK);
639  computeGlobal->doWork();
640  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_DOWORK);
641  NAMD_EVENT_START(1, NamdProfileEvent::GM_BARRIER);
642  CmiNodeBarrier();
643  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_BARRIER);
644  // CkPrintf("post doWork 2 step %d \n",this->patch->flags.step);
645  // CUDASequencer->printSOAPositionsAndVelocities();
646  if(isMaster) {
647  // aggregate and copy the global forces to d_f_global device buffer
648  NAMD_EVENT_START(1, NamdProfileEvent::GM_CPY_FORCE);
649  CUDASequencer->copyGlobalForcesToDevice();
650  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CPY_FORCE);
651  }
652  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CALCULATE);
653  }
654  // if (cudaGlobal && isMaster) {
655  // cudaGlobal->communicateToClients(&(this->patch->lattice));
656  // cudaGlobal->communicateToMD();
657  // }
658  if(isMaster) {
659  cudaBond->finishPatches();
660  if (cudaPme) {
661  cudaPme->finishReduction(reducePme);
662  }
663  if (doNbond) cudaNbond->finishPatches();
664 #ifdef NAMD_CUDA
665  if (cudaGlobal) cudaGlobal->finishReductions(
667  CUDASequencer->patchData->reduction);
668 #endif // NAMD_CUDA
669  }
670  }
671 
672 #if 0
673  // for migrations, I need to call OpenBoxesOnPe and finishPatches for every Pe
675  pairlistsAreValid = 1;
676  pairlistsAge = 0;
677  }
678  if ( pairlistsAreValid /* && !pressureStep */ ) ++pairlistsAge;
679 #endif
680  // CmiNodeBarrier();
681 }
682 
683 //apply MC pressure control
685  const int step,
686  const int doMigration,
687  const int doEnergy,
688  const int doVirial,
689  const int maxForceNumber,
690  const int doGlobal)
691 {
692  bool isMasterPe = (deviceCUDA->getMasterPe() == CkMyPe() );
693  NodeReduction *reduction = CUDASequencer->patchData->reduction;
694  Controller *c_out = CUDASequencer->patchData->c_out;
695  bool mGpuOn = CUDASequencer->mGpuOn;
696  Lattice oldLattice = this->patch->lattice;
697  Vector origin = this->patch->lattice.origin();
698  Tensor factor;
699  int accepted = 0; // status of MC volume fluctuation trial
700 
701  if(isMasterPe){
702  // Backup the reduction values for rejected move
703  CUDASequencer->patchData->nodeReductionSave->setVal(reduction);
704 
706  // Send the rescale factor for Monte Carlo Volume change from controller
707  c_out->mcPressure_prepare(step);
708  // receive the factor
709  factor = c_out->getPositionRescaleFactor(step);
710  }
711 
712  // Backup positions and forces, scale the coordinates and lattice
713  // Setup positions for energy and force calculation
714  CUDASequencer->monteCarloPressure_part1(factor, origin, oldLattice);
716  // Scale the lattice with factor
717  // patch.lattice is pointing to patch.flags.lattice
718  this->patch->lattice.rescale(factor);
719  CUDASequencer->patchData->lat = &(this->patch->lattice);
720  CUDASequencer->patchData->factor = &(factor);
721  // Copy scaled lattic flags to all patches
722  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
723 
724  // Zero all reduction values. We will add halfStep values, if
725  // the move is accepted.
726  reduction->zero();
727  }
728  }
729 
730  CmiNodeBarrier();
731  if(isMasterPe){
732  // copy global flags
733  CUDASequencer->update_patch_flags();
734  }
735  CmiNodeBarrier();
736  // Calculate the new force and energy after rescaling the coordinates
737  // Migration happend before calling this function
738  this->runComputeObjectsCUDA(0, doGlobal, 1, step, 0 /* startup */);
739  CmiNodeBarrier();
740 
741  if(isMasterPe){
742  // Accumulate force to SOA, calculate External energy/force
743  // reduce energy and virial
744  CUDASequencer->monteCarloPressure_part2(reduction, step, maxForceNumber,
745  doEnergy, doGlobal, doVirial);
746 
748  // Check to see if the move is accepted or not
749  c_out->mcPressure_accept(step);
750  accepted = c_out->getMCAcceptance(step);
751  //accepted = broadcast->monteCarloBarostatAcceptance.get(step);
752  //printf("Sequencer (accept): step: %d, Pe: %d, ACC status: %d\n", step, CkMyPe(), accepted);
753  }
754 
755  if (accepted) { // Move accepted
756  CUDASequencer->monteCarloPressure_accept(reduction, doMigration);
757  } else { // Move rejected
759  // Set the lattice to the original value, before scaling
760  this->patch->lattice = oldLattice;
761  CUDASequencer->patchData->lat = &(this->patch->lattice);
762  // Copy scaled lattic flags to all patches
763  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
764  }
765 
766  // Restore all positions and forces and cuLattice
767  CUDASequencer->monteCarloPressure_reject(this->patch->lattice);
768  // Restore the reduction values
769  reduction->setVal(CUDASequencer->patchData->nodeReductionSave);
770  }
771  }
772 
773  CmiNodeBarrier();
774  //continue the rejection step. Need to update lattice in all patches
775  if(isMasterPe && !accepted){
776  // copy global flags
777  CUDASequencer->update_patch_flags();
778  }
779 }
780 
781 void Sequencer::doMigrationGPU(const int startup, const int doGlobal,
782  const int updatePatchMap) {
783 
784  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
785  const bool updatePatchData = startup || doGlobal || updatePatchMap;
786  PatchMap* patchMap = PatchMap::Object();
787 
788  bool realloc = false;
789 
790  // This will check if a reallocation was done on the previous migration
791  // We use the scratch buffers to store the atomic data during reallocation
792  // However, the migrationDestination data much be maintained throughout
793  // migration (and tuple migration so beyond the scope of this function)
794  // We probably should add a function to do this at the end of migration
795  // But for now, DMC thought it was easier to just do at the begining
796  for (int i = 0; i < deviceCUDA->getNumDevice(); i++) {
797  if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
798  realloc = true;
799  break;
800  }
801  }
802  if (realloc) {
803  if (isMasterPe) {
804  CUDASequencer->reallocateMigrationDestination();
805  CUDASequencer->registerSOAPointersToHost();
806  }
807  CmiNodeBarrier();
808  if (isMasterPe) {
809  CUDASequencer->copySOAHostRegisterToDevice();
810  }
811  }
812 
813  // Proceed with migration
814  //
815  // Starts GPU migration
816  //
817  if (isMasterPe) {
818  CUDASequencer->migrationLocalInit();
819  // Hidden stream sync
820  }
821  CmiNodeBarrier();
822 
823  if (isMasterPe) {
824  CUDASequencer->migrationPerform();
825  // Hidden stream sync
826  }
827  CmiNodeBarrier();
828 
829  if (isMasterPe) {
830  CUDASequencer->migrationUpdateAtomCounts();
831  // Hidden stream sync
832  }
833  CmiNodeBarrier();
834 
835  if (isMasterPe) {
836  CUDASequencer->migrationUpdateAtomOffsets();
837  // Hidden stream sync
838  }
839  CmiNodeBarrier();
840 
841  if (isMasterPe) {
842  CUDASequencer->copyPatchDataToHost();
843  // Hidden stream sync
844  }
845  CmiNodeBarrier();
846 
847  // Update device buffer allocations
848  realloc = false;
849  if (isMasterPe) {
850  realloc = CUDASequencer->copyPatchData(true, false);
851  CUDASequencer->patchData->atomReallocationFlagPerDevice[deviceCUDA->getDeviceIndex()] = realloc;
852  }
853  CmiNodeBarrier();
854 
855  // If any of the devices have reallocated, we need to re-register the p2p buffers
856  for (int i = 0; i < deviceCUDA->getNumDevice(); i++) {
857  if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
858  realloc = true;
859  break;
860  }
861  }
862  if (realloc) {
863  if (isMasterPe) {
864  CUDASequencer->registerSOAPointersToHost();
865  }
866  CmiNodeBarrier();
867  if (isMasterPe) {
868  CUDASequencer->copySOAHostRegisterToDevice();
869  }
870  }
871 
872  // Performs various post processing like Solute/Solvent sorting and copies back to host
873  if (isMasterPe) {
874  CUDASequencer->migrationLocalPost(0);
875  CUDASequencer->migrationSortAtomsNonbonded();
876  }
877 
878  // If this is startup, we need to delay this until after AoS has been copied back to host
879  // Because we do need the atomIDs for the atom map initially
880  if (!updatePatchData) {
881  wakeULTs(); // Wakes everyone back up for migration
882  this->patch->positionsReady_GPU(1, startup);
883  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
884  CUDASequencer->masterThreadSleeping = true;
885  CUDASequencer->masterThread = CthSelf();
886  CthSuspend();
887  }
888  }
889 
890  if (isMasterPe) {
891  CUDASequencer->sync();
892  }
893  CmiNodeBarrier();
894  if (isMasterPe) {
895  CUDASequencer->migrationUpdateDestination();
896  }
897  CmiNodeBarrier();
898 
899  if (isMasterPe) {
900  CUDASequencer->migrationUpdateProxyDestination();
901  }
902  CmiNodeBarrier();
903 
904  if (isMasterPe) {
905  CUDASequencer->migrationUpdateRemoteOffsets();
906  }
907  CmiNodeBarrier();
908 
909  if (isMasterPe) {
910  CUDASequencer->copyDataToPeers(true);
911  }
912  CmiNodeBarrier();
913 
914  if (updatePatchData) {
915  // The atom maps need to be cleared the HomePatch atom arrays have been updated
916  int numhp = PatchMap::Object()->numHomePatches();
918  for(int i = 0; i < numhp; ++i) {
919  HomePatch *hp = hpList->item(i).patch;
920  hp->clearAtomMap();
921  }
922  CmiNodeBarrier();
923  if (isMasterPe) {
924  // We need the atom ordering to be correct within each
925  // patch to setup the atom map. The vdwType of each atom
926  // is also used for exclusion tuple generation
927  CUDASequencer->copyAoSDataToHost();
928  }
929  CmiNodeBarrier();
930  wakeULTs(); // Wakes everyone back up for migration
931  this->patch->positionsReady_GPU(1, startup);
932  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
933  CUDASequencer->masterThreadSleeping = true;
934  CUDASequencer->masterThread = CthSelf();
935  CthSuspend();
936  }
937  CmiNodeBarrier();
938  }
939  if (isMasterPe) {
940  if (doGlobal || simParams->forceDcdFrequency > 0) {
941  CUDASequencer->updateHostPatchDataSOA(); // Needs to be called after HomePatch updates
942  }
943  }
944  CmiNodeBarrier();
945  if (isMasterPe) {
946  // This needs to be called after positionsReady_GPU to that the atom maps have been updated
947  // This will be called in updateDeviceData during with startup=true, but we need to call it
948  // with startup=false to make sure the atoms are updated
949  CUDASequencer->migrationUpdateAdvancedFeatures(false);
950  }
951  CmiNodeBarrier();
952 }
953 
954 // JM: Single-node integration scheme
955 void Sequencer::integrate_CUDA_SOA(int scriptTask){
956 
957  #ifdef TIMER_COLLECTION
958  TimerSet& t = patch->timerSet;
959  #endif
960  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
961  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
962  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
963  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
964  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
965  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
966  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
967  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
968  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
969  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
970 
971  // Keep track of the step number.
972  //int &step = patch->flags.step;
973  int &step = patch->flags.step;
974  step = simParams->firstTimestep;
975  Controller *c_out = CUDASequencer->patchData->c_out;
976  PatchMap* patchMap = PatchMap::Object();
977 
978  // For multiple time stepping, which force boxes are used?
979  int &maxForceUsed = patch->flags.maxForceUsed;
980  int &maxForceMerged = patch->flags.maxForceMerged;
981  maxForceUsed = Results::normal;
982  maxForceMerged = Results::normal;
983 
984  // Keep track of total steps and steps per cycle.
985  const int numberOfSteps = simParams->N;
986  //const int stepsPerCycle = simParams->stepsPerCycle;
987  CheckStep stepsPerCycle;
988  stepsPerCycle.init(step, simParams->stepsPerCycle);
989  // The fundamental time step, get the scaling right for velocity units.
990  const BigReal timestep = simParams->dt * RECIP_TIMEFACTOR;
991 
992  //const int nonbondedFrequency = simParams->nonbondedFrequency;
993  //slowFreq = nonbondedFrequency;
994  CheckStep nonbondedFrequency;
996  // The step size for short-range nonbonded forces.
997  const BigReal nbondstep = timestep * simParams->nonbondedFrequency;
998  int &doNonbonded = patch->flags.doNonbonded;
999  //doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
1000  doNonbonded = (step >= numberOfSteps) ||
1001  nonbondedFrequency.init(step, simParams->nonbondedFrequency);
1002  //if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
1003  if ( nonbondedFrequency.period == 1 ) maxForceMerged = Results::nbond;
1004  if ( doNonbonded ) maxForceUsed = Results::nbond;
1005 
1006  // Do we do full electrostatics?
1007  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
1008  //const int fullElectFrequency = simParams->fullElectFrequency;
1009  //if ( dofull ) slowFreq = fullElectFrequency;
1010  CheckStep fullElectFrequency;
1011  if ( dofull ) slowFreq = simParams->fullElectFrequency;
1012  // The step size for long-range electrostatics.
1013  const BigReal slowstep = timestep * simParams->fullElectFrequency;
1014  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
1015  //doFullElectrostatics = (dofull &&
1016  // ((step >= numberOfSteps) || !(step%fullElectFrequency)));
1017  doFullElectrostatics = (dofull &&
1018  ((step >= numberOfSteps) ||
1019  fullElectFrequency.init(step, simParams->fullElectFrequency)));
1020  //if ( dofull && fullElectFrequency == 1 ) maxForceMerged = Results::slow;
1021  if ( dofull && fullElectFrequency.period == 1 ) maxForceMerged = Results::slow;
1022  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
1023 
1024  // Bother to calculate energies?
1025  int &doEnergy = patch->flags.doEnergy;
1026  //int energyFrequency = simParams->outputEnergies;
1027  CheckStep energyFrequency;
1028  int newComputeEnergies = simParams->computeEnergies;
1029  if(simParams->alchOn) newComputeEnergies = NAMD_gcd(newComputeEnergies, simParams->alchOutFreq);
1030  doEnergy = energyFrequency.init(step, newComputeEnergies);
1031 
1032  // check for Monte Carlo pressure control.
1033  CheckStep monteCarloPressureFrequency;
1034  doEnergy += monteCarloPressureFrequency.init(step, (simParams->monteCarloPressureOn ?
1035  simParams->monteCarloPressureFreq : numberOfSteps + 1) );
1036 
1037  int &doVirial = patch->flags.doVirial;
1038  doVirial = 1;
1039  // Do we need to return forces to TCL script or Colvar module?
1040  int doTcl = simParams->tclForcesOn;
1041  int doColvars = simParams->colvarsOn;
1042  int doGlobal = (doTcl || doColvars);
1044  CheckStep globalMasterFrequency;
1045  bool globalMasterStep=false;
1046  int doGlobalObjects=0;
1047  int doGlobalStaleForces = 0;
1048 
1049  if(doGlobal)
1050  {
1051  globalMasterFrequency.init(step, (simParams->globalMasterFrequency > 0 ? simParams->globalMasterFrequency : numberOfSteps+1));
1052  globalMasterStep = globalMasterFrequency.check(step);
1053  doGlobalObjects = globalMasterStep? 1:0;
1055  {
1056  doGlobalObjects=1;
1057  doGlobalStaleForces=1;
1058  }
1060  {
1061  doGlobalStaleForces = simParams->globalMasterStaleForces;
1062  }
1064  {
1065  doGlobalStaleForces=doGlobalObjects;
1066  }
1067  else
1068  {
1069  doGlobalStaleForces=doGlobalObjects;
1070  }
1071  }
1072  else
1073  {
1074  doGlobalStaleForces = 0;
1075  doGlobalObjects = 0;
1076  }
1077  // The following flags have to be explicitly disabled in Patch object.
1078  patch->flags.doMolly = 0;
1079  patch->flags.doLoweAndersen = 0;
1080  patch->flags.doGBIS = 0;
1081  patch->flags.doLCPO = 0;
1082 
1083  // Square of maximum velocity for simulation safety check
1084  const BigReal maxvel2 =
1085  (simParams->cutoff * simParams->cutoff) / (timestep * timestep);
1086 
1087  // check for Langevin piston
1088  // set period beyond numberOfSteps to disable
1089  // fprintf(stderr, " Patch %d Pinging in from integrate_cuda!\n", this->patch->getPatchID());
1090  CheckStep langevinPistonFrequency;
1091  langevinPistonFrequency.init(step,
1092  (simParams->langevinPistonOn ? slowFreq : numberOfSteps+1 ),
1093  (simParams->langevinPistonOn ? -1-slowFreq/2 : 0) /* = delta */);
1094 
1095  // check for velocity rescaling
1096  // set period beyond numberOfSteps to disable
1097  CheckStep stochRescaleFrequency;
1098  stochRescaleFrequency.init(step, (simParams->stochRescaleOn ?
1099  simParams->stochRescaleFreq : numberOfSteps+1 ) );
1100 
1101  CheckStep reassignVelocityFrequency;
1102  reassignVelocityFrequency.init(step, ((simParams->reassignFreq>0) ?
1103  simParams->reassignFreq : numberOfSteps+1 ) );
1104 
1105  // check for output
1106  // set period beyond numberOfSteps to disable
1107  CheckStep restartFrequency;
1108  restartFrequency.init(step, (simParams->restartFrequency > 0 ?
1109  simParams->restartFrequency : numberOfSteps+1) );
1110  CheckStep dcdFrequency;
1111  dcdFrequency.init(step, (simParams->dcdFrequency > 0 ?
1112  simParams->dcdFrequency : numberOfSteps+1) );
1113  CheckStep velDcdFrequency;
1114  velDcdFrequency.init(step, (simParams->velDcdFrequency > 0 ?
1115  simParams->velDcdFrequency : numberOfSteps+1) );
1116  CheckStep forceDcdFrequency;
1117  forceDcdFrequency.init(step, (simParams->forceDcdFrequency > 0 ?
1118  simParams->forceDcdFrequency : numberOfSteps+1) );
1119  CheckStep imdFrequency;
1120  imdFrequency.init(step, (simParams->IMDfreq > 0 ?
1121  simParams->IMDfreq : numberOfSteps+1) );
1122 
1123  patch->copy_atoms_to_SOA(); // do this whether or not useDeviceMigration
1124 
1125  // Haochuan: is this really needed for GPU-resident?
1126  if (simParams->rigidBonds != RIGID_NONE && ! patch->settle_initialized) {
1128  patch->rattleListValid_SOA = true;
1129  }
1130 
1131  this->suspendULTs();
1132  // for "run 0", numberOfSteps is zero, but we want to have at least a single energy evaluation
1133  if(!masterThread) {
1134  return;
1135  }
1136  bool isMasterPe = (deviceCUDA->getMasterPe() == CkMyPe() );
1137  CmiNodeBarrier();
1138 
1139  CUDASequencer->breakSuspends = false;
1140 
1141  // XXX this is ugly!
1142  // one thread will have the CollectionMaster and Output defined
1143  // use it to set the node group so that any thread can access
1144  if (CUDASequencer->patchData->ptrCollectionMaster == NULL) {
1145  CollectionMaster *pcm = CkpvAccess(CollectionMaster_instance)->Object();
1146  if (pcm) {
1147  CUDASequencer->patchData->ptrCollectionMaster = pcm;
1148  }
1149  }
1150  if (CUDASequencer->patchData->ptrOutput == NULL) {
1151  Output *pout = Node::Object()->output;
1152  if (pout) {
1153  CUDASequencer->patchData->ptrOutput = pout;
1154  }
1155  }
1156  if (CUDASequencer->patchData->pdb == NULL) {
1157  PDB *pdb = Node::Object()->pdb;
1158  if (pdb) {
1159  CUDASequencer->patchData->pdb = pdb;
1160  }
1161  }
1162  if (CUDASequencer->patchData->imd == NULL) {
1163  IMDOutput *imd = Node::Object()->imd;
1164  if (imd->getIMD()) {
1165  CUDASequencer->patchData->imd = imd;
1166  }
1167  }
1168 
1169  // Register ComputeCUDAMgrs from each PE into a list for later usage
1170  if(isMasterPe){
1171  // Each masterPE registers its own computeCUDAMgr
1172  CUDASequencer->patchData->cudaBondedList[CkMyPe()] = ComputeCUDAMgr::getComputeCUDAMgr()->getComputeBondedCUDA();
1173  CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = ComputeCUDAMgr::getComputeCUDAMgr()->getCudaComputeNonbonded();
1174  }else{
1175  CUDASequencer->patchData->cudaBondedList[CkMyPe()] = NULL;
1176  CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = NULL;
1177  }
1178 
1179  if (isMasterPe) {
1181  if(dofull && deviceCUDA->getIsPmeDevice()){
1182  CudaPmeOneDevice* cudaPme = 0;
1183  cudaPme = cudaMgr->createCudaPmeOneDevice();
1184  }
1185  CUDASequencer->patchData->reduction->zero();
1186  }
1187 
1188  CmiNodeBarrier();
1189 
1190 /* JM NOTE: This Will Contains the first calls to the integration loop. The order is:
1191  * 1 - Rattle (0,0)
1192  * 2 - runComputeObjects
1193  * 3 - addForceToMomentum(-0.5, tstep)
1194  * 4 - Rattle (-timestep, 0);
1195  * 5 - submitHalfstep();
1196  * 6 - addForceToMomentum(1.0 , tstep)
1197  * 7 - Rattle (tstep, 1)
1198  * 8 - SubmitHalf()
1199  * 9 - addForceToMomentum(-0.5, tstep)
1200  * 10 - submitReductions()
1201  */
1202 
1203  if(scriptTask == SCRIPT_RUN){
1204  updateDeviceData(1, maxForceUsed, doGlobal);
1205 
1206  if(isMasterPe) {
1207  if(patchData->updateCounter.load()>0)
1208  {
1209  CUDASequencer->updateDeviceKernels();
1210  }
1211 
1212  // warm_up1 is basically rattle1_SOA(0,0)
1213  CUDASequencer->startRun1(maxForceUsed, this->patch->lattice);
1214  (this->patch->flags.sequence)++;
1215  if (deviceCUDA->getIsMasterDevice()){
1216  CUDASequencer->patchData->lat = &(this->patch->lattice);
1217  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1218  }
1220 #ifdef NAMD_CUDA
1221  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1222 #endif // NAMD_CUDA
1223  const bool addCudaGlobalForces =
1224 #ifdef NAMD_CUDA
1225  (cudaGlobalMasterObject != nullptr) ?
1226  cudaGlobalMasterObject->willAddGlobalForces() :
1227 #endif // NAMD_CUDA
1228  false;
1229  if (addCudaGlobalForces) {
1230  CUDASequencer->allocateGPUSavedForces();
1231  }
1232  }
1233 
1234  CmiNodeBarrier();
1235  if (!simParams->useDeviceMigration) {
1236  wakeULTs(); // Wakes everyone back up for migration
1237  this->patch->positionsReady_SOA(1);
1238  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
1239  CUDASequencer->masterThreadSleeping = true;
1240  CUDASequencer->masterThread = CthSelf();
1241  CthSuspend();
1242  }
1243  CmiNodeBarrier();
1244  updateDeviceData(0, maxForceUsed, doGlobal);
1245  } else {
1246  doMigrationGPU(1, doGlobal, simParams->updateAtomMap);
1247  }
1248  CmiNodeBarrier();
1249  // I've migrated everything. Now run computes
1250  runComputeObjectsCUDA(/*isMigration = */ 1 ,
1251  doGlobal,
1252  /* step < numberofSteps */ 1,
1253  /* step = */ 0,
1254  /* startup = */ 1);
1255 
1256  if(isMasterPe){
1257  CUDASequencer->finish_patch_flags(true);
1259 #ifdef NAMD_CUDA
1260  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1261 #endif // NAMD_CUDA
1262  const bool addCudaGlobalForces =
1263 #ifdef NAMD_CUDA
1264  (cudaGlobalMasterObject != nullptr) ?
1265  cudaGlobalMasterObject->willAddGlobalForces() :
1266 #endif // NAMD_CUDA
1267  false;
1268  CUDASequencer->startRun2(timestep,
1269  nbondstep, slowstep, this->patch->lattice.origin(),
1270  CUDASequencer->patchData->reduction, doGlobal || addCudaGlobalForces, maxForceUsed);
1271  }
1272  CmiNodeBarrier();
1273  if(isMasterPe){
1274  const bool requestTotalForces = computeGlobal ? computeGlobal->getForceSendActive() : false;
1276 #ifdef NAMD_CUDA
1277  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1278 #endif // NAMD_CUDA
1279  const bool requestGPUTotalForces =
1280 #ifdef NAMD_CUDA
1281  (cudaGlobalMasterObject != nullptr) ?
1282  cudaGlobalMasterObject->requestedTotalForces() :
1283 #endif // NAMD_CUDA
1284  false;
1285  CUDASequencer->startRun3(timestep,
1286  nbondstep, slowstep, this->patch->lattice.origin(),
1287  CUDASequencer->patchData->reduction,
1288  requestTotalForces, doGlobalStaleForces,
1290  requestGPUTotalForces,
1291  maxForceUsed);
1292  }
1293 
1294  // save total force in computeGlobal, forces are copied from device
1295  // to host in startRun3
1296  if (doGlobal) {
1297  CmiNodeBarrier();
1298  // store the total force for compute global clients
1299  int numhp = PatchMap::Object()->numHomePatches();
1301  for(int i = 0; i < numhp; ++i) {
1302  HomePatch *hp = hpList->item(i).patch;
1303  computeGlobal->saveTotalForces(hp);
1304  }
1305  }
1306  }
1307 
1308  CmiNodeBarrier();
1309  // Called everything, now I can go ahead and print the step
1310  // PE 0 needs to handle IO as it owns the controller object
1311  // JM: What happens if PE 0 does not own a GPU here? XXX Check
1312  if(deviceCUDA->getIsMasterDevice()) {
1313  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1314  c_out->resetMovingAverage();
1315  c_out->printStep(step, CUDASequencer->patchData->reduction);
1316  CUDASequencer->patchData->reduction->zero();
1317  }
1318  CmiNodeBarrier();
1319 
1320  // XXX Should we promote velrescaling into Sequencer in order to save
1321  // the velocity rescaling coefficient between script run commands?
1322  double velrescaling = 1;
1323  // --------- Start of the MD loop ------- //
1324  for( ++step; step <= numberOfSteps; ++step ){
1325  const int isForcesOutputStep = forceDcdFrequency.check(step);
1326  int dcdSelectionChecks=0;
1327  Molecule *molecule = Node::Object()->molecule;
1328  for(int dcdindex=0; dcdindex<16;++dcdindex)
1329  {
1330  int dcdSelectionFrequency = molecule->dcdSelectionParams[dcdindex].frequency;
1331  if(dcdSelectionFrequency && step % dcdSelectionFrequency)
1332  dcdSelectionChecks++;
1333  }
1334  const int isCollection = restartFrequency.check(step) +
1335  dcdFrequency.check(step) + velDcdFrequency.check(step) +
1336  isForcesOutputStep + imdFrequency.check(step) + dcdSelectionChecks;
1337  int isMigration = false;
1338  const int doVelocityRescale = stochRescaleFrequency.check(step);
1339  const int doMCPressure = monteCarloPressureFrequency.check(step);
1340  // XXX doVelRescale should instead set a "doTemperature" flag
1341  doEnergy = energyFrequency.check(step) || doVelocityRescale || doMCPressure;
1342  int langevinPistonStep = langevinPistonFrequency.check(step);
1343 
1344  int reassignVelocityStep = reassignVelocityFrequency.check(step);
1345 
1346  // berendsen pressure control
1347  int berendsenPressureStep = 0;
1352  berendsenPressureStep = 1;
1353  }
1354  }
1355  if(patchData->updateCounter.load()>0)
1356  {
1357  CUDASequencer->updateDeviceKernels();
1358  }
1359 
1360  if(doGlobal)
1361  {
1362  globalMasterStep = globalMasterFrequency.check(step);
1363  doGlobalObjects = globalMasterStep? 1:0;
1365  {
1366  doGlobalObjects=1;
1367  doGlobalStaleForces=1;
1368  }
1370  {
1371  doGlobalStaleForces = simParams->globalMasterStaleForces;
1372  }
1374  {
1375  doGlobalStaleForces=doGlobalObjects;
1376  }
1377  else
1378  {
1379  doGlobalStaleForces=doGlobalObjects;
1380  }
1381  }
1382  else
1383  {
1384  doGlobalStaleForces = 0;
1385  doGlobalObjects = 0;
1386  globalMasterStep = false;
1387  }
1388  // CkPrintf("step %d doGlobal %d doGlobalObjects %d doGlobalStaleForces %d globalMasterStep %d globalMasterFrequency %d\n", step, doGlobal, doGlobalObjects, doGlobalStaleForces, globalMasterStep, simParams->globalMasterFrequency);
1389 
1390 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
1391  //eon = epid && (beginStep < step && step <= endStep);
1392  // int eon = epid && (beginStep < step && step <= endStep);
1393  // if (controlProfiling && step == beginStep) {
1394  // NAMD_PROFILE_START();
1395  // }
1396  //if (controlProfiling && step == endStep) {
1397  // NAMD_PROFILE_STOP();
1398  //}
1399 #endif
1400 
1401  Vector origin = this->patch->lattice.origin();
1402  Tensor factor;
1403  if (deviceCUDA->getIsMasterDevice()) {
1404  if (simParams->langevinPistonOn) {
1405  c_out->piston1(step);
1406  }
1407  // Get the rescale factor for berendsen from controller
1409  c_out->berendsenPressureController(step);
1410  }
1411  }
1412  if (langevinPistonStep || berendsenPressureStep) {
1414  factor = c_out->getPositionRescaleFactor(step);
1415  this->patch->lattice.rescale(factor);
1416  CUDASequencer->patchData->lat = &(this->patch->lattice);
1417  CUDASequencer->patchData->factor = &(factor); // This looks unsafe, but as devices run on lockstep, it's fine
1418  }
1419  }
1420 
1421  CmiNodeBarrier();
1422  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1423  int previousMaxForceUsed;
1424  if(isMasterPe){
1425  // need to remember number of buffers for previous force calculation
1426  previousMaxForceUsed = maxForceUsed;
1427  // update local flags
1428  //doNonbonded = !(step%nonbondedFrequency);
1429  // no need to include doMCPressure since it's common factor of nonbondedFrequency
1430  doNonbonded = nonbondedFrequency.check(step);
1431  // no need to include doMCPressure since it's common factor of fullElectFrequency
1432  doFullElectrostatics = (dofull && fullElectFrequency.check(step));
1433  maxForceUsed = Results::normal;
1434  if ( doNonbonded ) maxForceUsed = Results::nbond;
1435  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
1436 
1437  (this->patch->flags.sequence)++;
1438  // JM: Pressures needed for every timestep if the piston is on
1440 
1441  // copy local flags to global
1442  if(deviceCUDA->getIsMasterDevice()) CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1443  }
1444 
1445  CmiNodeBarrier();
1446 
1447  if(isMasterPe){
1448  CUDASequencer->launch_part1(
1449  step,
1450  timestep, nbondstep, slowstep, velrescaling, maxvel2,
1451  CUDASequencer->patchData->reduction,
1452  *(CUDASequencer->patchData->factor),
1453  origin,
1454  // this->patch->lattice, // need to use the lattice from PE 0 right now
1455  (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1456  reassignVelocityStep,
1457  langevinPistonStep,
1458  berendsenPressureStep,
1459  previousMaxForceUsed, // call with previous maxForceUsed
1460  (const int)(step == simParams->firstTimestep + 1),
1461  this->patch->flags.savePairlists, // XXX how to initialize?
1462  this->patch->flags.usePairlists, // XXX how to initialize?
1463  doEnergy);
1464  // reset velocity rescaling coefficient after applying it
1465  velrescaling = 1;
1466  }
1467  if (reassignVelocityStep)
1468  {
1469  // CkPrintf("dump after launch_part1\n");
1470  // CUDASequencer->printSOAPositionsAndVelocities(2,10);
1471  }
1472  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1473 
1474  CmiNodeBarrier();
1475 
1476  if(isMasterPe){
1477  CUDASequencer->launch_part11(
1478  timestep, nbondstep, slowstep, velrescaling, maxvel2,
1479  CUDASequencer->patchData->reduction,
1480  *(CUDASequencer->patchData->factor),
1481  origin,
1482  // this->patch->lattice, // need to use the lattice from PE 0 right now
1483  (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1484  langevinPistonStep,
1485  previousMaxForceUsed, // call with previous maxForceUsed
1486  (const int)(step == simParams->firstTimestep + 1),
1487  this->patch->flags.savePairlists, // XXX how to initialize?
1488  this->patch->flags.usePairlists, // XXX how to initialize?
1489  doEnergy);
1490  // reset velocity rescaling coefficient after applying it
1491  velrescaling = 1;
1492  }
1493  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1494 
1495  CmiNodeBarrier();
1496 
1497 
1498  for(int i = 0; i < deviceCUDA->getNumDevice(); i++){
1499  if(CUDASequencer->patchData->migrationFlagPerDevice[i] != 0) {
1500  isMigration = true;
1501  break;
1502  }
1503  }
1504 
1505  if(isMasterPe){
1506  // If this is a Device Migration step we'll do it later
1507  if (!simParams->useDeviceMigration || !isMigration) {
1508  CUDASequencer->launch_set_compute_positions();
1509  }
1510  }
1511 
1512  // isMigration = (CUDASequencer->patchData->migrationFlagPerDevice.end() != t) ? 1:0;
1513 
1514  if(isMasterPe) {
1515  // if(CkMyPe() == 0) CUDASequencer->updatePairlistFlags(isMigration);
1516  CUDASequencer->updatePairlistFlags(isMigration);
1517  if (!simParams->useDeviceMigration) {
1518  CUDASequencer->copyPositionsAndVelocitiesToHost(isMigration, doGlobalObjects);
1519  }
1520  }
1521 
1522 
1523  if(isMigration) {
1524  if (!simParams->useDeviceMigration) {
1525  CmiNodeBarrier();
1526  wakeULTs(); // sets the number of patches
1527  this->patch->positionsReady_SOA(isMigration);
1528  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
1529  CUDASequencer->masterThreadSleeping = true;
1530  CUDASequencer->masterThread = CthSelf();
1531  CthSuspend(); // suspends until everyone else has pinged back. :]
1532  }
1533  CmiNodeBarrier();
1534  updateDeviceData(0, maxForceUsed, doGlobal);
1535  } else {
1536  doMigrationGPU(false, doGlobal, simParams->updateAtomMap);
1537  CmiNodeBarrier();
1538  }
1539  }
1540 
1541  // Calculate force/energy for bond, nonBond, pme.
1542 
1543  this->runComputeObjectsCUDA(isMigration, doGlobalObjects, step<numberOfSteps, step, 0 /* startup */);
1544 
1545  if (isMasterPe) {
1546  // if(CkMyPe() == 0) CUDASequencer->finish_patch_flags(isMigration);
1547  CUDASequencer->finish_patch_flags(isMigration);
1548  CUDASequencer->patchData->migrationFlagPerDevice[deviceCUDA->getDeviceIndex()] = 0; // flags it back to zero
1549  }
1550  CmiNodeBarrier();
1551 
1552  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1553  if(isMasterPe){
1555 #ifdef NAMD_CUDA
1556  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1557 #endif // NAMD_CUDA
1558  const bool addCudaGlobalForces =
1559 #ifdef NAMD_CUDA
1560  (cudaGlobalMasterObject != nullptr) ?
1561  cudaGlobalMasterObject->willAddGlobalForces() :
1562 #endif // NAMD_CUDA
1563  false;
1564  CUDASequencer->launch_part2(doMCPressure,
1565  timestep, nbondstep, slowstep,
1566  CUDASequencer->patchData->reduction,
1567  origin,
1568  step,
1569  maxForceUsed,
1570  langevinPistonStep,
1571  isMigration && (!simParams->useDeviceMigration),
1572  isCollection,
1573  doGlobalStaleForces || addCudaGlobalForces,
1574  doEnergy);
1575  }
1576  CmiNodeBarrier();
1577  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1578 
1579  // Apply MC pressure control
1580  if(doMCPressure){
1581  monteCarloPressureControl(step, isMigration, 1, 1, maxForceUsed, doGlobalStaleForces);
1582  CmiNodeBarrier();
1583  }
1584 
1585  const bool requestTotalForces = (computeGlobal ? computeGlobal->getForceSendActive() : false) && doGlobalObjects;
1586  // continue launch_part2, after cellBasis fluctuation in MC barostat
1587  if(isMasterPe){
1589 #ifdef NAMD_CUDA
1590  const auto CudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1591 #endif // NAMD_CUDA
1592  const bool requestGPUTotalForces =
1593 #ifdef NAMD_CUDA
1594  (CudaGlobalMasterObject != nullptr) ?
1595  CudaGlobalMasterObject->requestedTotalForces() :
1596 #endif // NAMD_CUDA
1597  false;
1598  CUDASequencer->launch_part3(doMCPressure,
1599  timestep, nbondstep, slowstep,
1600  CUDASequencer->patchData->reduction,
1601  origin,
1602  step,
1603  maxForceUsed,
1604  requestTotalForces, // requested Force
1605  doGlobalStaleForces,
1606  requestGPUTotalForces,
1607  isMigration,
1608  isCollection,
1609  doEnergy,
1610  isForcesOutputStep);
1611  }
1612  CmiNodeBarrier();
1613  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1614 
1615  // save total force in computeGlobal, forces are copied from device
1616  // to host in launch_part3
1617  if (requestTotalForces) {
1618  CmiNodeBarrier();
1619  // store the total force for compute global clients
1620  int numhp = PatchMap::Object()->numHomePatches();
1622  for(int i = 0; i < numhp; ++i) {
1623  HomePatch *hp = hpList->item(i).patch;
1624  computeGlobal->saveTotalForces(hp);
1625  }
1626  }
1627 
1628 
1629  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_PRTSTEP);
1630  CmiNodeBarrier();
1631 
1632  if (deviceCUDA->getIsMasterDevice()) {
1633  // even though you're not on a printstep, calling this still takes 15us approx!!!
1634  c_out->printStep(step, CUDASequencer->patchData->reduction);
1635  // stochastic velocity rescaling
1636  // get coefficient from current temperature
1637  // to be applied on NEXT loop iteration
1638  if (doVelocityRescale) {
1639  // calculate coefficient based on current temperature
1640  velrescaling = c_out->stochRescaleCoefficient();
1641  }
1642  }
1643  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_PRTSTEP);
1644 
1645  NAMD_EVENT_START(1, NamdProfileEvent::RESET_REDUCTIONS);
1646  if(deviceCUDA->getIsMasterDevice()) CUDASequencer->patchData->reduction->zero();
1647  NAMD_EVENT_STOP(1, NamdProfileEvent::RESET_REDUCTIONS);
1648 
1649  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_SUBCOL);
1650  if (isCollection) {
1651  CmiNodeBarrier();
1653  if (isMasterPe) {
1654  CUDASequencer->copyAoSDataToHost();
1655  }
1656  // Make sure the data has been copied to all home patches. All PEs
1657  // participate in outputting
1658  CmiNodeBarrier();
1659  }
1660  HomePatchList *hplist = patchMap->homePatchList();
1661  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1662  HomePatch *hp = i->patch;
1663  hp->sequencer->submitCollections_SOA(step);
1664  }
1665  CmiNodeBarrier();
1666  }
1667  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_SUBCOL);
1668  }
1669 
1671  CmiNodeBarrier();
1672  if (isMasterPe) {
1673  CUDASequencer->copyAoSDataToHost();
1674  }
1675  } else {
1676  if(isMasterPe) {
1677  CUDASequencer->updateHostPatchDataSOA();
1678  CUDASequencer->saveForceCUDASOA_direct(false, true, maxForceUsed);
1679  }
1680  if(isMasterPe) CUDASequencer->copyPositionsAndVelocitiesToHost(true,doGlobal);
1681  CmiNodeBarrier();
1682  HomePatchList *hplist = patchMap->homePatchList();
1683  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1684  HomePatch *hp = i->patch;
1685  hp->copy_updates_to_AOS();
1686  hp->copy_forces_to_AOS(); // to support "output withforces"
1687  }
1688  }
1689  CmiNodeBarrier(); // Make sure the data has been copied to all home patches
1690 
1691  //CmiNodeBarrier();
1692  CUDASequencer->breakSuspends = true;
1693  wakeULTs();
1694  if(deviceCUDA->getIsMasterDevice()) c_out->awaken();
1695 }
1696 
1697 
1698 /*
1699  * Updates device data after a migration
1700  *
1701  */
1702 void Sequencer::updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal) {
1703  bool isMaster = deviceCUDA->getMasterPe() == CkMyPe();
1705  if (isMaster) {
1706  CUDASequencer->copyPatchData(true, startup);
1708  CUDASequencer->reallocateMigrationDestination();
1709  CUDASequencer->copyAtomDataToDeviceAoS();
1710  } else {
1711  CUDASequencer->copyAtomDataToDevice(startup, maxForceUsed);
1712  }
1713  CUDASequencer->migrationLocalPost(startup);
1714  CUDASequencer->migrationUpdateAdvancedFeatures(startup);
1715  // XXX This is only necessary if reallocation happens
1716  CUDASequencer->registerSOAPointersToHost();
1717  }
1718  CmiNodeBarrier();
1719  if (isMaster) {
1720  CUDASequencer->copySOAHostRegisterToDevice();
1722  CUDASequencer->patchData->atomReallocationFlagPerDevice[deviceCUDA->getDeviceIndex()] = 0;
1723  }
1724 
1725  if (doGlobal || simParams->forceDcdFrequency > 0) {
1726  CUDASequencer->updateHostPatchDataSOA(); // Needs to be called after HomePatch::domigration
1727  }
1728  }
1729  CmiNodeBarrier();
1730 }
1731 
1732 /*
1733  * Constructs the meta data structures storing the patch data for GPU resident code path
1734  *
1735  * This is called once during startup
1736  *
1737  */
1740  ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
1741  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
1742 
1743  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1744  patchData = cpdata.ckLocalBranch();
1745 
1746  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1747 
1748  // constructDevicePatchMap should only be called once per PE
1749  if (patchData->devicePatchMapFlag[CkMyPe()]) return;
1750  patchData->devicePatchMapFlag[CkMyPe()] = 1;
1751 
1752  // One thread per GPU will execute this block
1753  if (isMasterPe) {
1754  const int deviceIndex = deviceCUDA->getDeviceIndex();
1755 
1756  // Nonbonded patches are computed by CudaComputeNonbonded and contain all the patches and proxy
1757  // patches on this device. HomePatches is computed by SequencerCUDA and only contains the
1758  // home patches. localPatches will be generated by this function
1759  using NBPatchRecord = CudaComputeNonbonded::PatchRecord;
1761  std::vector<NBPatchRecord>& nonbondedPatches = cudaNbond->getPatches();
1762  std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1763  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1764 
1765  // The home patches are not necessarily ordered by their patchID. This can happen if there
1766  // are multiple PEs assigned to the same GPU. Sorting the home patches by their patch ID
1767  // makes it easy to have a consistent ordering
1768  std::stable_sort(
1769  homePatches.begin(),
1770  homePatches.end(),
1771  [](HomePatch* a, HomePatch* b) {
1772  return (a->getPatchID() < b->getPatchID());
1773  });
1774 
1775  // Iterates over all the patches on this device and adds them to h_localPatches
1776  // and determine if they are a home or proxy patch
1777  for (int i = 0; i < nonbondedPatches.size(); i++) {
1778  CudaLocalRecord record;
1779  record.patchID = nonbondedPatches[i].patchID;
1780 
1781  // TODO DMC the patchmap should be able to do this
1782  const int targetPatchID = record.patchID;
1783  auto result = std::find_if(
1784  homePatches.begin(),
1785  homePatches.end(),
1786  [targetPatchID](HomePatch* p) {
1787  return (p->getPatchID() == targetPatchID);
1788  });
1789 
1790  record.isProxy = (result == homePatches.end());
1791  localPatches.push_back(record);
1792  }
1793 
1794  // The home patches should be at the begining of the patch list
1795  // This makes integration easier since we can ignore the patches and operate on a
1796  // contiguous chunk of home atoms
1797  std::stable_sort(
1798  localPatches.begin(),
1799  localPatches.end(),
1800  [](CudaLocalRecord a, CudaLocalRecord b) {
1801  return (a.isProxy < b.isProxy);
1802  });
1803 
1804  // Now the ordering is fixed we can update the bonded and nonbonded orders. Since we have
1805  // moved the home patches to the begining the ordering has changed
1806  cudaBond->updatePatchOrder(localPatches);
1807  cudaNbond->updatePatchOrder(localPatches);
1808  patchData->devData[deviceIndex].numPatchesHome = homePatches.size();
1809  patchData->devData[deviceIndex].numPatchesHomeAndProxy = localPatches.size();
1810  }
1811  CmiNodeBarrier();
1812 
1813  // Iterates over all patches again, and generates the mapping between GPUs. For each patch,
1814  // it checks the other devices to see if the patch is on that device.
1815  // - For HomePatches, there will be a peer record for all of its proxies
1816  // - For ProxyPatches, there will only be a peer record for its home patch
1817  // There is a single array of peer records per device. Each patch stores an offset into this
1818  // array as well as its number of peer records
1819  if (isMasterPe) {
1820  const int deviceIndex = deviceCUDA->getDeviceIndex();
1821  std::vector<CudaPeerRecord>& myPeerPatches = patchData->devData[deviceIndex].h_peerPatches;
1822  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1823 
1824  for (int i = 0; i < localPatches.size(); i++) {
1825  std::vector<CudaPeerRecord> tempPeers;
1826  const int targetPatchID = localPatches[i].patchID;
1827  const int targetIsProxy = localPatches[i].isProxy;
1828 
1829  for (int devIdx = 0; devIdx < deviceCUDA->getNumDevice(); devIdx++) {
1830  if (devIdx == deviceIndex) continue;
1831  std::vector<CudaLocalRecord>& peerPatches = patchData->devData[devIdx].h_localPatches;
1832 
1833  // Searches peerPatches for patchID. If it is not being integrated on this device
1834  // then ignore other non-integration patches
1835  for (int j = 0; j < patchData->devData[devIdx].numPatchesHomeAndProxy; j++) {
1836  const CudaLocalRecord peer = peerPatches[j];
1837  if (peer.patchID == targetPatchID && peer.isProxy != targetIsProxy) {
1838  CudaPeerRecord peerRecord;
1839  peerRecord.deviceIndex = devIdx;
1840  peerRecord.patchIndex = j;
1841  tempPeers.push_back(peerRecord);
1842  break;
1843  }
1844  }
1845  }
1846 
1847  // Once we have the list of peer records, add them to the single device-width vector
1848  // and record the offset and count
1849  localPatches[i].numPeerRecord = tempPeers.size();
1850  if (!tempPeers.empty()) {
1851  localPatches[i].peerRecordStartIndex = myPeerPatches.size();
1852  myPeerPatches.insert(myPeerPatches.end(), tempPeers.begin(), tempPeers.end());
1853  }
1854  }
1855  }
1856  CmiNodeBarrier();
1857 }
1858 
1860  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1861  patchData = cpdata.ckLocalBranch();
1862 
1863  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1864 
1865  if (isMasterPe) {
1866  const int deviceIndex = deviceCUDA->getDeviceIndex();
1867  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1868  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1869 
1870  CmiLock(patchData->printlock);
1871  CkPrintf("PE: %d\n", CkMyPe());
1872 
1873  CkPrintf("[%d] Home patches %d Local patches %d\n", CkMyPe(), numPatchesHome, localPatches.size());
1874 
1875  CkPrintf("Home Patches: ");
1876  for (int i = 0; i < numPatchesHome; i++) {
1877  CkPrintf("%d ", localPatches[i].patchID);
1878  }
1879  CkPrintf("\n");
1880 
1881  CkPrintf("Proxy Patches: ");
1882  for (int i = numPatchesHome; i < localPatches.size(); i++) {
1883  CkPrintf("%d ", localPatches[i].patchID);
1884  }
1885  CkPrintf("\n");
1886 
1887  CmiUnlock(patchData->printlock);
1888  }
1889  CmiNodeBarrier();
1890 }
1891 
1893  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1894  patchData = cpdata.ckLocalBranch();
1895 
1896  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1897 
1898  // clearDevicePatchMap should only be called once per PE
1899  if (!patchData->devicePatchMapFlag[CkMyPe()]) return;
1900  patchData->devicePatchMapFlag[CkMyPe()] = 0;
1901 
1902  // One thread per GPU will execute this block
1903  if (isMasterPe) {
1904  const int deviceIndex = deviceCUDA->getDeviceIndex();
1905 
1906  using NBPatchRecord = CudaComputeNonbonded::PatchRecord;
1907  std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1908  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1909  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1910 
1911  homePatches.clear();
1912  localPatches.clear();
1913  peerPatches.clear();
1915  }
1916 }
1917 
1918 /*
1919  * Updates the meta data structures storing the patch data for GPU resident code path
1920  *
1921  * This is called every migration step. The actual mapping stays the same,
1922  * but the atom counts per patch change
1923  *
1924  */
1925 void Sequencer::updateDevicePatchMap(int startup) {
1926  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1927  patchData = cpdata.ckLocalBranch();
1928 
1929  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1930 
1931  if (isMasterPe) {
1932  const int deviceIndex = deviceCUDA->getDeviceIndex();
1933  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1934  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1937  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
1938 
1939  int max_atom_count = 0;
1940  int total_atom_count = 0;
1941 
1942  // Update the atom count of home patches
1943  for (int i = 0; i < numPatchesHome; i++) {
1944  Patch* patch = NULL;
1945  for(int j = 0; j < deviceCUDA->getNumPesSharingDevice(); j++){
1947  patch = pm->patch(localPatches[i].patchID);
1948  if (patch != NULL) break;
1949  }
1950  if (patch == NULL) NAMD_die("Sequencer: Failed to find patch in updateDevicePatchMap");
1951 
1952  localPatches[i].numAtoms = patch->getNumAtoms();
1953  localPatches[i].numAtomsNBPad = CudaComputeNonbondedKernel::computeAtomPad(localPatches[i].numAtoms);
1954 
1955  if (localPatches[i].numAtoms > max_atom_count) max_atom_count = localPatches[i].numAtoms;
1956  total_atom_count += localPatches[i].numAtoms;
1957  }
1958  }
1959  CmiNodeBarrier();
1960 
1961  // Update the proxy patches next, using the home patch atom counts of other devices
1962  if (isMasterPe) {
1963  const int deviceIndex = deviceCUDA->getDeviceIndex();
1964  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1965  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
1966  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1967  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1968 
1969  for (int i = numPatchesHome; i < numPatchesHomeAndProxy; i++) {
1970  const int index = localPatches[i].peerRecordStartIndex;
1971  const int devIdx = peerPatches[index].deviceIndex;
1972  const int peerIdx = peerPatches[index].patchIndex;
1973  const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
1974 
1975  localPatches[i].numAtoms = peer.numAtoms;
1976  localPatches[i].numAtomsNBPad = peer.numAtomsNBPad;
1977  }
1978  }
1979  CmiNodeBarrier();
1980 
1981  // Computes the offset for each patch using the atom counts
1982  if (isMasterPe) {
1983  const int deviceIndex = deviceCUDA->getDeviceIndex();
1984  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
1985  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1986 
1987  int runningOffset = 0;
1988  int runningOffsetNBPad = 0;
1989  // TODO Change to a C++ prefix sum
1990  for (int i = 0; i < numPatchesHomeAndProxy; i++) {
1991  localPatches[i].bufferOffset = runningOffset;
1992  localPatches[i].bufferOffsetNBPad = runningOffsetNBPad;
1993  runningOffset += localPatches[i].numAtoms;
1994  runningOffsetNBPad += localPatches[i].numAtomsNBPad;
1995  }
1996  }
1997  CmiNodeBarrier();
1998 
1999  // Update the peer records using the local record data
2000  if (isMasterPe) {
2001  const int deviceIndex = deviceCUDA->getDeviceIndex();
2002  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2003  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2004  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2005 
2006 
2007  for (int i = 0; i < peerPatches.size(); i++) {
2008  const int devIdx = peerPatches[i].deviceIndex;
2009  const int peerIdx = peerPatches[i].patchIndex;
2010  const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2011 
2012  peerPatches[i].bufferOffset = peer.bufferOffset;
2013  peerPatches[i].bufferOffsetNBPad = peer.bufferOffsetNBPad;
2014  }
2015 
2016  // Update inline copy of peer data
2017  for (int i = 0; i < numPatchesHomeAndProxy; i++) {
2018  const int numPeerRecord = localPatches[i].numPeerRecord;
2019  const int peerOffset = localPatches[i].peerRecordStartIndex;
2020 
2021  for (int j = 0; j < std::min(numPeerRecord, CudaLocalRecord::num_inline_peer); j++) {
2022  localPatches[i].inline_peers[j] = peerPatches[peerOffset+j];
2023  }
2024  }
2025  }
2026  CmiNodeBarrier();
2027 }
2028 
2029 #endif
2030 
2031 
2032 void Sequencer::integrate_SOA(int scriptTask) {
2033  //
2034  // Below when accessing the array buffers for position, velocity, force,
2035  // note that we don't want to set up pointers directly to the buffers
2036  // because the allocations might get resized after atom migration.
2037  //
2038 
2039 #ifdef TIMER_COLLECTION
2040  TimerSet& t = patch->timerSet;
2041 #endif
2042  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
2043  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
2044  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
2045  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
2046  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
2047  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
2048  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
2049  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
2050  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
2051  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
2052 
2053  // Keep track of the step number.
2054  int &step = patch->flags.step;
2055  step = simParams->firstTimestep;
2056 
2057  // For multiple time stepping, which force boxes are used?
2058  int &maxForceUsed = patch->flags.maxForceUsed;
2059  int &maxForceMerged = patch->flags.maxForceMerged;
2060  maxForceUsed = Results::normal;
2061  maxForceMerged = Results::normal;
2062 
2063  // Keep track of total steps and steps per cycle.
2064  const int numberOfSteps = simParams->N;
2065  //const int stepsPerCycle = simParams->stepsPerCycle;
2066  CheckStep stepsPerCycle;
2067  stepsPerCycle.init(step, simParams->stepsPerCycle);
2068  // The fundamental time step, get the scaling right for velocity units.
2069  const BigReal timestep = simParams->dt * RECIP_TIMEFACTOR;
2070 
2071  //const int nonbondedFrequency = simParams->nonbondedFrequency;
2072  //slowFreq = nonbondedFrequency;
2073  CheckStep nonbondedFrequency;
2075  // The step size for short-range nonbonded forces.
2076  const BigReal nbondstep = timestep * simParams->nonbondedFrequency;
2077  int &doNonbonded = patch->flags.doNonbonded;
2078  //doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
2079  doNonbonded = (step >= numberOfSteps) ||
2080  nonbondedFrequency.init(step, simParams->nonbondedFrequency);
2081  //if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
2082  if ( nonbondedFrequency.period == 1 ) maxForceMerged = Results::nbond;
2083  if ( doNonbonded ) maxForceUsed = Results::nbond;
2084 
2085  // Do we do full electrostatics?
2086  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
2087  //const int fullElectFrequency = simParams->fullElectFrequency;
2088  //if ( dofull ) slowFreq = fullElectFrequency;
2089  CheckStep fullElectFrequency;
2090  if ( dofull ) slowFreq = simParams->fullElectFrequency;
2091  // The step size for long-range electrostatics.
2092  const BigReal slowstep = timestep * simParams->fullElectFrequency;
2093  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
2094  //doFullElectrostatics = (dofull &&
2095  // ((step >= numberOfSteps) || !(step%fullElectFrequency)));
2096  doFullElectrostatics = (dofull &&
2097  ((step >= numberOfSteps) ||
2098  fullElectFrequency.init(step, simParams->fullElectFrequency)));
2099  //if ( dofull && fullElectFrequency == 1 ) maxForceMerged = Results::slow;
2100  if ( dofull && fullElectFrequency.period == 1 ) maxForceMerged = Results::slow;
2101  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
2102 
2103  // Bother to calculate energies?
2104  int &doEnergy = patch->flags.doEnergy;
2105  //int energyFrequency = simParams->outputEnergies;
2106  CheckStep energyFrequency;
2107  int newComputeEnergies = simParams->computeEnergies;
2108  if(simParams->alchOn) newComputeEnergies = NAMD_gcd(newComputeEnergies, simParams->alchOutFreq);
2109  doEnergy = energyFrequency.init(step, newComputeEnergies);
2110 
2111  // Do we need to return forces to TCL script or Colvar module?
2112  int doTcl = simParams->tclForcesOn;
2113  int doColvars = simParams->colvarsOn;
2114  int doGlobal = doTcl || doColvars;
2116  int &doVirial = patch->flags.doVirial;
2117  doVirial = 1;
2118 
2119  // The following flags have to be explicitly disabled in Patch object.
2120  patch->flags.doMolly = 0;
2121  patch->flags.doLoweAndersen = 0;
2122  patch->flags.doGBIS = 0;
2123  patch->flags.doLCPO = 0;
2124 
2125  // Square of maximum velocity for simulation safety check
2126  const BigReal maxvel2 =
2127  (simParams->cutoff * simParams->cutoff) / (timestep * timestep);
2128 
2129  // check for Langevin piston
2130  // set period beyond numberOfSteps to disable
2131  CheckStep langevinPistonFrequency;
2132  langevinPistonFrequency.init(step,
2133  (simParams->langevinPistonOn ? slowFreq : numberOfSteps+1 ),
2134  (simParams->langevinPistonOn ? -1-slowFreq/2 : 0) /* = delta */);
2135 
2136  // check for output
2137  // set period beyond numberOfSteps to disable
2138  CheckStep restartFrequency;
2139  restartFrequency.init(step, (simParams->restartFrequency ?
2140  simParams->restartFrequency : numberOfSteps+1) );
2141  CheckStep dcdFrequency;
2142  dcdFrequency.init(step, (simParams->dcdFrequency ?
2143  simParams->dcdFrequency : numberOfSteps+1) );
2144  CheckStep velDcdFrequency;
2145  velDcdFrequency.init(step, (simParams->velDcdFrequency ?
2146  simParams->velDcdFrequency : numberOfSteps+1) );
2147  CheckStep forceDcdFrequency;
2148  forceDcdFrequency.init(step, (simParams->forceDcdFrequency ?
2149  simParams->forceDcdFrequency : numberOfSteps+1) );
2150  CheckStep imdFrequency;
2151  imdFrequency.init(step, (simParams->IMDfreq ?
2152  simParams->IMDfreq : numberOfSteps+1) );
2153 
2154  if ( scriptTask == SCRIPT_RUN ) {
2155  // enforce rigid bond constraints on initial positions
2156  TIMER_START(t, RATTLE1);
2157  rattle1_SOA(0., 0);
2158  TIMER_STOP(t, RATTLE1);
2159 
2160  // must migrate here!
2161  int natoms = patch->patchDataSOA.numAtoms;
2162  runComputeObjects_SOA(1, step<numberOfSteps, step);
2163  // kick -0.5
2164  TIMER_START(t, KICK);
2165  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2166 #ifndef SOA_SIMPLIFY_PARAMS
2167  patch->patchDataSOA.recipMass,
2168  patch->patchDataSOA.f_normal_x,
2169  patch->patchDataSOA.f_normal_y,
2170  patch->patchDataSOA.f_normal_z,
2171  patch->patchDataSOA.f_nbond_x,
2172  patch->patchDataSOA.f_nbond_y,
2173  patch->patchDataSOA.f_nbond_z,
2174  patch->patchDataSOA.f_slow_x,
2175  patch->patchDataSOA.f_slow_y,
2176  patch->patchDataSOA.f_slow_z,
2177  patch->patchDataSOA.vel_x,
2178  patch->patchDataSOA.vel_y,
2179  patch->patchDataSOA.vel_z,
2180  patch->patchDataSOA.numAtoms,
2181 #endif
2182  maxForceUsed
2183  );
2184  TIMER_STOP(t, KICK);
2185 
2186  TIMER_START(t, RATTLE1);
2187  rattle1_SOA(-timestep, 0);
2188  TIMER_STOP(t, RATTLE1);
2189 
2190  TIMER_START(t, SUBMITHALF);
2192 #ifndef SOA_SIMPLIFY_PARAMS
2193  patch->patchDataSOA.hydrogenGroupSize,
2194  patch->patchDataSOA.mass,
2195  patch->patchDataSOA.vel_x,
2196  patch->patchDataSOA.vel_y,
2197  patch->patchDataSOA.vel_z,
2198  patch->patchDataSOA.numAtoms
2199 #endif
2200  );
2201  TIMER_STOP(t, SUBMITHALF);
2202 
2203  // kick 1.0
2204  TIMER_START(t, KICK);
2205  addForceToMomentum_SOA(1.0, timestep, nbondstep, slowstep,
2206 #ifndef SOA_SIMPLIFY_PARAMS
2207  patch->patchDataSOA.recipMass,
2208  patch->patchDataSOA.f_normal_x,
2209  patch->patchDataSOA.f_normal_y,
2210  patch->patchDataSOA.f_normal_z,
2211  patch->patchDataSOA.f_nbond_x,
2212  patch->patchDataSOA.f_nbond_y,
2213  patch->patchDataSOA.f_nbond_z,
2214  patch->patchDataSOA.f_slow_x,
2215  patch->patchDataSOA.f_slow_y,
2216  patch->patchDataSOA.f_slow_z,
2217  patch->patchDataSOA.vel_x,
2218  patch->patchDataSOA.vel_y,
2219  patch->patchDataSOA.vel_z,
2220  patch->patchDataSOA.numAtoms,
2221 #endif
2222  maxForceUsed
2223  );
2224  TIMER_STOP(t, KICK);
2225 
2226  TIMER_START(t, RATTLE1);
2227  rattle1_SOA(timestep, 1);
2228  TIMER_STOP(t, RATTLE1);
2229 
2230  // save total force in computeGlobal
2231  if (doGlobal) {
2232  computeGlobal->saveTotalForces(patch);
2233  }
2234 
2235  TIMER_START(t, SUBMITHALF);
2237 #ifndef SOA_SIMPLIFY_PARAMS
2238  patch->patchDataSOA.hydrogenGroupSize,
2239  patch->patchDataSOA.mass,
2240  patch->patchDataSOA.vel_x,
2241  patch->patchDataSOA.vel_y,
2242  patch->patchDataSOA.vel_z,
2243  patch->patchDataSOA.numAtoms
2244 #endif
2245  );
2246  TIMER_STOP(t, SUBMITHALF);
2247 
2248  // kick -0.5
2249  TIMER_START(t, KICK);
2250  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2251 #ifndef SOA_SIMPLIFY_PARAMS
2252  patch->patchDataSOA.recipMass,
2253  patch->patchDataSOA.f_normal_x,
2254  patch->patchDataSOA.f_normal_y,
2255  patch->patchDataSOA.f_normal_z,
2256  patch->patchDataSOA.f_nbond_x,
2257  patch->patchDataSOA.f_nbond_y,
2258  patch->patchDataSOA.f_nbond_z,
2259  patch->patchDataSOA.f_slow_x,
2260  patch->patchDataSOA.f_slow_y,
2261  patch->patchDataSOA.f_slow_z,
2262  patch->patchDataSOA.vel_x,
2263  patch->patchDataSOA.vel_y,
2264  patch->patchDataSOA.vel_z,
2265  patch->patchDataSOA.numAtoms,
2266 #endif
2267  maxForceUsed
2268  );
2269  TIMER_STOP(t, KICK);
2270 
2271  TIMER_START(t, SUBMITFULL);
2273 #ifndef SOA_SIMPLIFY_PARAMS
2274  patch->patchDataSOA.hydrogenGroupSize,
2275  patch->patchDataSOA.mass,
2276  patch->patchDataSOA.pos_x,
2277  patch->patchDataSOA.pos_y,
2278  patch->patchDataSOA.pos_z,
2279  patch->patchDataSOA.vel_x,
2280  patch->patchDataSOA.vel_y,
2281  patch->patchDataSOA.vel_z,
2282  patch->patchDataSOA.f_normal_x,
2283  patch->patchDataSOA.f_normal_y,
2284  patch->patchDataSOA.f_normal_z,
2285  patch->patchDataSOA.f_nbond_x,
2286  patch->patchDataSOA.f_nbond_y,
2287  patch->patchDataSOA.f_nbond_z,
2288  patch->patchDataSOA.f_slow_x,
2289  patch->patchDataSOA.f_slow_y,
2290  patch->patchDataSOA.f_slow_z,
2291  patch->patchDataSOA.numAtoms
2292 #endif
2293  );
2294  TIMER_STOP(t, SUBMITFULL);
2295 
2296  rebalanceLoad(step);
2297  } // scriptTask == SCRIPT_RUN
2298 
2299 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
2300  int& eon = patch->flags.event_on;
2301  int epid = (simParams->beginEventPatchID <= patch->getPatchID()
2302  && patch->getPatchID() <= simParams->endEventPatchID);
2303  int beginStep = simParams->beginEventStep;
2304  int endStep = simParams->endEventStep;
2305  bool controlProfiling = patch->getPatchID() == 0;
2306 #endif
2307 
2308  for ( ++step; step <= numberOfSteps; ++step ) {
2309  int dcdSelectionChecks=0;
2310  Molecule *molecule = Node::Object()->molecule;
2311  for(int dcdindex=0; dcdindex<16;++dcdindex)
2312  {
2313  int dcdSelectionFrequency = molecule->dcdSelectionParams[dcdindex].frequency;
2314  if(dcdSelectionFrequency && step % dcdSelectionFrequency)
2315  dcdSelectionChecks++;
2316  }
2317  const int isCollection = restartFrequency.check(step) +
2318  dcdFrequency.check(step) + velDcdFrequency.check(step) +
2319  forceDcdFrequency.check(step) + imdFrequency.check(step) +
2320  dcdSelectionChecks;
2321  const int isMigration = stepsPerCycle.check(step);
2322  doEnergy = energyFrequency.check(step);
2323  DebugM(3,"doGlobal now "<< doGlobal<<"\n"<<endi);
2324 
2325 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
2326  eon = epid && (beginStep < step && step <= endStep);
2327 
2328  if (controlProfiling && step == beginStep) {
2330  }
2331  if (controlProfiling && step == endStep) {
2333  }
2334 // NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_1);
2335  char buf[32];
2336  sprintf(buf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::INTEGRATE_SOA_1], patch->getPatchID());
2337  NAMD_EVENT_START_EX(eon, NamdProfileEvent::INTEGRATE_SOA_1, buf);
2338 #endif
2339 
2340  if ( simParams->stochRescaleOn ) {
2342  }
2343 
2344  if ( simParams->berendsenPressureOn ) {
2346 #ifndef SOA_SIMPLIFY_PARAMS
2347  patch->patchDataSOA.hydrogenGroupSize,
2348  patch->patchDataSOA.mass,
2349  patch->patchDataSOA.pos_x,
2350  patch->patchDataSOA.pos_y,
2351  patch->patchDataSOA.pos_z,
2352  patch->patchDataSOA.numAtoms,
2353 #endif
2354  step);
2355  }
2356 
2357  // kick 0.5
2358  TIMER_START(t, KICK);
2359  addForceToMomentum_SOA(0.5, timestep, nbondstep, slowstep,
2360 #ifndef SOA_SIMPLIFY_PARAMS
2361  patch->patchDataSOA.recipMass,
2362  patch->patchDataSOA.f_normal_x,
2363  patch->patchDataSOA.f_normal_y,
2364  patch->patchDataSOA.f_normal_z,
2365  patch->patchDataSOA.f_nbond_x,
2366  patch->patchDataSOA.f_nbond_y,
2367  patch->patchDataSOA.f_nbond_z,
2368  patch->patchDataSOA.f_slow_x,
2369  patch->patchDataSOA.f_slow_y,
2370  patch->patchDataSOA.f_slow_z,
2371  patch->patchDataSOA.vel_x,
2372  patch->patchDataSOA.vel_y,
2373  patch->patchDataSOA.vel_z,
2374  patch->patchDataSOA.numAtoms,
2375 #endif
2376  maxForceUsed
2377  );
2378  TIMER_STOP(t, KICK);
2379 
2380  // maximumMove checks velocity bound on atoms
2381  TIMER_START(t, MAXMOVE);
2382  maximumMove_SOA(timestep, maxvel2
2383 #ifndef SOA_SIMPLIFY_PARAMS
2384  ,
2385  patch->patchDataSOA.vel_x,
2386  patch->patchDataSOA.vel_y,
2387  patch->patchDataSOA.vel_z,
2388  patch->patchDataSOA.numAtoms
2389 #endif
2390  );
2391  TIMER_STOP(t, MAXMOVE);
2392 
2393 
2394  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_1);
2395 
2396  // Check to see if Langevin piston is enabled this step:
2397  // ! ((step-1-slowFreq/2) % slowFreq)
2398  if ( langevinPistonFrequency.check(step) ) {
2399  // if (langevinPistonStep) {
2400  // drift 0.5
2401  TIMER_START(t, DRIFT);
2402  addVelocityToPosition_SOA(0.5*timestep
2403 #ifndef SOA_SIMPLIFY_PARAMS
2404  ,
2405  patch->patchDataSOA.vel_x,
2406  patch->patchDataSOA.vel_y,
2407  patch->patchDataSOA.vel_z,
2408  patch->patchDataSOA.pos_x,
2409  patch->patchDataSOA.pos_y,
2410  patch->patchDataSOA.pos_z,
2411  patch->patchDataSOA.numAtoms
2412 #endif
2413  );
2414  TIMER_STOP(t, DRIFT);
2415  // There is a blocking receive inside of langevinPiston()
2416  // that might suspend the current thread of execution,
2417  // so split profiling around this conditional block.
2419 #ifndef SOA_SIMPLIFY_PARAMS
2420  patch->patchDataSOA.hydrogenGroupSize,
2421  patch->patchDataSOA.mass,
2422  patch->patchDataSOA.pos_x,
2423  patch->patchDataSOA.pos_y,
2424  patch->patchDataSOA.pos_z,
2425  patch->patchDataSOA.vel_x,
2426  patch->patchDataSOA.vel_y,
2427  patch->patchDataSOA.vel_z,
2428  patch->patchDataSOA.numAtoms,
2429 #endif
2430  step
2431  );
2432 
2433  // drift 0.5
2434  TIMER_START(t, DRIFT);
2435  addVelocityToPosition_SOA(0.5*timestep
2436 #ifndef SOA_SIMPLIFY_PARAMS
2437  ,
2438  patch->patchDataSOA.vel_x,
2439  patch->patchDataSOA.vel_y,
2440  patch->patchDataSOA.vel_z,
2441  patch->patchDataSOA.pos_x,
2442  patch->patchDataSOA.pos_y,
2443  patch->patchDataSOA.pos_z,
2444  patch->patchDataSOA.numAtoms
2445 #endif
2446  );
2447  TIMER_STOP(t, DRIFT);
2448  }
2449  else {
2450  // drift 1.0
2451  TIMER_START(t, DRIFT);
2452  addVelocityToPosition_SOA(timestep
2453 #ifndef SOA_SIMPLIFY_PARAMS
2454  ,
2455  patch->patchDataSOA.vel_x,
2456  patch->patchDataSOA.vel_y,
2457  patch->patchDataSOA.vel_z,
2458  patch->patchDataSOA.pos_x,
2459  patch->patchDataSOA.pos_y,
2460  patch->patchDataSOA.pos_z,
2461  patch->patchDataSOA.numAtoms
2462 #endif
2463  );
2464  TIMER_STOP(t, DRIFT);
2465  }
2466 
2467  //NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_2);
2468 
2469  // There are NO sends in submitHalfstep() just local summation
2470  // into the Reduction struct.
2471  TIMER_START(t, SUBMITHALF);
2473 #ifndef SOA_SIMPLIFY_PARAMS
2474  patch->patchDataSOA.hydrogenGroupSize,
2475  patch->patchDataSOA.mass,
2476  patch->patchDataSOA.vel_x,
2477  patch->patchDataSOA.vel_y,
2478  patch->patchDataSOA.vel_z,
2479  patch->patchDataSOA.numAtoms
2480 #endif
2481  );
2482  TIMER_STOP(t, SUBMITHALF);
2483 
2484  //doNonbonded = !(step%nonbondedFrequency);
2485  doNonbonded = nonbondedFrequency.check(step);
2486  //doFullElectrostatics = (dofull && !(step%fullElectFrequency));
2487  doFullElectrostatics = (dofull && fullElectFrequency.check(step));
2488 
2489  maxForceUsed = Results::normal;
2490  if ( doNonbonded ) maxForceUsed = Results::nbond;
2491  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
2492 
2493  // Migrate Atoms on stepsPerCycle
2494  // Check to see if this is energy evaluation step:
2495  // doEnergy = ! ( step % energyFrequency );
2496  doVirial = 1;
2497  doKineticEnergy = 1;
2498  doMomenta = 1;
2499 
2500  //NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_2); // integrate_SOA 2
2501 
2502  // The current thread of execution will suspend in runComputeObjects().
2503  // Check to see if we are at a migration step:
2504  // runComputeObjects_SOA(!(step%stepsPerCycle), step<numberOfSteps);
2505  runComputeObjects_SOA(isMigration, step<numberOfSteps, step);
2506 
2507  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_3);
2508 
2509  TIMER_START(t, VELBBK1);
2511  timestep
2512 #ifndef SOA_SIMPLIFY_PARAMS
2513  ,
2514  patch->patchDataSOA.langevinParam,
2515  patch->patchDataSOA.vel_x,
2516  patch->patchDataSOA.vel_y,
2517  patch->patchDataSOA.vel_z,
2518  patch->patchDataSOA.numAtoms
2519 #endif
2520  );
2521  TIMER_STOP(t, VELBBK1);
2522 
2523  // kick 1.0
2524  TIMER_START(t, KICK);
2525  addForceToMomentum_SOA(1.0, timestep, nbondstep, slowstep,
2526 #ifndef SOA_SIMPLIFY_PARAMS
2527  patch->patchDataSOA.recipMass,
2528  patch->patchDataSOA.f_normal_x,
2529  patch->patchDataSOA.f_normal_y,
2530  patch->patchDataSOA.f_normal_z,
2531  patch->patchDataSOA.f_nbond_x,
2532  patch->patchDataSOA.f_nbond_y,
2533  patch->patchDataSOA.f_nbond_z,
2534  patch->patchDataSOA.f_slow_x,
2535  patch->patchDataSOA.f_slow_y,
2536  patch->patchDataSOA.f_slow_z,
2537  patch->patchDataSOA.vel_x,
2538  patch->patchDataSOA.vel_y,
2539  patch->patchDataSOA.vel_z,
2540  patch->patchDataSOA.numAtoms,
2541 #endif
2542  maxForceUsed
2543  );
2544  TIMER_STOP(t, KICK);
2545 
2546  TIMER_START(t, VELBBK2);
2548  timestep
2549 #ifndef SOA_SIMPLIFY_PARAMS
2550  ,
2551  patch->patchDataSOA.langevinParam,
2552  patch->patchDataSOA.langScalVelBBK2,
2553  patch->patchDataSOA.langScalRandBBK2,
2554  patch->patchDataSOA.gaussrand_x,
2555  patch->patchDataSOA.gaussrand_y,
2556  patch->patchDataSOA.gaussrand_z,
2557  patch->patchDataSOA.vel_x,
2558  patch->patchDataSOA.vel_y,
2559  patch->patchDataSOA.vel_z,
2560  patch->patchDataSOA.numAtoms
2561 #endif
2562  );
2563  TIMER_STOP(t, VELBBK2);
2564 
2565  TIMER_START(t, RATTLE1);
2566  rattle1_SOA(timestep, 1);
2567  TIMER_STOP(t, RATTLE1);
2568 
2569  // save total force in computeGlobal
2570  if (doGlobal) {
2571  computeGlobal->saveTotalForces(patch);
2572  }
2573 
2574  TIMER_START(t, SUBMITHALF);
2576 #ifndef SOA_SIMPLIFY_PARAMS
2577  patch->patchDataSOA.hydrogenGroupSize,
2578  patch->patchDataSOA.mass,
2579  patch->patchDataSOA.vel_x,
2580  patch->patchDataSOA.vel_y,
2581  patch->patchDataSOA.vel_z,
2582  patch->patchDataSOA.numAtoms
2583 #endif
2584  );
2585  TIMER_STOP(t, SUBMITHALF);
2586 
2587  // kick -0.5
2588  TIMER_START(t, KICK);
2589  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2590 #ifndef SOA_SIMPLIFY_PARAMS
2591  patch->patchDataSOA.recipMass,
2592  patch->patchDataSOA.f_normal_x,
2593  patch->patchDataSOA.f_normal_y,
2594  patch->patchDataSOA.f_normal_z,
2595  patch->patchDataSOA.f_nbond_x,
2596  patch->patchDataSOA.f_nbond_y,
2597  patch->patchDataSOA.f_nbond_z,
2598  patch->patchDataSOA.f_slow_x,
2599  patch->patchDataSOA.f_slow_y,
2600  patch->patchDataSOA.f_slow_z,
2601  patch->patchDataSOA.vel_x,
2602  patch->patchDataSOA.vel_y,
2603  patch->patchDataSOA.vel_z,
2604  patch->patchDataSOA.numAtoms,
2605 #endif
2606  maxForceUsed
2607  );
2608  TIMER_STOP(t, KICK);
2609 
2610  // XXX rattle2_SOA(timestep,step);
2611 
2612  TIMER_START(t, SUBMITFULL);
2614 #ifndef SOA_SIMPLIFY_PARAMS
2615  patch->patchDataSOA.hydrogenGroupSize,
2616  patch->patchDataSOA.mass,
2617  patch->patchDataSOA.pos_x,
2618  patch->patchDataSOA.pos_y,
2619  patch->patchDataSOA.pos_z,
2620  patch->patchDataSOA.vel_x,
2621  patch->patchDataSOA.vel_y,
2622  patch->patchDataSOA.vel_z,
2623  patch->patchDataSOA.f_normal_x,
2624  patch->patchDataSOA.f_normal_y,
2625  patch->patchDataSOA.f_normal_z,
2626  patch->patchDataSOA.f_nbond_x,
2627  patch->patchDataSOA.f_nbond_y,
2628  patch->patchDataSOA.f_nbond_z,
2629  patch->patchDataSOA.f_slow_x,
2630  patch->patchDataSOA.f_slow_y,
2631  patch->patchDataSOA.f_slow_z,
2632  patch->patchDataSOA.numAtoms
2633 #endif
2634  );
2635  TIMER_STOP(t, SUBMITFULL);
2636 #ifdef TESTPID
2637  if (1) {
2638  int pid = TESTPID;
2639  if (patch->patchID == pid) {
2640  const PatchDataSOA& p = patch->patchDataSOA;
2641  int n = p.numAtoms;
2642 #if 0
2643  fprintf(stderr, "Patch %d has %d atoms\n", pid, n);
2644  fprintf(stderr, "%3s %8s %12s %12s %12s\n",
2645  "", "id", "fnormal_x", "fnbond_x", "fslow_x");
2646  for (int i=0; i < n; i++) {
2647  int index = p.id[i];
2648  fprintf(stderr, "%3d %8d %12.8f %12.8f %12.8f\n",
2649  i, index, p.f_normal_x[i], p.f_nbond_x[i], p.f_slow_x[i]);
2650  }
2651 #else
2652  Vector *f_normal = new Vector[n];
2653  Vector *f_nbond = new Vector[n];
2654  Vector *f_slow = new Vector[n];
2655  for (int i=0; i < n; i++) {
2656  f_normal[i].x = p.f_normal_x[i];
2657  f_normal[i].y = p.f_normal_y[i];
2658  f_normal[i].z = p.f_normal_z[i];
2659  f_nbond[i].x = p.f_nbond_x[i];
2660  f_nbond[i].y = p.f_nbond_y[i];
2661  f_nbond[i].z = p.f_nbond_z[i];
2662  f_slow[i].x = p.f_slow_x[i];
2663  f_slow[i].y = p.f_slow_y[i];
2664  f_slow[i].z = p.f_slow_z[i];
2665  }
2666  TestArray_write<double>(
2667  "f_normal_good.bin", "f_normal good", (double*)f_normal, 3*n);
2668  TestArray_write<double>(
2669  "f_nbond_good.bin", "f_nbond good", (double*)f_nbond, 3*n);
2670  TestArray_write<double>(
2671  "f_slow_good.bin", "f_slow good", (double*)f_slow, 3*n);
2672  delete [] f_normal;
2673  delete [] f_nbond;
2674  delete [] f_slow;
2675 #endif
2676  }
2677  }
2678 #endif
2679 
2680  // Do collections if any checks below are "on."
2681  // We add because we can't short-circuit.
2682  TIMER_START(t, SUBMITCOLLECT);
2683  if (isCollection) {
2684  submitCollections_SOA(step);
2685  }
2686  TIMER_STOP(t, SUBMITCOLLECT);
2687 
2688  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_3); // integrate_SOA 3
2689 
2690  rebalanceLoad(step);
2691  }
2692 
2693  patch->copy_updates_to_AOS();
2694 
2695  TIMER_DONE(t);
2696  if (patch->patchID == SPECIAL_PATCH_ID) {
2697  printf("Timer collection reporting in microseconds for "
2698  "Patch %d\n", patch->patchID);
2699  TIMER_REPORT(t);
2700  }
2701 }
2702 
2703 
2704 // XXX inline it?
2705 // XXX does not handle fixed atoms
2706 // Each timestep: dt = scaling * (timestep / TIMEFACTOR);
2708  const double scaling,
2709  double dt_normal, // timestep Results::normal = 0
2710  double dt_nbond, // timestep Results::nbond = 1
2711  double dt_slow, // timestep Results::slow = 2
2712 #ifndef SOA_SIMPLIFY_PARAMS
2713  const double * __restrict recipMass,
2714  const double * __restrict f_normal_x, // force Results::normal = 0
2715  const double * __restrict f_normal_y,
2716  const double * __restrict f_normal_z,
2717  const double * __restrict f_nbond_x, // force Results::nbond = 1
2718  const double * __restrict f_nbond_y,
2719  const double * __restrict f_nbond_z,
2720  const double * __restrict f_slow_x, // force Results::slow = 2
2721  const double * __restrict f_slow_y,
2722  const double * __restrict f_slow_z,
2723  double * __restrict vel_x,
2724  double * __restrict vel_y,
2725  double * __restrict vel_z,
2726  int numAtoms,
2727 #endif
2728  int maxForceNumber
2729  ) {
2730  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2731  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM_SOA);
2732 
2733 #ifdef SOA_SIMPLIFY_PARAMS
2734  const double * __restrict recipMass = patch->patchDataSOA.recipMass;
2735  // force Results::normal = 0
2736  const double * __restrict f_normal_x = patch->patchDataSOA.f_normal_x;
2737  const double * __restrict f_normal_y = patch->patchDataSOA.f_normal_y;
2738  const double * __restrict f_normal_z = patch->patchDataSOA.f_normal_z;
2739  // force Results::nbond = 1
2740  const double * __restrict f_nbond_x = patch->patchDataSOA.f_nbond_x;
2741  const double * __restrict f_nbond_y = patch->patchDataSOA.f_nbond_y;
2742  const double * __restrict f_nbond_z = patch->patchDataSOA.f_nbond_z;
2743  // force Results::slow = 2
2744  const double * __restrict f_slow_x = patch->patchDataSOA.f_slow_x;
2745  const double * __restrict f_slow_y = patch->patchDataSOA.f_slow_y;
2746  const double * __restrict f_slow_z = patch->patchDataSOA.f_slow_z;
2747  double * __restrict vel_x = patch->patchDataSOA.vel_x;
2748  double * __restrict vel_y = patch->patchDataSOA.vel_y;
2749  double * __restrict vel_z = patch->patchDataSOA.vel_z;
2750  int numAtoms = patch->patchDataSOA.numAtoms;
2751 #endif
2752  //
2753  // We could combine each case into a single loop with breaks,
2754  // with all faster forces also summed, like addForceToMomentum3().
2755  //
2756  // Things to consider:
2757  // - Do we always use acceleration (f/m) instead of just plain force?
2758  // Then we could instead buffer accel_slow, accel_nbond, etc.
2759  // - We will always need one multiply, since each dt includes
2760  // also a scaling factor.
2761  //
2762 
2763 #if 0
2764  if(this->patch->getPatchID() == 538){
2765  // fprintf(stderr, "Old Positions %lf %lf %lf\n", patch->patchDataSOA.pos_x[43], patch->patchDataSOA.pos_y[43], patch->patchDataSOA.pos_z[43]);
2766  // fprintf(stderr, "Old Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[ 43]);
2767  // fprintf(stderr, "Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2768  // f_slow_x[43], f_slow_y[43], f_slow_z[43],
2769  // f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2770  // f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2771  fprintf(stderr, "Old Positions %lf %lf %lf\n", patch->patchDataSOA.pos_x[0], patch->patchDataSOA.pos_y[0], patch->patchDataSOA.pos_z[0]);
2772  fprintf(stderr, "Old Velocities %lf %lf %lf\n", vel_x[0], vel_y[0], vel_z[ 0]);
2773  fprintf(stderr, "Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2774  f_slow_x[43], f_slow_y[43], f_slow_z[43],
2775  f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2776  f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2777  }
2778 #endif
2779  switch (maxForceNumber) {
2780  case Results::slow:
2781  dt_slow *= scaling;
2782  for (int i=0; i < numAtoms; i++) {
2783  vel_x[i] += f_slow_x[i] * recipMass[i] * dt_slow;
2784  vel_y[i] += f_slow_y[i] * recipMass[i] * dt_slow;
2785  vel_z[i] += f_slow_z[i] * recipMass[i] * dt_slow;
2786  }
2787  // fall through because we will always have the "faster" forces
2788  case Results::nbond:
2789  dt_nbond *= scaling;
2790  for (int i=0; i < numAtoms; i++) {
2791  vel_x[i] += f_nbond_x[i] * recipMass[i] * dt_nbond;
2792  vel_y[i] += f_nbond_y[i] * recipMass[i] * dt_nbond;
2793  vel_z[i] += f_nbond_z[i] * recipMass[i] * dt_nbond;
2794  }
2795  // fall through because we will always have the "faster" forces
2796  case Results::normal:
2797  dt_normal *= scaling;
2798  for (int i=0; i < numAtoms; i++) {
2799  vel_x[i] += f_normal_x[i] * recipMass[i] * dt_normal;
2800  vel_y[i] += f_normal_y[i] * recipMass[i] * dt_normal;
2801  vel_z[i] += f_normal_z[i] * recipMass[i] * dt_normal;
2802  }
2803  }
2804 }
2805 
2806 
2807 // XXX inline it?
2808 // XXX does not handle fixed atoms
2809 // Timestep: dt = scaling * (timestep / TIMEFACTOR);
2811  const double dt
2812 #ifndef SOA_SIMPLIFY_PARAMS
2813  ,
2814  const double * __restrict vel_x,
2815  const double * __restrict vel_y,
2816  const double * __restrict vel_z,
2817  double * __restrict pos_x,
2818  double * __restrict pos_y,
2819  double * __restrict pos_z,
2820  int numAtoms
2821 #endif
2822  ) {
2823  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2824  NamdProfileEvent::ADD_VELOCITY_TO_POSITION_SOA);
2825 #ifdef SOA_SIMPLIFY_PARAMS
2826  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
2827  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
2828  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
2829  double * __restrict pos_x = patch->patchDataSOA.pos_x;
2830  double * __restrict pos_y = patch->patchDataSOA.pos_y;
2831  double * __restrict pos_z = patch->patchDataSOA.pos_z;
2832  int numAtoms = patch->patchDataSOA.numAtoms;
2833 #endif
2834  for (int i=0; i < numAtoms; i++) {
2835  pos_x[i] += vel_x[i] * dt;
2836  pos_y[i] += vel_y[i] * dt;
2837  pos_z[i] += vel_z[i] * dt;
2838  }
2839 #if 0
2840  if(this->patch->getPatchID() == 538){
2841  fprintf(stderr, "New Positions %lf %lf %lf\n", pos_x[43], pos_y[43], pos_z[43]);
2842  fprintf(stderr, "New Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[43]);
2843  }
2844 #endif
2845 
2846 }
2847 
2848 
2850 #ifndef SOA_SIMPLIFY_PARAMS
2851  const int * __restrict hydrogenGroupSize,
2852  const float * __restrict mass,
2853  const double * __restrict vel_x,
2854  const double * __restrict vel_y,
2855  const double * __restrict vel_z,
2856  int numAtoms
2857 #endif
2858  ) {
2859  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2860  NamdProfileEvent::SUBMIT_HALFSTEP_SOA);
2861 #ifdef SOA_SIMPLIFY_PARAMS
2862  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
2863  const float * __restrict mass = patch->patchDataSOA.mass;
2864  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
2865  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
2866  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
2867  int numAtoms = patch->patchDataSOA.numAtoms;
2868 #endif
2869  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
2870  BigReal kineticEnergy = 0;
2871  Tensor virial;
2872  for (int i=0; i < numAtoms; i++) {
2873  // scalar kineticEnergy += mass[i] * vel[i]^2
2874  kineticEnergy += mass[i] *
2875  (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
2876  // tensor virial += mass[i] * outer_product(vel[i], vel[i])
2877  virial.xx += mass[i] * vel_x[i] * vel_x[i];
2878  virial.xy += mass[i] * vel_x[i] * vel_y[i];
2879  virial.xz += mass[i] * vel_x[i] * vel_z[i];
2880  virial.yx += mass[i] * vel_y[i] * vel_x[i];
2881  virial.yy += mass[i] * vel_y[i] * vel_y[i];
2882  virial.yz += mass[i] * vel_y[i] * vel_z[i];
2883  virial.zx += mass[i] * vel_z[i] * vel_x[i];
2884  virial.zy += mass[i] * vel_z[i] * vel_y[i];
2885  virial.zz += mass[i] * vel_z[i] * vel_z[i];
2886  }
2887  kineticEnergy *= 0.5 * 0.5;
2888  virial *= 0.5;
2889 
2890 #ifdef NODEGROUP_FORCE_REGISTER
2892  CUDASequencer->patchData->reduction->item(REDUCTION_HALFSTEP_KINETIC_ENERGY) += kineticEnergy;
2893  ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_VIRIAL_NORMAL,virial);
2894  }
2895  else{
2896 #endif
2898  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
2899 #ifdef NODEGROUP_FORCE_REGISTER
2900  }
2901 #endif
2902  }
2903 
2904  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
2905  BigReal intKineticEnergy = 0;
2906  Tensor intVirialNormal;
2907  int hgs;
2908  for (int i=0; i < numAtoms; i += hgs) {
2909  // find velocity of center-of-mass of hydrogen group
2910  // calculate mass-weighted velocity
2911  hgs = hydrogenGroupSize[i];
2912  BigReal m_cm = 0;
2913  BigReal v_cm_x = 0;
2914  BigReal v_cm_y = 0;
2915  BigReal v_cm_z = 0;
2916  for (int j = i; j < (i+hgs); j++) {
2917  m_cm += mass[j];
2918  v_cm_x += mass[j] * vel_x[j];
2919  v_cm_y += mass[j] * vel_y[j];
2920  v_cm_z += mass[j] * vel_z[j];
2921  }
2922  BigReal recip_m_cm = 1.0 / m_cm;
2923  v_cm_x *= recip_m_cm;
2924  v_cm_y *= recip_m_cm;
2925  v_cm_z *= recip_m_cm;
2926  // sum virial contributions wrt vel center-of-mass
2927  for (int j = i; j < (i+hgs); j++) {
2928  BigReal dv_x = vel_x[j] - v_cm_x;
2929  BigReal dv_y = vel_y[j] - v_cm_y;
2930  BigReal dv_z = vel_z[j] - v_cm_z;
2931  // scalar intKineticEnergy += mass[j] * dot_product(vel[j], dv)
2932  intKineticEnergy += mass[j] *
2933  (vel_x[j] * dv_x + vel_y[j] * dv_y + vel_z[j] * dv_z);
2934  // tensor intVirialNormal += mass[j] * outer_product(vel[j], dv)
2935  intVirialNormal.xx += mass[j] * vel_x[j] * dv_x;
2936  intVirialNormal.xy += mass[j] * vel_x[j] * dv_y;
2937  intVirialNormal.xz += mass[j] * vel_x[j] * dv_z;
2938  intVirialNormal.yx += mass[j] * vel_y[j] * dv_x;
2939  intVirialNormal.yy += mass[j] * vel_y[j] * dv_y;
2940  intVirialNormal.yz += mass[j] * vel_y[j] * dv_z;
2941  intVirialNormal.zx += mass[j] * vel_z[j] * dv_x;
2942  intVirialNormal.zy += mass[j] * vel_z[j] * dv_y;
2943  intVirialNormal.zz += mass[j] * vel_z[j] * dv_z;
2944  }
2945  }
2946  intKineticEnergy *= 0.5 * 0.5;
2947  intVirialNormal *= 0.5;
2948 #ifdef NODEGROUP_FORCE_REGISTER
2950  CUDASequencer->patchData->reduction->item(REDUCTION_INT_HALFSTEP_KINETIC_ENERGY) += intKineticEnergy;
2951  ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal);
2952  }
2953  else{
2954 #endif
2956  += intKineticEnergy;
2957  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL,
2958  intVirialNormal);
2959 #ifdef NODEGROUP_FORCE_REGISTER
2960  }
2961 #endif
2962  }
2963 }
2964 
2965 
2966 //
2967 // XXX
2968 //
2970 #ifndef SOA_SIMPLIFY_PARAMS
2971  const int * __restrict hydrogenGroupSize,
2972  const float * __restrict mass,
2973  const double * __restrict pos_x,
2974  const double * __restrict pos_y,
2975  const double * __restrict pos_z,
2976  const double * __restrict vel_x,
2977  const double * __restrict vel_y,
2978  const double * __restrict vel_z,
2979  const double * __restrict f_normal_x,
2980  const double * __restrict f_normal_y,
2981  const double * __restrict f_normal_z,
2982  const double * __restrict f_nbond_x,
2983  const double * __restrict f_nbond_y,
2984  const double * __restrict f_nbond_z,
2985  const double * __restrict f_slow_x,
2986  const double * __restrict f_slow_y,
2987  const double * __restrict f_slow_z,
2988  int numAtoms
2989 #endif
2990  ) {
2991  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2992  NamdProfileEvent::SUBMIT_REDUCTIONS_SOA);
2993 #ifdef SOA_SIMPLIFY_PARAMS
2994  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
2995  const float * __restrict mass = patch->patchDataSOA.mass;
2996  const double * __restrict pos_x = patch->patchDataSOA.pos_x;
2997  const double * __restrict pos_y = patch->patchDataSOA.pos_y;
2998  const double * __restrict pos_z = patch->patchDataSOA.pos_z;
2999  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
3000  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
3001  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
3002  const double * __restrict f_normal_x = patch->patchDataSOA.f_normal_x;
3003  const double * __restrict f_normal_y = patch->patchDataSOA.f_normal_y;
3004  const double * __restrict f_normal_z = patch->patchDataSOA.f_normal_z;
3005  const double * __restrict f_nbond_x = patch->patchDataSOA.f_nbond_x;
3006  const double * __restrict f_nbond_y = patch->patchDataSOA.f_nbond_y;
3007  const double * __restrict f_nbond_z = patch->patchDataSOA.f_nbond_z;
3008  const double * __restrict f_slow_x = patch->patchDataSOA.f_slow_x;
3009  const double * __restrict f_slow_y = patch->patchDataSOA.f_slow_y;
3010  const double * __restrict f_slow_z = patch->patchDataSOA.f_slow_z;
3011  int numAtoms = patch->patchDataSOA.numAtoms;
3012 #endif
3013 
3014 #ifdef NODEGROUP_FORCE_REGISTER
3016  CUDASequencer->patchData->reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
3017  CUDASequencer->patchData->reduction->item(REDUCTION_MARGIN_VIOLATIONS) += patch->marginViolations;
3018  }else{
3019 #endif
3020  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
3022 #ifdef NODEGROUP_FORCE_REGISTER
3023  }
3024 #endif
3025 
3026  if ( 1 /* doKineticEnergy || doMomenta || patch->flags.doVirial */ ) {
3027  BigReal kineticEnergy = 0;
3028  BigReal momentum_x = 0;
3029  BigReal momentum_y = 0;
3030  BigReal momentum_z = 0;
3031  BigReal angularMomentum_x = 0;
3032  BigReal angularMomentum_y = 0;
3033  BigReal angularMomentum_z = 0;
3034  BigReal origin_x = patch->lattice.origin().x;
3035  BigReal origin_y = patch->lattice.origin().y;
3036  BigReal origin_z = patch->lattice.origin().z;
3037 
3038  // XXX pairInteraction
3039 
3040  for (int i=0; i < numAtoms; i++) {
3041 
3042  // scalar kineticEnergy += mass[i] * dot_product(vel[i], vel[i])
3043  kineticEnergy += mass[i] *
3044  (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
3045 
3046  // vector momentum += mass[i] * vel[i]
3047  momentum_x += mass[i] * vel_x[i];
3048  momentum_y += mass[i] * vel_y[i];
3049  momentum_z += mass[i] * vel_z[i];
3050 
3051  // vector dpos = pos[i] - origin
3052  BigReal dpos_x = pos_x[i] - origin_x;
3053  BigReal dpos_y = pos_y[i] - origin_y;
3054  BigReal dpos_z = pos_z[i] - origin_z;
3055 
3056  // vector angularMomentum += mass[i] * cross_product(dpos, vel[i])
3057  angularMomentum_x += mass[i] * (dpos_y*vel_z[i] - dpos_z*vel_y[i]);
3058  angularMomentum_y += mass[i] * (dpos_z*vel_x[i] - dpos_x*vel_z[i]);
3059  angularMomentum_z += mass[i] * (dpos_x*vel_y[i] - dpos_y*vel_x[i]);
3060  }
3061 
3062  // XXX missing Drude
3063 
3064  kineticEnergy *= 0.5;
3065  Vector momentum(momentum_x, momentum_y, momentum_z);
3066  Vector angularMomentum(angularMomentum_x, angularMomentum_y,
3067  angularMomentum_z);
3068 
3069 #ifdef NODEGROUP_FORCE_REGISTER
3071  CUDASequencer->patchData->reduction->item(REDUCTION_CENTERED_KINETIC_ENERGY) += kineticEnergy;
3072  ADD_VECTOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_MOMENTUM,momentum);
3073  ADD_VECTOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
3074  }else{
3075 #endif
3077  ADD_VECTOR_OBJECT(reduction,REDUCTION_MOMENTUM,momentum);
3078  ADD_VECTOR_OBJECT(reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
3079 #ifdef NODEGROUP_FORCE_REGISTER
3080  }
3081 #endif
3082  }
3083  // For non-Multigrator doKineticEnergy = 1 always
3084  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
3085  BigReal intKineticEnergy = 0;
3086  Tensor intVirialNormal;
3087  Tensor intVirialNbond;
3088  Tensor intVirialSlow;
3089 
3090  int hgs = 1; // hydrogen group size
3091  for (int i=0; i < numAtoms; i += hgs) {
3092  hgs = hydrogenGroupSize[i];
3093  int j;
3094  BigReal m_cm = 0;
3095  BigReal r_cm_x = 0;
3096  BigReal r_cm_y = 0;
3097  BigReal r_cm_z = 0;
3098  BigReal v_cm_x = 0;
3099  BigReal v_cm_y = 0;
3100  BigReal v_cm_z = 0;
3101  for ( j = i; j < (i+hgs); ++j ) {
3102  m_cm += mass[j];
3103  r_cm_x += mass[j] * pos_x[j];
3104  r_cm_y += mass[j] * pos_y[j];
3105  r_cm_z += mass[j] * pos_z[j];
3106  v_cm_x += mass[j] * vel_x[j];
3107  v_cm_y += mass[j] * vel_y[j];
3108  v_cm_z += mass[j] * vel_z[j];
3109  }
3110  BigReal inv_m_cm = namd_reciprocal(m_cm);
3111  r_cm_x *= inv_m_cm;
3112  r_cm_y *= inv_m_cm;
3113  r_cm_z *= inv_m_cm;
3114  v_cm_x *= inv_m_cm;
3115  v_cm_y *= inv_m_cm;
3116  v_cm_z *= inv_m_cm;
3117 
3118  // XXX removed pairInteraction
3119  for ( j = i; j < (i+hgs); ++j ) {
3120  // XXX removed fixed atoms
3121 
3122  // vector vel[j] used twice below
3123  BigReal v_x = vel_x[j];
3124  BigReal v_y = vel_y[j];
3125  BigReal v_z = vel_z[j];
3126 
3127  // vector dv = vel[j] - v_cm
3128  BigReal dv_x = v_x - v_cm_x;
3129  BigReal dv_y = v_y - v_cm_y;
3130  BigReal dv_z = v_z - v_cm_z;
3131 
3132  // scalar intKineticEnergy += mass[j] * dot_product(v, dv)
3133  intKineticEnergy += mass[j] *
3134  (v_x * dv_x + v_y * dv_y + v_z * dv_z);
3135 
3136  // vector dr = pos[j] - r_cm
3137  BigReal dr_x = pos_x[j] - r_cm_x;
3138  BigReal dr_y = pos_y[j] - r_cm_y;
3139  BigReal dr_z = pos_z[j] - r_cm_z;
3140 
3141  // tensor intVirialNormal += outer_product(f_normal[j], dr)
3142  intVirialNormal.xx += f_normal_x[j] * dr_x;
3143  intVirialNormal.xy += f_normal_x[j] * dr_y;
3144  intVirialNormal.xz += f_normal_x[j] * dr_z;
3145  intVirialNormal.yx += f_normal_y[j] * dr_x;
3146  intVirialNormal.yy += f_normal_y[j] * dr_y;
3147  intVirialNormal.yz += f_normal_y[j] * dr_z;
3148  intVirialNormal.zx += f_normal_z[j] * dr_x;
3149  intVirialNormal.zy += f_normal_z[j] * dr_y;
3150  intVirialNormal.zz += f_normal_z[j] * dr_z;
3151 
3152  // tensor intVirialNbond += outer_product(f_nbond[j], dr)
3153  intVirialNbond.xx += f_nbond_x[j] * dr_x;
3154  intVirialNbond.xy += f_nbond_x[j] * dr_y;
3155  intVirialNbond.xz += f_nbond_x[j] * dr_z;
3156  intVirialNbond.yx += f_nbond_y[j] * dr_x;
3157  intVirialNbond.yy += f_nbond_y[j] * dr_y;
3158  intVirialNbond.yz += f_nbond_y[j] * dr_z;
3159  intVirialNbond.zx += f_nbond_z[j] * dr_x;
3160  intVirialNbond.zy += f_nbond_z[j] * dr_y;
3161  intVirialNbond.zz += f_nbond_z[j] * dr_z;
3162 
3163  // tensor intVirialSlow += outer_product(f_slow[j], dr)
3164  intVirialSlow.xx += f_slow_x[j] * dr_x;
3165  intVirialSlow.xy += f_slow_x[j] * dr_y;
3166  intVirialSlow.xz += f_slow_x[j] * dr_z;
3167  intVirialSlow.yx += f_slow_y[j] * dr_x;
3168  intVirialSlow.yy += f_slow_y[j] * dr_y;
3169  intVirialSlow.yz += f_slow_y[j] * dr_z;
3170  intVirialSlow.zx += f_slow_z[j] * dr_x;
3171  intVirialSlow.zy += f_slow_z[j] * dr_y;
3172  intVirialSlow.zz += f_slow_z[j] * dr_z;
3173  }
3174  }
3175 
3176  intKineticEnergy *= 0.5;
3177 
3178 #ifdef NODEGROUP_FORCE_REGISTER
3180  // JM: Every PE will have its own copy of CUDASequencer, since it's a
3181  // group. However, they all share the same nodegroup pointer,
3182  // which means the reduction object is the same across all PEs
3183  CUDASequencer->patchData->reduction->item(REDUCTION_INT_CENTERED_KINETIC_ENERGY) += intKineticEnergy;
3184  ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
3185  ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
3186  ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
3187  }else{
3188 #endif
3190  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
3191  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
3192  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
3193 #ifdef NODEGROUP_FORCE_REGISTER
3194  }
3195 #endif
3196  }
3197  // XXX removed pressure profile
3198 
3199  // XXX removed fixed atoms
3200 
3202 
3203  // XXX removed pressure profile reduction
3204 }
3205 
3206 
3207 void Sequencer::submitCollections_SOA(int step, int zeroVel /* = 0 */)
3208 {
3209  //
3210  // Copy updates of SOA back into AOS for collections.
3211  //
3212  // XXX Could update positions and velocities separately.
3213  //
3214  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3215  NamdProfileEvent::SUBMIT_COLLECTIONS_SOA);
3216  //
3217  // XXX Poor implementation here!
3218  // The selector functions called below in Output.C are
3219  // doing several tests and in an average use case calculating
3220  // at least two mod functions.
3221  //
3222  // However, most steps are NOT output steps!
3223  //
3224  int is_pos_needed;
3225  int dcdIndex;
3226  std::tie(is_pos_needed, dcdIndex)= Output::coordinateNeeded(step);
3227  int is_vel_needed = Output::velocityNeeded(step);
3228  int is_f_needed = Output::forceNeeded(step);
3229  if (!simParams->useDeviceMigration) { // This is already done for GPU migration
3230  if ( is_pos_needed || is_vel_needed ) {
3231  patch->copy_updates_to_AOS();
3232  }
3233  if (is_f_needed) {
3234  patch->copy_forces_to_AOS();
3235  }
3236  }
3237  if ( is_pos_needed ) {
3238 #ifndef MEM_OPT_VERSION
3239  collection->submitPositions(step,patch->atom,patch->lattice,is_pos_needed,dcdIndex);
3240 #else
3241  collection->submitPositions(step,patch->atom,patch->lattice,is_pos_needed);
3242 #endif
3243  }
3244  if ( is_vel_needed ) {
3245  collection->submitVelocities(step,zeroVel,patch->atom);
3246  }
3247  if ( is_f_needed ) {
3248  int maxForceUsed = patch->flags.maxForceUsed;
3249  if ( maxForceUsed > Results::slow ) maxForceUsed = Results::slow;
3250  collection->submitForces(step,patch->atom,maxForceUsed,patch->f);
3251  }
3252 }
3253 
3254 
3256  const double dt,
3257  const double maxvel2
3258 #ifndef SOA_SIMPLIFY_PARAMS
3259  ,
3260  const double * __restrict vel_x,
3261  const double * __restrict vel_y,
3262  const double * __restrict vel_z,
3263  int numAtoms
3264 #endif
3265  ) {
3266  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::MAXIMUM_MOVE_SOA);
3267 #ifdef SOA_SIMPLIFY_PARAMS
3268  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
3269  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
3270  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
3271  int numAtoms = patch->patchDataSOA.numAtoms;
3272 #endif
3273 
3274  // XXX missing maximum move
3275 
3276  // Loop vectorizes when replacing logical OR with summing.
3277  int killme = 0;
3278  for (int i=0; i < numAtoms; i++) {
3279  BigReal vel2 =
3280  vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3281  killme = killme + ( vel2 > maxvel2 );
3282  }
3283  if (killme) {
3284  // Found at least one atom that is moving too fast.
3285  // Terminating, so loop performance below doesn't matter.
3286  // Loop does not vectorize.
3287  killme = 0;
3288  for (int i=0; i < numAtoms; i++) {
3289  BigReal vel2 =
3290  vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3291  if (vel2 > maxvel2) {
3292  const FullAtom *a = patch->atom.begin();
3293  const Vector vel(vel_x[i], vel_y[i], vel_z[i]);
3294  const BigReal maxvel = sqrt(maxvel2);
3295  ++killme;
3296  iout << iERROR << "Atom " << (a[i].id + 1) << " velocity is "
3297  << ( PDBVELFACTOR * vel ) << " (limit is "
3298  << ( PDBVELFACTOR * maxvel ) << ", atom "
3299  << i << " of " << numAtoms << " on patch "
3300  << patch->patchID << " pe " << CkMyPe() << ")\n" << endi;
3301  }
3302  }
3303  iout << iERROR <<
3304  "Atoms moving too fast; simulation has become unstable ("
3305  << killme << " atoms on patch " << patch->patchID
3306  << " pe " << CkMyPe() << ").\n" << endi;
3308  terminate();
3309  }
3310 }
3311 
3312 
3314  BigReal timestep
3315 #ifndef SOA_SIMPLIFY_PARAMS
3316  ,
3317  const float * __restrict langevinParam,
3318  double * __restrict vel_x,
3319  double * __restrict vel_y,
3320  double * __restrict vel_z,
3321  int numAtoms
3322 #endif
3323  ) {
3324  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3325  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1_SOA);
3326 #ifdef SOA_SIMPLIFY_PARAMS
3327  const float * __restrict langevinParam = patch->patchDataSOA.langevinParam;
3328  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3329  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3330  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3331  int numAtoms = patch->patchDataSOA.numAtoms;
3332 #endif
3333  if ( simParams->langevinOn /* && !simParams->langevin_useBAOAB */ )
3334  {
3335  // scale by TIMEFACTOR to convert to fs and then by 0.001 to ps
3336  // multiply by the Langevin damping coefficient, units 1/ps
3337  // XXX we could instead store time-scaled Langevin parameters
3338  BigReal dt = timestep * (0.001 * TIMEFACTOR);
3339 
3340  // XXX missing Drude
3341 
3342  //
3343  // The conditional inside loop prevents vectorization and doesn't
3344  // avoid much work since addition and multiplication are cheap.
3345  //
3346  for (int i=0; i < numAtoms; i++) {
3347  BigReal dt_gamma = dt * langevinParam[i];
3348  //if ( ! dt_gamma ) continue;
3349 
3350  BigReal scaling = 1. - 0.5 * dt_gamma;
3351  vel_x[i] *= scaling;
3352  vel_y[i] *= scaling;
3353  vel_z[i] *= scaling;
3354  }
3355  } // end if langevinOn
3356 }
3357 
3358 
3360  BigReal timestep
3361 #ifndef SOA_SIMPLIFY_PARAMS
3362  ,
3363  const float * __restrict langevinParam,
3364  const float * __restrict langScalVelBBK2,
3365  const float * __restrict langScalRandBBK2,
3366  float * __restrict gaussrand_x,
3367  float * __restrict gaussrand_y,
3368  float * __restrict gaussrand_z,
3369  double * __restrict vel_x,
3370  double * __restrict vel_y,
3371  double * __restrict vel_z,
3372  int numAtoms
3373 #endif
3374  )
3375 {
3376  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3377  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2_SOA);
3378 #ifdef SOA_SIMPLIFY_PARAMS
3379  const float * __restrict langevinParam = patch->patchDataSOA.langevinParam;
3380  const float * __restrict langScalVelBBK2 = patch->patchDataSOA.langScalVelBBK2;
3381  const float * __restrict langScalRandBBK2 = patch->patchDataSOA.langScalRandBBK2;
3382  float * __restrict gaussrand_x = patch->patchDataSOA.gaussrand_x;
3383  float * __restrict gaussrand_y = patch->patchDataSOA.gaussrand_y;
3384  float * __restrict gaussrand_z = patch->patchDataSOA.gaussrand_z;
3385  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3386  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3387  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3388  int numAtoms = patch->patchDataSOA.numAtoms;
3389 #endif
3390  if ( simParams->langevinOn /* && !simParams->langevin_useBAOAB */ )
3391  {
3392  // XXX missing Drude
3393 
3394  // Scale by TIMEFACTOR to convert to fs and then by 0.001 to ps
3395  // multiply by the Langevin damping coefficient, units 1/ps.
3396  // XXX we could instead store time-scaled Langevin parameters
3397  BigReal dt = timestep * (0.001 * TIMEFACTOR);
3398  // Buffer the Gaussian random numbers
3400  // Must re-satisfy constraints if Langevin gammas differ.
3401  // (conserve momentum?)
3402  TIMER_START(patch->timerSet, RATTLE1);
3403  rattle1_SOA(timestep, 1);
3404  TIMER_STOP(patch->timerSet, RATTLE1);
3405  //
3406  // We don't need random numbers for atoms such that gamma=0.
3407  // If gammas differ, the likely case is that we aren't applying
3408  // Langevin damping to hydrogen, making those langevinParam=0,
3409  // in which case we need only numAtoms/3 random vectors.
3410  //
3411  // XXX can refine code below, count in advance how many
3412  // random numbers we need to use Random array filling routine
3413  //
3414  // XXX Loop does not vectorize!
3415  for (int i=0; i < numAtoms; i++) {
3416  Vector rg; // = 0
3417  if (langevinParam[i] != 0) rg = random->gaussian_vector();
3418  gaussrand_x[i] = float(rg.x);
3419  gaussrand_y[i] = float(rg.y);
3420  gaussrand_z[i] = float(rg.z);
3421  }
3422  }
3423  else {
3424  // Need to completely fill random number arrays.
3425  random->gaussian_array_f(gaussrand_x, numAtoms);
3426  random->gaussian_array_f(gaussrand_y, numAtoms);
3427  random->gaussian_array_f(gaussrand_z, numAtoms);
3428  }
3429 
3430  // do the velocity updates
3431  for (int i=0; i < numAtoms; i++) {
3432  vel_x[i] += gaussrand_x[i] * langScalRandBBK2[i];
3433  vel_y[i] += gaussrand_y[i] * langScalRandBBK2[i];
3434  vel_z[i] += gaussrand_z[i] * langScalRandBBK2[i];
3435  vel_x[i] *= langScalVelBBK2[i];
3436  vel_y[i] *= langScalVelBBK2[i];
3437  vel_z[i] *= langScalVelBBK2[i];
3438  }
3439  } // end if langevinOn
3440 }
3441 
3443 #ifndef SOA_SIMPLIFY_PARAMS
3444  const int * __restrict hydrogenGroupSize,
3445  const float * __restrict mass,
3446  double * __restrict pos_x,
3447  double * __restrict pos_y,
3448  double * __restrict pos_z,
3449  int numAtoms,
3450 #endif
3451  int step)
3452 {
3453 #ifdef SOA_SIMPLIFY_PARAMS
3454  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
3455  const float * __restrict mass = patch->patchDataSOA.mass;
3456  double * __restrict pos_x = patch->patchDataSOA.pos_x;
3457  double * __restrict pos_y = patch->patchDataSOA.pos_y;
3458  double * __restrict pos_z = patch->patchDataSOA.pos_z;
3459  int numAtoms = patch->patchDataSOA.numAtoms;
3460 #endif
3461 
3462  //
3463  // Loops below simplify if we lift out special cases of fixed atoms
3464  // and pressure excluded atoms and make them their own branch.
3465  //
3466 
3470  // Blocking receive for the updated lattice scaling factor.
3471  Tensor factor = broadcast->positionRescaleFactor.get(step);
3472  patch->lattice.rescale(factor);
3473  Vector origin = patch->lattice.origin();
3474 
3475  if ( simParams->useGroupPressure ) {
3476  int hgs;
3477  for (int i = 0; i < numAtoms; i += hgs) {
3478  int j;
3479  hgs = hydrogenGroupSize[i];
3480  // missing fixed atoms implementation
3481  BigReal m_cm = 0;
3482  BigReal r_cm_x = 0;
3483  BigReal r_cm_y = 0;
3484  BigReal r_cm_z = 0;
3485  // calculate the center of mass
3486  for ( j = i; j < (i+hgs); ++j ) {
3487  m_cm += mass[j];
3488  r_cm_x += mass[j] * pos_x[j];
3489  r_cm_y += mass[j] * pos_y[j];
3490  r_cm_z += mass[j] * pos_z[j];
3491  }
3492  BigReal inv_m_cm = namd_reciprocal(m_cm);
3493  r_cm_x *= inv_m_cm;
3494  r_cm_y *= inv_m_cm;
3495  r_cm_z *= inv_m_cm;
3496  // scale the center of mass with factor
3497  // shift to origin
3498  double tx = r_cm_x - origin.x;
3499  double ty = r_cm_y - origin.y;
3500  double tz = r_cm_z - origin.z;
3501  // apply transformation
3502  double new_r_cm_x = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3503  double new_r_cm_y = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3504  double new_r_cm_z = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3505  // shift back
3506  new_r_cm_x += origin.x;
3507  new_r_cm_y += origin.y;
3508  new_r_cm_z += origin.z;
3509  // translation vector from old COM and new COM
3510  double delta_r_cm_x = new_r_cm_x - r_cm_x;
3511  double delta_r_cm_y = new_r_cm_y - r_cm_y;
3512  double delta_r_cm_z = new_r_cm_z - r_cm_z;
3513  // shift the hydrogen group with translation vector
3514  for (j = i; j < (i+hgs); ++j) {
3515  pos_x[j] += delta_r_cm_x;
3516  pos_y[j] += delta_r_cm_y;
3517  pos_z[j] += delta_r_cm_z;
3518  }
3519  }
3520  } else {
3521  for (int i = 0; i < numAtoms; ++i) {
3522  // missing fixed atoms implementation
3523  // scale the coordinates with factor
3524  // shift to origin
3525  double tx = pos_x[i] - origin.x;
3526  double ty = pos_y[i] - origin.y;
3527  double tz = pos_z[i] - origin.z;
3528  // apply transformation
3529  double ftx = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3530  double fty = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3531  double ftz = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3532  // shift back
3533  pos_x[i] = ftx + origin.x;
3534  pos_y[i] = fty + origin.y;
3535  pos_z[i] = ftz + origin.z;
3536  }
3537  }
3538  }
3539 }
3540 
3542 #ifndef SOA_SIMPLIFY_PARAMS
3543  const int * __restrict hydrogenGroupSize,
3544  const float * __restrict mass,
3545  double * __restrict pos_x,
3546  double * __restrict pos_y,
3547  double * __restrict pos_z,
3548  double * __restrict vel_x,
3549  double * __restrict vel_y,
3550  double * __restrict vel_z,
3551  int numAtoms,
3552 #endif
3553  int step
3554  )
3555 {
3556 #ifdef SOA_SIMPLIFY_PARAMS
3557  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
3558  const float * __restrict mass = patch->patchDataSOA.mass;
3559  double * __restrict pos_x = patch->patchDataSOA.pos_x;
3560  double * __restrict pos_y = patch->patchDataSOA.pos_y;
3561  double * __restrict pos_z = patch->patchDataSOA.pos_z;
3562  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3563  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3564  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3565  int numAtoms = patch->patchDataSOA.numAtoms;
3566 #endif
3567 
3568  //
3569  // Loops below simplify if we lift out special cases of fixed atoms
3570  // and pressure excluded atoms and make them their own branch.
3571  //
3572 
3573  // Blocking receive for the updated lattice scaling factor.
3574 
3575  Tensor factor = broadcast->positionRescaleFactor.get(step);
3576 
3577  TIMER_START(patch->timerSet, PISTON);
3578  // JCP FIX THIS!!!
3579  double velFactor_x = namd_reciprocal(factor.xx);
3580  double velFactor_y = namd_reciprocal(factor.yy);
3581  double velFactor_z = namd_reciprocal(factor.zz);
3582  patch->lattice.rescale(factor);
3583  Vector origin = patch->lattice.origin();
3584  if ( simParams->useGroupPressure ) {
3585  int hgs;
3586  for (int i=0; i < numAtoms; i += hgs) {
3587  int j;
3588  hgs = hydrogenGroupSize[i];
3589  // missing fixed atoms
3590  BigReal m_cm = 0;
3591  BigReal r_cm_x = 0;
3592  BigReal r_cm_y = 0;
3593  BigReal r_cm_z = 0;
3594  BigReal v_cm_x = 0;
3595  BigReal v_cm_y = 0;
3596  BigReal v_cm_z = 0;
3597  for ( j = i; j < (i+hgs); ++j ) {
3598  m_cm += mass[j];
3599  r_cm_x += mass[j] * pos_x[j];
3600  r_cm_y += mass[j] * pos_y[j];
3601  r_cm_z += mass[j] * pos_z[j];
3602  v_cm_x += mass[j] * vel_x[j];
3603  v_cm_y += mass[j] * vel_y[j];
3604  v_cm_z += mass[j] * vel_z[j];
3605  }
3606  BigReal inv_m_cm = namd_reciprocal(m_cm);
3607  r_cm_x *= inv_m_cm;
3608  r_cm_y *= inv_m_cm;
3609  r_cm_z *= inv_m_cm;
3610 
3611  double tx = r_cm_x - origin.x;
3612  double ty = r_cm_y - origin.y;
3613  double tz = r_cm_z - origin.z;
3614  double new_r_cm_x = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3615  double new_r_cm_y = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3616  double new_r_cm_z = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3617  new_r_cm_x += origin.x;
3618  new_r_cm_y += origin.y;
3619  new_r_cm_z += origin.z;
3620 
3621  double delta_r_cm_x = new_r_cm_x - r_cm_x;
3622  double delta_r_cm_y = new_r_cm_y - r_cm_y;
3623  double delta_r_cm_z = new_r_cm_z - r_cm_z;
3624  v_cm_x *= inv_m_cm;
3625  v_cm_y *= inv_m_cm;
3626  v_cm_z *= inv_m_cm;
3627  double delta_v_cm_x = ( velFactor_x - 1 ) * v_cm_x;
3628  double delta_v_cm_y = ( velFactor_y - 1 ) * v_cm_y;
3629  double delta_v_cm_z = ( velFactor_z - 1 ) * v_cm_z;
3630  for (j = i; j < (i+hgs); j++) {
3631  pos_x[j] += delta_r_cm_x;
3632  pos_y[j] += delta_r_cm_y;
3633  pos_z[j] += delta_r_cm_z;
3634  vel_x[j] += delta_v_cm_x;
3635  vel_y[j] += delta_v_cm_y;
3636  vel_z[j] += delta_v_cm_z;
3637  }
3638  // if (i < 10)
3639  // printf("cpu: %d, %f, %f, %f, %f, %f, %f\n", i,
3640  // pos_x[i], pos_y[i], pos_z[i],
3641  // vel_x[i], vel_y[i], vel_z[i]);
3642  }
3643  }
3644  else {
3645  for (int i=0; i < numAtoms; i++) {
3646  double tx = pos_x[i] - origin.x;
3647  double ty = pos_y[i] - origin.y;
3648  double tz = pos_z[i] - origin.z;
3649  double ftx = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3650  double fty = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3651  double ftz = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3652  pos_x[i] = ftx + origin.x;
3653  pos_y[i] = fty + origin.y;
3654  pos_z[i] = ftz + origin.z;
3655  vel_x[i] *= velFactor_x;
3656  vel_y[i] *= velFactor_y;
3657  vel_z[i] *= velFactor_z;
3658  // if (i < 10)
3659  // printf("cpu: %d, %f, %f, %f, %f, %f, %f\n", i,
3660  // pos_x[i], pos_y[i], pos_z[i],
3661  // vel_x[i], vel_y[i], vel_z[i]);
3662  }
3663  }
3664  TIMER_STOP(patch->timerSet, PISTON);
3665  // exit(0);
3666 }
3667 
3668 
3669 // timestep scaled by 1/TIMEFACTOR
3670 void Sequencer::rattle1_SOA(BigReal timestep, int pressure)
3671 {
3672  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::RATTLE1_SOA);
3673  if ( simParams->rigidBonds != RIGID_NONE ) {
3674  Tensor virial;
3675  Tensor *vp = ( pressure ? &virial : 0 );
3676  // XXX pressureProfileReduction == NULL?
3677  if ( patch->rattle1_SOA(timestep, vp, pressureProfileReduction) ) {
3678  iout << iERROR <<
3679  "Constraint failure; simulation has become unstable.\n" << endi;
3681  terminate();
3682  }
3683 #ifdef NODEGROUP_FORCE_REGISTER
3685  ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_VIRIAL_NORMAL,virial);
3686  }
3687  else{
3688 #endif
3689  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
3690 #ifdef NODEGROUP_FORCE_REGISTER
3691  }
3692 #endif
3693  }
3694 }
3695 
3696 void Sequencer::runComputeObjects_SOA(int migration, int pairlists, int nstep)
3697 {
3698  if ( migration ) pairlistsAreValid = 0;
3699 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) || defined(NAMD_MIC)
3700  if ( pairlistsAreValid &&
3702  && ( pairlistsAge > pairlistsAgeLimit ) ) {
3703  pairlistsAreValid = 0;
3704  }
3705 #else
3707  pairlistsAreValid = 0;
3708  }
3709 #endif
3710  if ( ! simParams->usePairlists ) pairlists = 0;
3711  patch->flags.usePairlists = pairlists || pairlistsAreValid;
3712  patch->flags.savePairlists = pairlists && ! pairlistsAreValid;
3713 
3714 #if defined(NTESTPID)
3715  if (1 && patch->patchID == NTESTPID) {
3716  int step = patch->flags.step;
3717  int numAtoms = patch->numAtoms;
3718  double *xyzq = new double[4*numAtoms];
3719  double *x = patch->patchDataSOA.pos_x;
3720  double *y = patch->patchDataSOA.pos_y;
3721  double *z = patch->patchDataSOA.pos_z;
3722  float *q = patch->patchDataSOA.charge;
3723  for (int i=0; i < numAtoms; i++) {
3724  xyzq[4*i ] = x[i];
3725  xyzq[4*i+1] = y[i];
3726  xyzq[4*i+2] = z[i];
3727  xyzq[4*i+3] = q[i];
3728  }
3729  char fname[128], remark[128];
3730  sprintf(fname, "xyzq_soa_pid%d_step%d.bin", NTESTPID, step);
3731  sprintf(remark, "SOA xyzq, patch %d, step %d", NTESTPID, step);
3732  TestArray_write<double>(fname, remark, xyzq, 4*numAtoms);
3733  delete[] xyzq;
3734  }
3735 #endif
3736  // Zero all SOA global forces before computing force
3737  patch->zero_global_forces_SOA();
3738  patch->positionsReady_SOA(migration); // updates flags.sequence
3739 
3740  int seq = patch->flags.sequence;
3741  int basePriority = ( (seq & 0xffff) << 15 )
3743 
3744  // XXX missing GBIS
3745  priority = basePriority + COMPUTE_HOME_PRIORITY;
3746  //char prbuf[32];
3747  //sprintf(prbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::SEQ_SUSPEND], patch->getPatchID());
3748  //NAMD_EVENT_START_EX(1, NamdProfileEvent::SEQ_SUSPEND, prbuf);
3749  suspend(); // until all deposit boxes close
3750  //NAMD_EVENT_STOP(1, NamdProfileEvent::SEQ_SUSPEND);
3751 
3752 #ifdef NODEGROUP_FORCE_REGISTER
3753  if(!simParams->CUDASOAintegrate || migration){
3754  patch->copy_forces_to_SOA();
3755  }
3756 #else
3757  patch->copy_forces_to_SOA();
3758 #endif
3759 
3760 #if defined(NTESTPID)
3761  if (1 && patch->patchID == NTESTPID) {
3762  int step = patch->flags.step;
3763  int numAtoms = patch->numAtoms;
3764  char fname[128];
3765  char remark[128];
3766  double *fxyz = new double[3*numAtoms];
3767  double *fx = patch->patchDataSOA.f_normal_x;
3768  double *fy = patch->patchDataSOA.f_normal_y;
3769  double *fz = patch->patchDataSOA.f_normal_z;
3770  for (int i=0; i < numAtoms; i++) {
3771  fxyz[3*i ] = fx[i];
3772  fxyz[3*i+1] = fy[i];
3773  fxyz[3*i+2] = fz[i];
3774  }
3775  sprintf(fname, "fxyz_normal_soa_pid%d_step%d.bin", NTESTPID, step);
3776  sprintf(remark, "SOA fxyz normal, patch %d, step %d", NTESTPID, step);
3777  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3778  fx = patch->patchDataSOA.f_nbond_x;
3779  fy = patch->patchDataSOA.f_nbond_y;
3780  fz = patch->patchDataSOA.f_nbond_z;
3781  for (int i=0; i < numAtoms; i++) {
3782  fxyz[3*i ] = fx[i];
3783  fxyz[3*i+1] = fy[i];
3784  fxyz[3*i+2] = fz[i];
3785  }
3786  sprintf(fname, "fxyz_nbond_soa_pid%d_step%d.bin", NTESTPID, step);
3787  sprintf(remark, "SOA fxyz nonbonded, patch %d, step %d", NTESTPID, step);
3788  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3789  fx = patch->patchDataSOA.f_slow_x;
3790  fy = patch->patchDataSOA.f_slow_y;
3791  fz = patch->patchDataSOA.f_slow_z;
3792  for (int i=0; i < numAtoms; i++) {
3793  fxyz[3*i ] = fx[i];
3794  fxyz[3*i+1] = fy[i];
3795  fxyz[3*i+2] = fz[i];
3796  }
3797  sprintf(fname, "fxyz_slow_soa_pid%d_step%d.bin", NTESTPID, step);
3798  sprintf(remark, "SOA fxyz slow, patch %d, step %d", NTESTPID, step);
3799  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3800  delete[] fxyz;
3801  }
3802 #endif
3803 
3804 #if 0
3805  if (1 && patch->patchID == 0) {
3806  int numAtoms = patch->numAtoms;
3807  double *fxyz = new double[3*numAtoms];
3808  double *fx, *fy, *fz;
3809  char fname[64], remark[128];
3810  int step = patch->flags.step;
3811 
3812  fx = patch->patchDataSOA.f_slow_x;
3813  fy = patch->patchDataSOA.f_slow_y;
3814  fz = patch->patchDataSOA.f_slow_z;
3815  for (int i=0; i < numAtoms; i++) {
3816  fxyz[3*i ] = fx[i];
3817  fxyz[3*i+1] = fy[i];
3818  fxyz[3*i+2] = fz[i];
3819  }
3820  sprintf(fname, "fslow_soa_%d.bin", step);
3821  sprintf(remark, "SOA slow forces, step %d\n", step);
3822  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3823 
3824  fx = patch->patchDataSOA.f_nbond_x;
3825  fy = patch->patchDataSOA.f_nbond_y;
3826  fz = patch->patchDataSOA.f_nbond_z;
3827  for (int i=0; i < numAtoms; i++) {
3828  fxyz[3*i ] = fx[i];
3829  fxyz[3*i+1] = fy[i];
3830  fxyz[3*i+2] = fz[i];
3831  }
3832  sprintf(fname, "fnbond_soa_%d.bin", step);
3833  sprintf(remark, "SOA nonbonded forces, step %d\n", step);
3834  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3835 
3836  fx = patch->patchDataSOA.f_normal_x;
3837  fy = patch->patchDataSOA.f_normal_y;
3838  fz = patch->patchDataSOA.f_normal_z;
3839  for (int i=0; i < numAtoms; i++) {
3840  fxyz[3*i ] = fx[i];
3841  fxyz[3*i+1] = fy[i];
3842  fxyz[3*i+2] = fz[i];
3843  }
3844  sprintf(fname, "fnormal_soa_%d.bin", step);
3845  sprintf(remark, "SOA normal forces, step %d\n", step);
3846  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3847 
3848  delete[] fxyz;
3849  }
3850 #endif
3851 
3852 #if 0
3853  //Will print forces here after runComputeObjects
3854  if(nstep == 1){
3855  fprintf(stderr, "CPU force arrays for alanin\n" );
3856  for(int i = 0; i < patch->patchDataSOA.numAtoms; i++){
3857  fprintf(stderr, "f[%i] = %lf %lf %lf | %lf %lf %lf | %lf %lf %lf\n", i,
3858  patch->patchDataSOA.f_normal_x[i], patch->patchDataSOA.f_normal_y[i], patch->patchDataSOA.f_normal_z[i],
3859  patch->patchDataSOA.f_nbond_x[i], patch->patchDataSOA.f_nbond_y[i], patch->patchDataSOA.f_nbond_z[i],
3860  patch->patchDataSOA.f_slow_x[i], patch->patchDataSOA.f_slow_y[i], patch->patchDataSOA.f_slow_z[i]);
3861  }
3862  }
3863 #endif
3864 
3866  pairlistsAreValid = 1;
3867  pairlistsAge = 0;
3868  }
3869  // For multigrator, do not age pairlist during pressure step
3870  // NOTE: for non-multigrator pressureStep = 0 always
3871  if ( pairlistsAreValid /* && !pressureStep */ ) ++pairlistsAge;
3872 
3873  // XXX missing lonepairs
3874  // XXX missing Molly
3875  // XXX missing Lowe-Andersen
3876 }
3877 
3883 {
3886  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3887  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3888  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3889  int numAtoms = patch->patchDataSOA.numAtoms;
3890  // Blocking receive for the temperature coupling coefficient.
3891  BigReal velrescaling = broadcast->stochRescaleCoefficient.get(step);
3892  DebugM(4, "stochastically rescaling velocities at step " << step << " by " << velrescaling << "\n");
3893  for ( int i = 0; i < numAtoms; ++i ) {
3894  vel_x[i] *= velrescaling;
3895  vel_y[i] *= velrescaling;
3896  vel_z[i] *= velrescaling;
3897  }
3898  stochRescale_count = 0;
3899  }
3900 }
3901 
3902 //
3903 // end SOA code
3904 //
3906 
3907 #endif // SEQUENCER_SOA
3908 
3909 
3910 extern int eventEndOfTimeStep;
3911 
3912 void Sequencer::integrate(int scriptTask) {
3913  char traceNote[24];
3914  char tracePrefix[20];
3915  sprintf(tracePrefix, "p:%d,s:",patch->patchID);
3916 // patch->write_tip4_props();
3917 
3918  //
3919  // DJH: Copy all data into SOA (structure of arrays)
3920  // from AOS (array of structures) data structure.
3921  //
3922  //patch->copy_all_to_SOA();
3923 
3924 #ifdef TIMER_COLLECTION
3925  TimerSet& t = patch->timerSet;
3926 #endif
3927  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
3928  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
3929  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
3930  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
3931  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
3932  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
3933  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
3934  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
3935  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
3936  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
3937 
3938  int &step = patch->flags.step;
3939  step = simParams->firstTimestep;
3940 
3941  // drag switches
3942  const Bool rotDragOn = simParams->rotDragOn;
3943  const Bool movDragOn = simParams->movDragOn;
3944 
3945  const int commOnly = simParams->commOnly;
3946 
3947  int &maxForceUsed = patch->flags.maxForceUsed;
3948  int &maxForceMerged = patch->flags.maxForceMerged;
3949  maxForceUsed = Results::normal;
3950  maxForceMerged = Results::normal;
3951 
3952  const int numberOfSteps = simParams->N;
3953  const int stepsPerCycle = simParams->stepsPerCycle;
3954  const BigReal timestep = simParams->dt;
3955 
3956  // what MTS method?
3957  const int staleForces = ( simParams->MTSAlgorithm == NAIVE );
3958 
3959  const int nonbondedFrequency = simParams->nonbondedFrequency;
3960  slowFreq = nonbondedFrequency;
3961  const BigReal nbondstep = timestep * (staleForces?1:nonbondedFrequency);
3962  int &doNonbonded = patch->flags.doNonbonded;
3963  doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
3964  if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
3965  if ( doNonbonded ) maxForceUsed = Results::nbond;
3966 
3967  // Do we do full electrostatics?
3968  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
3969  const int fullElectFrequency = simParams->fullElectFrequency;
3970  if ( dofull ) slowFreq = fullElectFrequency;
3971  const BigReal slowstep = timestep * (staleForces?1:fullElectFrequency);
3972  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
3973  doFullElectrostatics = (dofull && ((step >= numberOfSteps) || !(step%fullElectFrequency)));
3974  if ( dofull && (fullElectFrequency == 1) && !(simParams->mollyOn) )
3975  maxForceMerged = Results::slow;
3976  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
3977 
3978 //#ifndef UPPER_BOUND
3979  const Bool accelMDOn = simParams->accelMDOn;
3980  const Bool accelMDdihe = simParams->accelMDdihe;
3981  const Bool accelMDdual = simParams->accelMDdual;
3982  if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed = Results::amdf;
3983 
3984  // Is adaptive tempering on?
3985  const Bool adaptTempOn = simParams->adaptTempOn;
3987  if (simParams->langevinOn)
3989  else if (simParams->rescaleFreq > 0)
3991 
3992 
3993  int &doMolly = patch->flags.doMolly;
3994  doMolly = simParams->mollyOn && doFullElectrostatics;
3995  // BEGIN LA
3996  int &doLoweAndersen = patch->flags.doLoweAndersen;
3997  doLoweAndersen = simParams->loweAndersenOn && doNonbonded;
3998  // END LA
3999 
4000  int &doGBIS = patch->flags.doGBIS;
4001  doGBIS = simParams->GBISOn;
4002 
4003  int &doLCPO = patch->flags.doLCPO;
4004  doLCPO = simParams->LCPOOn;
4005 
4006  int zeroMomentum = simParams->zeroMomentum;
4007 
4008  // Do we need to return forces to TCL script or Colvar module?
4009  int doTcl = simParams->tclForcesOn;
4010  int doColvars = simParams->colvarsOn;
4011 //#endif
4012  int doGlobal = doTcl || doColvars;
4014 
4015  // Bother to calculate energies?
4016  int &doEnergy = patch->flags.doEnergy;
4017  int energyFrequency = simParams->computeEnergies;
4018 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
4019  if(simParams->alchOn) energyFrequency = NAMD_gcd(energyFrequency, simParams->alchOutFreq);
4020 #endif
4021 #ifndef UPPER_BOUND
4022  const int reassignFreq = simParams->reassignFreq;
4023 #endif
4024 
4025  int &doVirial = patch->flags.doVirial;
4026  doVirial = 1;
4027 
4028  if ( scriptTask == SCRIPT_RUN ) {
4029 
4030 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4031 
4032 #ifndef UPPER_BOUND
4033 // printf("Doing initial rattle\n");
4034 #ifndef UPPER_BOUND
4035 D_MSG("rattle1()");
4036  TIMER_START(t, RATTLE1);
4037  rattle1(0.,0); // enforce rigid bond constraints on initial positions
4038  TIMER_STOP(t, RATTLE1);
4039 #endif
4040 
4043  patch->atom.begin(),patch->atom.end());
4044  }
4045 
4046  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4047  reassignVelocities(timestep,step);
4048  }
4049 #endif
4050 
4051  doEnergy = ! ( step % energyFrequency );
4052 #ifndef UPPER_BOUND
4053  if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4054  //Update energy every timestep for adaptive tempering
4055  if ( adaptTempOn ) doEnergy=1;
4056 #endif
4057 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4058 D_MSG("runComputeObjects()");
4059  runComputeObjects(1,step<numberOfSteps); // must migrate here!
4060 #ifndef UPPER_BOUND
4061  rescaleaccelMD(step, doNonbonded, doFullElectrostatics); // for accelMD
4062  adaptTempUpdate(step); // update adaptive tempering temperature
4063 #endif
4064 
4065 #ifndef UPPER_BOUND
4066  if ( staleForces || doGlobal ) {
4067  if ( doNonbonded ) saveForce(Results::nbond);
4068  if ( doFullElectrostatics ) saveForce(Results::slow);
4069  }
4070 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4071  if ( ! commOnly ) {
4072 D_MSG("newtonianVelocities()");
4073  TIMER_START(t, KICK);
4074  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,0,1,1);
4075  TIMER_STOP(t, KICK);
4076  }
4078 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4079 #ifndef UPPER_BOUND
4080 D_MSG("rattle1()");
4081  TIMER_START(t, RATTLE1);
4082  rattle1(-timestep,0);
4083  TIMER_STOP(t, RATTLE1);
4084 #endif
4085 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4086 D_MSG("submitHalfstep()");
4087  TIMER_START(t, SUBMITHALF);
4088  submitHalfstep(step);
4089  TIMER_STOP(t, SUBMITHALF);
4090 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4091  if ( ! commOnly ) {
4092 D_MSG("newtonianVelocities()");
4093  TIMER_START(t, KICK);
4094  newtonianVelocities(1.0,timestep,nbondstep,slowstep,0,1,1);
4095  TIMER_STOP(t, KICK);
4096  }
4097 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4098 D_MSG("rattle1()");
4099  TIMER_START(t, RATTLE1);
4100  rattle1(timestep,1);
4101  TIMER_STOP(t, RATTLE1);
4102  if (doGlobal) // include constraint forces
4103  computeGlobal->saveTotalForces(patch);
4104 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4105 D_MSG("submitHalfstep()");
4106  TIMER_START(t, SUBMITHALF);
4107  submitHalfstep(step);
4108  TIMER_STOP(t, SUBMITHALF);
4109  if ( zeroMomentum && doFullElectrostatics ) submitMomentum(step);
4110  if ( ! commOnly ) {
4111 D_MSG("newtonianVelocities()");
4112  TIMER_START(t, KICK);
4113  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,0,1,1);
4114  TIMER_STOP(t, KICK);
4115  }
4116 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4117 #endif
4118 D_MSG("submitReductions()");
4119  TIMER_START(t, SUBMITFULL);
4120  submitReductions(step);
4121  TIMER_STOP(t, SUBMITFULL);
4122 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4123 #ifndef UPPER_BOUND
4124  if(0){ // if(traceIsOn()){
4125  traceUserEvent(eventEndOfTimeStep);
4126  sprintf(traceNote, "%s%d",tracePrefix,step);
4127  traceUserSuppliedNote(traceNote);
4128  }
4129 #endif
4130  rebalanceLoad(step);
4131 
4132  } // scriptTask == SCRIPT_RUN
4133 
4134 #ifndef UPPER_BOUND
4135  bool doMultigratorRattle = false;
4136 #endif
4137 
4138  //
4139  // DJH: There are a lot of mod operations below and elsewhere to
4140  // test step number against the frequency of something happening.
4141  // Mod and integer division are expensive!
4142  // Might be better to replace with counters and test equality.
4143  //
4144 #if 0
4145  for(int i = 0; i < NamdProfileEvent::EventsCount; i++)
4146  CkPrintf("-------------- [%d] %s -------------\n", i, NamdProfileEventStr[i]);
4147 #endif
4148 
4149 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
4150  int& eon = patch->flags.event_on;
4151  int epid = (simParams->beginEventPatchID <= patch->getPatchID()
4152  && patch->getPatchID() <= simParams->endEventPatchID);
4153  int beginStep = simParams->beginEventStep;
4154  int endStep = simParams->endEventStep;
4155  bool controlProfiling = patch->getPatchID() == 0;
4156 #endif
4157 
4158  for ( ++step; step <= numberOfSteps; ++step )
4159  {
4160 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
4161  eon = epid && (beginStep < step && step <= endStep);
4162 
4163  if (controlProfiling && step == beginStep) {
4165  }
4166  if (controlProfiling && step == endStep) {
4168  }
4169  char buf[32];
4170  sprintf(buf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::INTEGRATE_1], patch->getPatchID());
4171  NAMD_EVENT_START_EX(eon, NamdProfileEvent::INTEGRATE_1, buf);
4172 #endif
4173  DebugM(3,"for step "<<step<< " dGlobal " << doGlobal<<"\n"<<endi);
4174 #ifndef UPPER_BOUND
4175  rescaleVelocities(step);
4176  tcoupleVelocities(timestep,step);
4177  if ( simParams->stochRescaleOn ) {
4178  stochRescaleVelocities(step);
4179  }
4180  berendsenPressure(step);
4181 
4182  if ( ! commOnly ) {
4183  TIMER_START(t, KICK);
4184  newtonianVelocities(0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4185  TIMER_STOP(t, KICK);
4186  }
4187 
4188  // We do RATTLE here if multigrator thermostat was applied in the previous step
4189  if (doMultigratorRattle) rattle1(timestep, 1);
4190 
4191  /* reassignment based on half-step velocities
4192  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4193  addVelocityToPosition(0.5*timestep);
4194  reassignVelocities(timestep,step);
4195  addVelocityToPosition(0.5*timestep);
4196  rattle1(0.,0);
4197  rattle1(-timestep,0);
4198  addVelocityToPosition(-1.0*timestep);
4199  rattle1(timestep,0);
4200  } */
4201 
4202  TIMER_START(t, MAXMOVE);
4203  maximumMove(timestep);
4204  TIMER_STOP(t, MAXMOVE);
4205 
4206  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_1); // integrate 1
4207 
4209  if ( ! commOnly ) {
4210  TIMER_START(t, DRIFT);
4211  addVelocityToPosition(0.5*timestep);
4212  TIMER_STOP(t, DRIFT);
4213  }
4214  // We add an Ornstein-Uhlenbeck integration step for the case of BAOAB (Langevin)
4215  langevinVelocities(timestep);
4216 
4217  // There is a blocking receive inside of langevinPiston()
4218  // that might suspend the current thread of execution,
4219  // so split profiling around this conditional block.
4220  langevinPiston(step);
4221 
4222  if ( ! commOnly ) {
4223  TIMER_START(t, DRIFT);
4224  addVelocityToPosition(0.5*timestep);
4225  TIMER_STOP(t, DRIFT);
4226  }
4227  } else {
4228  // If Langevin is not used, take full time step directly instread of two half steps
4229  if ( ! commOnly ) {
4230  TIMER_START(t, DRIFT);
4231  addVelocityToPosition(timestep);
4232  TIMER_STOP(t, DRIFT);
4233  }
4234  }
4235 
4236  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_2);
4237 
4238  // impose hard wall potential for Drude bond length
4239  hardWallDrude(timestep, 1);
4240 
4242 #endif // UPPER_BOUND
4243 
4244  doNonbonded = !(step%nonbondedFrequency);
4245  doFullElectrostatics = (dofull && !(step%fullElectFrequency));
4246 
4247 #ifndef UPPER_BOUND
4248  if ( zeroMomentum && doFullElectrostatics ) {
4249  // There is a blocking receive inside of correctMomentum().
4250  correctMomentum(step,slowstep);
4251  }
4252 
4253  // There are NO sends in submitHalfstep() just local summation
4254  // into the Reduction struct.
4255  TIMER_START(t, SUBMITHALF);
4256  submitHalfstep(step);
4257  TIMER_STOP(t, SUBMITHALF);
4258 
4259  doMolly = simParams->mollyOn && doFullElectrostatics;
4260  // BEGIN LA
4261  doLoweAndersen = simParams->loweAndersenOn && doNonbonded;
4262  // END LA
4263 
4264  maxForceUsed = Results::normal;
4265  if ( doNonbonded ) maxForceUsed = Results::nbond;
4266  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
4267  if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed = Results::amdf;
4268 
4269  // Migrate Atoms on stepsPerCycle
4270  doEnergy = ! ( step % energyFrequency );
4271  if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4272  if ( adaptTempOn ) doEnergy=1;
4273 
4274  // Multigrator
4275  if (simParams->multigratorOn) {
4276  doVirial = (!(step % energyFrequency) || ((simParams->outputPressure > 0) && !(step % simParams->outputPressure))
4277  || !(step % simParams->multigratorPressureFreq));
4278  doKineticEnergy = (!(step % energyFrequency) || !(step % simParams->multigratorTemperatureFreq));
4279  doMomenta = (simParams->outputMomenta > 0) && !(step % simParams->outputMomenta);
4280  } else {
4281  doVirial = 1;
4282  doKineticEnergy = 1;
4283  doMomenta = 1;
4284  }
4285 #endif
4286  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_2); // integrate 2
4287 
4288  // The current thread of execution will suspend in runComputeObjects().
4289  runComputeObjects(!(step%stepsPerCycle),step<numberOfSteps);
4290 
4291  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_3);
4292 
4293 #ifndef UPPER_BOUND
4294  rescaleaccelMD(step, doNonbonded, doFullElectrostatics); // for accelMD
4295 
4296  if ( staleForces || doGlobal ) {
4297  if ( doNonbonded ) saveForce(Results::nbond);
4298  if ( doFullElectrostatics ) saveForce(Results::slow);
4299  }
4300 
4301  // reassignment based on full-step velocities
4302  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4303  reassignVelocities(timestep,step);
4304  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4305  rattle1(-timestep,0);
4306  }
4307 
4308  if ( ! commOnly ) {
4309  TIMER_START(t, VELBBK1);
4310  langevinVelocitiesBBK1(timestep);
4311  TIMER_STOP(t, VELBBK1);
4312  TIMER_START(t, KICK);
4313  newtonianVelocities(1.0,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4314  TIMER_STOP(t, KICK);
4315  TIMER_START(t, VELBBK2);
4316  langevinVelocitiesBBK2(timestep);
4317  TIMER_STOP(t, VELBBK2);
4318  }
4319 
4320  // add drag to each atom's positions
4321  if ( ! commOnly && movDragOn ) addMovDragToPosition(timestep);
4322  if ( ! commOnly && rotDragOn ) addRotDragToPosition(timestep);
4323 
4324  TIMER_START(t, RATTLE1);
4325  rattle1(timestep,1);
4326  TIMER_STOP(t, RATTLE1);
4327  if (doGlobal) // include constraint forces
4328  computeGlobal->saveTotalForces(patch);
4329 
4330  TIMER_START(t, SUBMITHALF);
4331  submitHalfstep(step);
4332  TIMER_STOP(t, SUBMITHALF);
4333  if ( zeroMomentum && doFullElectrostatics ) submitMomentum(step);
4334 
4335  if ( ! commOnly ) {
4336  TIMER_START(t, KICK);
4337  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4338  TIMER_STOP(t, KICK);
4339  }
4340 
4341  // rattle2(timestep,step);
4342 #endif
4343 
4344  TIMER_START(t, SUBMITFULL);
4345  submitReductions(step);
4346  TIMER_STOP(t, SUBMITFULL);
4347  TIMER_START(t, SUBMITCOLLECT);
4348  submitCollections(step);
4349  TIMER_STOP(t, SUBMITCOLLECT);
4350 #ifndef UPPER_BOUND
4351  //Update adaptive tempering temperature
4352  adaptTempUpdate(step);
4353 
4354  // Multigrator temperature and pressure steps
4355  multigratorTemperature(step, 1);
4356  multigratorPressure(step, 1);
4357  multigratorPressure(step, 2);
4358  multigratorTemperature(step, 2);
4359  doMultigratorRattle = (simParams->multigratorOn && !(step % simParams->multigratorTemperatureFreq));
4360 
4361  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_3); // integrate 3
4362 #endif
4363 
4364 #if CYCLE_BARRIER
4365  cycleBarrier(!((step+1) % stepsPerCycle), step);
4366 #elif PME_BARRIER
4367  cycleBarrier(doFullElectrostatics, step);
4368 #elif STEP_BARRIER
4369  cycleBarrier(1, step);
4370 #endif
4371 
4372 #ifndef UPPER_BOUND
4373  if(Node::Object()->specialTracing || simParams->statsOn){
4374  int bstep = simParams->traceStartStep;
4375  int estep = bstep + simParams->numTraceSteps;
4376  if(step == bstep || step == estep){
4377  traceBarrier(step);
4378  }
4379  }
4380 
4381 #ifdef MEASURE_NAMD_WITH_PAPI
4382  if(simParams->papiMeasure) {
4383  int bstep = simParams->papiMeasureStartStep;
4384  int estep = bstep + simParams->numPapiMeasureSteps;
4385  if(step == bstep || step==estep) {
4386  papiMeasureBarrier(step);
4387  }
4388  }
4389 #endif
4390 
4391  if(0){ // if(traceIsOn()){
4392  traceUserEvent(eventEndOfTimeStep);
4393  sprintf(traceNote, "%s%d",tracePrefix,step);
4394  traceUserSuppliedNote(traceNote);
4395  }
4396 #endif // UPPER_BOUND
4397  rebalanceLoad(step);
4398 
4399 #if PME_BARRIER
4400  // a step before PME
4401  cycleBarrier(dofull && !((step+1)%fullElectFrequency),step);
4402 #endif
4403 
4404 #if USE_HPM
4405  if(step == START_HPM_STEP)
4406  (CProxy_Node(CkpvAccess(BOCclass_group).node)).startHPM();
4407 
4408  if(step == STOP_HPM_STEP)
4409  (CProxy_Node(CkpvAccess(BOCclass_group).node)).stopHPM();
4410 #endif
4411 
4412  }
4413 
4414  TIMER_DONE(t);
4415 #ifdef TIMER_COLLECTION
4416  if (patch->patchID == SPECIAL_PATCH_ID) {
4417  printf("Timer collection reporting in microseconds for "
4418  "Patch %d\n", patch->patchID);
4419  TIMER_REPORT(t);
4420  }
4421 #endif // TIMER_COLLECTION
4422  //
4423  // DJH: Copy updates of SOA back into AOS.
4424  //
4425  //patch->copy_updates_to_AOS();
4426 }
4427 
4428 // add moving drag to each atom's position
4430  FullAtom *atom = patch->atom.begin();
4431  int numAtoms = patch->numAtoms;
4432  Molecule *molecule = Node::Object()->molecule; // need its methods
4433  const BigReal movDragGlobVel = simParams->movDragGlobVel;
4434  const BigReal dt = timestep / TIMEFACTOR; // MUST be as in the integrator!
4435  Vector movDragVel, dragIncrement;
4436  for ( int i = 0; i < numAtoms; ++i )
4437  {
4438  // skip if fixed atom or zero drag attribute
4439  if ( (simParams->fixedAtomsOn && atom[i].atomFixed)
4440  || !(molecule->is_atom_movdragged(atom[i].id)) ) continue;
4441  molecule->get_movdrag_params(movDragVel, atom[i].id);
4442  dragIncrement = movDragGlobVel * movDragVel * dt;
4443  atom[i].position += dragIncrement;
4444  }
4445 }
4446 
4447 // add rotating drag to each atom's position
4449  FullAtom *atom = patch->atom.begin();
4450  int numAtoms = patch->numAtoms;
4451  Molecule *molecule = Node::Object()->molecule; // need its methods
4452  const BigReal rotDragGlobVel = simParams->rotDragGlobVel;
4453  const BigReal dt = timestep / TIMEFACTOR; // MUST be as in the integrator!
4454  BigReal rotDragVel, dAngle;
4455  Vector atomRadius;
4456  Vector rotDragAxis, rotDragPivot, dragIncrement;
4457  for ( int i = 0; i < numAtoms; ++i )
4458  {
4459  // skip if fixed atom or zero drag attribute
4460  if ( (simParams->fixedAtomsOn && atom[i].atomFixed)
4461  || !(molecule->is_atom_rotdragged(atom[i].id)) ) continue;
4462  molecule->get_rotdrag_params(rotDragVel, rotDragAxis, rotDragPivot, atom[i].id);
4463  dAngle = rotDragGlobVel * rotDragVel * dt;
4464  rotDragAxis /= rotDragAxis.length();
4465  atomRadius = atom[i].position - rotDragPivot;
4466  dragIncrement = cross(rotDragAxis, atomRadius) * dAngle;
4467  atom[i].position += dragIncrement;
4468  }
4469 }
4470 
4472  //
4473  // DJH: Copy all data into SOA (structure of arrays)
4474  // from AOS (array of structures) data structure.
4475  //
4476  //patch->copy_all_to_SOA();
4477 
4478  const int numberOfSteps = simParams->N;
4479  const int stepsPerCycle = simParams->stepsPerCycle;
4480 #if 0 && defined(NODEGROUP_FORCE_REGISTER)
4481  // XXX DJH: This is a hack that is found to get GPU nonbonded
4482  // force calculation right for --with-single-node-cuda builds
4483  const int stepsPerCycle_save = stepsPerCycle;
4484  simParams->stepsPerCycle = 1;
4485 #endif
4486  int &step = patch->flags.step;
4487  step = simParams->firstTimestep;
4488 
4489  int &maxForceUsed = patch->flags.maxForceUsed;
4490  int &maxForceMerged = patch->flags.maxForceMerged;
4491  maxForceUsed = Results::normal;
4492  maxForceMerged = Results::normal;
4493  int &doNonbonded = patch->flags.doNonbonded;
4494  doNonbonded = 1;
4495  maxForceUsed = Results::nbond;
4496  maxForceMerged = Results::nbond;
4497  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
4498  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
4499  doFullElectrostatics = dofull;
4500  if ( dofull ) {
4501  maxForceMerged = Results::slow;
4502  maxForceUsed = Results::slow;
4503  }
4504  int &doMolly = patch->flags.doMolly;
4505  doMolly = simParams->mollyOn && doFullElectrostatics;
4506  int &doMinimize = patch->flags.doMinimize;
4507  doMinimize = 1;
4508  // BEGIN LA
4509  int &doLoweAndersen = patch->flags.doLoweAndersen;
4510  doLoweAndersen = 0;
4511  // END LA
4512 
4513  int &doGBIS = patch->flags.doGBIS;
4514  doGBIS = simParams->GBISOn;
4515 
4516  int &doLCPO = patch->flags.doLCPO;
4517  doLCPO = simParams->LCPOOn;
4518 
4519  int doTcl = simParams->tclForcesOn;
4520  int doColvars = simParams->colvarsOn;
4521  int doGlobal = doTcl || doColvars;
4523 
4524  int &doEnergy = patch->flags.doEnergy;
4525  doEnergy = 1;
4526 
4527  // Do this to stabilize the minimizer, whether or not the user
4528  // wants rigid bond constraints enabled for dynamics.
4529  // In order to enforce, we have to call HomePatch::rattle1() directly.
4530  patch->rattle1(0.,0,0); // enforce rigid bond constraints on initial positions
4531 
4534  patch->atom.begin(),patch->atom.end());
4535  }
4536 
4537  runComputeObjects(1,step<numberOfSteps); // must migrate here!
4538 
4539  if ( doGlobal ) {
4540 #ifdef DEBUG_MINIMIZE
4541  printf("doTcl = %d doColvars = %d\n", doTcl, doColvars);
4542 #endif
4543  if ( doNonbonded ) saveForce(Results::nbond);
4544  if ( doFullElectrostatics ) saveForce(Results::slow);
4545  computeGlobal->saveTotalForces(patch);
4546  }
4547 #ifdef DEBUG_MINIMIZE
4548  else { printf("No computeGlobal\n"); }
4549 #endif
4550 
4552 
4553  submitMinimizeReductions(step,fmax2);
4554  rebalanceLoad(step);
4555 
4556  int downhill = 1; // start out just fixing bad contacts
4557  int minSeq = 0;
4558  for ( ++step; step <= numberOfSteps; ++step ) {
4559  // Blocking receive for the minimization coefficient.
4560  BigReal c = broadcast->minimizeCoefficient.get(minSeq++);
4561 
4562  if ( downhill ) {
4563  if ( c ) minimizeMoveDownhill(fmax2);
4564  else {
4565  downhill = 0;
4566  fmax2 *= 10000.;
4567  }
4568  }
4569  if ( ! downhill ) {
4570  if ( ! c ) { // new direction
4571 
4572  // Blocking receive for the minimization coefficient.
4573  c = broadcast->minimizeCoefficient.get(minSeq++);
4574 
4575  newMinimizeDirection(c); // v = c * v + f
4576 
4577  // Blocking receive for the minimization coefficient.
4578  c = broadcast->minimizeCoefficient.get(minSeq++);
4579 
4580  } // same direction
4581  newMinimizePosition(c); // x = x + c * v
4582  }
4583 
4584  runComputeObjects(!(step%stepsPerCycle),step<numberOfSteps);
4585  if ( doGlobal ) {
4586  if ( doNonbonded ) saveForce(Results::nbond);
4587  if ( doFullElectrostatics ) saveForce(Results::slow);
4588  computeGlobal->saveTotalForces(patch);
4589  }
4590  submitMinimizeReductions(step,fmax2);
4591  submitCollections(step, 1); // write out zeros for velocities
4592  rebalanceLoad(step);
4593  }
4594  quenchVelocities(); // zero out bogus velocity
4595 
4596  doMinimize = 0;
4597 
4598 #if 0
4599  // when using CUDASOAintegrate, need to update SOA data structures
4601  patch->copy_atoms_to_SOA();
4602  }
4603 #endif
4604 
4605 #if 0 && defined(NODEGROUP_FORCE_REGISTER)
4606  // XXX DJH: all patches in a PE are writing into simParams
4607  // so this hack needs a guard
4608  simParams->stepsPerCycle = stepsPerCycle_save;
4609 #endif
4610  //
4611  // DJH: Copy updates of SOA back into AOS.
4612  //
4613  //patch->copy_updates_to_AOS();
4614 }
4615 
4616 // x = x + 0.1 * unit(f) for large f
4618 
4619  FullAtom *a = patch->atom.begin();
4620  Force *f1 = patch->f[Results::normal].begin(); // includes nbond and slow
4621  int numAtoms = patch->numAtoms;
4622 
4623  for ( int i = 0; i < numAtoms; ++i ) {
4624  if ( simParams->fixedAtomsOn && a[i].atomFixed ) continue;
4625  Force f = f1[i];
4626  if ( f.length2() > fmax2 ) {
4627  a[i].position += ( 0.1 * f.unit() );
4628  int hgs = a[i].hydrogenGroupSize; // 0 if not parent
4629  for ( int j=1; j<hgs; ++j ) {
4630  a[++i].position += ( 0.1 * f.unit() );
4631  }
4632  }
4633  }
4634 
4635  patch->rattle1(0.,0,0);
4636 }
4637 
4638 // v = c * v + f
4640  FullAtom *a = patch->atom.begin();
4641  Force *f1 = patch->f[Results::normal].begin(); // includes nbond and slow
4642  const bool fixedAtomsOn = simParams->fixedAtomsOn;
4643  const bool drudeHardWallOn = simParams->drudeHardWallOn;
4644  int numAtoms = patch->numAtoms;
4645  BigReal maxv2 = 0.;
4646 
4647  for ( int i = 0; i < numAtoms; ++i ) {
4648  a[i].velocity *= c;
4649  a[i].velocity += f1[i];
4650  if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4651  a[i].velocity = a[i-1].velocity;
4652  }
4653  if ( fixedAtomsOn && a[i].atomFixed ) a[i].velocity = 0;
4654  BigReal v2 = a[i].velocity.length2();
4655  if ( v2 > maxv2 ) maxv2 = v2;
4656  }
4657 
4658  { Tensor virial; patch->minimize_rattle2( 0.1 * TIMEFACTOR / sqrt(maxv2), &virial); }
4659 
4660  maxv2 = 0.;
4661  for ( int i = 0; i < numAtoms; ++i ) {
4662  if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4663  a[i].velocity = a[i-1].velocity;
4664  }
4665  if ( fixedAtomsOn && a[i].atomFixed ) a[i].velocity = 0;
4666  BigReal v2 = a[i].velocity.length2();
4667  if ( v2 > maxv2 ) maxv2 = v2;
4668  }
4669 
4670  min_reduction->max(0,maxv2);
4671  min_reduction->submit();
4672 
4673  // prevent hydrogens from being left behind
4674  BigReal fmax2 = 0.01 * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR;
4675  // int adjustCount = 0;
4676  int hgs;
4677  for ( int i = 0; i < numAtoms; i += hgs ) {
4678  hgs = a[i].hydrogenGroupSize;
4679  BigReal minChildVel = a[i].velocity.length2();
4680  if ( minChildVel < fmax2 ) continue;
4681  int adjustChildren = 1;
4682  for ( int j = i+1; j < (i+hgs); ++j ) {
4683  if ( a[j].velocity.length2() > minChildVel ) adjustChildren = 0;
4684  }
4685  if ( adjustChildren ) {
4686  // if ( hgs > 1 ) ++adjustCount;
4687  for ( int j = i+1; j < (i+hgs); ++j ) {
4688  if (a[i].mass < 0.01) continue; // lone pair
4689  a[j].velocity = a[i].velocity;
4690  }
4691  }
4692  }
4693  // if (adjustCount) CkPrintf("Adjusting %d hydrogen groups\n", adjustCount);
4694 
4695 }
4696 
4697 // x = x + c * v
4699  FullAtom *a = patch->atom.begin();
4700  int numAtoms = patch->numAtoms;
4701 
4702  for ( int i = 0; i < numAtoms; ++i ) {
4703  a[i].position += c * a[i].velocity;
4704  }
4705 
4706  if ( simParams->drudeHardWallOn ) {
4707  for ( int i = 1; i < numAtoms; ++i ) {
4708  if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4709  a[i].position -= a[i-1].position;
4710  }
4711  }
4712  }
4713 
4714  patch->rattle1(0.,0,0);
4715 
4716  if ( simParams->drudeHardWallOn ) {
4717  for ( int i = 1; i < numAtoms; ++i ) {
4718  if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4719  a[i].position += a[i-1].position;
4720  }
4721  }
4722  }
4723 }
4724 
4725 // v = 0
4727  FullAtom *a = patch->atom.begin();
4728  int numAtoms = patch->numAtoms;
4729 
4730  for ( int i = 0; i < numAtoms; ++i ) {
4731  a[i].velocity = 0;
4732  }
4733 }
4734 
4736 
4737  FullAtom *a = patch->atom.begin();
4738  const int numAtoms = patch->numAtoms;
4739 
4740  Vector momentum = 0;
4741  BigReal mass = 0;
4742 if ( simParams->zeroMomentumAlt ) {
4743  for ( int i = 0; i < numAtoms; ++i ) {
4744  momentum += a[i].mass * a[i].velocity;
4745  mass += 1.;
4746  }
4747 } else {
4748  for ( int i = 0; i < numAtoms; ++i ) {
4749  momentum += a[i].mass * a[i].velocity;
4750  mass += a[i].mass;
4751  }
4752 }
4753 
4754  ADD_VECTOR_OBJECT(reduction,REDUCTION_HALFSTEP_MOMENTUM,momentum);
4756 }
4757 
4758 void Sequencer::correctMomentum(int step, BigReal drifttime) {
4759 
4760  //
4761  // DJH: This test should be done in SimParameters.
4762  //
4763  if ( simParams->fixedAtomsOn )
4764  NAMD_die("Cannot zero momentum when fixed atoms are present.");
4765 
4766  // Blocking receive for the momentum correction vector.
4767  const Vector dv = broadcast->momentumCorrection.get(step);
4768 
4769  const Vector dx = dv * ( drifttime / TIMEFACTOR );
4770 
4771  FullAtom *a = patch->atom.begin();
4772  const int numAtoms = patch->numAtoms;
4773 
4774 if ( simParams->zeroMomentumAlt ) {
4775  for ( int i = 0; i < numAtoms; ++i ) {
4776  a[i].velocity += dv * a[i].recipMass;
4777  a[i].position += dx * a[i].recipMass;
4778  }
4779 } else {
4780  for ( int i = 0; i < numAtoms; ++i ) {
4781  a[i].velocity += dv;
4782  a[i].position += dx;
4783  }
4784 }
4785 
4786 }
4787 
4788 // --------- For Multigrator ---------
4789 void Sequencer::scalePositionsVelocities(const Tensor& posScale, const Tensor& velScale) {
4790  FullAtom *a = patch->atom.begin();
4791  int numAtoms = patch->numAtoms;
4792  Position origin = patch->lattice.origin();
4793  if ( simParams->fixedAtomsOn ) {
4794  NAMD_bug("Sequencer::scalePositionsVelocities, fixed atoms not implemented");
4795  }
4796  if ( simParams->useGroupPressure ) {
4797  int hgs;
4798  for ( int i = 0; i < numAtoms; i += hgs ) {
4799  hgs = a[i].hydrogenGroupSize;
4800  Position pos_cm(0.0, 0.0, 0.0);
4801  Velocity vel_cm(0.0, 0.0, 0.0);
4802  BigReal m_cm = 0.0;
4803  for (int j=0;j < hgs;++j) {
4804  m_cm += a[i+j].mass;
4805  pos_cm += a[i+j].mass*a[i+j].position;
4806  vel_cm += a[i+j].mass*a[i+j].velocity;
4807  }
4808  pos_cm /= m_cm;
4809  vel_cm /= m_cm;
4810  pos_cm -= origin;
4811  Position dpos = posScale*pos_cm;
4812  Velocity dvel = velScale*vel_cm;
4813  for (int j=0;j < hgs;++j) {
4814  a[i+j].position += dpos;
4815  a[i+j].velocity += dvel;
4816  }
4817  }
4818  } else {
4819  for ( int i = 0; i < numAtoms; i++) {
4820  a[i].position += posScale*(a[i].position-origin);
4821  a[i].velocity = velScale*a[i].velocity;
4822  }
4823  }
4824 }
4825 
4826 void Sequencer::multigratorPressure(int step, int callNumber) {
4827 // Calculate new positions, momenta, and volume using positionRescaleFactor and
4828 // velocityRescaleTensor values returned from Controller::multigratorPressureCalcScale()
4830  FullAtom *a = patch->atom.begin();
4831  int numAtoms = patch->numAtoms;
4832 
4833  // Blocking receive (get) scaling factors from Controller
4834  Tensor scaleTensor = (callNumber == 1) ? broadcast->positionRescaleFactor.get(step) : broadcast->positionRescaleFactor2.get(step);
4835  Tensor velScaleTensor = (callNumber == 1) ? broadcast->velocityRescaleTensor.get(step) : broadcast->velocityRescaleTensor2.get(step);
4836  Tensor posScaleTensor = scaleTensor;
4837  posScaleTensor -= Tensor::identity();
4838  if (simParams->useGroupPressure) {
4839  velScaleTensor -= Tensor::identity();
4840  }
4841 
4842  // Scale volume
4843  patch->lattice.rescale(scaleTensor);
4844  // Scale positions and velocities
4845  scalePositionsVelocities(posScaleTensor, velScaleTensor);
4846 
4847  if (!patch->flags.doFullElectrostatics) NAMD_bug("Sequencer::multigratorPressure, doFullElectrostatics must be true");
4848 
4849  // Calculate new forces
4850  // NOTE: We should not need to migrate here since any migration should have happened in the
4851  // previous call to runComputeObjects inside the MD loop in Sequencer::integrate()
4852  const int numberOfSteps = simParams->N;
4853  const int stepsPerCycle = simParams->stepsPerCycle;
4854  runComputeObjects(0 , step<numberOfSteps, 1);
4855 
4856  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
4858 
4859  // Virials etc.
4860  Tensor virialNormal;
4861  Tensor momentumSqrSum;
4862  BigReal kineticEnergy = 0;
4863  if ( simParams->pairInteractionOn ) {
4864  if ( simParams->pairInteractionSelf ) {
4865  for ( int i = 0; i < numAtoms; ++i ) {
4866  if ( a[i].partition != 1 ) continue;
4867  kineticEnergy += a[i].mass * a[i].velocity.length2();
4868  virialNormal.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4869  }
4870  }
4871  } else {
4872  for ( int i = 0; i < numAtoms; ++i ) {
4873  if (a[i].mass < 0.01) continue;
4874  kineticEnergy += a[i].mass * a[i].velocity.length2();
4875  virialNormal.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4876  }
4877  }
4878  if (!simParams->useGroupPressure) momentumSqrSum = virialNormal;
4879  kineticEnergy *= 0.5;
4881  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virialNormal);
4882 
4883  if ( simParams->fixedAtomsOn ) {
4884  Tensor fixVirialNormal;
4885  Tensor fixVirialNbond;
4886  Tensor fixVirialSlow;
4887  Vector fixForceNormal = 0;
4888  Vector fixForceNbond = 0;
4889  Vector fixForceSlow = 0;
4890 
4891  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
4892 
4893  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, fixVirialNormal);
4894  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, fixVirialNbond);
4895  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, fixVirialSlow);
4896  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_NORMAL, fixForceNormal);
4897  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_NBOND, fixForceNbond);
4898  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_SLOW, fixForceSlow);
4899  }
4900 
4901  // Internal virial and group momentum
4902  Tensor intVirialNormal;
4903  Tensor intVirialNormal2;
4904  Tensor intVirialNbond;
4905  Tensor intVirialSlow;
4906  int hgs;
4907  for ( int i = 0; i < numAtoms; i += hgs ) {
4908  hgs = a[i].hydrogenGroupSize;
4909  int j;
4910  BigReal m_cm = 0;
4911  Position x_cm(0,0,0);
4912  Velocity v_cm(0,0,0);
4913  for ( j = i; j < (i+hgs); ++j ) {
4914  m_cm += a[j].mass;
4915  x_cm += a[j].mass * a[j].position;
4916  v_cm += a[j].mass * a[j].velocity;
4917  }
4918  if (simParams->useGroupPressure) momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
4919  x_cm /= m_cm;
4920  v_cm /= m_cm;
4921  if (simParams->fixedAtomsOn) NAMD_bug("Sequencer::multigratorPressure, simParams->fixedAtomsOn not implemented yet");
4922  if ( simParams->pairInteractionOn ) {
4923  if ( simParams->pairInteractionSelf ) {
4924  NAMD_bug("Sequencer::multigratorPressure, this part needs to be implemented correctly");
4925  for ( j = i; j < (i+hgs); ++j ) {
4926  if ( a[j].partition != 1 ) continue;
4927  BigReal mass = a[j].mass;
4928  Vector v = a[j].velocity;
4929  Vector dv = v - v_cm;
4930  intVirialNormal2.outerAdd (mass, v, dv);
4931  Vector dx = a[j].position - x_cm;
4932  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
4933  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
4934  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
4935  }
4936  }
4937  } else {
4938  for ( j = i; j < (i+hgs); ++j ) {
4939  BigReal mass = a[j].mass;
4940  Vector v = a[j].velocity;
4941  Vector dv = v - v_cm;
4942  intVirialNormal2.outerAdd(mass, v, dv);
4943  Vector dx = a[j].position - x_cm;
4944  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
4945  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
4946  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
4947  }
4948  }
4949  }
4950 
4951  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal);
4952  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal2);
4953  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NBOND, intVirialNbond);
4954  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_SLOW, intVirialSlow);
4955  ADD_TENSOR_OBJECT(reduction, REDUCTION_MOMENTUM_SQUARED, momentumSqrSum);
4956 
4957  reduction->submit();
4958  }
4959 }
4960 
4961 void Sequencer::scaleVelocities(const BigReal velScale) {
4962  FullAtom *a = patch->atom.begin();
4963  int numAtoms = patch->numAtoms;
4964  for ( int i = 0; i < numAtoms; i++) {
4965  a[i].velocity *= velScale;
4966  }
4967 }
4968 
4970  FullAtom *a = patch->atom.begin();
4971  int numAtoms = patch->numAtoms;
4972  BigReal kineticEnergy = 0.0;
4973  if ( simParams->pairInteractionOn ) {
4974  if ( simParams->pairInteractionSelf ) {
4975  for (int i = 0; i < numAtoms; ++i ) {
4976  if ( a[i].partition != 1 ) continue;
4977  kineticEnergy += a[i].mass * a[i].velocity.length2();
4978  }
4979  }
4980  } else {
4981  for (int i = 0; i < numAtoms; ++i ) {
4982  kineticEnergy += a[i].mass * a[i].velocity.length2();
4983  }
4984  }
4985  kineticEnergy *= 0.5;
4986  return kineticEnergy;
4987 }
4988 
4989 void Sequencer::multigratorTemperature(int step, int callNumber) {
4991  // Blocking receive (get) velocity scaling factor.
4992  BigReal velScale = (callNumber == 1) ? broadcast->velocityRescaleFactor.get(step) : broadcast->velocityRescaleFactor2.get(step);
4993  scaleVelocities(velScale);
4994  // Calculate new kineticEnergy
4995  BigReal kineticEnergy = calcKineticEnergy();
4997  if (callNumber == 1 && !(step % simParams->multigratorPressureFreq)) {
4998  // If this is a pressure cycle, calculate new momentum squared sum
4999  FullAtom *a = patch->atom.begin();
5000  int numAtoms = patch->numAtoms;
5001  Tensor momentumSqrSum;
5002  if (simParams->useGroupPressure) {
5003  int hgs;
5004  for ( int i = 0; i < numAtoms; i += hgs ) {
5005  hgs = a[i].hydrogenGroupSize;
5006  int j;
5007  BigReal m_cm = 0;
5008  Position x_cm(0,0,0);
5009  Velocity v_cm(0,0,0);
5010  for ( j = i; j < (i+hgs); ++j ) {
5011  m_cm += a[j].mass;
5012  x_cm += a[j].mass * a[j].position;
5013  v_cm += a[j].mass * a[j].velocity;
5014  }
5015  momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
5016  }
5017  } else {
5018  for ( int i = 0; i < numAtoms; i++) {
5019  momentumSqrSum.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5020  }
5021  }
5022  ADD_TENSOR_OBJECT(multigratorReduction, MULTIGRATOR_REDUCTION_MOMENTUM_SQUARED, momentumSqrSum);
5023  }
5024  // Submit reductions (kineticEnergy and, if applicable, momentumSqrSum)
5026 
5027  }
5028 }
5029 // --------- End Multigrator ---------
5030 
5031 //
5032 // DJH: Calls one or more addForceToMomentum which in turn calls HomePatch
5033 // versions. We should inline to reduce the number of function calls.
5034 //
5035 void Sequencer::newtonianVelocities(BigReal stepscale, const BigReal timestep,
5036  const BigReal nbondstep,
5037  const BigReal slowstep,
5038  const int staleForces,
5039  const int doNonbonded,
5040  const int doFullElectrostatics)
5041 {
5042  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5043  NamdProfileEvent::NEWTONIAN_VELOCITIES);
5044 
5045  // Deterministic velocity update, account for multigrator
5046  if (staleForces || (doNonbonded && doFullElectrostatics)) {
5047  addForceToMomentum3(stepscale*timestep, Results::normal, 0,
5048  stepscale*nbondstep, Results::nbond, staleForces,
5049  stepscale*slowstep, Results::slow, staleForces);
5050  } else {
5051  addForceToMomentum(stepscale*timestep);
5052  if (staleForces || doNonbonded)
5053  addForceToMomentum(stepscale*nbondstep, Results::nbond, staleForces);
5054  if (staleForces || doFullElectrostatics)
5055  addForceToMomentum(stepscale*slowstep, Results::slow, staleForces);
5056  }
5057 }
5058 
5060 {
5061 // This routine is used for the BAOAB integrator,
5062 // Ornstein-Uhlenbeck exact solve for the O-part.
5063 // See B. Leimkuhler and C. Matthews, AMRX (2012)
5064 // Routine originally written by JPhillips, with fresh errors by CMatthews June2012
5065 
5067  {
5068  FullAtom *a = patch->atom.begin();
5069  int numAtoms = patch->numAtoms;
5070  Molecule *molecule = Node::Object()->molecule;
5071  BigReal dt = dt_fs * 0.001; // convert to ps
5074  {
5075  kbT = BOLTZMANN*adaptTempT;
5076  }
5077 
5078  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5079  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5080 
5081  for ( int i = 0; i < numAtoms; ++i )
5082  {
5083  BigReal dt_gamma = dt * a[i].langevinParam;
5084  if ( ! dt_gamma ) continue;
5085 
5086  BigReal f1 = exp( -dt_gamma );
5087  BigReal f2 = sqrt( ( 1. - f1*f1 ) * kbT *
5088  ( a[i].partition ? tempFactor : 1.0 ) *
5089  a[i].recipMass );
5090  a[i].velocity *= f1;
5091  a[i].velocity += f2 * random->gaussian_vector();
5092  }
5093  }
5094 }
5095 
5097 {
5098  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5099  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1);
5101  {
5102  FullAtom *a = patch->atom.begin();
5103  int numAtoms = patch->numAtoms;
5104  Molecule *molecule = Node::Object()->molecule;
5105  BigReal dt = dt_fs * 0.001; // convert to ps
5106  int i;
5107 
5108  if (simParams->drudeOn) {
5109  for (i = 0; i < numAtoms; i++) {
5110 
5111  if (i < numAtoms-1 &&
5112  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5113  //printf("*** Found Drude particle %d\n", a[i+1].id);
5114  // i+1 is a Drude particle with parent i
5115 
5116  // convert from Cartesian coordinates to (COM,bond) coordinates
5117  BigReal m = a[i+1].mass / (a[i].mass + a[i+1].mass); // mass ratio
5118  Vector v_bnd = a[i+1].velocity - a[i].velocity; // vel of bond
5119  Vector v_com = a[i].velocity + m * v_bnd; // vel of COM
5120  BigReal dt_gamma;
5121 
5122  // use Langevin damping factor i for v_com
5123  dt_gamma = dt * a[i].langevinParam;
5124  if (dt_gamma != 0.0) {
5125  v_com *= ( 1. - 0.5 * dt_gamma );
5126  }
5127 
5128  // use Langevin damping factor i+1 for v_bnd
5129  dt_gamma = dt * a[i+1].langevinParam;
5130  if (dt_gamma != 0.0) {
5131  v_bnd *= ( 1. - 0.5 * dt_gamma );
5132  }
5133 
5134  // convert back
5135  a[i].velocity = v_com - m * v_bnd;
5136  a[i+1].velocity = v_bnd + a[i].velocity;
5137 
5138  i++; // +1 from loop, we've updated both particles
5139  }
5140  else {
5141  BigReal dt_gamma = dt * a[i].langevinParam;
5142  if ( ! dt_gamma ) continue;
5143 
5144  a[i].velocity *= ( 1. - 0.5 * dt_gamma );
5145  }
5146 
5147  } // end for
5148  } // end if drudeOn
5149  else {
5150 
5151  //
5152  // DJH: The conditional inside loop prevents vectorization and doesn't
5153  // avoid much work since addition and multiplication are cheap.
5154  //
5155  for ( i = 0; i < numAtoms; ++i )
5156  {
5157  BigReal dt_gamma = dt * a[i].langevinParam;
5158  if ( ! dt_gamma ) continue;
5159 
5160  a[i].velocity *= ( 1. - 0.5 * dt_gamma );
5161  }
5162 
5163  } // end else
5164 
5165  } // end if langevinOn
5166 }
5167 
5168 
5170 {
5171  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5172  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2);
5174  {
5175  //
5176  // DJH: This call is expensive. Avoid calling when gammas don't differ.
5177  // Set flag in SimParameters and make this call conditional.
5178  //
5179  TIMER_START(patch->timerSet, RATTLE1);
5180  rattle1(dt_fs,1); // conserve momentum if gammas differ
5181  TIMER_STOP(patch->timerSet, RATTLE1);
5182 
5183  FullAtom *a = patch->atom.begin();
5184  int numAtoms = patch->numAtoms;
5185  Molecule *molecule = Node::Object()->molecule;
5186  BigReal dt = dt_fs * 0.001; // convert to ps
5189  {
5190  kbT = BOLTZMANN*adaptTempT;
5191  }
5192  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5193  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5194  int i;
5195 
5196  if (simParams->drudeOn) {
5197  BigReal kbT_bnd = BOLTZMANN*(simParams->drudeTemp); // drude bond Temp
5198 
5199  for (i = 0; i < numAtoms; i++) {
5200 
5201  if (i < numAtoms-1 &&
5202  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5203  //printf("*** Found Drude particle %d\n", a[i+1].id);
5204  // i+1 is a Drude particle with parent i
5205 
5206  // convert from Cartesian coordinates to (COM,bond) coordinates
5207  BigReal m = a[i+1].mass / (a[i].mass + a[i+1].mass); // mass ratio
5208  Vector v_bnd = a[i+1].velocity - a[i].velocity; // vel of bond
5209  Vector v_com = a[i].velocity + m * v_bnd; // vel of COM
5210  BigReal dt_gamma;
5211 
5212  // use Langevin damping factor i for v_com
5213  dt_gamma = dt * a[i].langevinParam;
5214  if (dt_gamma != 0.0) {
5215  BigReal mass = a[i].mass + a[i+1].mass;
5216  v_com += random->gaussian_vector() *
5217  sqrt( 2 * dt_gamma * kbT *
5218  ( a[i].partition ? tempFactor : 1.0 ) / mass );
5219  v_com /= ( 1. + 0.5 * dt_gamma );
5220  }
5221 
5222  // use Langevin damping factor i+1 for v_bnd
5223  dt_gamma = dt * a[i+1].langevinParam;
5224  if (dt_gamma != 0.0) {
5225  BigReal mass = a[i+1].mass * (1. - m);
5226  v_bnd += random->gaussian_vector() *
5227  sqrt( 2 * dt_gamma * kbT_bnd *
5228  ( a[i+1].partition ? tempFactor : 1.0 ) / mass );
5229  v_bnd /= ( 1. + 0.5 * dt_gamma );
5230  }
5231 
5232  // convert back
5233  a[i].velocity = v_com - m * v_bnd;
5234  a[i+1].velocity = v_bnd + a[i].velocity;
5235 
5236  i++; // +1 from loop, we've updated both particles
5237  }
5238  else {
5239  BigReal dt_gamma = dt * a[i].langevinParam;
5240  if ( ! dt_gamma ) continue;
5241 
5242  a[i].velocity += random->gaussian_vector() *
5243  sqrt( 2 * dt_gamma * kbT *
5244  ( a[i].partition ? tempFactor : 1.0 ) * a[i].recipMass );
5245  a[i].velocity /= ( 1. + 0.5 * dt_gamma );
5246  }
5247 
5248  } // end for
5249  } // end if drudeOn
5250  else {
5251 
5252  //
5253  // DJH: For case using same gamma (the Langevin parameter),
5254  // no partitions (e.g. FEP), and no adaptive tempering (adaptTempMD),
5255  // we can precompute constants. Then by lifting the RNG from the
5256  // loop (filling up an array of random numbers), we can vectorize
5257  // loop and simplify arithmetic to just addition and multiplication.
5258  //
5259  for ( i = 0; i < numAtoms; ++i )
5260  {
5261  BigReal dt_gamma = dt * a[i].langevinParam;
5262  if ( ! dt_gamma ) continue;
5263 
5264  a[i].velocity += random->gaussian_vector() *
5265  sqrt( 2 * dt_gamma * kbT *
5266  ( a[i].partition ? tempFactor : 1.0 ) * a[i].recipMass );
5267  a[i].velocity /= ( 1. + 0.5 * dt_gamma );
5268  }
5269 
5270  } // end else
5271 
5272  } // end if langevinOn
5273 }
5274 
5275 
5277 {
5278  if ( simParams->berendsenPressureOn ) {
5280  const int freq = simParams->berendsenPressureFreq;
5281  if ( ! (berendsenPressure_count % freq ) ) {
5283  FullAtom *a = patch->atom.begin();
5284  int numAtoms = patch->numAtoms;
5285  // Blocking receive for the updated lattice scaling factor.
5286  Tensor factor = broadcast->positionRescaleFactor.get(step);
5287  patch->lattice.rescale(factor);
5288  if ( simParams->useGroupPressure )
5289  {
5290  int hgs;
5291  for ( int i = 0; i < numAtoms; i += hgs ) {
5292  int j;
5293  hgs = a[i].hydrogenGroupSize;
5294  if ( simParams->fixedAtomsOn && a[i].groupFixed ) {
5295  for ( j = i; j < (i+hgs); ++j ) {
5297  a[j].fixedPosition,a[j].transform);
5298  }
5299  continue;
5300  }
5301  BigReal m_cm = 0;
5302  Position x_cm(0,0,0);
5303  for ( j = i; j < (i+hgs); ++j ) {
5304  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
5305  m_cm += a[j].mass;
5306  x_cm += a[j].mass * a[j].position;
5307  }
5308  x_cm /= m_cm;
5309  Position new_x_cm = x_cm;
5310  patch->lattice.rescale(new_x_cm,factor);
5311  Position delta_x_cm = new_x_cm - x_cm;
5312  for ( j = i; j < (i+hgs); ++j ) {
5313  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5315  a[j].fixedPosition,a[j].transform);
5316  continue;
5317  }
5318  a[j].position += delta_x_cm;
5319  }
5320  }
5321  }
5322  else
5323  {
5324  for ( int i = 0; i < numAtoms; ++i )
5325  {
5326  if ( simParams->fixedAtomsOn && a[i].atomFixed ) {
5328  a[i].fixedPosition,a[i].transform);
5329  continue;
5330  }
5331  patch->lattice.rescale(a[i].position,factor);
5332  }
5333  }
5334  }
5335  } else {
5337  }
5338 }
5339 
5341 {
5342  if ( simParams->langevinPistonOn && ! ( (step-1-slowFreq/2) % slowFreq ) )
5343  {
5344  //
5345  // DJH: Loops below simplify if we lift out special cases of fixed atoms
5346  // and pressure excluded atoms and make them their own branch.
5347  //
5348  FullAtom *a = patch->atom.begin();
5349  int numAtoms = patch->numAtoms;
5350  // Blocking receive for the updated lattice scaling factor.
5351  Tensor factor = broadcast->positionRescaleFactor.get(step);
5352  TIMER_START(patch->timerSet, PISTON);
5353  // JCP FIX THIS!!!
5354  Vector velFactor(1/factor.xx,1/factor.yy,1/factor.zz);
5355  patch->lattice.rescale(factor);
5356  Molecule *mol = Node::Object()->molecule;
5357  if ( simParams->useGroupPressure )
5358  {
5359  int hgs;
5360  for ( int i = 0; i < numAtoms; i += hgs ) {
5361  int j;
5362  hgs = a[i].hydrogenGroupSize;
5363  if ( simParams->fixedAtomsOn && a[i].groupFixed ) {
5364  for ( j = i; j < (i+hgs); ++j ) {
5366  a[j].fixedPosition,a[j].transform);
5367  }
5368  continue;
5369  }
5370  BigReal m_cm = 0;
5371  Position x_cm(0,0,0);
5372  Velocity v_cm(0,0,0);
5373  for ( j = i; j < (i+hgs); ++j ) {
5374  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
5375  m_cm += a[j].mass;
5376  x_cm += a[j].mass * a[j].position;
5377  v_cm += a[j].mass * a[j].velocity;
5378  }
5379  x_cm /= m_cm;
5380  Position new_x_cm = x_cm;
5381  patch->lattice.rescale(new_x_cm,factor);
5382  Position delta_x_cm = new_x_cm - x_cm;
5383  v_cm /= m_cm;
5384  Velocity delta_v_cm;
5385  delta_v_cm.x = ( velFactor.x - 1 ) * v_cm.x;
5386  delta_v_cm.y = ( velFactor.y - 1 ) * v_cm.y;
5387  delta_v_cm.z = ( velFactor.z - 1 ) * v_cm.z;
5388  for ( j = i; j < (i+hgs); ++j ) {
5389  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5391  a[j].fixedPosition,a[j].transform);
5392  continue;
5393  }
5394  if ( mol->is_atom_exPressure(a[j].id) ) continue;
5395  a[j].position += delta_x_cm;
5396  a[j].velocity += delta_v_cm;
5397  }
5398  }
5399  }
5400  else
5401  {
5402  for ( int i = 0; i < numAtoms; ++i )
5403  {
5404  if ( simParams->fixedAtomsOn && a[i].atomFixed ) {
5406  a[i].fixedPosition,a[i].transform);
5407  continue;
5408  }
5409  if ( mol->is_atom_exPressure(a[i].id) ) continue;
5410  patch->lattice.rescale(a[i].position,factor);
5411  a[i].velocity.x *= velFactor.x;
5412  a[i].velocity.y *= velFactor.y;
5413  a[i].velocity.z *= velFactor.z;
5414  }
5415  }
5416  TIMER_STOP(patch->timerSet, PISTON);
5417  }
5418 }
5419 
5421 {
5422  const int rescaleFreq = simParams->rescaleFreq;
5423  if ( rescaleFreq > 0 ) {
5424  FullAtom *a = patch->atom.begin();
5425  int numAtoms = patch->numAtoms;
5427  if ( rescaleVelocities_numTemps == rescaleFreq ) {
5428  // Blocking receive for the velcity scaling factor.
5429  BigReal factor = broadcast->velocityRescaleFactor.get(step);
5430  for ( int i = 0; i < numAtoms; ++i )
5431  {
5432  a[i].velocity *= factor;
5433  }
5435  }
5436  }
5437 }
5438 
5439 void Sequencer::rescaleaccelMD (int step, int doNonbonded, int doFullElectrostatics)
5440 {
5441  if (!simParams->accelMDOn) return;
5442  if ((step < simParams->accelMDFirstStep) || ( simParams->accelMDLastStep >0 && step > simParams->accelMDLastStep)) return;
5443 
5444  // Blocking receive for the Accelerated MD scaling factors.
5445  Vector accelMDfactor = broadcast->accelMDRescaleFactor.get(step);
5446  const BigReal factor_dihe = accelMDfactor[0];
5447  const BigReal factor_tot = accelMDfactor[1];
5448  const int numAtoms = patch->numAtoms;
5449 
5450  if (simParams->accelMDdihe && factor_tot <1 )
5451  NAMD_die("accelMD broadcasting error!\n");
5452  if (!simParams->accelMDdihe && !simParams->accelMDdual && factor_dihe <1 )
5453  NAMD_die("accelMD broadcasting error!\n");
5454 
5455  if (simParams->accelMDdihe && factor_dihe < 1) {
5456  for (int i = 0; i < numAtoms; ++i)
5457  if (patch->f[Results::amdf][i][0] || patch->f[Results::amdf][i][1] || patch->f[Results::amdf][i][2])
5458  patch->f[Results::normal][i] += patch->f[Results::amdf][i]*(factor_dihe - 1);
5459  }
5460 
5461  if ( !simParams->accelMDdihe && factor_tot < 1) {
5462  for (int i = 0; i < numAtoms; ++i)
5463  patch->f[Results::normal][i] *= factor_tot;
5464  if (doNonbonded) {
5465  for (int i = 0; i < numAtoms; ++i)
5466  patch->f[Results::nbond][i] *= factor_tot;
5467  }
5468  if (doFullElectrostatics) {
5469  for (int i = 0; i < numAtoms; ++i)
5470  patch->f[Results::slow][i] *= factor_tot;
5471  }
5472  }
5473 
5474  if (simParams->accelMDdual && factor_dihe < 1) {
5475  for (int i = 0; i < numAtoms; ++i)
5476  if (patch->f[Results::amdf][i][0] || patch->f[Results::amdf][i][1] || patch->f[Results::amdf][i][2])
5477  patch->f[Results::normal][i] += patch->f[Results::amdf][i]*(factor_dihe - factor_tot);
5478  }
5479 
5480 }
5481 
5483 {
5484  //check if adaptive tempering is enabled and in the right timestep range
5485  if (!simParams->adaptTempOn) return;
5486  if ( (step < simParams->adaptTempFirstStep ) ||
5488  if (simParams->langevinOn) // restore langevin temperature
5490  return;
5491  }
5492  // Get Updated Temperature
5493  if ( !(step % simParams->adaptTempFreq ) && (step > simParams->firstTimestep ))
5494  // Blocking receive for the updated adaptive tempering temperature.
5496 }
5497 
5498 void Sequencer::reassignVelocities(BigReal timestep, int step)
5499 {
5500  const int reassignFreq = simParams->reassignFreq;
5501  if ( ( reassignFreq > 0 ) && ! ( step % reassignFreq ) ) {
5502  FullAtom *a = patch->atom.begin();
5503  int numAtoms = patch->numAtoms;
5504  BigReal newTemp = simParams->reassignTemp;
5505  newTemp += ( step / reassignFreq ) * simParams->reassignIncr;
5506  if ( simParams->reassignIncr > 0.0 ) {
5507  if ( newTemp > simParams->reassignHold && simParams->reassignHold > 0.0 )
5508  newTemp = simParams->reassignHold;
5509  } else {
5510  if ( newTemp < simParams->reassignHold )
5511  newTemp = simParams->reassignHold;
5512  }
5513  BigReal kbT = BOLTZMANN * newTemp;
5514 
5515  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5516  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5517 
5518  for ( int i = 0; i < numAtoms; ++i )
5519  {
5520  a[i].velocity = ( ( simParams->fixedAtomsOn &&
5521  a[i].atomFixed && a[i].mass > 0.) ? Vector(0,0,0) :
5522  sqrt(kbT * (a[i].partition ? tempFactor : 1.0) * a[i].recipMass) *
5523  random->gaussian_vector() );
5524  }
5525  } else {
5526  NAMD_bug("Sequencer::reassignVelocities called improperly!");
5527  }
5528 }
5529 
5531 {
5532  FullAtom *a = patch->atom.begin();
5533  int numAtoms = patch->numAtoms;
5534  BigReal newTemp = simParams->initialTemp;
5535  BigReal kbT = BOLTZMANN * newTemp;
5536 
5537  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5538  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5539 
5540  for ( int i = 0; i < numAtoms; ++i )
5541  {
5542  a[i].velocity = ( ( (simParams->fixedAtomsOn && a[i].atomFixed) ||
5543  a[i].mass <= 0.) ? Vector(0,0,0) :
5544  sqrt(kbT * (a[i].partition ? tempFactor : 1.0) * a[i].recipMass) *
5545  random->gaussian_vector() );
5546  if ( simParams->drudeOn && i+1 < numAtoms && a[i+1].mass < 1.0 && a[i+1].mass > 0.05 ) {
5547  a[i+1].velocity = a[i].velocity; // zero is good enough
5548  ++i;
5549  }
5550  }
5551 }
5552 
5554 {
5555  FullAtom *a = patch->atom.begin();
5556  int numAtoms = patch->numAtoms;
5557  for ( int i = 0; i < numAtoms; ++i )
5558  {
5559  a[i].velocity *= factor;
5560  }
5561 }
5562 
5564 {
5565  FullAtom *a = patch->atom.begin();
5566  int numAtoms = patch->numAtoms;
5567  Molecule *molecule = Node::Object()->molecule;
5568  for ( int i = 0; i < numAtoms; ++i )
5569  {
5570  a[i].charge = molecule->atomcharge(a[i].id);
5571  }
5572 }
5573 
5574 // REST2 solute charge scaling
5576 {
5577  FullAtom *a = patch->atom.begin();
5578  int numAtoms = patch->numAtoms;
5579  Molecule *molecule = Node::Object()->molecule;
5580  BigReal sqrt_factor = sqrt(factor);
5581  // apply scaling to the original charge (stored in molecule)
5582  // of just the marked solute atoms
5583  for ( int i = 0; i < numAtoms; ++i ) {
5584  if (molecule->get_ss_type(a[i].id)) {
5585  a[i].charge = sqrt_factor * molecule->atomcharge(a[i].id);
5586  if (simParams->SOAintegrateOn) patch->patchDataSOA.charge[i] = a[i].charge;
5587  }
5588  }
5589 }
5590 
5592 {
5593  if ( simParams->tCoupleOn )
5594  {
5595  FullAtom *a = patch->atom.begin();
5596  int numAtoms = patch->numAtoms;
5597  // Blocking receive for the temperature coupling coefficient.
5598  BigReal coefficient = broadcast->tcoupleCoefficient.get(step);
5599  Molecule *molecule = Node::Object()->molecule;
5600  BigReal dt = dt_fs * 0.001; // convert to ps
5601  coefficient *= dt;
5602  for ( int i = 0; i < numAtoms; ++i )
5603  {
5604  BigReal f1 = exp( coefficient * a[i].langevinParam );
5605  a[i].velocity *= f1;
5606  }
5607  }
5608 }
5609 
5615 {
5618  FullAtom *a = patch->atom.begin();
5619  int numAtoms = patch->numAtoms;
5620  // Blocking receive for the temperature coupling coefficient.
5621  BigReal velrescaling = broadcast->stochRescaleCoefficient.get(step);
5622  DebugM(4, "stochastically rescaling velocities at step " << step << " by " << velrescaling << "\n");
5623  for ( int i = 0; i < numAtoms; ++i ) {
5624  a[i].velocity *= velrescaling;
5625  }
5626  stochRescale_count = 0;
5627  }
5628 }
5629 
5630 void Sequencer::saveForce(const int ftag)
5631 {
5632  patch->saveForce(ftag);
5633 }
5634 
5635 //
5636 // DJH: Need to change division by TIMEFACTOR into multiplication by
5637 // reciprocal of TIMEFACTOR. Done several times for each iteration of
5638 // the integrate() loop.
5639 //
5640 
5642  BigReal timestep, const int ftag, const int useSaved
5643  ) {
5644  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5645  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5646 #if CMK_BLUEGENEL
5647  CmiNetworkProgressAfter (0);
5648 #endif
5649  const BigReal dt = timestep / TIMEFACTOR;
5650  FullAtom *atom_arr = patch->atom.begin();
5651  ForceList *f_use = (useSaved ? patch->f_saved : patch->f);
5652  const Force *force_arr = f_use[ftag].const_begin();
5653  patch->addForceToMomentum(atom_arr, force_arr, dt, patch->numAtoms);
5654 }
5655 
5657  const BigReal timestep1, const int ftag1, const int useSaved1,
5658  const BigReal timestep2, const int ftag2, const int useSaved2,
5659  const BigReal timestep3, const int ftag3, const int useSaved3
5660  ) {
5661  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5662  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5663 #if CMK_BLUEGENEL
5664  CmiNetworkProgressAfter (0);
5665 #endif
5666  const BigReal dt1 = timestep1 / TIMEFACTOR;
5667  const BigReal dt2 = timestep2 / TIMEFACTOR;
5668  const BigReal dt3 = timestep3 / TIMEFACTOR;
5669  ForceList *f_use1 = (useSaved1 ? patch->f_saved : patch->f);
5670  ForceList *f_use2 = (useSaved2 ? patch->f_saved : patch->f);
5671  ForceList *f_use3 = (useSaved3 ? patch->f_saved : patch->f);
5672  FullAtom *atom_arr = patch->atom.begin();
5673  const Force *force_arr1 = f_use1[ftag1].const_begin();
5674  const Force *force_arr2 = f_use2[ftag2].const_begin();
5675  const Force *force_arr3 = f_use3[ftag3].const_begin();
5676  patch->addForceToMomentum3 (atom_arr, force_arr1, force_arr2, force_arr3,
5677  dt1, dt2, dt3, patch->numAtoms);
5678 }
5679 
5681 {
5682  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5683  NamdProfileEvent::ADD_VELOCITY_TO_POSITION);
5684 #if CMK_BLUEGENEL
5685  CmiNetworkProgressAfter (0);
5686 #endif
5687  const BigReal dt = timestep / TIMEFACTOR;
5688  FullAtom *atom_arr = patch->atom.begin();
5689  patch->addVelocityToPosition(atom_arr, dt, patch->numAtoms);
5690 }
5691 
5692 void Sequencer::hardWallDrude(BigReal dt, int pressure)
5693 {
5694  if ( simParams->drudeHardWallOn ) {
5695  Tensor virial;
5696  Tensor *vp = ( pressure ? &virial : 0 );
5697  if ( patch->hardWallDrude(dt, vp, pressureProfileReduction) ) {
5698  iout << iERROR << "Constraint failure in HardWallDrude(); "
5699  << "simulation may become unstable.\n" << endi;
5701  terminate();
5702  }
5703  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5704  }
5705 }
5706 
5707 void Sequencer::rattle1(BigReal dt, int pressure)
5708 {
5709  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::RATTLE1);
5710  if ( simParams->rigidBonds != RIGID_NONE ) {
5711  Tensor virial;
5712  Tensor *vp = ( pressure ? &virial : 0 );
5713  if ( patch->rattle1(dt, vp, pressureProfileReduction) ) {
5714  iout << iERROR <<
5715  "Constraint failure; simulation has become unstable.\n" << endi;
5717  terminate();
5718  }
5719 #if 0
5720  printf("virial = %g %g %g %g %g %g %g %g %g\n",
5721  virial.xx, virial.xy, virial.xz,
5722  virial.yx, virial.yy, virial.yz,
5723  virial.zx, virial.zy, virial.zz);
5724 #endif
5725  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5726 #if 0
5727  {
5728  const FullAtom *a = patch->atom.const_begin();
5729  for (int n=0; n < patch->numAtoms; n++) {
5730  printf("pos[%d] = %g %g %g\n", n,
5731  a[n].position.x, a[n].position.y, a[n].position.z);
5732  }
5733  for (int n=0; n < patch->numAtoms; n++) {
5734  printf("vel[%d] = %g %g %g\n", n,
5735  a[n].velocity.x, a[n].velocity.y, a[n].velocity.z);
5736  }
5737  if (pressure) {
5738  for (int n=0; n < patch->numAtoms; n++) {
5739  printf("force[%d] = %g %g %g\n", n,
5740  patch->f[Results::normal][n].x,
5741  patch->f[Results::normal][n].y,
5742  patch->f[Results::normal][n].z);
5743  }
5744  }
5745  }
5746 #endif
5747  }
5748 }
5749 
5750 // void Sequencer::rattle2(BigReal dt, int step)
5751 // {
5752 // if ( simParams->rigidBonds != RIGID_NONE ) {
5753 // Tensor virial;
5754 // patch->rattle2(dt, &virial);
5755 // ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5756 // // we need to add to alt and int virial because not included in forces
5757 // #ifdef ALTVIRIAL
5758 // ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,virial);
5759 // #endif
5760 // ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,virial);
5761 // }
5762 // }
5763 
5765 {
5766  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::MAXIMUM_MOVE);
5767 
5768  FullAtom *a = patch->atom.begin();
5769  int numAtoms = patch->numAtoms;
5770  if ( simParams->maximumMove ) {
5771  const BigReal dt = timestep / TIMEFACTOR;
5772  const BigReal maxvel = simParams->maximumMove / dt;
5773  const BigReal maxvel2 = maxvel * maxvel;
5774  for ( int i=0; i<numAtoms; ++i ) {
5775  if ( a[i].velocity.length2() > maxvel2 ) {
5776  a[i].velocity *= ( maxvel / a[i].velocity.length() );
5777  }
5778  }
5779  } else {
5780  const BigReal dt = timestep / TIMEFACTOR;
5781  const BigReal maxvel = simParams->cutoff / dt;
5782  const BigReal maxvel2 = maxvel * maxvel;
5783  int killme = 0;
5784  for ( int i=0; i<numAtoms; ++i ) {
5785  killme = killme || ( a[i].velocity.length2() > maxvel2 );
5786  }
5787  if ( killme ) {
5788  killme = 0;
5789  for ( int i=0; i<numAtoms; ++i ) {
5790  if ( a[i].velocity.length2() > maxvel2 ) {
5791  ++killme;
5792  iout << iERROR << "Atom " << (a[i].id + 1) << " velocity is "
5793  << ( PDBVELFACTOR * a[i].velocity ) << " (limit is "
5794  << ( PDBVELFACTOR * maxvel ) << ", atom "
5795  << i << " of " << numAtoms << " on patch "
5796  << patch->patchID << " pe " << CkMyPe() << ")\n" << endi;
5797  }
5798  }
5799  iout << iERROR <<
5800  "Atoms moving too fast; simulation has become unstable ("
5801  << killme << " atoms on patch " << patch->patchID
5802  << " pe " << CkMyPe() << ").\n" << endi;
5804  terminate();
5805  }
5806  }
5807 }
5808 
5810 {
5811  if ( simParams->minimizeOn ) {
5812  FullAtom *a = patch->atom.begin();
5813  int numAtoms = patch->numAtoms;
5814  for ( int i=0; i<numAtoms; ++i ) {
5815  a[i].velocity = 0.;
5816  }
5817  }
5818 }
5819 
5821 {
5822  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::SUBMIT_HALFSTEP);
5823 
5824  // velocity-dependent quantities *** ONLY ***
5825  // positions are not at half-step when called
5826  FullAtom *a = patch->atom.begin();
5827  int numAtoms = patch->numAtoms;
5828 
5829 #if CMK_BLUEGENEL
5830  CmiNetworkProgressAfter (0);
5831 #endif
5832 
5833  // For non-Multigrator doKineticEnergy = 1 always
5834  Tensor momentumSqrSum;
5836  {
5837  BigReal kineticEnergy = 0;
5838  Tensor virial;
5839  if ( simParams->pairInteractionOn ) {
5840  if ( simParams->pairInteractionSelf ) {
5841  for ( int i = 0; i < numAtoms; ++i ) {
5842  if ( a[i].partition != 1 ) continue;
5843  kineticEnergy += a[i].mass * a[i].velocity.length2();
5844  virial.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5845  }
5846  }
5847  } else {
5848  for ( int i = 0; i < numAtoms; ++i ) {
5849  if (a[i].mass < 0.01) continue;
5850  kineticEnergy += a[i].mass * a[i].velocity.length2();
5851  virial.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5852  }
5853  }
5854 
5856  momentumSqrSum = virial;
5857  }
5858  kineticEnergy *= 0.5 * 0.5;
5860  virial *= 0.5;
5861  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5862 #ifdef ALTVIRIAL
5863  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,virial);
5864 #endif
5865  }
5866 
5868  int nslabs = simParams->pressureProfileSlabs;
5869  const Lattice &lattice = patch->lattice;
5870  BigReal idz = nslabs/lattice.c().z;
5871  BigReal zmin = lattice.origin().z - 0.5*lattice.c().z;
5872  int useGroupPressure = simParams->useGroupPressure;
5873 
5874  // Compute kinetic energy partition, possibly subtracting off
5875  // internal kinetic energy if group pressure is enabled.
5876  // Since the regular pressure is 1/2 mvv and the internal kinetic
5877  // term that is subtracted off for the group pressure is
5878  // 1/2 mv (v-v_cm), the group pressure kinetic contribution is
5879  // 1/2 m * v * v_cm. The factor of 1/2 is because submitHalfstep
5880  // gets called twice per timestep.
5881  int hgs;
5882  for (int i=0; i<numAtoms; i += hgs) {
5883  int j, ppoffset;
5884  hgs = a[i].hydrogenGroupSize;
5885  int partition = a[i].partition;
5886 
5887  BigReal m_cm = 0;
5888  Velocity v_cm(0,0,0);
5889  for (j=i; j< i+hgs; ++j) {
5890  m_cm += a[j].mass;
5891  v_cm += a[j].mass * a[j].velocity;
5892  }
5893  v_cm /= m_cm;
5894  for (j=i; j < i+hgs; ++j) {
5895  BigReal mass = a[j].mass;
5896  if (! (useGroupPressure && j != i)) {
5897  BigReal z = a[j].position.z;
5898  int slab = (int)floor((z-zmin)*idz);
5899  if (slab < 0) slab += nslabs;
5900  else if (slab >= nslabs) slab -= nslabs;
5901  ppoffset = 3*(slab + partition*nslabs);
5902  }
5903  BigReal wxx, wyy, wzz;
5904  if (useGroupPressure) {
5905  wxx = 0.5*mass * a[j].velocity.x * v_cm.x;
5906  wyy = 0.5*mass * a[j].velocity.y * v_cm.y;
5907  wzz = 0.5*mass * a[j].velocity.z * v_cm.z;
5908  } else {
5909  wxx = 0.5*mass * a[j].velocity.x * a[j].velocity.x;
5910  wyy = 0.5*mass * a[j].velocity.y * a[j].velocity.y;
5911  wzz = 0.5*mass * a[j].velocity.z * a[j].velocity.z;
5912  }
5913  pressureProfileReduction->item(ppoffset ) += wxx;
5914  pressureProfileReduction->item(ppoffset+1) += wyy;
5915  pressureProfileReduction->item(ppoffset+2) += wzz;
5916  }
5917  }
5918  }
5919 
5920  // For non-Multigrator doKineticEnergy = 1 always
5922  {
5923  BigReal intKineticEnergy = 0;
5924  Tensor intVirialNormal;
5925 
5926  int hgs;
5927  for ( int i = 0; i < numAtoms; i += hgs ) {
5928 
5929 #if CMK_BLUEGENEL
5930  CmiNetworkProgress ();
5931 #endif
5932 
5933  hgs = a[i].hydrogenGroupSize;
5934  int j;
5935  BigReal m_cm = 0;
5936  Velocity v_cm(0,0,0);
5937  for ( j = i; j < (i+hgs); ++j ) {
5938  m_cm += a[j].mass;
5939  v_cm += a[j].mass * a[j].velocity;
5940  }
5942  momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
5943  }
5944  v_cm /= m_cm;
5945  if ( simParams->pairInteractionOn ) {
5946  if ( simParams->pairInteractionSelf ) {
5947  for ( j = i; j < (i+hgs); ++j ) {
5948  if ( a[j].partition != 1 ) continue;
5949  BigReal mass = a[j].mass;
5950  Vector v = a[j].velocity;
5951  Vector dv = v - v_cm;
5952  intKineticEnergy += mass * (v * dv);
5953  intVirialNormal.outerAdd (mass, v, dv);
5954  }
5955  }
5956  } else {
5957  for ( j = i; j < (i+hgs); ++j ) {
5958  BigReal mass = a[j].mass;
5959  Vector v = a[j].velocity;
5960  Vector dv = v - v_cm;
5961  intKineticEnergy += mass * (v * dv);
5962  intVirialNormal.outerAdd(mass, v, dv);
5963  }
5964  }
5965  }
5966  intKineticEnergy *= 0.5 * 0.5;
5968  intVirialNormal *= 0.5;
5969  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
5970  if ( simParams->multigratorOn) {
5971  momentumSqrSum *= 0.5;
5972  ADD_TENSOR_OBJECT(reduction,REDUCTION_MOMENTUM_SQUARED,momentumSqrSum);
5973  }
5974  }
5975 
5976 }
5977 
5978 void Sequencer::calcFixVirial(Tensor& fixVirialNormal, Tensor& fixVirialNbond, Tensor& fixVirialSlow,
5979  Vector& fixForceNormal, Vector& fixForceNbond, Vector& fixForceSlow) {
5980 
5981  FullAtom *a = patch->atom.begin();
5982  int numAtoms = patch->numAtoms;
5983 
5984  for ( int j = 0; j < numAtoms; j++ ) {
5985  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5986  Vector dx = a[j].fixedPosition;
5987  // all negative because fixed atoms cancels these forces
5988  fixVirialNormal.outerAdd(-1.0, patch->f[Results::normal][j], dx);
5989  fixVirialNbond.outerAdd(-1.0, patch->f[Results::nbond][j], dx);
5990  fixVirialSlow.outerAdd(-1.0, patch->f[Results::slow][j], dx);
5991  fixForceNormal -= patch->f[Results::normal][j];
5992  fixForceNbond -= patch->f[Results::nbond][j];
5993  fixForceSlow -= patch->f[Results::slow][j];
5994  }
5995  }
5996 }
5997 
5999 {
6000 #ifndef UPPER_BOUND
6001  NAMD_EVENT_RANGE_2(patch->flags.event_on,
6002  NamdProfileEvent::SUBMIT_REDUCTIONS);
6003  FullAtom *a = patch->atom.begin();
6004 #endif
6005  int numAtoms = patch->numAtoms;
6006 
6007 #if CMK_BLUEGENEL
6008  CmiNetworkProgressAfter(0);
6009 #endif
6010 
6011  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
6013 
6014 #ifndef UPPER_BOUND
6015  // For non-Multigrator doKineticEnergy = 1 always
6017  {
6018  BigReal kineticEnergy = 0;
6019  Vector momentum = 0;
6020  Vector angularMomentum = 0;
6021  Vector o = patch->lattice.origin();
6022  int i;
6023  if ( simParams->pairInteractionOn ) {
6024  if ( simParams->pairInteractionSelf ) {
6025  for (i = 0; i < numAtoms; ++i ) {
6026  if ( a[i].partition != 1 ) continue;
6027  kineticEnergy += a[i].mass * a[i].velocity.length2();
6028  momentum += a[i].mass * a[i].velocity;
6029  angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6030  }
6031  }
6032  } else {
6033  for (i = 0; i < numAtoms; ++i ) {
6034  kineticEnergy += a[i].mass * a[i].velocity.length2();
6035  momentum += a[i].mass * a[i].velocity;
6036  angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6037  }
6038  if (simParams->drudeOn) {
6039  BigReal drudeComKE = 0.;
6040  BigReal drudeBondKE = 0.;
6041 
6042  for (i = 0; i < numAtoms; i++) {
6043  if (i < numAtoms-1 &&
6044  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
6045  // i+1 is a Drude particle with parent i
6046 
6047  // convert from Cartesian coordinates to (COM,bond) coordinates
6048  BigReal m_com = (a[i].mass + a[i+1].mass); // mass of COM
6049  BigReal m = a[i+1].mass / m_com; // mass ratio
6050  BigReal m_bond = a[i+1].mass * (1. - m); // mass of bond
6051  Vector v_bond = a[i+1].velocity - a[i].velocity; // vel of bond
6052  Vector v_com = a[i].velocity + m * v_bond; // vel of COM
6053 
6054  drudeComKE += m_com * v_com.length2();
6055  drudeBondKE += m_bond * v_bond.length2();
6056 
6057  i++; // +1 from loop, we've updated both particles
6058  }
6059  else {
6060  drudeComKE += a[i].mass * a[i].velocity.length2();
6061  }
6062  } // end for
6063 
6064  drudeComKE *= 0.5;
6065  drudeBondKE *= 0.5;
6067  += drudeComKE;
6069  += drudeBondKE;
6070  } // end drudeOn
6071 
6072  } // end else
6073 
6074  kineticEnergy *= 0.5;
6076  ADD_VECTOR_OBJECT(reduction,REDUCTION_MOMENTUM,momentum);
6077  ADD_VECTOR_OBJECT(reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
6078  }
6079 
6080 #ifdef ALTVIRIAL
6081  // THIS IS NOT CORRECTED FOR PAIR INTERACTIONS
6082  {
6083  Tensor altVirial;
6084  for ( int i = 0; i < numAtoms; ++i ) {
6085  altVirial.outerAdd(1.0, patch->f[Results::normal][i], a[i].position);
6086  }
6087  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,altVirial);
6088  }
6089  {
6090  Tensor altVirial;
6091  for ( int i = 0; i < numAtoms; ++i ) {
6092  altVirial.outerAdd(1.0, patch->f[Results::nbond][i], a[i].position);
6093  }
6094  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NBOND,altVirial);
6095  }
6096  {
6097  Tensor altVirial;
6098  for ( int i = 0; i < numAtoms; ++i ) {
6099  altVirial.outerAdd(1.0, patch->f[Results::slow][i], a[i].position);
6100  }
6101  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_SLOW,altVirial);
6102  }
6103 #endif
6104 
6105  // For non-Multigrator doKineticEnergy = 1 always
6107  {
6108  BigReal intKineticEnergy = 0;
6109  Tensor intVirialNormal;
6110  Tensor intVirialNbond;
6111  Tensor intVirialSlow;
6112 
6113  int hgs;
6114  for ( int i = 0; i < numAtoms; i += hgs ) {
6115 #if CMK_BLUEGENEL
6116  CmiNetworkProgress();
6117 #endif
6118  hgs = a[i].hydrogenGroupSize;
6119  int j;
6120  BigReal m_cm = 0;
6121  Position x_cm(0,0,0);
6122  Velocity v_cm(0,0,0);
6123  for ( j = i; j < (i+hgs); ++j ) {
6124  m_cm += a[j].mass;
6125  x_cm += a[j].mass * a[j].position;
6126  v_cm += a[j].mass * a[j].velocity;
6127  }
6128  x_cm /= m_cm;
6129  v_cm /= m_cm;
6130  int fixedAtomsOn = simParams->fixedAtomsOn;
6131  if ( simParams->pairInteractionOn ) {
6132  int pairInteractionSelf = simParams->pairInteractionSelf;
6133  for ( j = i; j < (i+hgs); ++j ) {
6134  if ( a[j].partition != 1 &&
6135  ( pairInteractionSelf || a[j].partition != 2 ) ) continue;
6136  // net force treated as zero for fixed atoms
6137  if ( fixedAtomsOn && a[j].atomFixed ) continue;
6138  BigReal mass = a[j].mass;
6139  Vector v = a[j].velocity;
6140  Vector dv = v - v_cm;
6141  intKineticEnergy += mass * (v * dv);
6142  Vector dx = a[j].position - x_cm;
6143  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6144  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6145  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6146  }
6147  } else {
6148  for ( j = i; j < (i+hgs); ++j ) {
6149  // net force treated as zero for fixed atoms
6150  if ( fixedAtomsOn && a[j].atomFixed ) continue;
6151  BigReal mass = a[j].mass;
6152  Vector v = a[j].velocity;
6153  Vector dv = v - v_cm;
6154  intKineticEnergy += mass * (v * dv);
6155  Vector dx = a[j].position - x_cm;
6156  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6157  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6158  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6159  }
6160  }
6161  }
6162 
6163  intKineticEnergy *= 0.5;
6165  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
6166  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
6167  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
6168  }
6169 
6171  // subtract off internal virial term, calculated as for intVirial.
6172  int nslabs = simParams->pressureProfileSlabs;
6173  const Lattice &lattice = patch->lattice;
6174  BigReal idz = nslabs/lattice.c().z;
6175  BigReal zmin = lattice.origin().z - 0.5*lattice.c().z;
6176  int useGroupPressure = simParams->useGroupPressure;
6177 
6178  int hgs;
6179  for (int i=0; i<numAtoms; i += hgs) {
6180  int j;
6181  hgs = a[i].hydrogenGroupSize;
6182  BigReal m_cm = 0;
6183  Position x_cm(0,0,0);
6184  for (j=i; j< i+hgs; ++j) {
6185  m_cm += a[j].mass;
6186  x_cm += a[j].mass * a[j].position;
6187  }
6188  x_cm /= m_cm;
6189 
6190  BigReal z = a[i].position.z;
6191  int slab = (int)floor((z-zmin)*idz);
6192  if (slab < 0) slab += nslabs;
6193  else if (slab >= nslabs) slab -= nslabs;
6194  int partition = a[i].partition;
6195  int ppoffset = 3*(slab + nslabs*partition);
6196  for (j=i; j < i+hgs; ++j) {
6197  BigReal mass = a[j].mass;
6198  Vector dx = a[j].position - x_cm;
6199  const Vector &fnormal = patch->f[Results::normal][j];
6200  const Vector &fnbond = patch->f[Results::nbond][j];
6201  const Vector &fslow = patch->f[Results::slow][j];
6202  BigReal wxx = (fnormal.x + fnbond.x + fslow.x) * dx.x;
6203  BigReal wyy = (fnormal.y + fnbond.y + fslow.y) * dx.y;
6204  BigReal wzz = (fnormal.z + fnbond.z + fslow.z) * dx.z;
6205  pressureProfileReduction->item(ppoffset ) -= wxx;
6206  pressureProfileReduction->item(ppoffset+1) -= wyy;
6207  pressureProfileReduction->item(ppoffset+2) -= wzz;
6208  }
6209  }
6210  }
6211 
6212  // For non-Multigrator doVirial = 1 always
6213  if (patch->flags.doVirial)
6214  {
6215  if ( simParams->fixedAtomsOn ) {
6216  Tensor fixVirialNormal;
6217  Tensor fixVirialNbond;
6218  Tensor fixVirialSlow;
6219  Vector fixForceNormal = 0;
6220  Vector fixForceNbond = 0;
6221  Vector fixForceSlow = 0;
6222 
6223  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6224 
6225 #if 0
6226  auto printTensor = [](const Tensor& t, const std::string& name){
6227  CkPrintf("%s", name.c_str());
6228  CkPrintf("\n%12.5lf %12.5lf %12.5lf\n"
6229  "%12.5lf %12.5lf %12.5lf\n"
6230  "%12.5lf %12.5lf %12.5lf\n",
6231  t.xx, t.xy, t.xz,
6232  t.yx, t.yy, t.yz,
6233  t.zx, t.zy, t.zz);
6234  };
6235  printTensor(fixVirialNormal, "fixVirialNormal = ");
6236  printTensor(fixVirialNbond, "fixVirialNbond = ");
6237  printTensor(fixVirialSlow, "fixVirialSlow = ");
6238 #endif
6239 
6240  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,fixVirialNormal);
6241  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NBOND,fixVirialNbond);
6242  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,fixVirialSlow);
6243  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NORMAL,fixForceNormal);
6244  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NBOND,fixForceNbond);
6245  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_SLOW,fixForceSlow);
6246  }
6247  }
6248 #endif // UPPER_BOUND
6249 
6250  reduction->submit();
6251 #ifndef UPPER_BOUND
6253 #endif
6254 }
6255 
6257 {
6258  FullAtom *a = patch->atom.begin();
6259  Force *f1 = patch->f[Results::normal].begin();
6260  Force *f2 = patch->f[Results::nbond].begin();
6261  Force *f3 = patch->f[Results::slow].begin();
6262  const bool fixedAtomsOn = simParams->fixedAtomsOn;
6263  const bool drudeHardWallOn = simParams->drudeHardWallOn;
6264  const double drudeBondLen = simParams->drudeBondLen;
6265  const double drudeBondLen2 = drudeBondLen * drudeBondLen;
6266  const double drudeStep = 0.1/(TIMEFACTOR*TIMEFACTOR);
6267  const double drudeMove = 0.01;
6268  const double drudeStep2 = drudeStep * drudeStep;
6269  const double drudeMove2 = drudeMove * drudeMove;
6270  int numAtoms = patch->numAtoms;
6271 
6272  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
6273 
6274  for ( int i = 0; i < numAtoms; ++i ) {
6275 #if 0
6276  printf("ap[%2d]= %f %f %f\n", i, a[i].position.x, a[i].position.y, a[i].position.z);
6277  printf("f1[%2d]= %f %f %f\n", i, f1[i].x, f1[i].y, f1[i].z);
6278  printf("f2[%2d]= %f %f %f\n", i, f2[i].x, f2[i].y, f2[i].z);
6279  //printf("f3[%2d]= %f %f %f\n", i, f3[i].x, f3[i].y, f3[i].z);
6280 #endif
6281  f1[i] += f2[i] + f3[i]; // add all forces
6282  if ( drudeHardWallOn && i && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) { // drude particle
6283  if ( ! fixedAtomsOn || ! a[i].atomFixed ) {
6284  if ( drudeStep2 * f1[i].length2() > drudeMove2 ) {
6285  a[i].position += drudeMove * f1[i].unit();
6286  } else {
6287  a[i].position += drudeStep * f1[i];
6288  }
6289  if ( (a[i].position - a[i-1].position).length2() > drudeBondLen2 ) {
6290  a[i].position = a[i-1].position + drudeBondLen * (a[i].position - a[i-1].position).unit();
6291  }
6292  }
6293  Vector netf = f1[i-1] + f1[i];
6294  if ( fixedAtomsOn && a[i-1].atomFixed ) netf = 0;
6295  f1[i-1] = netf;
6296  f1[i] = 0.;
6297  }
6298  if ( fixedAtomsOn && a[i].atomFixed ) f1[i] = 0;
6299  }
6300 
6301  f2 = f3 = 0; // included in f1
6302 
6303  BigReal maxv2 = 0.;
6304 
6305  for ( int i = 0; i < numAtoms; ++i ) {
6306  BigReal v2 = a[i].velocity.length2();
6307  if ( v2 > 0. ) {
6308  if ( v2 > maxv2 ) maxv2 = v2;
6309  } else {
6310  v2 = f1[i].length2();
6311  if ( v2 > maxv2 ) maxv2 = v2;
6312  }
6313  }
6314 
6315  if ( fmax2 > 10. * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR )
6316  { Tensor virial; patch->minimize_rattle2( 0.1 * TIMEFACTOR / sqrt(maxv2), &virial, true /* forces */); }
6317 
6318  BigReal fdotf = 0;
6319  BigReal fdotv = 0;
6320  BigReal vdotv = 0;
6321  int numHuge = 0;
6322  for ( int i = 0; i < numAtoms; ++i ) {
6323  if ( simParams->fixedAtomsOn && a[i].atomFixed ) continue;
6324  if ( drudeHardWallOn && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) continue; // drude particle
6325  Force f = f1[i];
6326  BigReal ff = f * f;
6327  if ( ff > fmax2 ) {
6328  if (simParams->printBadContacts) {
6329  CkPrintf("STEP(%i) MIN_HUGE[%i] f=%e kcal/mol/A\n",patch->flags.sequence,patch->pExt[i].id,ff);
6330  }
6331  ++numHuge;
6332  // pad scaling so minimizeMoveDownhill() doesn't miss them
6333  BigReal fmult = 1.01 * sqrt(fmax2/ff);
6334  f *= fmult; ff = f * f;
6335  f1[i] *= fmult;
6336  }
6337  fdotf += ff;
6338  fdotv += f * a[i].velocity;
6339  vdotv += a[i].velocity * a[i].velocity;
6340  }
6341 
6342 #if 0
6343  printf("fdotf = %f\n", fdotf);
6344  printf("fdotv = %f\n", fdotv);
6345  printf("vdotv = %f\n", vdotv);
6346 #endif
6351 
6352  {
6353  Tensor intVirialNormal;
6354  Tensor intVirialNbond;
6355  Tensor intVirialSlow;
6356 
6357  int hgs;
6358  for ( int i = 0; i < numAtoms; i += hgs ) {
6359  hgs = a[i].hydrogenGroupSize;
6360  int j;
6361  BigReal m_cm = 0;
6362  Position x_cm(0,0,0);
6363  for ( j = i; j < (i+hgs); ++j ) {
6364  m_cm += a[j].mass;
6365  x_cm += a[j].mass * a[j].position;
6366  }
6367  x_cm /= m_cm;
6368  for ( j = i; j < (i+hgs); ++j ) {
6369  BigReal mass = a[j].mass;
6370  // net force treated as zero for fixed atoms
6371  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
6372  Vector dx = a[j].position - x_cm;
6373  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6374  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6375  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6376  }
6377  }
6378 
6379  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
6380  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
6381  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
6382  }
6383 
6384  if ( simParams->fixedAtomsOn ) {
6385  Tensor fixVirialNormal;
6386  Tensor fixVirialNbond;
6387  Tensor fixVirialSlow;
6388  Vector fixForceNormal = 0;
6389  Vector fixForceNbond = 0;
6390  Vector fixForceSlow = 0;
6391 
6392  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6393 
6394  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,fixVirialNormal);
6395  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NBOND,fixVirialNbond);
6396  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,fixVirialSlow);
6397  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NORMAL,fixForceNormal);
6398  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NBOND,fixForceNbond);
6399  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_SLOW,fixForceSlow);
6400  }
6401 
6402  reduction->submit();
6403 }
6404 
6405 void Sequencer::submitCollections(int step, int zeroVel)
6406 {
6407  //
6408  // DJH: Copy updates of SOA back into AOS.
6409  // Do we need to update everything or is it safe to just update
6410  // positions and velocities separately, as needed?
6411  //
6412  //patch->copy_updates_to_AOS();
6413 
6414  NAMD_EVENT_RANGE_2(patch->flags.event_on,
6415  NamdProfileEvent::SUBMIT_COLLECTIONS);
6416  int prec;
6417  int dcdSelectionIndex;
6418  std::tie (prec, dcdSelectionIndex) = Output::coordinateNeeded(step);
6419  if ( prec ) {
6420 #ifndef MEM_OPT_VERSION
6421  collection->submitPositions(step,patch->atom,patch->lattice,prec,dcdSelectionIndex);
6422 #else
6423  collection->submitPositions(step,patch->atom,patch->lattice,prec);
6424 #endif
6425  }
6426  if ( Output::velocityNeeded(step) ) {
6427  collection->submitVelocities(step,zeroVel,patch->atom);
6428  }
6429  if ( Output::forceNeeded(step) ) {
6430  int maxForceUsed = patch->flags.maxForceUsed;
6431  if ( maxForceUsed > Results::slow ) maxForceUsed = Results::slow;
6432  collection->submitForces(step,patch->atom,maxForceUsed,patch->f);
6433  }
6434 }
6435 
6436 void Sequencer::runComputeObjects(int migration, int pairlists, int pressureStep)
6437 {
6438  if ( migration ) pairlistsAreValid = 0;
6439 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
6440  if ( pairlistsAreValid &&
6442  && ( pairlistsAge > pairlistsAgeLimit ) ) {
6443  pairlistsAreValid = 0;
6444  }
6445 #else
6447  pairlistsAreValid = 0;
6448  }
6449 #endif
6450  if ( ! simParams->usePairlists ) pairlists = 0;
6451  patch->flags.usePairlists = pairlists || pairlistsAreValid;
6453  pairlists && ! pairlistsAreValid;
6454 
6455  if ( simParams->singleTopology ) patch->reposition_all_alchpairs();
6456  if ( simParams->lonepairs ) patch->reposition_all_lonepairs();
6457 
6458  //
6459  // DJH: Copy updates of SOA back into AOS.
6460  // The positionsReady() routine starts force computation and atom migration.
6461  //
6462  // We could reduce amount of copying here by checking migration status
6463  // and copying velocities only when migrating. Some types of simulation
6464  // always require velocities, such as Lowe-Anderson.
6465  //
6466  //patch->copy_updates_to_AOS();
6467 
6468  patch->positionsReady(migration); // updates flags.sequence
6469 
6470  int seq = patch->flags.sequence;
6471  int basePriority = ( (seq & 0xffff) << 15 )
6473  if ( patch->flags.doGBIS && patch->flags.doNonbonded) {
6474  priority = basePriority + GB1_COMPUTE_HOME_PRIORITY;
6475  suspend(); // until all deposit boxes close
6477  priority = basePriority + GB2_COMPUTE_HOME_PRIORITY;
6478  suspend();
6480  priority = basePriority + COMPUTE_HOME_PRIORITY;
6481  suspend();
6482  } else {
6483  priority = basePriority + COMPUTE_HOME_PRIORITY;
6484  suspend(); // until all deposit boxes close
6485  }
6486 
6487  //
6488  // DJH: Copy all data into SOA from AOS.
6489  //
6490  // We need everything copied after atom migration.
6491  // When doing force computation without atom migration,
6492  // all data except forces will already be up-to-date in SOA
6493  // (except maybe for some special types of simulation).
6494  //
6495  //patch->copy_all_to_SOA();
6496 
6497  //
6498  // DJH: Copy forces to SOA.
6499  // Force available after suspend() has returned.
6500  //
6501  //patch->copy_forces_to_SOA();
6502 
6504  pairlistsAreValid = 1;
6505  pairlistsAge = 0;
6506  }
6507  // For multigrator, do not age pairlist during pressure step
6508  // NOTE: for non-multigrator pressureStep = 0 always
6509  if ( pairlistsAreValid && !pressureStep ) ++pairlistsAge;
6510 
6511  if (simParams->lonepairs) {
6512  {
6513  Tensor virial;
6514  patch->redistrib_lonepair_forces(Results::normal, &virial);
6515  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6516  }
6517  if (patch->flags.doNonbonded) {
6518  Tensor virial;
6519  patch->redistrib_lonepair_forces(Results::nbond, &virial);
6520  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6521  }
6523  Tensor virial;
6524  patch->redistrib_lonepair_forces(Results::slow, &virial);
6525  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6526  }
6527  } else if (simParams->watmodel == WaterModel::TIP4) {
6528  {
6529  Tensor virial;
6530  patch->redistrib_tip4p_forces(Results::normal, &virial);
6531  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6532  }
6533  if (patch->flags.doNonbonded) {
6534  Tensor virial;
6535  patch->redistrib_tip4p_forces(Results::nbond, &virial);
6536  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6537  }
6539  Tensor virial;
6540  patch->redistrib_tip4p_forces(Results::slow, &virial);
6541  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6542  }
6543  } else if (simParams->watmodel == WaterModel::SWM4) {
6544  {
6545  Tensor virial;
6546  patch->redistrib_swm4_forces(Results::normal, &virial);
6547  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6548  }
6549  if (patch->flags.doNonbonded) {
6550  Tensor virial;
6551  patch->redistrib_swm4_forces(Results::nbond, &virial);
6552  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6553  }
6555  Tensor virial;
6556  patch->redistrib_swm4_forces(Results::slow, &virial);
6557  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6558  }
6559  }
6560 
6561  if (simParams->singleTopology) {
6562  patch->redistrib_alchpair_forces(Results::normal);
6563  if (patch->flags.doNonbonded) {
6564  patch->redistrib_alchpair_forces(Results::nbond);
6565  }
6567  patch->redistrib_alchpair_forces(Results::slow);
6568  }
6569  }
6570 
6571  if ( patch->flags.doMolly ) {
6572  Tensor virial;
6573  patch->mollyMollify(&virial);
6574  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,virial);
6575  }
6576 
6577 
6578  // BEGIN LA
6579  if (patch->flags.doLoweAndersen) {
6581  }
6582  // END LA
6583 //TODO:HIP check if this applies to HIP
6584 #ifdef NAMD_CUDA_XXX
6585  int numAtoms = patch->numAtoms;
6586  FullAtom *a = patch->atom.begin();
6587  for ( int i=0; i<numAtoms; ++i ) {
6588  CkPrintf("%d %g %g %g\n", a[i].id,
6589  patch->f[Results::normal][i].x +
6590  patch->f[Results::nbond][i].x +
6591  patch->f[Results::slow][i].x,
6592  patch->f[Results::normal][i].y +
6593  patch->f[Results::nbond][i].y +
6594  patch->f[Results::slow][i].y,
6595  patch->f[Results::normal][i].z +
6596  patch->f[Results::nbond][i].z +
6597  patch->f[Results::slow][i].z);
6598  CkPrintf("%d %g %g %g\n", a[i].id,
6599  patch->f[Results::normal][i].x,
6600  patch->f[Results::nbond][i].x,
6601  patch->f[Results::slow][i].x);
6602  CkPrintf("%d %g %g %g\n", a[i].id,
6603  patch->f[Results::normal][i].y,
6604  patch->f[Results::nbond][i].y,
6605  patch->f[Results::slow][i].y);
6606  CkPrintf("%d %g %g %g\n", a[i].id,
6607  patch->f[Results::normal][i].z,
6608  patch->f[Results::nbond][i].z,
6609  patch->f[Results::slow][i].z);
6610  }
6611 #endif
6612 
6613 //#undef PRINT_FORCES
6614 //#define PRINT_FORCES 1
6615 #if PRINT_FORCES
6616  int numAtoms = patch->numAtoms;
6617  FullAtom *a = patch->atom.begin();
6618  for ( int i=0; i<numAtoms; ++i ) {
6619  float fxNo = patch->f[Results::normal][i].x;
6620  float fxNb = patch->f[Results::nbond][i].x;
6621  float fxSl = patch->f[Results::slow][i].x;
6622  float fyNo = patch->f[Results::normal][i].y;
6623  float fyNb = patch->f[Results::nbond][i].y;
6624  float fySl = patch->f[Results::slow][i].y;
6625  float fzNo = patch->f[Results::normal][i].z;
6626  float fzNb = patch->f[Results::nbond][i].z;
6627  float fzSl = patch->f[Results::slow][i].z;
6628  float fx = fxNo+fxNb+fxSl;
6629  float fy = fyNo+fyNb+fySl;
6630  float fz = fzNo+fzNb+fzSl;
6631 
6632  float f = sqrt(fx*fx+fy*fy+fz*fz);
6633  int id = patch->pExt[i].id;
6634  int seq = patch->flags.sequence;
6635  float x = patch->p[i].position.x;
6636  float y = patch->p[i].position.y;
6637  float z = patch->p[i].position.z;
6638  //CkPrintf("FORCE(%04i)[%04i] = <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <<% .4e, % .4e, % .4e>>\n", seq,id,
6639  CkPrintf("FORCE(%04i)[%04i] = % .9e % .9e % .9e\n", seq,id,
6640  //CkPrintf("FORCE(%04i)[%04i] = <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e>\n", seq,id,
6641 //fxNo,fyNo,fzNo,
6642 fxNb,fyNb,fzNb
6643 //fxSl,fySl,fzSl,
6644 //fx,fy,fz
6645 );
6646  }
6647 #endif
6648 }
6649 
6650 void Sequencer::rebalanceLoad(int timestep) {
6651  if ( ! ldbSteps ) {
6653  }
6654  if ( ! --ldbSteps ) {
6655  patch->submitLoadStats(timestep);
6656  ldbCoordinator->rebalance(this,patch->getPatchID());
6657  pairlistsAreValid = 0;
6658  }
6659 }
6660 
6661 void Sequencer::cycleBarrier(int doBarrier, int step) {
6662 #if USE_BARRIER
6663  if (doBarrier)
6664  // Blocking receive for the cycle barrier.
6665  broadcast->cycleBarrier.get(step);
6666 #endif
6667 }
6668 
6670  // Blocking receive for the trace barrier.
6671  broadcast->traceBarrier.get(step);
6672 }
6673 
6674 #ifdef MEASURE_NAMD_WITH_PAPI
6675 void Sequencer::papiMeasureBarrier(int step){
6676  // Blocking receive for the PAPI measure barrier.
6677  broadcast->papiMeasureBarrier.get(step);
6678 }
6679 #endif
6680 
6683  CthFree(thread);
6684  CthSuspend();
6685 }
static Node * Object()
Definition: Node.h:86
HomePatch *const patch
Definition: Sequencer.h:321
Real atomcharge(int anum) const
Definition: Molecule.h:1117
SubmitReduction * multigratorReduction
Definition: Sequencer.h:307
Vector gaussian_vector(void)
Definition: Random.h:219
void rescaleVelocities(int)
Definition: Sequencer.C:5420
void finishReduction(bool doEnergyVirial)
double * vel_y
Definition: NamdTypes.h:387
int doKineticEnergy
Definition: Sequencer.h:308
void minimizationQuenchVelocity(void)
Definition: Sequencer.C:5809
int period
period for some step dependent event (e.g. stepsPerCycle)
Definition: Sequencer.C:143
NAMD_HOST_DEVICE void rescale(Tensor factor)
Definition: Lattice.h:60
void max(int i, BigReal v)
Definition: ReductionMgr.h:316
int init(int initstep, int initperiod, int delta=0)
Definition: Sequencer.C:159
#define RECIP_TIMEFACTOR
Definition: common.h:61
DCDParams dcdSelectionParams[16]
Definition: Molecule.h:481
BigReal zy
Definition: Tensor.h:19
Real langevinParam
Definition: NamdTypes.h:210
Bool berendsenPressureOn
void tcoupleVelocities(BigReal, int)
Definition: Sequencer.C:5591
void addMovDragToPosition(BigReal)
Definition: Sequencer.C:4429
void terminate(void)
Definition: Sequencer.C:6681
BigReal soluteScalingFactorCharge
virtual void algorithm(void)
Definition: Sequencer.C:277
void get_rotdrag_params(BigReal &v, Vector &a, Vector &p, int atomnum) const
Definition: Molecule.h:1414
void langevinVelocitiesBBK2_SOA(BigReal timestep)
Definition: Sequencer.C:3359
#define NAMD_EVENT_STOP(eon, id)
int frequency
Definition: common.h:262
Bool is_atom_movdragged(int atomnum) const
Definition: Molecule.h:1282
SubmitReduction * pressureProfileReduction
Definition: Sequencer.h:323
void suspendULTs()
int getNumAtoms() const
Definition: Patch.h:105
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
Definition: HomePatch.C:4378
void integrate(int)
Definition: Sequencer.C:3912
friend class SequencerCUDA
Definition: Sequencer.h:47
HomePatch * patch
Definition: HomePatchList.h:23
static int velocityNeeded(int)
Definition: Output.C:500
Definition: PDB.h:36
void scaleVelocities(const BigReal velScale)
Definition: Sequencer.C:4961
void positionsReady_SOA(int doMigration=0)
Definition: HomePatch.C:971
#define GB1_COMPUTE_HOME_PRIORITY
Definition: Priorities.h:56
void addVelocityToPosition(BigReal)
Definition: Sequencer.C:5680
SubmitReduction * reduction
Definition: Sequencer.h:322
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
BigReal xz
Definition: Tensor.h:17
SubmitReduction * min_reduction
Definition: Sequencer.h:226
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
Bool is_atom_exPressure(int atomnum) const
Definition: Molecule.h:1539
SimpleBroadcastObject< int > traceBarrier
Definition: Broadcasts.h:88
BigReal accelMDLastStep
void maximumMove(BigReal)
Definition: Sequencer.C:5764
Bool monteCarloPressureOn
int marginViolations
Definition: HomePatch.h:401
#define BOLTZMANN
Definition: common.h:54
Definition: Node.h:78
double * f_normal_z
Definition: NamdTypes.h:420
const GlobalMasterIMD * getIMD()
Definition: IMDOutput.h:38
void cycleBarrier(int, int)
Definition: Sequencer.C:6661
#define FILE_OUTPUT
Definition: Output.h:25
IMDOutput * imd
Definition: Node.h:186
double * f_normal_y
Definition: NamdTypes.h:419
Position fixedPosition
Definition: NamdTypes.h:202
Lattice & lattice
Definition: Patch.h:127
void submitCollections_SOA(int step, int zeroVel=0)
Definition: Sequencer.C:3207
Bool globalMasterScaleByFrequency
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
SimpleBroadcastObject< Vector > momentumCorrection
Definition: Broadcasts.h:81
void addRotDragToPosition(BigReal)
Definition: Sequencer.C:4448
static PatchMap * Object()
Definition: PatchMap.h:27
void saveForce(const int ftag=Results::normal)
Definition: Sequencer.C:5630
void registerIDsFullAtom(const FullAtom *begin, const FullAtom *end)
Definition: AtomMap.C:50
CmiNodeLock printlock
Definition: PatchData.h:157
#define EVAL_MEASURE
Definition: Output.h:27
double * f_slow_y
Definition: NamdTypes.h:425
Definition: Vector.h:72
void langevinVelocitiesBBK2(BigReal)
Definition: Sequencer.C:5169
void monteCarloPressureControl(const int step, const int doMigration, const int doEnergy, const int doVirial, const int maxForceNumber, const int doGlobal)
Output * output
Definition: Node.h:185
#define ADD_TENSOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:44
SimParameters * simParameters
Definition: Node.h:181
int slowFreq
Definition: Sequencer.h:295
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3315
void newMinimizeDirection(BigReal)
Definition: Sequencer.C:4639
void newMinimizePosition(BigReal)
Definition: Sequencer.C:4698
double stochRescaleCoefficient()
Definition: Controller.C:1806
Bool CUDASOAintegrateMode
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3784
int savePairlists
Definition: PatchTypes.h:40
int nextstep
next step value
Definition: Sequencer.C:144
bool masterThread
Definition: Sequencer.h:329
BigReal reassignTemp
BigReal & item(int i)
Definition: ReductionMgr.h:313
void gbisComputeAfterP2()
Definition: HomePatch.C:4937
#define DebugM(x, y)
Definition: Debug.h:75
void startWork(const LDObjHandle &handle)
HomePatchList * homePatchList()
Definition: PatchMap.C:438
void langevinVelocitiesBBK1(BigReal)
Definition: Sequencer.C:5096
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
BigReal z
Definition: Vector.h:74
char const *const NamdProfileEventStr[]
int getNumDevice()
Definition: DeviceCUDA.h:125
int usePairlists
Definition: PatchTypes.h:39
Position position
Definition: NamdTypes.h:77
BigReal rotDragGlobVel
BigReal yz
Definition: Tensor.h:18
void updateDevicePatchMap(int startup)
int berendsenPressureFreq
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:366
void rattle1(BigReal, int)
Definition: Sequencer.C:5707
void saveTotalForces(HomePatch *)
void submitVelocities(int seq, int zero, FullAtomList &a)
SimpleBroadcastObject< BigReal > adaptTemperature
Definition: Broadcasts.h:90
unsigned char get_ss_type(int anum) const
Definition: Molecule.h:1443
void rebalanceLoad(int timestep)
Definition: Sequencer.C:6650
void submitHalfstep(int)
Definition: Sequencer.C:5820
Bool globalMasterStaleForces
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:279
#define iout
Definition: InfoStream.h:51
void addForceToMomentum_SOA(const double scaling, double dt_normal, double dt_nbond, double dt_slow, int maxForceNumber)
Definition: Sequencer.C:2707
int doLoweAndersen
Definition: PatchTypes.h:27
Velocity velocity
Definition: NamdTypes.h:201
int pressureProfileSlabs
void minimizeMoveDownhill(BigReal fmax2)
Definition: Sequencer.C:4617
Patch * patch(PatchID pid)
Definition: PatchMap.h:244
void addForceToMomentum(BigReal, const int ftag=Results::normal, const int useSaved=0)
Definition: Sequencer.C:5641
void submitReductions_SOA()
Definition: Sequencer.C:2969
std::vector< PatchRecord > & getPatches()
static PatchMap * ObjectOnPe(int pe)
Definition: PatchMap.h:28
float * langScalVelBBK2
derived from langevinParam
Definition: NamdTypes.h:409
uint32 groupFixed
Definition: NamdTypes.h:159
void pauseWork(const LDObjHandle &handle)
void langevinPiston(int)
Definition: Sequencer.C:5340
SimpleBroadcastObject< BigReal > tcoupleCoefficient
Definition: Broadcasts.h:78
static int forceNeeded(int)
Definition: Output.C:595
int NAMD_gcd(int a, int b)
Definition: common.C:102
void exchangeCheckpoint(int scriptTask, int &bpc)
Definition: HomePatch.C:5257
AtomMapper * atomMapper
Definition: Patch.h:159
Bool pairInteractionOn
float * gaussrand_y
Definition: NamdTypes.h:414
Molecule stores the structural information for the system.
Definition: Molecule.h:175
LDObjHandle ldObjHandle
Definition: HomePatch.h:554
void wakeULTs()
double * pos_y
Definition: NamdTypes.h:368
void split(int iStream, int numStreams)
Definition: Random.h:77
static NAMD_HOST_DEVICE Tensor identity(BigReal v1=1.0)
Definition: Tensor.h:31
void addForceToMomentum3(const BigReal timestep1, const int ftag1, const int useSaved1, const BigReal timestep2, const int ftag2, const int useSaved2, const BigReal timestep3, const int ftag3, const int useSaved3)
Definition: Sequencer.C:5656
void positionsReady(int doMigration=0)
Definition: HomePatch.C:1895
Definition: Patch.h:35
Bool useDeviceMigration
float * mass
Definition: NamdTypes.h:395
Flags flags
Definition: Patch.h:128
void submitHalfstep_SOA()
Definition: Sequencer.C:2849
WaterModel watmodel
SimpleBroadcastObject< BigReal > stochRescaleCoefficient
Definition: Broadcasts.h:79
double * f_nbond_y
Definition: NamdTypes.h:422
uint32 id
Definition: NamdTypes.h:156
void revert(void)
Definition: HomePatch.C:5226
void submitCollections(int step, int zeroVel=0)
Definition: Sequencer.C:6405
void stochRescaleVelocities_SOA(int step)
Definition: Sequencer.C:3882
static void print_vel_SOA(const double *vel_x, const double *vel_y, const double *vel_z, int ilo=0, int ihip1=1)
Definition: Sequencer.C:107
Charge charge
Definition: NamdTypes.h:78
void runComputeObjects_SOA(int migration, int pairlists, int step)
Definition: Sequencer.C:3696
BigReal calcKineticEnergy()
Definition: Sequencer.C:4969
#define SEQ_STK_SZ
Definition: Thread.h:11
void adaptTempUpdate(int)
Definition: Sequencer.C:5482
double * f_nbond_z
Definition: NamdTypes.h:423
void positionsReady_GPU(int doMigration=0, int startup=0)
Bool langevin_useBAOAB
int32 * hydrogenGroupSize
Definition: NamdTypes.h:375
#define TIMER_START(T, TYPE)
Definition: HomePatch.h:264
#define NAIVE
Definition: SimParameters.h:51
Definition: Output.h:35
int rattle1_SOA(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:4653
double * f_normal_x
Definition: NamdTypes.h:418
void calcFixVirial(Tensor &fixVirialNormal, Tensor &fixVirialNbond, Tensor &fixVirialSlow, Vector &fixForceNormal, Vector &fixForceNbond, Vector &fixForceSlow)
Definition: Sequencer.C:5978
float * langevinParam
Definition: NamdTypes.h:396
Definition: Random.h:37
#define NAMD_PROFILE_START()
float * gaussrand_x
fill with Gaussian distributed random numbers
Definition: NamdTypes.h:413
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
int numPatches(void) const
Definition: PatchMap.h:59
void awaken(void)
Definition: Sequencer.h:53
#define NAMD_EVENT_START(eon, id)
int pairlistsAge
Definition: Sequencer.h:230
void stochRescaleVelocities(int)
Definition: Sequencer.C:5614
void rattle1_SOA(BigReal, int)
Definition: Sequencer.C:3670
#define COMPUTE_HOME_PRIORITY
Definition: Priorities.h:76
void constructDevicePatchMap()
static void print_tensor(const Tensor &t)
Definition: Sequencer.C:120
NAMD_HOST_DEVICE BigReal length(void) const
Definition: Vector.h:202
int getMasterPe()
Definition: DeviceCUDA.h:137
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
Definition: Lattice.h:137
BigReal rescaleTemp
void NAMD_bug(const char *err_msg)
Definition: common.C:195
float * gaussrand_z
Definition: NamdTypes.h:415
#define TIMER_REPORT(T)
Definition: HomePatch.h:267
void multigratorPressure(int step, int callNumber)
Definition: Sequencer.C:4826
int doEnergy
Definition: PatchTypes.h:20
static ComputeCUDAMgr * getComputeCUDAMgr()
void berendsenPressure(int)
Definition: Sequencer.C:5276
void submitMomentum(int step)
Definition: Sequencer.C:4735
int doFullElectrostatics
Definition: PatchTypes.h:23
BigReal yx
Definition: Tensor.h:18
Bool adaptTempLangevin
int rescaleVelocities_numTemps
Definition: Sequencer.h:275
double * vel_x
Jim recommends double precision velocity.
Definition: NamdTypes.h:386
int32 * id
Definition: NamdTypes.h:380
void submitLoadStats(int timestep)
Definition: HomePatch.C:5422
void mollyMollify(Tensor *virial)
Definition: HomePatch.C:5153
void runComputeObjects(int migration=1, int pairlists=0, int pressureStep=0)
Definition: Sequencer.C:6436
void awaken(void)
Definition: Controller.C:354
void rebalance(Sequencer *seq, PatchID id)
void rescaleaccelMD(int, int, int)
Definition: Sequencer.C:5439
SimpleBroadcastObject< Tensor > velocityRescaleTensor2
Definition: Broadcasts.h:74
float * charge
Definition: NamdTypes.h:371
int Bool
Definition: common.h:142
BigReal drudeBondLen
CompAtomList p
Definition: Patch.h:153
Sequencer(HomePatch *p)
Definition: Sequencer.C:171
BigReal langevinTemp
void clearDevicePatchMap()
NAMD_HOST_DEVICE BigReal length2(void) const
Definition: Vector.h:206
int ldbSteps
Definition: Sequencer.h:328
int numAtoms
Definition: Patch.h:151
MTSChoices MTSAlgorithm
#define NAMD_EVENT_RANGE_2(eon, id)
void run(void)
Definition: Sequencer.C:257
SimpleBroadcastObject< int > scriptBarrier
Definition: Broadcasts.h:87
uint8 partition
Definition: NamdTypes.h:80
bool getIsGlobalDevice() const
Definition: DeviceCUDA.h:172
BigReal scriptArg1
BigReal x
Definition: Vector.h:74
uint8 hydrogenGroupSize
Definition: NamdTypes.h:88
const_iterator const_begin(void) const
Definition: ResizeArray.h:39
PatchID getPatchID() const
Definition: Patch.h:114
void scalePositionsVelocities(const Tensor &posScale, const Tensor &velScale)
Definition: Sequencer.C:4789
int monteCarloPressureFreq
int getPesSharingDevice(const int i)
Definition: DeviceCUDA.h:139
BigReal adaptTempT
Definition: Sequencer.h:270
int maxForceUsed
Definition: PatchTypes.h:32
SimpleBroadcastObject< BigReal > velocityRescaleFactor2
Definition: Broadcasts.h:75
int sequence
Definition: PatchTypes.h:18
Bool is_atom_rotdragged(int atomnum) const
Definition: Molecule.h:1298
#define D_MSG(t)
Definition: Debug.h:165
int eventEndOfTimeStep
Definition: Node.C:293
void doMigrationGPU(const int startup, const int doGlobal, const int updatePatchMap)
void langevinPiston_SOA(int step)
Definition: Sequencer.C:3541
#define END_OF_RUN
Definition: Output.h:26
void gbisComputeAfterP1()
Definition: HomePatch.C:4909
void integrate_SOA(int)
Definition: Sequencer.C:2032
void traceBarrier(int)
Definition: Sequencer.C:6669
int doNonbonded
Definition: PatchTypes.h:22
void NAMD_die(const char *err_msg)
Definition: common.C:147
PDB * pdb
Definition: Node.h:183
static LdbCoordinator * Object()
BigReal reassignIncr
void gaussian_array_f(float *a, int n)
Definition: Random.h:255
#define TIMER_INIT_WIDTH(T, TYPE, WIDTH)
Definition: HomePatch.h:263
int getForceSendActive() const
Definition: ComputeGlobal.h:47
int berendsenPressure_count
Definition: Sequencer.h:292
SimpleBroadcastObject< BigReal > velocityRescaleFactor
Definition: Broadcasts.h:70
SimpleBroadcastObject< BigReal > minimizeCoefficient
Definition: Broadcasts.h:80
void reassignVelocities(BigReal, int)
Definition: Sequencer.C:5498
void langevinVelocitiesBBK1_SOA(BigReal timestep)
Definition: Sequencer.C:3313
SimpleBroadcastObject< Vector > accelMDRescaleFactor
Definition: Broadcasts.h:89
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3406
ComputeGlobal * computeGlobalObject
Definition: ComputeMgr.h:154
Elem & item(int i)
Definition: ResizeArray.h:119
void saveForce(const int ftag=Results::normal)
Definition: HomePatch.C:2315
Random * random
Definition: Sequencer.h:319
void runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup)
BigReal xx
Definition: Tensor.h:17
SimpleBroadcastObject< Tensor > positionRescaleFactor
Definition: Broadcasts.h:71
void buildRattleList_SOA()
Definition: HomePatch.C:4516
int getDeviceID()
Definition: DeviceCUDA.h:144
void langevinVelocities(BigReal)
Definition: Sequencer.C:5059
static std::pair< int, int > coordinateNeeded(int)
Definition: Output.C:199
void submitForces(int seq, FullAtomList &a, int maxForceUsed, ForceList *f)
static CollectionMaster * Object()
void hardWallDrude(BigReal, int)
Definition: Sequencer.C:5692
void checkpoint(void)
Definition: HomePatch.C:5216
BigReal zz
Definition: Tensor.h:19
#define TIMER_STOP(T, TYPE)
Definition: HomePatch.h:265
void suspend(void)
Definition: Sequencer.C:267
void multigratorTemperature(int step, int callNumber)
Definition: Sequencer.C:4989
static constexpr int num_inline_peer
Definition: CudaRecord.h:36
unsigned int randomSeed
double * recipMass
derived from mass
Definition: NamdTypes.h:394
BigReal initialTemp
void reinitVelocities(void)
Definition: Sequencer.C:5530
int pressureProfileAtomTypes
int checkpoint_berendsenPressure_count
Definition: Sequencer.h:293
#define simParams
Definition: Output.C:129
ControllerBroadcasts * broadcast
Definition: Sequencer.h:326
#define NAMD_EVENT_START_EX(eon, id, str)
iterator begin(void)
Definition: ResizeArray.h:36
void maximumMove_SOA(const double dt, const double maxvel2)
Definition: Sequencer.C:3255
double * pos_z
Definition: NamdTypes.h:369
double * f_slow_x
Definition: NamdTypes.h:424
CollectionMgr *const collection
Definition: Sequencer.h:325
void updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal)
const PatchID patchID
Definition: Patch.h:150
#define GB2_COMPUTE_HOME_PRIORITY
Definition: Priorities.h:64
int numHomePatches(void)
Definition: PatchMap.C:432
Definition: Tensor.h:15
BigReal xy
Definition: Tensor.h:17
iterator end(void)
Definition: ResizeArray.h:37
bool rattleListValid_SOA
Definition: HomePatch.h:454
#define NAMD_PROFILE_STOP()
Bool langevinGammasDiffer
double * pos_x
Definition: NamdTypes.h:367
int doVirial
Definition: PatchTypes.h:21
BigReal y
Definition: Vector.h:74
virtual ~Sequencer(void)
Definition: Sequencer.C:233
BigReal movDragGlobVel
int getNumStepsToRun(void)
bool getIsPmeDevice()
Definition: DeviceCUDA.h:168
int doLCPO
Definition: PatchTypes.h:30
void resetMovingAverage()
Definition: Controller.C:639
void newtonianVelocities(BigReal, const BigReal, const BigReal, const BigReal, const int, const int, const int)
Definition: Sequencer.C:5035
static void print_vel_AOS(const FullAtom *a, int ilo=0, int ihip1=1)
Definition: Sequencer.C:95
void rescaleSoluteCharges(BigReal)
Definition: Sequencer.C:5575
void addVelocityToPosition_SOA(const double dt)
Definition: Sequencer.C:2810
double * vel_z
Definition: NamdTypes.h:388
void setVal(const NodeReduction *other)
Definition: ReductionMgr.C:647
#define SOA_SIMPLIFY_PARAMS
Definition: Sequencer.h:29
Mass mass
Definition: NamdTypes.h:208
Bool pressureProfileOn
void submitMinimizeReductions(int, BigReal fmax2)
Definition: Sequencer.C:6256
#define ADD_VECTOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:28
BigReal yy
Definition: Tensor.h:18
int doMomenta
Definition: Sequencer.h:309
#define TIMER_DONE(T)
Definition: HomePatch.h:266
#define PDBVELFACTOR
Definition: common.h:57
CudaComputeNonbonded * getCudaComputeNonbonded()
#define TIMEFACTOR
Definition: common.h:55
Bool pairInteractionSelf
int multigratorPressureFreq
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
int bufferOffsetNBPad
Definition: CudaRecord.h:39
double * f_nbond_x
Definition: NamdTypes.h:421
int getDeviceIndex()
Definition: DeviceCUDA.h:166
BigReal maximumMove
#define SPECIAL_PATCH_ID
Definition: Sequencer.C:88
void submitPositions(int seq, FullAtomList &a, Lattice l, int prec, int dcdSelectionIndex)
void correctMomentum(int step, BigReal drifttime)
Definition: Sequencer.C:4758
bool getIsMasterDevice()
int pairlistsAgeLimit
Definition: Sequencer.h:231
NAMD_HOST_DEVICE void outerAdd(BigReal scale, const Vector &v1, const Vector &v2)
Definition: Tensor.h:255
int pairlistsAreValid
Definition: Sequencer.h:229
int doGBIS
Definition: PatchTypes.h:29
int stochRescale_count
Definition: Sequencer.h:288
void submit(void)
Definition: ReductionMgr.h:324
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:83
int check(int step)
Definition: Sequencer.C:149
ComputeMgr * computeMgr
Definition: Node.h:172
int maxForceMerged
Definition: PatchTypes.h:33
BigReal reassignHold
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3344
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3383
void quenchVelocities()
Definition: Sequencer.C:4726
ForceList f[Results::maxNumForces]
Definition: Patch.h:214
float * langScalRandBBK2
from langevinParam and recipMass
Definition: NamdTypes.h:410
void get_movdrag_params(Vector &v, int atomnum) const
Definition: Molecule.h:1408
void enableEarlyExit(void)
Definition: Node.C:1404
void submitReductions(int)
Definition: Sequencer.C:5998
#define namd_reciprocal(x)
Definition: Vector.h:69
SimpleBroadcastObject< Tensor > positionRescaleFactor2
Definition: Broadcasts.h:76
void integrate_CUDA_SOA(int scriptTask)
#define RIGID_NONE
Definition: SimParameters.h:79
void loweAndersenFinish()
Definition: HomePatch.C:4875
uint32 atomFixed
Definition: NamdTypes.h:158
int getNumPesSharingDevice()
Definition: DeviceCUDA.h:138
SimParameters *const simParams
Definition: Sequencer.h:320
SimpleBroadcastObject< Tensor > velocityRescaleTensor
Definition: Broadcasts.h:73
NAMD_HOST_DEVICE Vector unit(void) const
Definition: Vector.h:215
BigReal zx
Definition: Tensor.h:19
CompAtomExtList pExt
Definition: Patch.h:181
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
Molecule * molecule
Definition: Node.h:179
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278
void rescaleVelocitiesByFactor(BigReal)
Definition: Sequencer.C:5553
double * f_slow_z
Definition: NamdTypes.h:426
int doMolly
Definition: PatchTypes.h:24
int multigratorTemperatureFreq
void reloadCharges()
Definition: Sequencer.C:5563
int doMinimize
Definition: PatchTypes.h:25
#define FORCE_OUTPUT
Definition: Output.h:28
int globalMasterFrequency
double BigReal
Definition: common.h:123
void minimize()
Definition: Sequencer.C:4471
CudaPmeOneDevice * createCudaPmeOneDevice()
int step
Definition: PatchTypes.h:16
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
CudaPmeOneDevice * getCudaPmeOneDevice()
void updatePatchOrder(const std::vector< CudaLocalRecord > &data)
for(int i=0;i< n1;++i)
void berendsenPressure_SOA(int step)
Definition: Sequencer.C:3442
int32 numAtoms
number of atoms
Definition: NamdTypes.h:446
void printDevicePatchMap()
BigReal drudeTemp
void compute(const Lattice &lattice, int doEnergyVirial, int step)
void exchangeAtoms(int scriptTask)
Definition: HomePatch.C:5364