NAMD
Sequencer.C
Go to the documentation of this file.
1 
7 /*****************************************************************************
8  * $Source: /home/cvs/namd/cvsroot/namd2/src/Sequencer.C,v $
9  * $Author: jim $
10  * $Date: 2016/08/26 19:40:32 $
11  * $Revision: 1.1230 $
12  *****************************************************************************/
13 
14 // The UPPER_BOUND macro is used to eliminate all of the per atom
15 // computation done for the numerical integration in Sequencer::integrate()
16 // other than the actual force computation and atom migration.
17 // The idea is to "turn off" the integration for doing performance
18 // profiling in order to get an upper bound on the speedup available
19 // by moving the integration parts to the GPU.
20 //
21 // Define it in the Make.config file, i.e. CXXOPTS += -DUPPER_BOUND
22 // or simply uncomment the line below.
23 //
24 //#define UPPER_BOUND
25 
26 //for gbis debugging; print net force on each atom
27 #include "CudaRecord.h"
28 #include "PatchData.h"
29 #include "common.h"
30 #define PRINT_FORCES 0
31 
32 #include "InfoStream.h"
33 #include "Node.h"
34 #include "SimParameters.h"
35 #include "Sequencer.h"
36 #include "HomePatch.h"
37 #include "ReductionMgr.h"
38 #include "CollectionMgr.h"
39 #include "BroadcastObject.h"
40 #include "Output.h"
41 #include "Controller.h"
42 #include "Broadcasts.h"
43 #include "Molecule.h"
44 #include "NamdOneTools.h"
45 #include "LdbCoordinator.h"
46 #include "Thread.h"
47 #include "Random.h"
48 #include "PatchMap.inl"
49 #include "ComputeMgr.h"
50 #include "ComputeGlobal.h"
51 #include "NamdEventsProfiling.h"
52 #include <iomanip>
53 #include "ComputeCUDAMgr.h"
54 #include "CollectionMaster.h"
55 #include "IMDOutput.h"
56 #include "CudaGlobalMasterServer.h"
57 
58 #include "TestArray.h"
59 
60 #include <algorithm> // Used for sorting
61 
62 #define MIN_DEBUG_LEVEL 3
63 //#define DEBUGM
64 //
65 // Define NL_DEBUG below to activate D_*() macros in integrate_SOA()
66 // for debugging.
67 //
68 //#define NL_DEBUG
69 #include "Debug.h"
70 
71 #if USE_HPM
72 #define START_HPM_STEP 1000
73 #define STOP_HPM_STEP 1500
74 #endif
75 
76 #include "DeviceCUDA.h"
77 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
78 #ifdef WIN32
79 #define __thread __declspec(thread)
80 #endif
81 extern __thread DeviceCUDA *deviceCUDA;
82 #ifdef __IBMCPP__
83 // IBM compiler requires separate definition for static members
85 #endif
86 #endif
87 
88 #define SPECIAL_PATCH_ID 91
89 
90 //
91 // BEGIN
92 // print_* routines
93 // assist in debugging SOA integration code
94 //
95 static void print_vel_AOS(
96  const FullAtom *a,
97  int ilo=0, int ihip1=1
98  ) {
99  printf("AOS Velocities:\n");
100  for (int i=ilo; i < ihip1; i++) {
101  printf("%d %g %g %g\n", i,
102  a[i].velocity.x, a[i].velocity.y, a[i].velocity.z);
103  }
104 }
105 
106 
107 static void print_vel_SOA(
108  const double *vel_x,
109  const double *vel_y,
110  const double *vel_z,
111  int ilo=0, int ihip1=1
112  ) {
113  printf("SOA Velocities:\n");
114  for (int i=ilo; i < ihip1; i++) {
115  printf("%d %g %g %g\n", i, vel_x[i], vel_y[i], vel_z[i]);
116  }
117 }
118 
119 
120 static void print_tensor(const Tensor& t) {
121  printf("%g %g %g %g %g %g %g %g %g\n",
122  t.xx, t.xy, t.xz, t.yx, t.yy, t.yz, t.zx, t.zy, t.zz);
123 }
124 //
125 // END
126 // print_* routines
127 // assist in debugging SOA integration code
128 //
129 
130 
142 struct CheckStep {
143  int period;
144  int nextstep;
145 
149  inline int check(int step) {
150  if (step == nextstep) return( nextstep += period, 1 );
151  else return 0;
152  }
153 
159  inline int init(int initstep, int initperiod, int delta=0) {
160  period = initperiod;
161  nextstep = initstep - (initstep % period) - (delta % period);
162  while (nextstep <= initstep) nextstep += period;
163  // returns true if initstep is divisible by period
164  return (initstep + period == nextstep);
165  }
166 
167  CheckStep() : period(0), nextstep(0) { }
168 };
169 
170 
172  simParams(Node::Object()->simParameters),
173  patch(p),
174  collection(CollectionMgr::Object()),
175  ldbSteps(0),
176  pairlistsAreValid(0),
177  pairlistsAge(0),
178  pairlistsAgeLimit(0)
179 {
182 
183 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
185  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
186  PatchData* patchData = cpdata.ckLocalBranch();
188  } else
189 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP)
190  {
192  }
193 
198  int ntypes = simParams->pressureProfileAtomTypes;
199  int nslabs = simParams->pressureProfileSlabs;
202  REDUCTIONS_PPROF_INTERNAL, 3*nslabs*ntypes);
203  } else {
205  }
206  if (simParams->multigratorOn) {
208  } else {
209  multigratorReduction = NULL;
210  }
211  ldbCoordinator = (LdbCoordinator::Object());
214 
215  // Is soluteScaling enabled?
216  if (simParams->soluteScalingOn) {
217  // If so, we must "manually" perform charge scaling on startup because
218  // Sequencer will not get a scripting task for initial charge scaling.
219  // Subsequent rescalings will take place through a scripting task.
221  }
222 
224  stochRescale_count = 0;
226  masterThread = true;
227 // patch->write_tip4_props();
228 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER)
230 #if 0
231  CUDASequencer = new SequencerCUDA(deviceCUDA->getDeviceID(),
232  simParams);
233 #else
234  CUDASequencer = SequencerCUDA::InstanceInit(deviceCUDA->getDeviceID(),
235  simParams);
236 
237  syncColl = SynchronousCollectives::Object();
238  globalGPUMgr = GlobalGPUMgr::Object();
240 #endif
241  }
242 #endif
243 }
244 
246 {
247  delete broadcast;
248  delete reduction;
249  delete min_reduction;
251  delete random;
253 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER)
255  delete CUDASequencer;
257  }
258 #endif
259 }
260 
261 // Invoked by thread
262 void Sequencer::threadRun(Sequencer* arg)
263 {
265  arg->algorithm();
266 }
267 
268 // Invoked by Node::run() via HomePatch::runSequencer()
269 void Sequencer::run(void)
270 {
271  // create a Thread and invoke it
272  DebugM(4, "::run() - this = " << this << "\n" );
273  thread = CthCreate((CthVoidFn)&(threadRun),(void*)(this),SEQ_STK_SZ);
274  CthSetStrategyDefault(thread);
275  priority = PATCH_PRIORITY(patch->getPatchID());
276  awaken();
277 }
278 
280 {
282  CthSuspend();
284 }
285 
286 // Defines sequence of operations on a patch. e.g. when
287 // to push out information for Compute objects to consume
288 // when to migrate atoms, when to add forces to velocity update.
290 {
291  int scriptTask;
292  int scriptSeq = 0;
293  // Blocking receive for the script barrier.
294  while ( (scriptTask = broadcast->scriptBarrier.get(scriptSeq++)) != SCRIPT_END ) {
295  switch ( scriptTask ) {
296  case SCRIPT_OUTPUT:
298  break;
299  case SCRIPT_FORCEOUTPUT:
301  break;
302  case SCRIPT_MEASURE:
304  break;
305  case SCRIPT_REINITVELS:
307  break;
308  case SCRIPT_RESCALEVELS:
310  break;
313  break;
315  reloadCharges();
316  break;
317  case SCRIPT_CHECKPOINT:
318  patch->checkpoint();
320  break;
321  case SCRIPT_REVERT:
322  patch->revert();
324  pairlistsAreValid = 0;
325  break;
331  break;
332  case SCRIPT_ATOMSENDRECV:
333  case SCRIPT_ATOMSEND:
334  case SCRIPT_ATOMRECV:
335  patch->exchangeAtoms(scriptTask);
336  break;
337  case SCRIPT_MINIMIZE:
338 #if 0
340  NAMD_die("Minimization is currently not supported on the GPU integrator\n");
341  }
342 #endif
343  minimize();
344  break;
345  case SCRIPT_RUN:
346  case SCRIPT_CONTINUE:
347  //
348  // DJH: Call a cleaned up version of integrate().
349  //
350  // We could test for simulation options and call a more basic version
351  // of integrate() where we can avoid performing most tests.
352  //
353 #ifdef SEQUENCER_SOA
354  if ( simParams->SOAintegrateOn ) {
355 #ifdef NODEGROUP_FORCE_REGISTER
356 
358  else {
359 #endif
360  integrate_SOA(scriptTask);
361 #ifdef NODEGROUP_FORCE_REGISTER
362  }
363 #endif
364  }
365  else
366 #endif
367  integrate(scriptTask);
368  break;
369  default:
370  NAMD_bug("Unknown task in Sequencer::algorithm");
371  }
372  }
374  terminate();
375 }
376 
377 
378 #ifdef SEQUENCER_SOA
379 
381 //
382 // begin SOA code
383 //
384 
385 #if defined(NODEGROUP_FORCE_REGISTER)
386 
388  PatchMap* patchMap = PatchMap::Object();
389  CUDASequencer->numPatchesCheckedIn += 1;
390  if (CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe())) {
391  masterThread = false;
392  CUDASequencer->waitingThreads.push_back(CthSelf());
393  NAMD_EVENT_STOP(patch->flags.event_on, NamdProfileEvent::INTEGRATE_SOA_1);
394  CthSuspend();
395 
396  // JM: if a thread get here, it will be for migrating atoms until the end of the simulation
397  while(true){
398  // read global flags
399  int lastStep = CUDASequencer->patchData->flags.step;
400  int startup = (CUDASequencer->patchData->flags.step == simParams->firstTimestep);
401  if (CUDASequencer->breakSuspends) break;
403  this->patch->positionsReady_GPU(true, startup);
404  } else {
405  this->patch->positionsReady_SOA(true);
406  }
407  CUDASequencer->numPatchesCheckedIn += 1;
408  CUDASequencer->waitingThreads.push_back(CthSelf());
409  if(CUDASequencer->numPatchesCheckedIn == patchMap->numPatchesOnNode(CkMyPe()) - 1 &&
410  CUDASequencer->masterThreadSleeping){
411  CUDASequencer->masterThreadSleeping = false;
412  CthAwaken(CUDASequencer->masterThread);
413  }
414  CthSuspend();
415  }
416  }
417 }
418 void Sequencer::wakeULTs(){
419  CUDASequencer->numPatchesCheckedIn = 0;
420  for (CthThread t : CUDASequencer->waitingThreads) {
421  CthAwaken(t);
422  }
423  CUDASequencer->waitingThreads.clear();
424 }
425 
426 void Sequencer::runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup) {
427 
428  PatchMap* map = PatchMap::Object();
429 
430  bool isMaster = deviceCUDA->getMasterPe() == CkMyPe();
433 
434  // Sync after the node barrier. This is making sure that the position buffers have been
435  // populated. However, this doesn't need to happen at the node level. I.e. the non-pme
436  // nonbonded calculations can begin before the PME device is finished setting it's positions.
437  // There is a node barrier after the forces are done, so we don't have to worry about
438  // the positions being updated before the positions have been set
439  if (isMaster) {
440  CUDASequencer->sync();
441  }
442 
443 
444  // JM: Each masterPE owns a particular copy of the compute object we need to launch
445  // work on. The goal is to launch work on everyone, but for migration steps, sometimes
446  // there are a few operation that need to be launched on computes owned by different PEs.
447  // ComputeBondedCUDA::openBoxesOnPe() is an example: There is a list of PEs on each compute
448  // which holds information on which proxy object it should also invoke openBoxesOnPe();
449 
450  // We need to be mindful of that and, since we want to launch methods on different computes.
451  // A data structure that holds all nonbonded Computes from all masterPEs is necessary
452  ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
453  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
454  CudaPmeOneDevice* cudaPme = (globalGPUMgr->getIsPmeDevice() && simParams->fullElectFrequency) ?
455  cudaMgr->getCudaPmeOneDevice() : NULL;
456  // We need to submit PME reductions even if we don't compute the pme force
457  int computePme = (patch->flags.doFullElectrostatics);
458  int reducePme = (patch->flags.doVirial || patch->flags.doEnergy);
459  auto cudaGlobal = deviceCUDA->getIsGlobalDevice() ? cudaMgr->getCudaGlobalMaster() : nullptr;
460  if (isMaster && cudaGlobal && doMigration) cudaGlobal->setStep(static_cast<int64_t>(patch->flags.step));
461  // fprintf(stderr, "Patch %d invoking computes\n", this->patch->patchID);
462 
463 
464  // JM NOTE: I don't think the scheme below holds for nMasterPes > 1, check it out laters
465 
466  // Invoking computes on the GPU //
467  if(doMigration){
468  // JM: if we're on a migration step, we call the setup functions manually
469  // which means:
470  // 0. masterPe->doWork();
471  // 1. openBoxesOnPe();
472  // loadTuplesOnPe();
473  // 2. masterPe->launchWork();
474  // 3. finishPatchesOnPe();
475  // 4. masterPe->finishReductions();
476 
477  if(isMaster){
478  NAMD_EVENT_START(1, NamdProfileEvent::MIG_ATOMUPDATE);
479  cudaNbond->atomUpdate();
480  cudaBond->atomUpdate();
481  cudaNbond->doWork();
482  cudaBond->doWork();
483  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_ATOMUPDATE);
484 
485  if (cudaPme && !simParams->useDeviceMigration) CUDASequencer->atomUpdatePme();
486  if (cudaGlobal) {
487  cudaGlobal->updateAtomMaps();
488  cudaGlobal->communicateToClients(&(this->patch->lattice));
489  }
490  }
491 
493 
495  if(isMaster){
496  CUDASequencer->launch_set_compute_positions();
497  CUDASequencer->sync(); // TODO move this to tuple migration
498  }
500  }
501 
502  NAMD_EVENT_START(1, NamdProfileEvent::MIG_OPENBOXESONPE);
503 
504  // Here we need to do the following, for each Comput
505  for(int i = 0 ; i < CkNumPes(); i++){
506  // Here I need to find if the PE is on the bonded PE list
507  // XXX NOTE: This might be inefficient. Check the overhead later
508  ComputeBondedCUDA* b = CUDASequencer->patchData->cudaBondedList[i];
509  CudaComputeNonbonded* nb = CUDASequencer->patchData->cudaNonbondedList[i];
510  if (b == NULL) continue;
511  auto list = std::find(std::begin(b->getBondedPes()), std::end(b->getBondedPes()), CkMyPe());
512  if( list != std::end(b->getBondedPes()) ){
513  b->openBoxesOnPe(startup);
514 
515  // XXX NOTE: nb has a differente PE list!!! We need a different loop for nb
516  nb->openBoxesOnPe();
517 
518  }
520  }
521  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_OPENBOXESONPE);
522  // for the bonded kernels, there's an additional step here, loadTuplesOnPe
523  // JM NOTE: Those are major hotspots, they account for 50% of the migration time.
525  NAMD_EVENT_START(1, NamdProfileEvent::MIG_LOADTUPLESONPE);
526 
527  // NOTE: problem here: One of the CompAtomExt structures is turning to null, why?
528  cudaBond->loadTuplesOnPe(startup);
529  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_LOADTUPLESONPE);
531  NAMD_EVENT_START(1, NamdProfileEvent::MIG_COPYTUPLEDATA);
532 
534  cudaBond->copyTupleDataGPU(startup);
535  } else {
536  cudaBond->copyTupleDataSN();
537  }
538 
539  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_COPYTUPLEDATA);
540  // waits until everyone has finished to open their respective boxes
541  // node barrier actually prevents the error that is happening.
543  if(isMaster){
544  // launches work on the masterPe
545  NAMD_EVENT_START(1, NamdProfileEvent::MIG_LAUNCHWORK);
546  cudaBond->launchWork();
547  cudaNbond->launchWork();
548  if (cudaPme && computePme) {
549  cudaPme->compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
550  }
551  cudaNbond->reSortTileLists();
552  if (cudaGlobal) {
553  // cudaGlobal->communicateToClients(&(this->patch->lattice));
554  cudaGlobal->calculate();
555  cudaGlobal->communicateToMD(patch->flags.doEnergy, patch->flags.doVirial);
556  }
557  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_LAUNCHWORK);
558  }
559 
561  //global master force calculation
562 
563  if(doGlobal) {
564  NAMD_EVENT_START(1, NamdProfileEvent::GM_CALCULATE);
566  // Zero all SOA global forces before computing next global force
567  NAMD_EVENT_START(1, NamdProfileEvent::GM_ZERO);
568  int numhp = PatchMap::Object()->numHomePatches();
570  for(int i = 0; i < numhp; ++i) {
571  HomePatch *hp = hpList->item(i).patch;
572  hp->zero_global_forces_SOA();
573  }
574  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_ZERO);
575  NAMD_EVENT_START(1, NamdProfileEvent::GM_DOWORK);
576  // call globalmaster to calculate the force from client.
577  computeGlobal->doWork();
578  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_DOWORK);
579  NAMD_EVENT_START(1, NamdProfileEvent::GM_BARRIER);
581  // CkPrintf("post doWork step %d \n",this->patch->flags.step);
582  // CUDASequencer->printSOAPositionsAndVelocities();
583  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_BARRIER);
584  if(isMaster) {
585  // aggregate and copy the global forces to d_f_global device buffer
586  NAMD_EVENT_START(1, NamdProfileEvent::GM_CPY_FORCE);
587  CUDASequencer->copyGlobalForcesToDevice();
588  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CPY_FORCE);
589  }
590  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CALCULATE);
591  }
592  NAMD_EVENT_START(1, NamdProfileEvent::MIG_FINISHPATCHES);
593  cudaNbond->finishPatches();
594  cudaBond->finishPatches();
595  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_FINISHPATCHES);
597 
598  // finishes reduction with masterPe!
599  if(isMaster){
600  cudaNbond->finishReductions();
601  if (cudaPme) cudaPme->finishReduction(reducePme);
602  cudaBond->finishReductions();
603  if (cudaGlobal) cudaGlobal->finishReductions();
604  }
606  }
607  // if we're not on a migration step, do the work only on masterPE, except globalmaster work
608  else {
609  int doNbond = patch->flags.doNonbonded;
610  if(isMaster) {
611  // JM NOTE: We issue the nonbonded work first and sync it last
612  if (cudaPme && computePme) {
613  cudaPme->compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
614  }
615  cudaNbond->doWork();
616  cudaBond->doWork();
617  if (cudaGlobal) {
618  // cudaGlobal->communicateToClients(&(this->patch->lattice));
619  cudaGlobal->calculate();
620  cudaGlobal->communicateToMD(patch->flags.doEnergy, patch->flags.doVirial);
621  }
622  }
623  //global master force calculation
624  if(doGlobal) {
625  NAMD_EVENT_START(1, NamdProfileEvent::GM_CALCULATE);
626  NAMD_EVENT_START(1, NamdProfileEvent::GM_ZERO);
628  // Zero all SOA global forces before computing next global force
629  int numhp = PatchMap::Object()->numHomePatches();
631  for(int i = 0; i < numhp; ++i) {
632  HomePatch *hp = hpList->item(i).patch;
633  hp->zero_global_forces_SOA();
634  }
635  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_ZERO);
636  // call globalmaster to calculate the force from client.
637  NAMD_EVENT_START(1, NamdProfileEvent::GM_DOWORK);
638  computeGlobal->doWork();
639  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_DOWORK);
640  NAMD_EVENT_START(1, NamdProfileEvent::GM_BARRIER);
642  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_BARRIER);
643  // CkPrintf("post doWork 2 step %d \n",this->patch->flags.step);
644  // CUDASequencer->printSOAPositionsAndVelocities();
645  if(isMaster) {
646  // aggregate and copy the global forces to d_f_global device buffer
647  NAMD_EVENT_START(1, NamdProfileEvent::GM_CPY_FORCE);
648  CUDASequencer->copyGlobalForcesToDevice();
649  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CPY_FORCE);
650  }
651  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CALCULATE);
652  }
653  if(isMaster) {
654  cudaBond->finishPatches();
655  if (cudaPme) {
656  cudaPme->finishReduction(reducePme);
657  }
658  cudaNbond->finishPatches();
659  if (cudaGlobal) cudaGlobal->finishReductions();
660  }
661  }
662 
663 #if 0
664  // for migrations, I need to call OpenBoxesOnPe and finishPatches for every Pe
666  pairlistsAreValid = 1;
667  pairlistsAge = 0;
668  }
669  if ( pairlistsAreValid /* && !pressureStep */ ) ++pairlistsAge;
670 #endif
671  // syncColl->barrier(SynchronousCollectiveScope::all);
672 }
673 
674 //apply MC pressure control
676  const int step,
677  const int doMigration,
678  const int doEnergy,
679  const int doVirial,
680  const int maxForceNumber,
681  const int doGlobal)
682 {
683  bool isMasterPe = (deviceCUDA->getMasterPe() == CkMyPe() );
684  NodeReduction *reduction = CUDASequencer->patchData->reductionBackend;
685  Controller *c_out = CUDASequencer->patchData->c_out;
686  bool mGpuOn = CUDASequencer->mGpuOn;
687  Lattice oldLattice = this->patch->lattice;
688  Vector origin = this->patch->lattice.origin();
689  Tensor factor;
690  int accepted = 0; // status of MC volume fluctuation trial
691 
692  CUDASequencer->submitReductionValues(); // Copy data to NodeReduction
693  if(isMasterPe){
694  // Backup the reduction values for rejected move
695  CUDASequencer->patchData->reductionBackendSave->setVal(reduction);
696 
698  // Send the rescale factor for Monte Carlo Volume change from controller
699  c_out->mcPressure_prepare(step);
700  // receive the factor
701  factor = broadcast->positionRescaleFactor.get(step, CkNumPes());
702  }
703 
704  // Backup positions and forces, scale the coordinates and lattice
705  // Setup positions for energy and force calculation
706  CUDASequencer->monteCarloPressure_part1(factor, origin, oldLattice);
708  // Scale the lattice with factor
709  // patch.lattice is pointing to patch.flags.lattice
710  this->patch->lattice.rescale(factor);
711  CUDASequencer->patchData->lat = &(this->patch->lattice);
712  CUDASequencer->patchData->factor = &(factor);
713  // Copy scaled lattic flags to all patches
714  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
715 
716  // Zero all reduction values. We will add halfStep values, if
717  // the move is accepted.
718  reduction->zero();
719  }
720  }
721 
723  if(isMasterPe){
724  // copy global flags
725  CUDASequencer->update_patch_flags();
726  }
728  // Calculate the new force and energy after rescaling the coordinates
729  // Migration happend before calling this function
730  this->runComputeObjectsCUDA(0, doGlobal, 1, step, 0 /* startup */);
732 
733  if(isMasterPe){
734  // Accumulate force to SOA, calculate External energy/force
735  // reduce energy and virial
736  CUDASequencer->monteCarloPressure_part2(step, maxForceNumber,
737  doEnergy, doGlobal, doVirial);
738  CUDASequencer->submitReductionValues(); // Copy data to NodeReduction
739 
741  // Check to see if the move is accepted or not
742  c_out->mcPressure_accept(step);
743  accepted = broadcast->monteCarloBarostatAcceptance.get(step);
744  //printf("Sequencer (accept): step: %d, Pe: %d, ACC status: %d\n", step, CkMyPe(), accepted);
745  }
746 
747  if (accepted) { // Move accepted
748  CUDASequencer->monteCarloPressure_accept(doMigration);
749  } else { // Move rejected
751  // Set the lattice to the original value, before scaling
752  this->patch->lattice = oldLattice;
753  CUDASequencer->patchData->lat = &(this->patch->lattice);
754  // Copy scaled lattic flags to all patches
755  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
756  }
757 
758  // Restore all positions and forces and cuLattice
759  CUDASequencer->monteCarloPressure_reject(this->patch->lattice);
760  // Restore the reduction values
761 
762  reduction->setVal(CUDASequencer->patchData->reductionBackendSave);
763  }
764  }
765 
767  //continue the rejection step. Need to update lattice in all patches
768  if(isMasterPe && !accepted){
769  // copy global flags
770  CUDASequencer->update_patch_flags();
771  }
772 }
773 
774 void Sequencer::doMigrationGPU(const int startup, const int doGlobal,
775  const int updatePatchMap) {
776 
777  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
778  const bool updatePatchData = startup || doGlobal || updatePatchMap;
779  PatchMap* patchMap = PatchMap::Object();
780 
781  bool realloc = false;
782 
783  // This will check if a reallocation was done on the previous migration
784  // We use the scratch buffers to store the atomic data during reallocation
785  // However, the migrationDestination data much be maintained throughout
786  // migration (and tuple migration so beyond the scope of this function)
787  // We probably should add a function to do this at the end of migration
788  // But for now, DMC thought it was easier to just do at the begining
789  for (int i = 0; i < deviceCUDA->getNumDevice(); i++) {
790  if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
791  realloc = true;
792  break;
793  }
794  }
795  if (realloc) {
796  if (isMasterPe) {
797  CUDASequencer->reallocateMigrationDestination();
798  CUDASequencer->registerSOAPointersToHost();
799  }
801  if (isMasterPe) {
802  CUDASequencer->copySOAHostRegisterToDevice();
803  }
804  }
805 
806  // Proceed with migration
807  //
808  // Starts GPU migration
809  //
810  if (isMasterPe) {
811  CUDASequencer->migrationLocalInit();
812  // Hidden stream sync
813  }
815 
816  if (isMasterPe) {
817  CUDASequencer->migrationPerform();
818  // Hidden stream sync
819  }
821 
822  if (isMasterPe) {
823  CUDASequencer->migrationUpdateAtomCounts();
824  // Hidden stream sync
825  }
827 
828  if (isMasterPe) {
829  CUDASequencer->migrationUpdateAtomOffsets();
830  // Hidden stream sync
831  }
833 
834  if (isMasterPe) {
835  CUDASequencer->copyPatchDataToHost();
836  // Hidden stream sync
837  }
839 
840  // Update device buffer allocations
841  realloc = false;
842  if (isMasterPe) {
843  realloc = CUDASequencer->copyPatchData(true, false);
844  CUDASequencer->patchData->atomReallocationFlagPerDevice[deviceCUDA->getDeviceIndex()] = realloc;
845  }
847 
848  // If any of the devices have reallocated, we need to re-register the p2p buffers
849  for (int i = 0; i < deviceCUDA->getNumDevice(); i++) {
850  if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
851  realloc = true;
852  break;
853  }
854  }
855  if (realloc) {
856  if (isMasterPe) {
857  CUDASequencer->registerSOAPointersToHost();
858  }
860  if (isMasterPe) {
861  CUDASequencer->copySOAHostRegisterToDevice();
862  }
863  }
864 
865  // Performs various post processing like Solute/Solvent sorting and copies back to host
866  if (isMasterPe) {
867  CUDASequencer->migrationLocalPost(0);
868  CUDASequencer->migrationSortAtomsNonbonded();
869  }
870 
871  // If this is startup, we need to delay this until after AoS has been copied back to host
872  // Because we do need the atomIDs for the atom map initially
873  if (!updatePatchData) {
874  wakeULTs(); // Wakes everyone back up for migration
875  this->patch->positionsReady_GPU(1, startup);
876  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
877  CUDASequencer->masterThreadSleeping = true;
878  CUDASequencer->masterThread = CthSelf();
879  CthSuspend();
880  }
881  }
882 
883  if (isMasterPe) {
884  CUDASequencer->sync();
885  }
887 
888  if (isMasterPe) {
889  CUDASequencer->migrationUpdateDestination();
890  }
892 
893  if (isMasterPe) {
894  CUDASequencer->migrationUpdateProxyDestination();
895  }
897 
898  if (isMasterPe) {
899  CUDASequencer->migrationUpdateRemoteOffsets();
900  }
902 
903  if (isMasterPe) {
904  CUDASequencer->copyDataToPeers(true);
905  }
907 
908  if (updatePatchData) {
909  // The atom maps need to be cleared the HomePatch atom arrays have been updated
910  int numhp = PatchMap::Object()->numHomePatches();
912  for(int i = 0; i < numhp; ++i) {
913  HomePatch *hp = hpList->item(i).patch;
914  hp->clearAtomMap();
915  }
917  if (isMasterPe) {
918  // We need the atom ordering to be correct within each
919  // patch to setup the atom map. The vdwType of each atom
920  // is also used for exclusion tuple generation
921  CUDASequencer->copyAoSDataToHost();
922  }
924  wakeULTs(); // Wakes everyone back up for migration
925  this->patch->positionsReady_GPU(1, startup);
926  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
927  CUDASequencer->masterThreadSleeping = true;
928  CUDASequencer->masterThread = CthSelf();
929  CthSuspend();
930  }
932  }
933  if (isMasterPe) {
934  if (doGlobal || simParams->forceDcdFrequency > 0) {
935  CUDASequencer->updateHostPatchDataSOA(); // Needs to be called after HomePatch updates
936  }
937  }
939  if (isMasterPe) {
940  // This needs to be called after positionsReady_GPU to that the atom maps have been updated
941  // This will be called in updateDeviceData during with startup=true, but we need to call it
942  // with startup=false to make sure the atoms are updated
943  CUDASequencer->migrationUpdateAdvancedFeatures(false);
944  }
946 }
947 
948 // JM: Single-node integration scheme
949 void Sequencer::integrate_CUDA_SOA(int scriptTask){
950 
951  #ifdef TIMER_COLLECTION
952  TimerSet& t = patch->timerSet;
953  #endif
954  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
955  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
956  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
957  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
958  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
959  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
960  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
961  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
962  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
963  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
964 
965  // Keep track of the step number.
966  //int &step = patch->flags.step;
967  int &step = patch->flags.step;
968  step = simParams->firstTimestep;
969  Controller *c_out = CUDASequencer->patchData->c_out;
970  PatchMap* patchMap = PatchMap::Object();
971 
972  // For multiple time stepping, which force boxes are used?
973  int &maxForceUsed = patch->flags.maxForceUsed;
974  int &maxForceMerged = patch->flags.maxForceMerged;
975  maxForceUsed = Results::normal;
976  maxForceMerged = Results::normal;
977 
978  // Keep track of total steps and steps per cycle.
979  const int numberOfSteps = simParams->N;
980  //const int stepsPerCycle = simParams->stepsPerCycle;
981  CheckStep stepsPerCycle;
982  stepsPerCycle.init(step, simParams->stepsPerCycle);
983  // The fundamental time step, get the scaling right for velocity units.
984  const BigReal timestep = simParams->dt * RECIP_TIMEFACTOR;
985 
986  //const int nonbondedFrequency = simParams->nonbondedFrequency;
987  //slowFreq = nonbondedFrequency;
988  CheckStep nonbondedFrequency;
990  // The step size for short-range nonbonded forces.
991  const BigReal nbondstep = timestep * simParams->nonbondedFrequency;
992  int &doNonbonded = patch->flags.doNonbonded;
993  //doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
994  doNonbonded = (step >= numberOfSteps) ||
995  nonbondedFrequency.init(step, simParams->nonbondedFrequency);
996  //if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
997  if ( nonbondedFrequency.period == 1 ) maxForceMerged = Results::nbond;
998  if ( doNonbonded ) maxForceUsed = Results::nbond;
999 
1000  // Do we do full electrostatics?
1001  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
1002  //const int fullElectFrequency = simParams->fullElectFrequency;
1003  //if ( dofull ) slowFreq = fullElectFrequency;
1004  CheckStep fullElectFrequency;
1005  if ( dofull ) slowFreq = simParams->fullElectFrequency;
1006  // The step size for long-range electrostatics.
1007  const BigReal slowstep = timestep * simParams->fullElectFrequency;
1008  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
1009  //doFullElectrostatics = (dofull &&
1010  // ((step >= numberOfSteps) || !(step%fullElectFrequency)));
1011  doFullElectrostatics = (dofull &&
1012  ((step >= numberOfSteps) ||
1013  fullElectFrequency.init(step, simParams->fullElectFrequency)));
1014  //if ( dofull && fullElectFrequency == 1 ) maxForceMerged = Results::slow;
1015  if ( dofull && fullElectFrequency.period == 1 ) maxForceMerged = Results::slow;
1016  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
1017 
1018  // Bother to calculate energies?
1019  int &doEnergy = patch->flags.doEnergy;
1020  //int energyFrequency = simParams->outputEnergies;
1021  CheckStep energyFrequency;
1022  int newComputeEnergies = simParams->computeEnergies;
1023  if(simParams->alchOn) newComputeEnergies = NAMD_gcd(newComputeEnergies, simParams->alchOutFreq);
1024  doEnergy = energyFrequency.init(step, newComputeEnergies);
1025 
1026  // check for Monte Carlo pressure control.
1027  CheckStep monteCarloPressureFrequency;
1028  doEnergy += monteCarloPressureFrequency.init(step, (simParams->monteCarloPressureOn ?
1029  simParams->monteCarloPressureFreq : numberOfSteps + 1) );
1030 
1031  int &doVirial = patch->flags.doVirial;
1032  doVirial = 1;
1033  // Do we need to return forces to TCL script or Colvar module?
1034  int doTcl = simParams->tclForcesOn;
1035  int doColvars = simParams->colvarsOn;
1036  const int doIMD = (simParams->IMDon && ! (simParams->IMDignore || simParams->IMDignoreForces));
1037  int doGlobal = (doTcl || doColvars || doIMD);
1039  CheckStep globalMasterFrequency;
1040  bool globalMasterStep=false;
1041  int doGlobalObjects=0;
1042  int doGlobalStaleForces = 0;
1043 
1044  if(doGlobal)
1045  {
1046  globalMasterFrequency.init(step, (simParams->globalMasterFrequency > 0 ? simParams->globalMasterFrequency : numberOfSteps+1));
1047  globalMasterStep = globalMasterFrequency.check(step);
1048  doGlobalObjects = globalMasterStep? 1:0;
1050  {
1051  doGlobalObjects=1;
1052  doGlobalStaleForces=1;
1053  }
1055  {
1056  doGlobalStaleForces = simParams->globalMasterStaleForces;
1057  }
1059  {
1060  doGlobalStaleForces=doGlobalObjects;
1061  }
1062  else
1063  {
1064  doGlobalStaleForces=doGlobalObjects;
1065  }
1066  }
1067  else
1068  {
1069  doGlobalStaleForces = 0;
1070  doGlobalObjects = 0;
1071  }
1072  // The following flags have to be explicitly disabled in Patch object.
1073  patch->flags.doMolly = 0;
1074  patch->flags.doLoweAndersen = 0;
1075  patch->flags.doGBIS = 0;
1076  patch->flags.doLCPO = 0;
1077 
1078  // Square of maximum velocity for simulation safety check
1079  const BigReal maxvel2 =
1080  (simParams->cutoff * simParams->cutoff) / (timestep * timestep);
1081 
1082  // check for Langevin piston
1083  // set period beyond numberOfSteps to disable
1084  // fprintf(stderr, " Patch %d Pinging in from integrate_cuda!\n", this->patch->getPatchID());
1085  CheckStep langevinPistonFrequency;
1086  langevinPistonFrequency.init(step,
1087  (simParams->langevinPistonOn ? slowFreq : numberOfSteps+1 ),
1088  (simParams->langevinPistonOn ? -1-slowFreq/2 : 0) /* = delta */);
1089 
1090  // check for velocity rescaling
1091  // set period beyond numberOfSteps to disable
1092  CheckStep stochRescaleFrequency;
1093  stochRescaleFrequency.init(step, (simParams->stochRescaleOn ?
1094  simParams->stochRescaleFreq : numberOfSteps+1 ) );
1095 
1096  CheckStep reassignVelocityFrequency;
1097  reassignVelocityFrequency.init(step, ((simParams->reassignFreq>0) ?
1098  simParams->reassignFreq : numberOfSteps+1 ) );
1099 
1100  // check for output
1101  // set period beyond numberOfSteps to disable
1102  CheckStep restartFrequency;
1103  restartFrequency.init(step, (simParams->restartFrequency > 0 ?
1104  simParams->restartFrequency : numberOfSteps+1) );
1105  CheckStep dcdFrequency;
1106  dcdFrequency.init(step, (simParams->dcdFrequency > 0 ?
1107  simParams->dcdFrequency : numberOfSteps+1) );
1108  CheckStep velDcdFrequency;
1109  velDcdFrequency.init(step, (simParams->velDcdFrequency > 0 ?
1110  simParams->velDcdFrequency : numberOfSteps+1) );
1111  CheckStep forceDcdFrequency;
1112  forceDcdFrequency.init(step, (simParams->forceDcdFrequency > 0 ?
1113  simParams->forceDcdFrequency : numberOfSteps+1) );
1114  CheckStep imdFrequency;
1115  imdFrequency.init(step, (simParams->IMDon ?
1116  simParams->IMDfreq : numberOfSteps+1) );
1117 
1118  patch->copy_atoms_to_SOA(); // do this whether or not useDeviceMigration
1119 
1120  // Haochuan: is this really needed for GPU-resident?
1121  if (simParams->rigidBonds != RIGID_NONE && ! patch->settle_initialized) {
1123  patch->rattleListValid_SOA = true;
1124  }
1125 
1126  this->suspendULTs();
1127  // for "run 0", numberOfSteps is zero, but we want to have at least a single energy evaluation
1128  if(!masterThread) {
1129  return;
1130  }
1131  bool isMasterPe = (deviceCUDA->getMasterPe() == CkMyPe() );
1133 
1134  CUDASequencer->breakSuspends = false;
1135 
1136  // XXX this is ugly!
1137  // one thread will have the CollectionMaster and Output defined
1138  // use it to set the node group so that any thread can access
1139  if (CUDASequencer->patchData->ptrCollectionMaster == NULL) {
1140  CollectionMaster *pcm = CkpvAccess(CollectionMaster_instance)->Object();
1141  if (pcm) {
1142  CUDASequencer->patchData->ptrCollectionMaster = pcm;
1143  }
1144  }
1145  if (CUDASequencer->patchData->ptrOutput == NULL) {
1146  Output *pout = Node::Object()->output;
1147  if (pout) {
1148  CUDASequencer->patchData->ptrOutput = pout;
1149  }
1150  }
1151  if (CUDASequencer->patchData->pdb == NULL) {
1152  PDB *pdb = Node::Object()->pdb;
1153  if (pdb) {
1154  CUDASequencer->patchData->pdb = pdb;
1155  }
1156  }
1157  if (CUDASequencer->patchData->imd == NULL) {
1158  IMDOutput *imd = Node::Object()->imd;
1159  if (imd->getIMD()) {
1160  CUDASequencer->patchData->imd = imd;
1161  }
1162  }
1163 
1164  // Register ComputeCUDAMgrs from each PE into a list for later usage
1165  if(isMasterPe){
1166  // Each masterPE registers its own computeCUDAMgr
1167  CUDASequencer->patchData->cudaBondedList[CkMyPe()] = ComputeCUDAMgr::getComputeCUDAMgr()->getComputeBondedCUDA();
1168  CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = ComputeCUDAMgr::getComputeCUDAMgr()->getCudaComputeNonbonded();
1169  }else{
1170  CUDASequencer->patchData->cudaBondedList[CkMyPe()] = NULL;
1171  CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = NULL;
1172  }
1173 
1174  if (isMasterPe) {
1176  if(dofull && deviceCUDA->getIsPmeDevice()){
1177  CudaPmeOneDevice* cudaPme = 0;
1178  cudaPme = cudaMgr->createCudaPmeOneDevice();
1179  }
1180  }
1181 
1183 
1184 /* JM NOTE: This Will Contains the first calls to the integration loop. The order is:
1185  * 1 - Rattle (0,0)
1186  * 2 - runComputeObjects
1187  * 3 - addForceToMomentum(-0.5, tstep)
1188  * 4 - Rattle (-timestep, 0);
1189  * 5 - submitHalfstep();
1190  * 6 - addForceToMomentum(1.0 , tstep)
1191  * 7 - Rattle (tstep, 1)
1192  * 8 - SubmitHalf()
1193  * 9 - addForceToMomentum(-0.5, tstep)
1194  * 10 - submitReductions()
1195  */
1196 
1197  if(scriptTask == SCRIPT_RUN){
1198  updateDeviceData(1, maxForceUsed, doGlobal);
1199 
1200  if(isMasterPe) {
1201  if(patchData->updateCounter.load()>0)
1202  {
1203  CUDASequencer->updateDeviceKernels();
1204  }
1205 
1206  // warm_up1 is basically rattle1_SOA(0,0)
1207  CUDASequencer->startRun1(maxForceUsed, this->patch->lattice);
1208  (this->patch->flags.sequence)++;
1209  if (deviceCUDA->getIsMasterDevice()){
1210  CUDASequencer->patchData->lat = &(this->patch->lattice);
1211  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1212  }
1214  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1215  const bool addCudaGlobalForces =
1216  (cudaGlobalMasterObject != nullptr) ?
1217  cudaGlobalMasterObject->willAddGlobalForces() :
1218  false;
1219  if (addCudaGlobalForces) {
1220  CUDASequencer->allocateGPUSavedForces();
1221  }
1222  }
1223 
1225  if (!simParams->useDeviceMigration) {
1226  wakeULTs(); // Wakes everyone back up for migration
1227  this->patch->positionsReady_SOA(1);
1228  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
1229  CUDASequencer->masterThreadSleeping = true;
1230  CUDASequencer->masterThread = CthSelf();
1231  CthSuspend();
1232  }
1234  updateDeviceData(0, maxForceUsed, doGlobal);
1235  } else {
1236  doMigrationGPU(1, doGlobal, simParams->updateAtomMap);
1237  }
1251  if (isMasterPe) {
1252  CUDASequencer->setRescalePairlistTolerance(step < numberOfSteps);
1253  }
1255  // I've migrated everything. Now run computes
1256  runComputeObjectsCUDA(/*isMigration = */ 1 ,
1257  doGlobal,
1258  /* step < numberofSteps */ 1,
1259  /* step = */ 0,
1260  /* startup = */ 1);
1261 
1262  if(isMasterPe){
1263  CUDASequencer->finish_patch_flags(true);
1265  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1266  const bool addCudaGlobalForces =
1267  (cudaGlobalMasterObject != nullptr) ?
1268  cudaGlobalMasterObject->willAddGlobalForces() :
1269  false;
1270  CUDASequencer->startRun2(timestep,
1271  nbondstep, slowstep, this->patch->lattice.origin(),
1272  doGlobal || addCudaGlobalForces, maxForceUsed);
1273  }
1275  if(isMasterPe){
1276  const bool requestTotalForces = computeGlobal ? computeGlobal->getForceSendActive() : false;
1278  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1279  const bool requestGPUTotalForces =
1280  (cudaGlobalMasterObject != nullptr) ?
1281  cudaGlobalMasterObject->requestedTotalForces() :
1282  false;
1283  CUDASequencer->startRun3(timestep,
1284  nbondstep, slowstep, this->patch->lattice.origin(),
1285  requestTotalForces, doGlobalStaleForces,
1287  requestGPUTotalForces,
1288  maxForceUsed);
1289  }
1290 
1291  // save total force in computeGlobal, forces are copied from device
1292  // to host in startRun3
1293  if (doGlobal) {
1295  // store the total force for compute global clients
1296  int numhp = PatchMap::Object()->numHomePatches();
1298  for(int i = 0; i < numhp; ++i) {
1299  HomePatch *hp = hpList->item(i).patch;
1300  computeGlobal->saveTotalForces(hp);
1301  }
1302  }
1303  }
1304  CUDASequencer->submitReductionValues();
1305  syncColl->waitAndAwaken(); // Allow charm++ reductions to finish before calling require in print step
1306 
1307  // Called everything, now I can go ahead and print the step
1308  // PE 0 needs to handle IO as it owns the controller object
1309  // JM: What happens if PE 0 does not own a GPU here? XXX Check
1310  if(deviceCUDA->getIsMasterDevice()) {
1311  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1312  c_out->resetMovingAverage();
1313  c_out->printStep(step);
1314  }
1316 
1317  // XXX Should we promote velrescaling into Sequencer in order to save
1318  // the velocity rescaling coefficient between script run commands?
1319  double velrescaling = 1;
1320  // --------- Start of the MD loop ------- //
1321  for( ++step; step <= numberOfSteps; ++step ){
1322  const int isForcesOutputStep = forceDcdFrequency.check(step) + imdFrequency.check(step);
1323  int dcdSelectionChecks=0;
1324  Molecule *molecule = Node::Object()->molecule;
1325  for(int dcdindex=0; dcdindex<16;++dcdindex)
1326  {
1327  int dcdSelectionFrequency = molecule->dcdSelectionParams[dcdindex].frequency;
1328  if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
1329  dcdSelectionChecks++;
1330  }
1331  const int isCollection = restartFrequency.check(step) +
1332  dcdFrequency.check(step) + velDcdFrequency.check(step) +
1333  isForcesOutputStep + dcdSelectionChecks;
1334  int isMigration = false;
1335  const int doVelocityRescale = stochRescaleFrequency.check(step);
1336  const int doMCPressure = monteCarloPressureFrequency.check(step);
1337  // XXX doVelRescale should instead set a "doTemperature" flag
1338  doEnergy = energyFrequency.check(step) || doVelocityRescale || doMCPressure;
1339  int langevinPistonStep = langevinPistonFrequency.check(step);
1340 
1341  int reassignVelocityStep = reassignVelocityFrequency.check(step);
1342 
1343  // berendsen pressure control
1344  int berendsenPressureStep = 0;
1349  berendsenPressureStep = 1;
1350  }
1351  }
1352  if(patchData->updateCounter.load()>0)
1353  {
1354  CUDASequencer->updateDeviceKernels();
1355  }
1356 
1357  if(doGlobal)
1358  {
1359  globalMasterStep = globalMasterFrequency.check(step);
1360  doGlobalObjects = globalMasterStep? 1:0;
1362  {
1363  doGlobalObjects=1;
1364  doGlobalStaleForces=1;
1365  }
1367  {
1368  doGlobalStaleForces = simParams->globalMasterStaleForces;
1369  }
1371  {
1372  doGlobalStaleForces=doGlobalObjects;
1373  }
1374  else
1375  {
1376  doGlobalStaleForces=doGlobalObjects;
1377  }
1378  }
1379  else
1380  {
1381  doGlobalStaleForces = 0;
1382  doGlobalObjects = 0;
1383  globalMasterStep = false;
1384  }
1385  // CkPrintf("step %d doGlobal %d doGlobalObjects %d doGlobalStaleForces %d globalMasterStep %d globalMasterFrequency %d\n", step, doGlobal, doGlobalObjects, doGlobalStaleForces, globalMasterStep, simParams->globalMasterFrequency);
1386 
1387 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
1388  //eon = epid && (beginStep < step && step <= endStep);
1389  // int eon = epid && (beginStep < step && step <= endStep);
1390  // if (controlProfiling && step == beginStep) {
1391  // NAMD_PROFILE_START();
1392  // }
1393  //if (controlProfiling && step == endStep) {
1394  // NAMD_PROFILE_STOP();
1395  //}
1396 #endif
1397 
1398  Vector origin = this->patch->lattice.origin();
1399  Tensor factor;
1400  if (deviceCUDA->getIsMasterDevice()) {
1401  if (simParams->langevinPistonOn) {
1402  c_out->piston1(step);
1403  }
1404  // Get the rescale factor for berendsen from controller
1406  c_out->berendsenPressureController(step);
1407  }
1408  }
1409 
1411  syncColl->waitAndAwaken();
1412  if (isMasterPe) cudaCheck(cudaDeviceSynchronize());
1414  }
1415  if (langevinPistonStep || berendsenPressureStep) {
1416  factor = broadcast->positionRescaleFactor.get(step, CkNumPes());
1417 
1418  if (isMasterPe) {
1419  this->patch->lattice.rescale(factor);
1420  CUDASequencer->patchData->lat = &(this->patch->lattice);
1421  CUDASequencer->patchData->factor = &(factor);
1422  }
1423  }
1424 
1426  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1427  int previousMaxForceUsed;
1428  if(isMasterPe){
1429  // need to remember number of buffers for previous force calculation
1430  previousMaxForceUsed = maxForceUsed;
1431  // update local flags
1432  //doNonbonded = !(step%nonbondedFrequency);
1433  // no need to include doMCPressure since it's common factor of nonbondedFrequency
1434  doNonbonded = nonbondedFrequency.check(step);
1435  // no need to include doMCPressure since it's common factor of fullElectFrequency
1436  doFullElectrostatics = (dofull && fullElectFrequency.check(step));
1437  maxForceUsed = Results::normal;
1438  if ( doNonbonded ) maxForceUsed = Results::nbond;
1439  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
1440 
1441  (this->patch->flags.sequence)++;
1442  // JM: Pressures needed for every timestep if the piston is on
1444 
1445  // copy local flags to global
1446  if(deviceCUDA->getIsMasterDevice()) CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1447  }
1448 
1450 
1451  if(isMasterPe){
1452  CUDASequencer->launch_part1(
1453  step,
1454  timestep, nbondstep, slowstep, velrescaling, maxvel2,
1455  *(CUDASequencer->patchData->factor),
1456  origin,
1457  // this->patch->lattice, // need to use the lattice from PE 0 right now
1458  (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1459  reassignVelocityStep,
1460  langevinPistonStep,
1461  berendsenPressureStep,
1462  previousMaxForceUsed, // call with previous maxForceUsed
1463  (const int)(step == simParams->firstTimestep + 1),
1464  this->patch->flags.savePairlists, // XXX how to initialize?
1465  this->patch->flags.usePairlists, // XXX how to initialize?
1466  doEnergy);
1467  // reset velocity rescaling coefficient after applying it
1468  velrescaling = 1;
1469  }
1470  if (reassignVelocityStep)
1471  {
1472  // CkPrintf("dump after launch_part1\n");
1473  // CUDASequencer->printSOAPositionsAndVelocities(2,10);
1474  }
1475  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1476 
1478 
1479  if(isMasterPe){
1480  CUDASequencer->launch_part11(
1481  timestep, nbondstep, slowstep, velrescaling, maxvel2,
1482  *(CUDASequencer->patchData->factor),
1483  origin,
1484  // this->patch->lattice, // need to use the lattice from PE 0 right now
1485  (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1486  langevinPistonStep,
1487  previousMaxForceUsed, // call with previous maxForceUsed
1488  (const int)(step == simParams->firstTimestep + 1),
1489  this->patch->flags.savePairlists, // XXX how to initialize?
1490  this->patch->flags.usePairlists, // XXX how to initialize?
1491  doEnergy);
1492  // reset velocity rescaling coefficient after applying it
1493  velrescaling = 1;
1494  }
1495  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1496 
1498 
1499 
1500  for(int i = 0; i < deviceCUDA->getNumDevice(); i++){
1501  if(CUDASequencer->patchData->migrationFlagPerDevice[i] != 0) {
1502  isMigration = true;
1503  break;
1504  }
1505  }
1506 
1507  if(isMasterPe){
1508  // If this is a Device Migration step we'll do it later
1509  if (!simParams->useDeviceMigration || !isMigration) {
1510  CUDASequencer->launch_set_compute_positions();
1511  }
1512  }
1513 
1514  // isMigration = (CUDASequencer->patchData->migrationFlagPerDevice.end() != t) ? 1:0;
1515 
1516  if(isMasterPe) {
1517  // if(CkMyPe() == 0) CUDASequencer->updatePairlistFlags(isMigration);
1518  CUDASequencer->updatePairlistFlags(isMigration);
1519  if (!simParams->useDeviceMigration) {
1520  CUDASequencer->copyPositionsAndVelocitiesToHost(isMigration, doGlobalObjects);
1521  }
1522  if (simParams->useCudaGlobal && !isMigration) {
1523  // Copy atoms to clients if CudaGlobalMaster is used
1524  // For a migration step, we will do it in runComputeObjectsCUDA
1526  auto cudaGlobal = deviceCUDA->getIsGlobalDevice() ? cudaMgr->getCudaGlobalMaster() : nullptr;
1527  if (cudaGlobal) {
1528  cudaGlobal->setStep(static_cast<int64_t>(patch->flags.step));
1529  cudaGlobal->communicateToClients(&(this->patch->lattice));
1530  }
1531  }
1532  }
1533 
1534 
1535  if(isMigration) {
1536  if (!simParams->useDeviceMigration) {
1538  wakeULTs(); // sets the number of patches
1539  this->patch->positionsReady_SOA(isMigration);
1540  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
1541  CUDASequencer->masterThreadSleeping = true;
1542  CUDASequencer->masterThread = CthSelf();
1543  CthSuspend(); // suspends until everyone else has pinged back. :]
1544  }
1546  updateDeviceData(0, maxForceUsed, doGlobal);
1547  } else {
1548  doMigrationGPU(false, doGlobal, simParams->updateAtomMap);
1550  }
1551  }
1552 
1553  // Calculate force/energy for bond, nonBond, pme.
1554 
1555  this->runComputeObjectsCUDA(isMigration, doGlobalObjects, step<numberOfSteps, step, 0 /* startup */);
1556 
1557  if (isMasterPe) {
1558  // if(CkMyPe() == 0) CUDASequencer->finish_patch_flags(isMigration);
1559  CUDASequencer->finish_patch_flags(isMigration);
1560  CUDASequencer->patchData->migrationFlagPerDevice[deviceCUDA->getDeviceIndex()] = 0; // flags it back to zero
1561  }
1563 
1564  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1565  if(isMasterPe){
1567  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1568  const bool addCudaGlobalForces =
1569  (cudaGlobalMasterObject != nullptr) ?
1570  cudaGlobalMasterObject->willAddGlobalForces() :
1571  false;
1572  CUDASequencer->launch_part2(doMCPressure,
1573  timestep, nbondstep, slowstep,
1574  origin,
1575  step,
1576  maxForceUsed,
1577  langevinPistonStep,
1578  isMigration && (!simParams->useDeviceMigration),
1579  isCollection,
1580  doGlobalStaleForces || addCudaGlobalForces,
1581  doEnergy);
1582  }
1584  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1585 
1586  // Apply MC pressure control
1587  if(doMCPressure){
1588  monteCarloPressureControl(step, isMigration, 1, 1, maxForceUsed, doGlobalStaleForces);
1590  }
1591 
1592  const bool requestTotalForces = (computeGlobal ? computeGlobal->getForceSendActive() : false) && doGlobalObjects;
1593  // continue launch_part2, after cellBasis fluctuation in MC barostat
1594  if(isMasterPe){
1596  const auto CudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1597  const bool requestGPUTotalForces =
1598  (CudaGlobalMasterObject != nullptr) ?
1599  CudaGlobalMasterObject->requestedTotalForces() :
1600  false;
1601  CUDASequencer->launch_part3(doMCPressure,
1602  timestep, nbondstep, slowstep,
1603  origin,
1604  step,
1605  maxForceUsed,
1606  requestTotalForces, // requested Force
1607  doGlobalStaleForces,
1608  requestGPUTotalForces,
1609  isMigration,
1610  isCollection,
1611  doEnergy,
1612  isForcesOutputStep);
1613  }
1615  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1616 
1617  // save total force in computeGlobal, forces are copied from device
1618  // to host in launch_part3
1619  if (requestTotalForces) {
1621  // store the total force for compute global clients
1622  int numhp = PatchMap::Object()->numHomePatches();
1624  for(int i = 0; i < numhp; ++i) {
1625  HomePatch *hp = hpList->item(i).patch;
1626  computeGlobal->saveTotalForces(hp);
1627  }
1628  }
1629 
1630  CUDASequencer->submitReductionValues();
1631 
1632  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_PRTSTEP);
1633  syncColl->waitAndAwaken(); // Allow charm++ reductions to finish before calling require in print step
1634 
1635  if (deviceCUDA->getIsMasterDevice()) {
1636  // even though you're not on a printstep, calling this still takes 15us approx!!!
1637  c_out->printStep(step);
1638  // stochastic velocity rescaling
1639  // get coefficient from current temperature
1640  // to be applied on NEXT loop iteration
1641  if (doVelocityRescale) {
1642  // calculate coefficient based on current temperature
1643  velrescaling = c_out->stochRescaleCoefficient();
1644  broadcast->stochRescaleCoefficient.publish(step, velrescaling);
1645  }
1646  }
1647  // Non-master PEs should get the rescale factor here.
1648  if (doVelocityRescale) {
1649  syncColl->waitAndAwaken(); // Allow charm++ broadcast to happen and sync
1650  velrescaling = broadcast->stochRescaleCoefficient.get(step, CkNumPes());
1651  }
1652  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_PRTSTEP);
1653 
1654  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_SUBCOL);
1655  if (isCollection) {
1658  if (isMasterPe) {
1659  CUDASequencer->copyAoSDataToHost();
1660  }
1661  // Make sure the data has been copied to all home patches. All PEs
1662  // participate in outputting
1664  }
1665  HomePatchList *hplist = patchMap->homePatchList();
1666  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1667  HomePatch *hp = i->patch;
1668  hp->sequencer->submitCollections_SOA(step);
1669  }
1670 
1671  syncColl->waitAndAwaken(); // Allow for collections to finish
1672  }
1673  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_SUBCOL);
1674  }
1675 
1678  if (isMasterPe) {
1679  CUDASequencer->copyAoSDataToHost();
1680  CUDASequencer->saveForceCUDASOA_direct(false, true, maxForceUsed);
1681  }
1683  // Ensure that the SoA data is also fresh to avoid unforeseen issues. sort_solvent_atoms should not actually
1684  // order atoms, but ensure that the solute/solvent counts are accurate
1685  HomePatchList *hplist = patchMap->homePatchList();
1686  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1687  HomePatch *hp = i->patch;
1688  hp->sort_solvent_atoms();
1689  hp->copy_atoms_to_SOA();
1690  hp->copy_forces_to_AOS(); // to support "output withforces"
1691  }
1692  } else {
1693  if(isMasterPe) {
1694  CUDASequencer->updateHostPatchDataSOA();
1695  CUDASequencer->saveForceCUDASOA_direct(false, true, maxForceUsed);
1696  }
1697  if(isMasterPe) CUDASequencer->copyPositionsAndVelocitiesToHost(true,doGlobal);
1699  HomePatchList *hplist = patchMap->homePatchList();
1700  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1701  HomePatch *hp = i->patch;
1702  hp->copy_updates_to_AOS();
1703  hp->copy_forces_to_AOS(); // to support "output withforces"
1704  }
1705  }
1706  syncColl->barrier(SynchronousCollectiveScope::all); // Make sure the data has been copied to all home patches
1707 
1708  //syncColl->barrier(SynchronousCollectiveScope::all);
1709  CUDASequencer->breakSuspends = true;
1710  wakeULTs();
1711  if(deviceCUDA->getIsMasterDevice()) c_out->awaken();
1712 }
1713 
1714 
1715 /*
1716  * Updates device data after a migration
1717  *
1718  */
1719 void Sequencer::updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal) {
1720  bool isMaster = deviceCUDA->getMasterPe() == CkMyPe();
1722  if (isMaster) {
1723  CUDASequencer->copyPatchData(true, startup);
1725  CUDASequencer->reallocateMigrationDestination();
1726  CUDASequencer->copyAtomDataToDeviceAoS();
1727  } else {
1728  CUDASequencer->copyAtomDataToDevice(startup, maxForceUsed);
1729  }
1730  CUDASequencer->migrationLocalPost(startup);
1731  CUDASequencer->migrationUpdateAdvancedFeatures(startup);
1732  // XXX This is only necessary if reallocation happens
1733  CUDASequencer->registerSOAPointersToHost();
1734  }
1736  if (isMaster) {
1737  CUDASequencer->copySOAHostRegisterToDevice();
1739  CUDASequencer->patchData->atomReallocationFlagPerDevice[deviceCUDA->getDeviceIndex()] = 0;
1740  }
1741 
1742  if (doGlobal || simParams->forceDcdFrequency > 0) {
1743  CUDASequencer->updateHostPatchDataSOA(); // Needs to be called after HomePatch::domigration
1744  }
1745  }
1747 }
1748 
1749 /*
1750  * Constructs the meta data structures storing the patch data for GPU resident code path
1751  *
1752  * This is called once during startup
1753  *
1754  */
1757  ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
1758  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
1759 
1760  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1761  patchData = cpdata.ckLocalBranch();
1762 
1763  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1764 
1765  // constructDevicePatchMap should only be called once per PE
1766  if (patchData->devicePatchMapFlag[CkMyPe()]) return;
1767  patchData->devicePatchMapFlag[CkMyPe()] = 1;
1768 
1769  // One thread per GPU will execute this block
1770  if (isMasterPe) {
1771  const int deviceIndex = deviceCUDA->getDeviceIndex();
1772 
1773  // Nonbonded patches are computed by CudaComputeNonbonded and contain all the patches and proxy
1774  // patches on this device. HomePatches is computed by SequencerCUDA and only contains the
1775  // home patches. localPatches will be generated by this function
1776  using NBPatchRecord = CudaComputeNonbonded::PatchRecord;
1778  std::vector<NBPatchRecord>& nonbondedPatches = cudaNbond->getPatches();
1779  std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1780  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1781 
1782  // The home patches are not necessarily ordered by their patchID. This can happen if there
1783  // are multiple PEs assigned to the same GPU. Sorting the home patches by their patch ID
1784  // makes it easy to have a consistent ordering
1785  std::stable_sort(
1786  homePatches.begin(),
1787  homePatches.end(),
1788  [](HomePatch* a, HomePatch* b) {
1789  return (a->getPatchID() < b->getPatchID());
1790  });
1791 
1792  // Iterates over all the patches on this device and adds them to h_localPatches
1793  // and determine if they are a home or proxy patch
1794  for (int i = 0; i < nonbondedPatches.size(); i++) {
1795  CudaLocalRecord record;
1796  record.patchID = nonbondedPatches[i].patchID;
1797 
1798  // TODO DMC the patchmap should be able to do this
1799  const int targetPatchID = record.patchID;
1800  auto result = std::find_if(
1801  homePatches.begin(),
1802  homePatches.end(),
1803  [targetPatchID](HomePatch* p) {
1804  return (p->getPatchID() == targetPatchID);
1805  });
1806 
1807  record.isProxy = (result == homePatches.end());
1808  localPatches.push_back(record);
1809  }
1810 
1811  // The home patches should be at the begining of the patch list
1812  // This makes integration easier since we can ignore the patches and operate on a
1813  // contiguous chunk of home atoms
1814  std::stable_sort(
1815  localPatches.begin(),
1816  localPatches.end(),
1817  [](CudaLocalRecord a, CudaLocalRecord b) {
1818  return (a.isProxy < b.isProxy);
1819  });
1820 
1821  // Now the ordering is fixed we can update the bonded and nonbonded orders. Since we have
1822  // moved the home patches to the begining the ordering has changed
1823  cudaBond->updatePatchOrder(localPatches);
1824  cudaNbond->updatePatchOrder(localPatches);
1825  patchData->devData[deviceIndex].numPatchesHome = homePatches.size();
1826  patchData->devData[deviceIndex].numPatchesHomeAndProxy = localPatches.size();
1827  }
1829 
1830  // Iterates over all patches again, and generates the mapping between GPUs. For each patch,
1831  // it checks the other devices to see if the patch is on that device.
1832  // - For HomePatches, there will be a peer record for all of its proxies
1833  // - For ProxyPatches, there will only be a peer record for its home patch
1834  // There is a single array of peer records per device. Each patch stores an offset into this
1835  // array as well as its number of peer records
1836  if (isMasterPe) {
1837  const int deviceIndex = deviceCUDA->getDeviceIndex();
1838  std::vector<CudaPeerRecord>& myPeerPatches = patchData->devData[deviceIndex].h_peerPatches;
1839  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1840 
1841  for (int i = 0; i < localPatches.size(); i++) {
1842  std::vector<CudaPeerRecord> tempPeers;
1843  const int targetPatchID = localPatches[i].patchID;
1844  const int targetIsProxy = localPatches[i].isProxy;
1845 
1846  for (int devIdx = 0; devIdx < deviceCUDA->getNumDevice(); devIdx++) {
1847  if (devIdx == deviceIndex) continue;
1848  std::vector<CudaLocalRecord>& peerPatches = patchData->devData[devIdx].h_localPatches;
1849 
1850  // Searches peerPatches for patchID. If it is not being integrated on this device
1851  // then ignore other non-integration patches
1852  for (int j = 0; j < patchData->devData[devIdx].numPatchesHomeAndProxy; j++) {
1853  const CudaLocalRecord peer = peerPatches[j];
1854  if (peer.patchID == targetPatchID && peer.isProxy != targetIsProxy) {
1855  CudaPeerRecord peerRecord;
1856  peerRecord.deviceIndex = devIdx;
1857  peerRecord.patchIndex = j;
1858  tempPeers.push_back(peerRecord);
1859  break;
1860  }
1861  }
1862  }
1863 
1864  // Once we have the list of peer records, add them to the single device-width vector
1865  // and record the offset and count
1866  localPatches[i].numPeerRecord = tempPeers.size();
1867  if (!tempPeers.empty()) {
1868  localPatches[i].peerRecordStartIndex = myPeerPatches.size();
1869  myPeerPatches.insert(myPeerPatches.end(), tempPeers.begin(), tempPeers.end());
1870  }
1871  }
1872  }
1874 }
1875 
1877  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1878  patchData = cpdata.ckLocalBranch();
1879 
1880  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1881 
1882  if (isMasterPe) {
1883  const int deviceIndex = deviceCUDA->getDeviceIndex();
1884  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1885  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1886 
1887  CmiLock(patchData->printlock);
1888  CkPrintf("PE: %d\n", CkMyPe());
1889 
1890  CkPrintf("[%d] Home patches %d Local patches %d\n", CkMyPe(), numPatchesHome, localPatches.size());
1891 
1892  CkPrintf("Home Patches: ");
1893  for (int i = 0; i < numPatchesHome; i++) {
1894  CkPrintf("%d ", localPatches[i].patchID);
1895  }
1896  CkPrintf("\n");
1897 
1898  CkPrintf("Proxy Patches: ");
1899  for (int i = numPatchesHome; i < localPatches.size(); i++) {
1900  CkPrintf("%d ", localPatches[i].patchID);
1901  }
1902  CkPrintf("\n");
1903 
1904  CmiUnlock(patchData->printlock);
1905  }
1907 }
1908 
1910  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1911  patchData = cpdata.ckLocalBranch();
1912 
1913  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1914 
1915  // clearDevicePatchMap should only be called once per PE
1916  if (!patchData->devicePatchMapFlag[CkMyPe()]) return;
1917  patchData->devicePatchMapFlag[CkMyPe()] = 0;
1918 
1919  // One thread per GPU will execute this block
1920  if (isMasterPe) {
1921  const int deviceIndex = deviceCUDA->getDeviceIndex();
1922 
1923  using NBPatchRecord = CudaComputeNonbonded::PatchRecord;
1924  std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1925  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1926  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1927 
1928  homePatches.clear();
1929  localPatches.clear();
1930  peerPatches.clear();
1932  }
1933 }
1934 
1935 /*
1936  * Updates the meta data structures storing the patch data for GPU resident code path
1937  *
1938  * This is called every migration step. The actual mapping stays the same,
1939  * but the atom counts per patch change
1940  *
1941  */
1942 void Sequencer::updateDevicePatchMap(int startup) {
1943  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1944  patchData = cpdata.ckLocalBranch();
1945 
1946  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1947 
1948  if (isMasterPe) {
1949  const int deviceIndex = deviceCUDA->getDeviceIndex();
1950  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1951  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1954  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
1955 
1956  int max_atom_count = 0;
1957  int total_atom_count = 0;
1958 
1959  // Update the atom count of home patches
1960  for (int i = 0; i < numPatchesHome; i++) {
1961  Patch* patch = NULL;
1962  for(int j = 0; j < deviceCUDA->getNumPesSharingDevice(); j++){
1964  patch = pm->patch(localPatches[i].patchID);
1965  if (patch != NULL) break;
1966  }
1967  if (patch == NULL) NAMD_die("Sequencer: Failed to find patch in updateDevicePatchMap");
1968 
1969  localPatches[i].numAtoms = patch->getNumAtoms();
1970  localPatches[i].numAtomsNBPad = CudaComputeNonbondedKernel::computeAtomPad(localPatches[i].numAtoms);
1971 
1972  if (localPatches[i].numAtoms > max_atom_count) max_atom_count = localPatches[i].numAtoms;
1973  total_atom_count += localPatches[i].numAtoms;
1974  }
1975  }
1977 
1978  // Update the proxy patches next, using the home patch atom counts of other devices
1979  if (isMasterPe) {
1980  const int deviceIndex = deviceCUDA->getDeviceIndex();
1981  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1982  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
1983  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1984  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1985 
1986  for (int i = numPatchesHome; i < numPatchesHomeAndProxy; i++) {
1987  const int index = localPatches[i].peerRecordStartIndex;
1988  const int devIdx = peerPatches[index].deviceIndex;
1989  const int peerIdx = peerPatches[index].patchIndex;
1990  const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
1991 
1992  localPatches[i].numAtoms = peer.numAtoms;
1993  localPatches[i].numAtomsNBPad = peer.numAtomsNBPad;
1994  }
1995  }
1997 
1998  // Computes the offset for each patch using the atom counts
1999  if (isMasterPe) {
2000  const int deviceIndex = deviceCUDA->getDeviceIndex();
2001  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2002  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2003 
2004  int runningOffset = 0;
2005  int runningOffsetNBPad = 0;
2006  // TODO Change to a C++ prefix sum
2007  for (int i = 0; i < numPatchesHomeAndProxy; i++) {
2008  localPatches[i].bufferOffset = runningOffset;
2009  localPatches[i].bufferOffsetNBPad = runningOffsetNBPad;
2010  runningOffset += localPatches[i].numAtoms;
2011  runningOffsetNBPad += localPatches[i].numAtomsNBPad;
2012  }
2013  }
2015 
2016  // Update the peer records using the local record data
2017  if (isMasterPe) {
2018  const int deviceIndex = deviceCUDA->getDeviceIndex();
2019  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2020  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2021  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2022 
2023 
2024  for (int i = 0; i < peerPatches.size(); i++) {
2025  const int devIdx = peerPatches[i].deviceIndex;
2026  const int peerIdx = peerPatches[i].patchIndex;
2027  const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2028 
2029  peerPatches[i].bufferOffset = peer.bufferOffset;
2030  peerPatches[i].bufferOffsetNBPad = peer.bufferOffsetNBPad;
2031  }
2032 
2033  // Update inline copy of peer data
2034  for (int i = 0; i < numPatchesHomeAndProxy; i++) {
2035  const int numPeerRecord = localPatches[i].numPeerRecord;
2036  const int peerOffset = localPatches[i].peerRecordStartIndex;
2037 
2038  for (int j = 0; j < std::min(numPeerRecord, CudaLocalRecord::num_inline_peer); j++) {
2039  localPatches[i].inline_peers[j] = peerPatches[peerOffset+j];
2040  }
2041  }
2042  }
2044 }
2045 
2046 #endif
2047 
2048 
2049 void Sequencer::integrate_SOA(int scriptTask) {
2050  //
2051  // Below when accessing the array buffers for position, velocity, force,
2052  // note that we don't want to set up pointers directly to the buffers
2053  // because the allocations might get resized after atom migration.
2054  //
2055 
2056 #ifdef TIMER_COLLECTION
2057  TimerSet& t = patch->timerSet;
2058 #endif
2059  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
2060  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
2061  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
2062  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
2063  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
2064  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
2065  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
2066  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
2067  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
2068  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
2069 
2070  // Keep track of the step number.
2071  int &step = patch->flags.step;
2072  step = simParams->firstTimestep;
2073 
2074  // For multiple time stepping, which force boxes are used?
2075  int &maxForceUsed = patch->flags.maxForceUsed;
2076  int &maxForceMerged = patch->flags.maxForceMerged;
2077  maxForceUsed = Results::normal;
2078  maxForceMerged = Results::normal;
2079 
2080  // Keep track of total steps and steps per cycle.
2081  const int numberOfSteps = simParams->N;
2082  //const int stepsPerCycle = simParams->stepsPerCycle;
2083  CheckStep stepsPerCycle;
2084  stepsPerCycle.init(step, simParams->stepsPerCycle);
2085  // The fundamental time step, get the scaling right for velocity units.
2086  const BigReal timestep = simParams->dt * RECIP_TIMEFACTOR;
2087 
2088  //const int nonbondedFrequency = simParams->nonbondedFrequency;
2089  //slowFreq = nonbondedFrequency;
2090  CheckStep nonbondedFrequency;
2092  // The step size for short-range nonbonded forces.
2093  const BigReal nbondstep = timestep * simParams->nonbondedFrequency;
2094  int &doNonbonded = patch->flags.doNonbonded;
2095  //doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
2096  doNonbonded = (step >= numberOfSteps) ||
2097  nonbondedFrequency.init(step, simParams->nonbondedFrequency);
2098  //if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
2099  if ( nonbondedFrequency.period == 1 ) maxForceMerged = Results::nbond;
2100  if ( doNonbonded ) maxForceUsed = Results::nbond;
2101 
2102  // Do we do full electrostatics?
2103  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
2104  //const int fullElectFrequency = simParams->fullElectFrequency;
2105  //if ( dofull ) slowFreq = fullElectFrequency;
2106  CheckStep fullElectFrequency;
2107  if ( dofull ) slowFreq = simParams->fullElectFrequency;
2108  // The step size for long-range electrostatics.
2109  const BigReal slowstep = timestep * simParams->fullElectFrequency;
2110  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
2111  //doFullElectrostatics = (dofull &&
2112  // ((step >= numberOfSteps) || !(step%fullElectFrequency)));
2113  doFullElectrostatics = (dofull &&
2114  ((step >= numberOfSteps) ||
2115  fullElectFrequency.init(step, simParams->fullElectFrequency)));
2116  //if ( dofull && fullElectFrequency == 1 ) maxForceMerged = Results::slow;
2117  if ( dofull && fullElectFrequency.period == 1 ) maxForceMerged = Results::slow;
2118  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
2119 
2120  // Bother to calculate energies?
2121  int &doEnergy = patch->flags.doEnergy;
2122  //int energyFrequency = simParams->outputEnergies;
2123  CheckStep energyFrequency;
2124  int newComputeEnergies = simParams->computeEnergies;
2125  if(simParams->alchOn) newComputeEnergies = NAMD_gcd(newComputeEnergies, simParams->alchOutFreq);
2126  doEnergy = energyFrequency.init(step, newComputeEnergies);
2127 
2128  // Do we need to return forces to TCL script or Colvar module?
2129  int doTcl = simParams->tclForcesOn;
2130  int doColvars = simParams->colvarsOn;
2131  int doGlobal = doTcl || doColvars;
2133  int &doVirial = patch->flags.doVirial;
2134  doVirial = 1;
2135 
2136  // The following flags have to be explicitly disabled in Patch object.
2137  patch->flags.doMolly = 0;
2138  patch->flags.doLoweAndersen = 0;
2139  patch->flags.doGBIS = 0;
2140  patch->flags.doLCPO = 0;
2141 
2142  // Square of maximum velocity for simulation safety check
2143  const BigReal maxvel2 =
2144  (simParams->cutoff * simParams->cutoff) / (timestep * timestep);
2145 
2146  // check for Langevin piston
2147  // set period beyond numberOfSteps to disable
2148  CheckStep langevinPistonFrequency;
2149  langevinPistonFrequency.init(step,
2150  (simParams->langevinPistonOn ? slowFreq : numberOfSteps+1 ),
2151  (simParams->langevinPistonOn ? -1-slowFreq/2 : 0) /* = delta */);
2152 
2153  // check for output
2154  // set period beyond numberOfSteps to disable
2155  CheckStep restartFrequency;
2156  restartFrequency.init(step, (simParams->restartFrequency ?
2157  simParams->restartFrequency : numberOfSteps+1) );
2158  CheckStep dcdFrequency;
2159  dcdFrequency.init(step, (simParams->dcdFrequency ?
2160  simParams->dcdFrequency : numberOfSteps+1) );
2161  CheckStep velDcdFrequency;
2162  velDcdFrequency.init(step, (simParams->velDcdFrequency ?
2163  simParams->velDcdFrequency : numberOfSteps+1) );
2164  CheckStep forceDcdFrequency;
2165  forceDcdFrequency.init(step, (simParams->forceDcdFrequency ?
2166  simParams->forceDcdFrequency : numberOfSteps+1) );
2167  CheckStep imdFrequency;
2168  imdFrequency.init(step, (simParams->IMDfreq ?
2169  simParams->IMDfreq : numberOfSteps+1) );
2170 
2171  if ( scriptTask == SCRIPT_RUN ) {
2172  // enforce rigid bond constraints on initial positions
2173  TIMER_START(t, RATTLE1);
2174  rattle1_SOA(0., 0);
2175  TIMER_STOP(t, RATTLE1);
2176 
2177  // must migrate here!
2178  int natoms = patch->patchDataSOA.numAtoms;
2179  runComputeObjects_SOA(1, step<numberOfSteps, step);
2180  // kick -0.5
2181  TIMER_START(t, KICK);
2182  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2183 #ifndef SOA_SIMPLIFY_PARAMS
2184  patch->patchDataSOA.recipMass,
2185  patch->patchDataSOA.f_normal_x,
2186  patch->patchDataSOA.f_normal_y,
2187  patch->patchDataSOA.f_normal_z,
2188  patch->patchDataSOA.f_nbond_x,
2189  patch->patchDataSOA.f_nbond_y,
2190  patch->patchDataSOA.f_nbond_z,
2191  patch->patchDataSOA.f_slow_x,
2192  patch->patchDataSOA.f_slow_y,
2193  patch->patchDataSOA.f_slow_z,
2194  patch->patchDataSOA.vel_x,
2195  patch->patchDataSOA.vel_y,
2196  patch->patchDataSOA.vel_z,
2197  patch->patchDataSOA.numAtoms,
2198 #endif
2199  maxForceUsed
2200  );
2201  TIMER_STOP(t, KICK);
2202 
2203  TIMER_START(t, RATTLE1);
2204  rattle1_SOA(-timestep, 0);
2205  TIMER_STOP(t, RATTLE1);
2206 
2207  TIMER_START(t, SUBMITHALF);
2209 #ifndef SOA_SIMPLIFY_PARAMS
2210  patch->patchDataSOA.hydrogenGroupSize,
2211  patch->patchDataSOA.mass,
2212  patch->patchDataSOA.vel_x,
2213  patch->patchDataSOA.vel_y,
2214  patch->patchDataSOA.vel_z,
2215  patch->patchDataSOA.numAtoms
2216 #endif
2217  );
2218  TIMER_STOP(t, SUBMITHALF);
2219 
2220  // kick 1.0
2221  TIMER_START(t, KICK);
2222  addForceToMomentum_SOA(1.0, timestep, nbondstep, slowstep,
2223 #ifndef SOA_SIMPLIFY_PARAMS
2224  patch->patchDataSOA.recipMass,
2225  patch->patchDataSOA.f_normal_x,
2226  patch->patchDataSOA.f_normal_y,
2227  patch->patchDataSOA.f_normal_z,
2228  patch->patchDataSOA.f_nbond_x,
2229  patch->patchDataSOA.f_nbond_y,
2230  patch->patchDataSOA.f_nbond_z,
2231  patch->patchDataSOA.f_slow_x,
2232  patch->patchDataSOA.f_slow_y,
2233  patch->patchDataSOA.f_slow_z,
2234  patch->patchDataSOA.vel_x,
2235  patch->patchDataSOA.vel_y,
2236  patch->patchDataSOA.vel_z,
2237  patch->patchDataSOA.numAtoms,
2238 #endif
2239  maxForceUsed
2240  );
2241  TIMER_STOP(t, KICK);
2242 
2243  TIMER_START(t, RATTLE1);
2244  rattle1_SOA(timestep, 1);
2245  TIMER_STOP(t, RATTLE1);
2246 
2247  // save total force in computeGlobal
2248  if (doGlobal) {
2249  computeGlobal->saveTotalForces(patch);
2250  }
2251 
2252  TIMER_START(t, SUBMITHALF);
2254 #ifndef SOA_SIMPLIFY_PARAMS
2255  patch->patchDataSOA.hydrogenGroupSize,
2256  patch->patchDataSOA.mass,
2257  patch->patchDataSOA.vel_x,
2258  patch->patchDataSOA.vel_y,
2259  patch->patchDataSOA.vel_z,
2260  patch->patchDataSOA.numAtoms
2261 #endif
2262  );
2263  TIMER_STOP(t, SUBMITHALF);
2264 
2265  // kick -0.5
2266  TIMER_START(t, KICK);
2267  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2268 #ifndef SOA_SIMPLIFY_PARAMS
2269  patch->patchDataSOA.recipMass,
2270  patch->patchDataSOA.f_normal_x,
2271  patch->patchDataSOA.f_normal_y,
2272  patch->patchDataSOA.f_normal_z,
2273  patch->patchDataSOA.f_nbond_x,
2274  patch->patchDataSOA.f_nbond_y,
2275  patch->patchDataSOA.f_nbond_z,
2276  patch->patchDataSOA.f_slow_x,
2277  patch->patchDataSOA.f_slow_y,
2278  patch->patchDataSOA.f_slow_z,
2279  patch->patchDataSOA.vel_x,
2280  patch->patchDataSOA.vel_y,
2281  patch->patchDataSOA.vel_z,
2282  patch->patchDataSOA.numAtoms,
2283 #endif
2284  maxForceUsed
2285  );
2286  TIMER_STOP(t, KICK);
2287 
2288  TIMER_START(t, SUBMITFULL);
2290 #ifndef SOA_SIMPLIFY_PARAMS
2291  patch->patchDataSOA.hydrogenGroupSize,
2292  patch->patchDataSOA.mass,
2293  patch->patchDataSOA.pos_x,
2294  patch->patchDataSOA.pos_y,
2295  patch->patchDataSOA.pos_z,
2296  patch->patchDataSOA.vel_x,
2297  patch->patchDataSOA.vel_y,
2298  patch->patchDataSOA.vel_z,
2299  patch->patchDataSOA.f_normal_x,
2300  patch->patchDataSOA.f_normal_y,
2301  patch->patchDataSOA.f_normal_z,
2302  patch->patchDataSOA.f_nbond_x,
2303  patch->patchDataSOA.f_nbond_y,
2304  patch->patchDataSOA.f_nbond_z,
2305  patch->patchDataSOA.f_slow_x,
2306  patch->patchDataSOA.f_slow_y,
2307  patch->patchDataSOA.f_slow_z,
2308  patch->patchDataSOA.numAtoms
2309 #endif
2310  );
2311  TIMER_STOP(t, SUBMITFULL);
2312 
2313  rebalanceLoad(step);
2314  } // scriptTask == SCRIPT_RUN
2315 
2316 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
2317  int& eon = patch->flags.event_on;
2318  int epid = (simParams->beginEventPatchID <= patch->getPatchID()
2319  && patch->getPatchID() <= simParams->endEventPatchID);
2320  int beginStep = simParams->beginEventStep;
2321  int endStep = simParams->endEventStep;
2322  bool controlProfiling = patch->getPatchID() == 0;
2323 #endif
2324 
2325  for ( ++step; step <= numberOfSteps; ++step ) {
2326  int dcdSelectionChecks=0;
2327  Molecule *molecule = Node::Object()->molecule;
2328  for(int dcdindex=0; dcdindex<16;++dcdindex)
2329  {
2330  int dcdSelectionFrequency = molecule->dcdSelectionParams[dcdindex].frequency;
2331  if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
2332  dcdSelectionChecks++;
2333  }
2334  const int isCollection = restartFrequency.check(step) +
2335  dcdFrequency.check(step) + velDcdFrequency.check(step) +
2336  forceDcdFrequency.check(step) + imdFrequency.check(step) +
2337  dcdSelectionChecks;
2338  const int isMigration = stepsPerCycle.check(step);
2339  doEnergy = energyFrequency.check(step);
2340  DebugM(3,"doGlobal now "<< doGlobal<<"\n"<<endi);
2341 
2342 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
2343  eon = epid && (beginStep < step && step <= endStep);
2344 
2345  if (controlProfiling && step == beginStep) {
2347  }
2348  if (controlProfiling && step == endStep) {
2350  }
2351 // NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_1);
2352  char buf[32];
2353  sprintf(buf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::INTEGRATE_SOA_1], patch->getPatchID());
2354  NAMD_EVENT_START_EX(eon, NamdProfileEvent::INTEGRATE_SOA_1, buf);
2355 #endif
2356 
2357  if ( simParams->stochRescaleOn ) {
2359  }
2360 
2361  if ( simParams->berendsenPressureOn ) {
2363 #ifndef SOA_SIMPLIFY_PARAMS
2364  patch->patchDataSOA.hydrogenGroupSize,
2365  patch->patchDataSOA.mass,
2366  patch->patchDataSOA.pos_x,
2367  patch->patchDataSOA.pos_y,
2368  patch->patchDataSOA.pos_z,
2369  patch->patchDataSOA.numAtoms,
2370 #endif
2371  step);
2372  }
2373 
2374  // kick 0.5
2375  TIMER_START(t, KICK);
2376  addForceToMomentum_SOA(0.5, timestep, nbondstep, slowstep,
2377 #ifndef SOA_SIMPLIFY_PARAMS
2378  patch->patchDataSOA.recipMass,
2379  patch->patchDataSOA.f_normal_x,
2380  patch->patchDataSOA.f_normal_y,
2381  patch->patchDataSOA.f_normal_z,
2382  patch->patchDataSOA.f_nbond_x,
2383  patch->patchDataSOA.f_nbond_y,
2384  patch->patchDataSOA.f_nbond_z,
2385  patch->patchDataSOA.f_slow_x,
2386  patch->patchDataSOA.f_slow_y,
2387  patch->patchDataSOA.f_slow_z,
2388  patch->patchDataSOA.vel_x,
2389  patch->patchDataSOA.vel_y,
2390  patch->patchDataSOA.vel_z,
2391  patch->patchDataSOA.numAtoms,
2392 #endif
2393  maxForceUsed
2394  );
2395  TIMER_STOP(t, KICK);
2396 
2397  // maximumMove checks velocity bound on atoms
2398  TIMER_START(t, MAXMOVE);
2399  maximumMove_SOA(timestep, maxvel2
2400 #ifndef SOA_SIMPLIFY_PARAMS
2401  ,
2402  patch->patchDataSOA.vel_x,
2403  patch->patchDataSOA.vel_y,
2404  patch->patchDataSOA.vel_z,
2405  patch->patchDataSOA.numAtoms
2406 #endif
2407  );
2408  TIMER_STOP(t, MAXMOVE);
2409 
2410 
2411  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_1);
2412 
2413  // Check to see if Langevin piston is enabled this step:
2414  // ! ((step-1-slowFreq/2) % slowFreq)
2415  if ( langevinPistonFrequency.check(step) ) {
2416  // if (langevinPistonStep) {
2417  // drift 0.5
2418  TIMER_START(t, DRIFT);
2419  addVelocityToPosition_SOA(0.5*timestep
2420 #ifndef SOA_SIMPLIFY_PARAMS
2421  ,
2422  patch->patchDataSOA.vel_x,
2423  patch->patchDataSOA.vel_y,
2424  patch->patchDataSOA.vel_z,
2425  patch->patchDataSOA.pos_x,
2426  patch->patchDataSOA.pos_y,
2427  patch->patchDataSOA.pos_z,
2428  patch->patchDataSOA.numAtoms
2429 #endif
2430  );
2431  TIMER_STOP(t, DRIFT);
2432  // There is a blocking receive inside of langevinPiston()
2433  // that might suspend the current thread of execution,
2434  // so split profiling around this conditional block.
2436 #ifndef SOA_SIMPLIFY_PARAMS
2437  patch->patchDataSOA.hydrogenGroupSize,
2438  patch->patchDataSOA.mass,
2439  patch->patchDataSOA.pos_x,
2440  patch->patchDataSOA.pos_y,
2441  patch->patchDataSOA.pos_z,
2442  patch->patchDataSOA.vel_x,
2443  patch->patchDataSOA.vel_y,
2444  patch->patchDataSOA.vel_z,
2445  patch->patchDataSOA.numAtoms,
2446 #endif
2447  step
2448  );
2449 
2450  // drift 0.5
2451  TIMER_START(t, DRIFT);
2452  addVelocityToPosition_SOA(0.5*timestep
2453 #ifndef SOA_SIMPLIFY_PARAMS
2454  ,
2455  patch->patchDataSOA.vel_x,
2456  patch->patchDataSOA.vel_y,
2457  patch->patchDataSOA.vel_z,
2458  patch->patchDataSOA.pos_x,
2459  patch->patchDataSOA.pos_y,
2460  patch->patchDataSOA.pos_z,
2461  patch->patchDataSOA.numAtoms
2462 #endif
2463  );
2464  TIMER_STOP(t, DRIFT);
2465  }
2466  else {
2467  // drift 1.0
2468  TIMER_START(t, DRIFT);
2469  addVelocityToPosition_SOA(timestep
2470 #ifndef SOA_SIMPLIFY_PARAMS
2471  ,
2472  patch->patchDataSOA.vel_x,
2473  patch->patchDataSOA.vel_y,
2474  patch->patchDataSOA.vel_z,
2475  patch->patchDataSOA.pos_x,
2476  patch->patchDataSOA.pos_y,
2477  patch->patchDataSOA.pos_z,
2478  patch->patchDataSOA.numAtoms
2479 #endif
2480  );
2481  TIMER_STOP(t, DRIFT);
2482  }
2483 
2484  //NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_2);
2485 
2486  // There are NO sends in submitHalfstep() just local summation
2487  // into the Reduction struct.
2488  TIMER_START(t, SUBMITHALF);
2490 #ifndef SOA_SIMPLIFY_PARAMS
2491  patch->patchDataSOA.hydrogenGroupSize,
2492  patch->patchDataSOA.mass,
2493  patch->patchDataSOA.vel_x,
2494  patch->patchDataSOA.vel_y,
2495  patch->patchDataSOA.vel_z,
2496  patch->patchDataSOA.numAtoms
2497 #endif
2498  );
2499  TIMER_STOP(t, SUBMITHALF);
2500 
2501  //doNonbonded = !(step%nonbondedFrequency);
2502  doNonbonded = nonbondedFrequency.check(step);
2503  //doFullElectrostatics = (dofull && !(step%fullElectFrequency));
2504  doFullElectrostatics = (dofull && fullElectFrequency.check(step));
2505 
2506  maxForceUsed = Results::normal;
2507  if ( doNonbonded ) maxForceUsed = Results::nbond;
2508  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
2509 
2510  // Migrate Atoms on stepsPerCycle
2511  // Check to see if this is energy evaluation step:
2512  // doEnergy = ! ( step % energyFrequency );
2513  doVirial = 1;
2514  doKineticEnergy = 1;
2515  doMomenta = 1;
2516 
2517  //NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_2); // integrate_SOA 2
2518 
2519  // The current thread of execution will suspend in runComputeObjects().
2520  // Check to see if we are at a migration step:
2521  // runComputeObjects_SOA(!(step%stepsPerCycle), step<numberOfSteps);
2522  runComputeObjects_SOA(isMigration, step<numberOfSteps, step);
2523 
2524  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_3);
2525 
2526  TIMER_START(t, VELBBK1);
2528  timestep
2529 #ifndef SOA_SIMPLIFY_PARAMS
2530  ,
2531  patch->patchDataSOA.langevinParam,
2532  patch->patchDataSOA.vel_x,
2533  patch->patchDataSOA.vel_y,
2534  patch->patchDataSOA.vel_z,
2535  patch->patchDataSOA.numAtoms
2536 #endif
2537  );
2538  TIMER_STOP(t, VELBBK1);
2539 
2540  // kick 1.0
2541  TIMER_START(t, KICK);
2542  addForceToMomentum_SOA(1.0, timestep, nbondstep, slowstep,
2543 #ifndef SOA_SIMPLIFY_PARAMS
2544  patch->patchDataSOA.recipMass,
2545  patch->patchDataSOA.f_normal_x,
2546  patch->patchDataSOA.f_normal_y,
2547  patch->patchDataSOA.f_normal_z,
2548  patch->patchDataSOA.f_nbond_x,
2549  patch->patchDataSOA.f_nbond_y,
2550  patch->patchDataSOA.f_nbond_z,
2551  patch->patchDataSOA.f_slow_x,
2552  patch->patchDataSOA.f_slow_y,
2553  patch->patchDataSOA.f_slow_z,
2554  patch->patchDataSOA.vel_x,
2555  patch->patchDataSOA.vel_y,
2556  patch->patchDataSOA.vel_z,
2557  patch->patchDataSOA.numAtoms,
2558 #endif
2559  maxForceUsed
2560  );
2561  TIMER_STOP(t, KICK);
2562 
2563  TIMER_START(t, VELBBK2);
2565  timestep
2566 #ifndef SOA_SIMPLIFY_PARAMS
2567  ,
2568  patch->patchDataSOA.langevinParam,
2569  patch->patchDataSOA.langScalVelBBK2,
2570  patch->patchDataSOA.langScalRandBBK2,
2571  patch->patchDataSOA.gaussrand_x,
2572  patch->patchDataSOA.gaussrand_y,
2573  patch->patchDataSOA.gaussrand_z,
2574  patch->patchDataSOA.vel_x,
2575  patch->patchDataSOA.vel_y,
2576  patch->patchDataSOA.vel_z,
2577  patch->patchDataSOA.numAtoms
2578 #endif
2579  );
2580  TIMER_STOP(t, VELBBK2);
2581 
2582  TIMER_START(t, RATTLE1);
2583  rattle1_SOA(timestep, 1);
2584  TIMER_STOP(t, RATTLE1);
2585 
2586  // save total force in computeGlobal
2587  if (doGlobal) {
2588  computeGlobal->saveTotalForces(patch);
2589  }
2590 
2591  TIMER_START(t, SUBMITHALF);
2593 #ifndef SOA_SIMPLIFY_PARAMS
2594  patch->patchDataSOA.hydrogenGroupSize,
2595  patch->patchDataSOA.mass,
2596  patch->patchDataSOA.vel_x,
2597  patch->patchDataSOA.vel_y,
2598  patch->patchDataSOA.vel_z,
2599  patch->patchDataSOA.numAtoms
2600 #endif
2601  );
2602  TIMER_STOP(t, SUBMITHALF);
2603 
2604  // kick -0.5
2605  TIMER_START(t, KICK);
2606  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2607 #ifndef SOA_SIMPLIFY_PARAMS
2608  patch->patchDataSOA.recipMass,
2609  patch->patchDataSOA.f_normal_x,
2610  patch->patchDataSOA.f_normal_y,
2611  patch->patchDataSOA.f_normal_z,
2612  patch->patchDataSOA.f_nbond_x,
2613  patch->patchDataSOA.f_nbond_y,
2614  patch->patchDataSOA.f_nbond_z,
2615  patch->patchDataSOA.f_slow_x,
2616  patch->patchDataSOA.f_slow_y,
2617  patch->patchDataSOA.f_slow_z,
2618  patch->patchDataSOA.vel_x,
2619  patch->patchDataSOA.vel_y,
2620  patch->patchDataSOA.vel_z,
2621  patch->patchDataSOA.numAtoms,
2622 #endif
2623  maxForceUsed
2624  );
2625  TIMER_STOP(t, KICK);
2626 
2627  // XXX rattle2_SOA(timestep,step);
2628 
2629  TIMER_START(t, SUBMITFULL);
2631 #ifndef SOA_SIMPLIFY_PARAMS
2632  patch->patchDataSOA.hydrogenGroupSize,
2633  patch->patchDataSOA.mass,
2634  patch->patchDataSOA.pos_x,
2635  patch->patchDataSOA.pos_y,
2636  patch->patchDataSOA.pos_z,
2637  patch->patchDataSOA.vel_x,
2638  patch->patchDataSOA.vel_y,
2639  patch->patchDataSOA.vel_z,
2640  patch->patchDataSOA.f_normal_x,
2641  patch->patchDataSOA.f_normal_y,
2642  patch->patchDataSOA.f_normal_z,
2643  patch->patchDataSOA.f_nbond_x,
2644  patch->patchDataSOA.f_nbond_y,
2645  patch->patchDataSOA.f_nbond_z,
2646  patch->patchDataSOA.f_slow_x,
2647  patch->patchDataSOA.f_slow_y,
2648  patch->patchDataSOA.f_slow_z,
2649  patch->patchDataSOA.numAtoms
2650 #endif
2651  );
2652  TIMER_STOP(t, SUBMITFULL);
2653 #ifdef TESTPID
2654  if (1) {
2655  int pid = TESTPID;
2656  if (patch->patchID == pid) {
2657  const PatchDataSOA& p = patch->patchDataSOA;
2658  int n = p.numAtoms;
2659 #if 0
2660  fprintf(stderr, "Patch %d has %d atoms\n", pid, n);
2661  fprintf(stderr, "%3s %8s %12s %12s %12s\n",
2662  "", "id", "fnormal_x", "fnbond_x", "fslow_x");
2663  for (int i=0; i < n; i++) {
2664  int index = p.id[i];
2665  fprintf(stderr, "%3d %8d %12.8f %12.8f %12.8f\n",
2666  i, index, p.f_normal_x[i], p.f_nbond_x[i], p.f_slow_x[i]);
2667  }
2668 #else
2669  Vector *f_normal = new Vector[n];
2670  Vector *f_nbond = new Vector[n];
2671  Vector *f_slow = new Vector[n];
2672  for (int i=0; i < n; i++) {
2673  f_normal[i].x = p.f_normal_x[i];
2674  f_normal[i].y = p.f_normal_y[i];
2675  f_normal[i].z = p.f_normal_z[i];
2676  f_nbond[i].x = p.f_nbond_x[i];
2677  f_nbond[i].y = p.f_nbond_y[i];
2678  f_nbond[i].z = p.f_nbond_z[i];
2679  f_slow[i].x = p.f_slow_x[i];
2680  f_slow[i].y = p.f_slow_y[i];
2681  f_slow[i].z = p.f_slow_z[i];
2682  }
2683  TestArray_write<double>(
2684  "f_normal_good.bin", "f_normal good", (double*)f_normal, 3*n);
2685  TestArray_write<double>(
2686  "f_nbond_good.bin", "f_nbond good", (double*)f_nbond, 3*n);
2687  TestArray_write<double>(
2688  "f_slow_good.bin", "f_slow good", (double*)f_slow, 3*n);
2689  delete [] f_normal;
2690  delete [] f_nbond;
2691  delete [] f_slow;
2692 #endif
2693  }
2694  }
2695 #endif
2696 
2697  // Do collections if any checks below are "on."
2698  // We add because we can't short-circuit.
2699  TIMER_START(t, SUBMITCOLLECT);
2700  if (isCollection) {
2701  submitCollections_SOA(step);
2702  }
2703  TIMER_STOP(t, SUBMITCOLLECT);
2704 
2705  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_3); // integrate_SOA 3
2706 
2707  rebalanceLoad(step);
2708  }
2709 
2710  patch->copy_updates_to_AOS();
2711 
2712  TIMER_DONE(t);
2713  if (patch->patchID == SPECIAL_PATCH_ID) {
2714  printf("Timer collection reporting in microseconds for "
2715  "Patch %d\n", patch->patchID);
2716  TIMER_REPORT(t);
2717  }
2718 }
2719 
2720 
2721 // XXX inline it?
2722 // XXX does not handle fixed atoms
2723 // Each timestep: dt = scaling * (timestep / TIMEFACTOR);
2725  const double scaling,
2726  double dt_normal, // timestep Results::normal = 0
2727  double dt_nbond, // timestep Results::nbond = 1
2728  double dt_slow, // timestep Results::slow = 2
2729 #ifndef SOA_SIMPLIFY_PARAMS
2730  const double * __restrict recipMass,
2731  const double * __restrict f_normal_x, // force Results::normal = 0
2732  const double * __restrict f_normal_y,
2733  const double * __restrict f_normal_z,
2734  const double * __restrict f_nbond_x, // force Results::nbond = 1
2735  const double * __restrict f_nbond_y,
2736  const double * __restrict f_nbond_z,
2737  const double * __restrict f_slow_x, // force Results::slow = 2
2738  const double * __restrict f_slow_y,
2739  const double * __restrict f_slow_z,
2740  double * __restrict vel_x,
2741  double * __restrict vel_y,
2742  double * __restrict vel_z,
2743  int numAtoms,
2744 #endif
2745  int maxForceNumber
2746  ) {
2747  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2748  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM_SOA);
2749 
2750 #ifdef SOA_SIMPLIFY_PARAMS
2751  const double * __restrict recipMass = patch->patchDataSOA.recipMass;
2752  // force Results::normal = 0
2753  const double * __restrict f_normal_x = patch->patchDataSOA.f_normal_x;
2754  const double * __restrict f_normal_y = patch->patchDataSOA.f_normal_y;
2755  const double * __restrict f_normal_z = patch->patchDataSOA.f_normal_z;
2756  // force Results::nbond = 1
2757  const double * __restrict f_nbond_x = patch->patchDataSOA.f_nbond_x;
2758  const double * __restrict f_nbond_y = patch->patchDataSOA.f_nbond_y;
2759  const double * __restrict f_nbond_z = patch->patchDataSOA.f_nbond_z;
2760  // force Results::slow = 2
2761  const double * __restrict f_slow_x = patch->patchDataSOA.f_slow_x;
2762  const double * __restrict f_slow_y = patch->patchDataSOA.f_slow_y;
2763  const double * __restrict f_slow_z = patch->patchDataSOA.f_slow_z;
2764  double * __restrict vel_x = patch->patchDataSOA.vel_x;
2765  double * __restrict vel_y = patch->patchDataSOA.vel_y;
2766  double * __restrict vel_z = patch->patchDataSOA.vel_z;
2767  int numAtoms = patch->patchDataSOA.numAtoms;
2768 #endif
2769  //
2770  // We could combine each case into a single loop with breaks,
2771  // with all faster forces also summed, like addForceToMomentum3().
2772  //
2773  // Things to consider:
2774  // - Do we always use acceleration (f/m) instead of just plain force?
2775  // Then we could instead buffer accel_slow, accel_nbond, etc.
2776  // - We will always need one multiply, since each dt includes
2777  // also a scaling factor.
2778  //
2779 
2780 #if 0
2781  if(this->patch->getPatchID() == 538){
2782  // fprintf(stderr, "Old Positions %lf %lf %lf\n", patch->patchDataSOA.pos_x[43], patch->patchDataSOA.pos_y[43], patch->patchDataSOA.pos_z[43]);
2783  // fprintf(stderr, "Old Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[ 43]);
2784  // fprintf(stderr, "Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2785  // f_slow_x[43], f_slow_y[43], f_slow_z[43],
2786  // f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2787  // f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2788  fprintf(stderr, "Old Positions %lf %lf %lf\n", patch->patchDataSOA.pos_x[0], patch->patchDataSOA.pos_y[0], patch->patchDataSOA.pos_z[0]);
2789  fprintf(stderr, "Old Velocities %lf %lf %lf\n", vel_x[0], vel_y[0], vel_z[ 0]);
2790  fprintf(stderr, "Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2791  f_slow_x[43], f_slow_y[43], f_slow_z[43],
2792  f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2793  f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2794  }
2795 #endif
2796  switch (maxForceNumber) {
2797  case Results::slow:
2798  dt_slow *= scaling;
2799  for (int i=0; i < numAtoms; i++) {
2800  vel_x[i] += f_slow_x[i] * recipMass[i] * dt_slow;
2801  vel_y[i] += f_slow_y[i] * recipMass[i] * dt_slow;
2802  vel_z[i] += f_slow_z[i] * recipMass[i] * dt_slow;
2803  }
2804  // fall through because we will always have the "faster" forces
2805  case Results::nbond:
2806  dt_nbond *= scaling;
2807  for (int i=0; i < numAtoms; i++) {
2808  vel_x[i] += f_nbond_x[i] * recipMass[i] * dt_nbond;
2809  vel_y[i] += f_nbond_y[i] * recipMass[i] * dt_nbond;
2810  vel_z[i] += f_nbond_z[i] * recipMass[i] * dt_nbond;
2811  }
2812  // fall through because we will always have the "faster" forces
2813  case Results::normal:
2814  dt_normal *= scaling;
2815  for (int i=0; i < numAtoms; i++) {
2816  vel_x[i] += f_normal_x[i] * recipMass[i] * dt_normal;
2817  vel_y[i] += f_normal_y[i] * recipMass[i] * dt_normal;
2818  vel_z[i] += f_normal_z[i] * recipMass[i] * dt_normal;
2819  }
2820  }
2821 }
2822 
2823 
2824 // XXX inline it?
2825 // XXX does not handle fixed atoms
2826 // Timestep: dt = scaling * (timestep / TIMEFACTOR);
2828  const double dt
2829 #ifndef SOA_SIMPLIFY_PARAMS
2830  ,
2831  const double * __restrict vel_x,
2832  const double * __restrict vel_y,
2833  const double * __restrict vel_z,
2834  double * __restrict pos_x,
2835  double * __restrict pos_y,
2836  double * __restrict pos_z,
2837  int numAtoms
2838 #endif
2839  ) {
2840  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2841  NamdProfileEvent::ADD_VELOCITY_TO_POSITION_SOA);
2842 #ifdef SOA_SIMPLIFY_PARAMS
2843  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
2844  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
2845  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
2846  double * __restrict pos_x = patch->patchDataSOA.pos_x;
2847  double * __restrict pos_y = patch->patchDataSOA.pos_y;
2848  double * __restrict pos_z = patch->patchDataSOA.pos_z;
2849  int numAtoms = patch->patchDataSOA.numAtoms;
2850 #endif
2851  for (int i=0; i < numAtoms; i++) {
2852  pos_x[i] += vel_x[i] * dt;
2853  pos_y[i] += vel_y[i] * dt;
2854  pos_z[i] += vel_z[i] * dt;
2855  }
2856 #if 0
2857  if(this->patch->getPatchID() == 538){
2858  fprintf(stderr, "New Positions %lf %lf %lf\n", pos_x[43], pos_y[43], pos_z[43]);
2859  fprintf(stderr, "New Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[43]);
2860  }
2861 #endif
2862 
2863 }
2864 
2865 
2867 #ifndef SOA_SIMPLIFY_PARAMS
2868  const int * __restrict hydrogenGroupSize,
2869  const float * __restrict mass,
2870  const double * __restrict vel_x,
2871  const double * __restrict vel_y,
2872  const double * __restrict vel_z,
2873  int numAtoms
2874 #endif
2875  ) {
2876  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2877  NamdProfileEvent::SUBMIT_HALFSTEP_SOA);
2878 #ifdef SOA_SIMPLIFY_PARAMS
2879  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
2880  const float * __restrict mass = patch->patchDataSOA.mass;
2881  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
2882  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
2883  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
2884  int numAtoms = patch->patchDataSOA.numAtoms;
2885 #endif
2886  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
2887  BigReal kineticEnergy = 0;
2888  Tensor virial;
2889  for (int i=0; i < numAtoms; i++) {
2890  // scalar kineticEnergy += mass[i] * vel[i]^2
2891  kineticEnergy += mass[i] *
2892  (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
2893  // tensor virial += mass[i] * outer_product(vel[i], vel[i])
2894  virial.xx += mass[i] * vel_x[i] * vel_x[i];
2895  virial.xy += mass[i] * vel_x[i] * vel_y[i];
2896  virial.xz += mass[i] * vel_x[i] * vel_z[i];
2897  virial.yx += mass[i] * vel_y[i] * vel_x[i];
2898  virial.yy += mass[i] * vel_y[i] * vel_y[i];
2899  virial.yz += mass[i] * vel_y[i] * vel_z[i];
2900  virial.zx += mass[i] * vel_z[i] * vel_x[i];
2901  virial.zy += mass[i] * vel_z[i] * vel_y[i];
2902  virial.zz += mass[i] * vel_z[i] * vel_z[i];
2903  }
2904  kineticEnergy *= 0.5 * 0.5;
2905  virial *= 0.5;
2906 
2908  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
2909  }
2910 
2911  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
2912  BigReal intKineticEnergy = 0;
2913  Tensor intVirialNormal;
2914  int hgs;
2915  for (int i=0; i < numAtoms; i += hgs) {
2916  // find velocity of center-of-mass of hydrogen group
2917  // calculate mass-weighted velocity
2918  hgs = hydrogenGroupSize[i];
2919  BigReal m_cm = 0;
2920  BigReal v_cm_x = 0;
2921  BigReal v_cm_y = 0;
2922  BigReal v_cm_z = 0;
2923  for (int j = i; j < (i+hgs); j++) {
2924  m_cm += mass[j];
2925  v_cm_x += mass[j] * vel_x[j];
2926  v_cm_y += mass[j] * vel_y[j];
2927  v_cm_z += mass[j] * vel_z[j];
2928  }
2929  BigReal recip_m_cm = 1.0 / m_cm;
2930  v_cm_x *= recip_m_cm;
2931  v_cm_y *= recip_m_cm;
2932  v_cm_z *= recip_m_cm;
2933  // sum virial contributions wrt vel center-of-mass
2934  for (int j = i; j < (i+hgs); j++) {
2935  BigReal dv_x = vel_x[j] - v_cm_x;
2936  BigReal dv_y = vel_y[j] - v_cm_y;
2937  BigReal dv_z = vel_z[j] - v_cm_z;
2938  // scalar intKineticEnergy += mass[j] * dot_product(vel[j], dv)
2939  intKineticEnergy += mass[j] *
2940  (vel_x[j] * dv_x + vel_y[j] * dv_y + vel_z[j] * dv_z);
2941  // tensor intVirialNormal += mass[j] * outer_product(vel[j], dv)
2942  intVirialNormal.xx += mass[j] * vel_x[j] * dv_x;
2943  intVirialNormal.xy += mass[j] * vel_x[j] * dv_y;
2944  intVirialNormal.xz += mass[j] * vel_x[j] * dv_z;
2945  intVirialNormal.yx += mass[j] * vel_y[j] * dv_x;
2946  intVirialNormal.yy += mass[j] * vel_y[j] * dv_y;
2947  intVirialNormal.yz += mass[j] * vel_y[j] * dv_z;
2948  intVirialNormal.zx += mass[j] * vel_z[j] * dv_x;
2949  intVirialNormal.zy += mass[j] * vel_z[j] * dv_y;
2950  intVirialNormal.zz += mass[j] * vel_z[j] * dv_z;
2951  }
2952  }
2953  intKineticEnergy *= 0.5 * 0.5;
2954  intVirialNormal *= 0.5;
2956  += intKineticEnergy;
2957  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL,
2958  intVirialNormal);
2959  }
2960 }
2961 
2962 
2963 //
2964 // XXX
2965 //
2967 #ifndef SOA_SIMPLIFY_PARAMS
2968  const int * __restrict hydrogenGroupSize,
2969  const float * __restrict mass,
2970  const double * __restrict pos_x,
2971  const double * __restrict pos_y,
2972  const double * __restrict pos_z,
2973  const double * __restrict vel_x,
2974  const double * __restrict vel_y,
2975  const double * __restrict vel_z,
2976  const double * __restrict f_normal_x,
2977  const double * __restrict f_normal_y,
2978  const double * __restrict f_normal_z,
2979  const double * __restrict f_nbond_x,
2980  const double * __restrict f_nbond_y,
2981  const double * __restrict f_nbond_z,
2982  const double * __restrict f_slow_x,
2983  const double * __restrict f_slow_y,
2984  const double * __restrict f_slow_z,
2985  int numAtoms
2986 #endif
2987  ) {
2988  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2989  NamdProfileEvent::SUBMIT_REDUCTIONS_SOA);
2990 #ifdef SOA_SIMPLIFY_PARAMS
2991  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
2992  const float * __restrict mass = patch->patchDataSOA.mass;
2993  const double * __restrict pos_x = patch->patchDataSOA.pos_x;
2994  const double * __restrict pos_y = patch->patchDataSOA.pos_y;
2995  const double * __restrict pos_z = patch->patchDataSOA.pos_z;
2996  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
2997  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
2998  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
2999  const double * __restrict f_normal_x = patch->patchDataSOA.f_normal_x;
3000  const double * __restrict f_normal_y = patch->patchDataSOA.f_normal_y;
3001  const double * __restrict f_normal_z = patch->patchDataSOA.f_normal_z;
3002  const double * __restrict f_nbond_x = patch->patchDataSOA.f_nbond_x;
3003  const double * __restrict f_nbond_y = patch->patchDataSOA.f_nbond_y;
3004  const double * __restrict f_nbond_z = patch->patchDataSOA.f_nbond_z;
3005  const double * __restrict f_slow_x = patch->patchDataSOA.f_slow_x;
3006  const double * __restrict f_slow_y = patch->patchDataSOA.f_slow_y;
3007  const double * __restrict f_slow_z = patch->patchDataSOA.f_slow_z;
3008  int numAtoms = patch->patchDataSOA.numAtoms;
3009 #endif
3010 
3011  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
3013 
3014  if ( 1 /* doKineticEnergy || doMomenta || patch->flags.doVirial */ ) {
3015  BigReal kineticEnergy = 0;
3016  BigReal momentum_x = 0;
3017  BigReal momentum_y = 0;
3018  BigReal momentum_z = 0;
3019  BigReal angularMomentum_x = 0;
3020  BigReal angularMomentum_y = 0;
3021  BigReal angularMomentum_z = 0;
3022  BigReal origin_x = patch->lattice.origin().x;
3023  BigReal origin_y = patch->lattice.origin().y;
3024  BigReal origin_z = patch->lattice.origin().z;
3025 
3026  // XXX pairInteraction
3027 
3028  for (int i=0; i < numAtoms; i++) {
3029 
3030  // scalar kineticEnergy += mass[i] * dot_product(vel[i], vel[i])
3031  kineticEnergy += mass[i] *
3032  (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
3033 
3034  // vector momentum += mass[i] * vel[i]
3035  momentum_x += mass[i] * vel_x[i];
3036  momentum_y += mass[i] * vel_y[i];
3037  momentum_z += mass[i] * vel_z[i];
3038 
3039  // vector dpos = pos[i] - origin
3040  BigReal dpos_x = pos_x[i] - origin_x;
3041  BigReal dpos_y = pos_y[i] - origin_y;
3042  BigReal dpos_z = pos_z[i] - origin_z;
3043 
3044  // vector angularMomentum += mass[i] * cross_product(dpos, vel[i])
3045  angularMomentum_x += mass[i] * (dpos_y*vel_z[i] - dpos_z*vel_y[i]);
3046  angularMomentum_y += mass[i] * (dpos_z*vel_x[i] - dpos_x*vel_z[i]);
3047  angularMomentum_z += mass[i] * (dpos_x*vel_y[i] - dpos_y*vel_x[i]);
3048  }
3049 
3050  // XXX missing Drude
3051 
3052  kineticEnergy *= 0.5;
3053  Vector momentum(momentum_x, momentum_y, momentum_z);
3054  Vector angularMomentum(angularMomentum_x, angularMomentum_y,
3055  angularMomentum_z);
3056 
3058  ADD_VECTOR_OBJECT(reduction,REDUCTION_MOMENTUM,momentum);
3059  ADD_VECTOR_OBJECT(reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
3060  }
3061  // For non-Multigrator doKineticEnergy = 1 always
3062  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
3063  BigReal intKineticEnergy = 0;
3064  Tensor intVirialNormal;
3065  Tensor intVirialNbond;
3066  Tensor intVirialSlow;
3067 
3068  int hgs = 1; // hydrogen group size
3069  for (int i=0; i < numAtoms; i += hgs) {
3070  hgs = hydrogenGroupSize[i];
3071  int j;
3072  BigReal m_cm = 0;
3073  BigReal r_cm_x = 0;
3074  BigReal r_cm_y = 0;
3075  BigReal r_cm_z = 0;
3076  BigReal v_cm_x = 0;
3077  BigReal v_cm_y = 0;
3078  BigReal v_cm_z = 0;
3079  for ( j = i; j < (i+hgs); ++j ) {
3080  m_cm += mass[j];
3081  r_cm_x += mass[j] * pos_x[j];
3082  r_cm_y += mass[j] * pos_y[j];
3083  r_cm_z += mass[j] * pos_z[j];
3084  v_cm_x += mass[j] * vel_x[j];
3085  v_cm_y += mass[j] * vel_y[j];
3086  v_cm_z += mass[j] * vel_z[j];
3087  }
3088  BigReal inv_m_cm = namd_reciprocal(m_cm);
3089  r_cm_x *= inv_m_cm;
3090  r_cm_y *= inv_m_cm;
3091  r_cm_z *= inv_m_cm;
3092  v_cm_x *= inv_m_cm;
3093  v_cm_y *= inv_m_cm;
3094  v_cm_z *= inv_m_cm;
3095 
3096  // XXX removed pairInteraction
3097  for ( j = i; j < (i+hgs); ++j ) {
3098  // XXX removed fixed atoms
3099 
3100  // vector vel[j] used twice below
3101  BigReal v_x = vel_x[j];
3102  BigReal v_y = vel_y[j];
3103  BigReal v_z = vel_z[j];
3104 
3105  // vector dv = vel[j] - v_cm
3106  BigReal dv_x = v_x - v_cm_x;
3107  BigReal dv_y = v_y - v_cm_y;
3108  BigReal dv_z = v_z - v_cm_z;
3109 
3110  // scalar intKineticEnergy += mass[j] * dot_product(v, dv)
3111  intKineticEnergy += mass[j] *
3112  (v_x * dv_x + v_y * dv_y + v_z * dv_z);
3113 
3114  // vector dr = pos[j] - r_cm
3115  BigReal dr_x = pos_x[j] - r_cm_x;
3116  BigReal dr_y = pos_y[j] - r_cm_y;
3117  BigReal dr_z = pos_z[j] - r_cm_z;
3118 
3119  // tensor intVirialNormal += outer_product(f_normal[j], dr)
3120  intVirialNormal.xx += f_normal_x[j] * dr_x;
3121  intVirialNormal.xy += f_normal_x[j] * dr_y;
3122  intVirialNormal.xz += f_normal_x[j] * dr_z;
3123  intVirialNormal.yx += f_normal_y[j] * dr_x;
3124  intVirialNormal.yy += f_normal_y[j] * dr_y;
3125  intVirialNormal.yz += f_normal_y[j] * dr_z;
3126  intVirialNormal.zx += f_normal_z[j] * dr_x;
3127  intVirialNormal.zy += f_normal_z[j] * dr_y;
3128  intVirialNormal.zz += f_normal_z[j] * dr_z;
3129 
3130  // tensor intVirialNbond += outer_product(f_nbond[j], dr)
3131  intVirialNbond.xx += f_nbond_x[j] * dr_x;
3132  intVirialNbond.xy += f_nbond_x[j] * dr_y;
3133  intVirialNbond.xz += f_nbond_x[j] * dr_z;
3134  intVirialNbond.yx += f_nbond_y[j] * dr_x;
3135  intVirialNbond.yy += f_nbond_y[j] * dr_y;
3136  intVirialNbond.yz += f_nbond_y[j] * dr_z;
3137  intVirialNbond.zx += f_nbond_z[j] * dr_x;
3138  intVirialNbond.zy += f_nbond_z[j] * dr_y;
3139  intVirialNbond.zz += f_nbond_z[j] * dr_z;
3140 
3141  // tensor intVirialSlow += outer_product(f_slow[j], dr)
3142  intVirialSlow.xx += f_slow_x[j] * dr_x;
3143  intVirialSlow.xy += f_slow_x[j] * dr_y;
3144  intVirialSlow.xz += f_slow_x[j] * dr_z;
3145  intVirialSlow.yx += f_slow_y[j] * dr_x;
3146  intVirialSlow.yy += f_slow_y[j] * dr_y;
3147  intVirialSlow.yz += f_slow_y[j] * dr_z;
3148  intVirialSlow.zx += f_slow_z[j] * dr_x;
3149  intVirialSlow.zy += f_slow_z[j] * dr_y;
3150  intVirialSlow.zz += f_slow_z[j] * dr_z;
3151  }
3152  }
3153 
3154  intKineticEnergy *= 0.5;
3155 
3157  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
3158  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
3159  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
3160  }
3161  // XXX removed pressure profile
3162 
3163  // XXX removed fixed atoms
3164 
3165  reduction->submit();
3166 
3167  // XXX removed pressure profile reduction
3168 }
3169 
3170 
3171 void Sequencer::submitCollections_SOA(int step, int zeroVel /* = 0 */)
3172 {
3173  //
3174  // Copy updates of SOA back into AOS for collections.
3175  //
3176  // XXX Could update positions and velocities separately.
3177  //
3178  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3179  NamdProfileEvent::SUBMIT_COLLECTIONS_SOA);
3180  //
3181  // XXX Poor implementation here!
3182  // The selector functions called below in Output.C are
3183  // doing several tests and in an average use case calculating
3184  // at least two mod functions.
3185  //
3186  // However, most steps are NOT output steps!
3187  //
3188  int is_pos_needed;
3189  int dcdIndex;
3190  std::tie(is_pos_needed, dcdIndex)= Output::coordinateNeeded(step);
3191  int is_vel_needed = Output::velocityNeeded(step);
3192  int is_f_needed = Output::forceNeeded(step);
3193  if (!simParams->useDeviceMigration) { // This is already done for GPU migration
3194  if ( is_pos_needed || is_vel_needed ) {
3195  patch->copy_updates_to_AOS();
3196  }
3197  }
3198  if (is_f_needed) {
3204  patch->copy_forces_to_AOS();
3205  }
3206  if ( is_pos_needed ) {
3207  collection->submitPositions(step,patch->atom,patch->lattice,is_pos_needed,dcdIndex);
3208  }
3209  if ( is_vel_needed ) {
3210  collection->submitVelocities(step,zeroVel,patch->atom,is_vel_needed);
3211  }
3212  if ( is_f_needed ) {
3213  int maxForceUsed = patch->flags.maxForceUsed;
3214  if ( maxForceUsed > Results::slow ) maxForceUsed = Results::slow;
3215  collection->submitForces(step,patch->atom,maxForceUsed,patch->f,is_f_needed);
3216  }
3217 }
3218 
3219 
3221  const double dt,
3222  const double maxvel2
3223 #ifndef SOA_SIMPLIFY_PARAMS
3224  ,
3225  const double * __restrict vel_x,
3226  const double * __restrict vel_y,
3227  const double * __restrict vel_z,
3228  int numAtoms
3229 #endif
3230  ) {
3231  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::MAXIMUM_MOVE_SOA);
3232 #ifdef SOA_SIMPLIFY_PARAMS
3233  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
3234  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
3235  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
3236  int numAtoms = patch->patchDataSOA.numAtoms;
3237 #endif
3238 
3239  // XXX missing maximum move
3240 
3241  // Loop vectorizes when replacing logical OR with summing.
3242  int killme = 0;
3243  for (int i=0; i < numAtoms; i++) {
3244  BigReal vel2 =
3245  vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3246  killme = killme + ( vel2 > maxvel2 );
3247  }
3248  if (killme) {
3249  // Found at least one atom that is moving too fast.
3250  // Terminating, so loop performance below doesn't matter.
3251  // Loop does not vectorize.
3252  killme = 0;
3253  for (int i=0; i < numAtoms; i++) {
3254  BigReal vel2 =
3255  vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3256  if (vel2 > maxvel2) {
3257  const FullAtom *a = patch->atom.begin();
3258  const Vector vel(vel_x[i], vel_y[i], vel_z[i]);
3259  const BigReal maxvel = sqrt(maxvel2);
3260  ++killme;
3261  iout << iERROR << "Atom " << (a[i].id + 1) << " velocity is "
3262  << ( PDBVELFACTOR * vel ) << " (limit is "
3263  << ( PDBVELFACTOR * maxvel ) << ", atom "
3264  << i << " of " << numAtoms << " on patch "
3265  << patch->patchID << " pe " << CkMyPe() << ")\n" << endi;
3266  }
3267  }
3268  iout << iERROR <<
3269  "Atoms moving too fast; simulation has become unstable ("
3270  << killme << " atoms on patch " << patch->patchID
3271  << " pe " << CkMyPe() << ").\n" << endi;
3273  terminate();
3274  }
3275 }
3276 
3277 
3279  BigReal timestep
3280 #ifndef SOA_SIMPLIFY_PARAMS
3281  ,
3282  const float * __restrict langevinParam,
3283  double * __restrict vel_x,
3284  double * __restrict vel_y,
3285  double * __restrict vel_z,
3286  int numAtoms
3287 #endif
3288  ) {
3289  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3290  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1_SOA);
3291 #ifdef SOA_SIMPLIFY_PARAMS
3292  const float * __restrict langevinParam = patch->patchDataSOA.langevinParam;
3293  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3294  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3295  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3296  int numAtoms = patch->patchDataSOA.numAtoms;
3297 #endif
3298  if ( simParams->langevinOn /* && !simParams->langevin_useBAOAB */ )
3299  {
3300  // scale by TIMEFACTOR to convert to fs and then by 0.001 to ps
3301  // multiply by the Langevin damping coefficient, units 1/ps
3302  // XXX we could instead store time-scaled Langevin parameters
3303  BigReal dt = timestep * (0.001 * TIMEFACTOR);
3304 
3305  // XXX missing Drude
3306 
3307  //
3308  // The conditional inside loop prevents vectorization and doesn't
3309  // avoid much work since addition and multiplication are cheap.
3310  //
3311  for (int i=0; i < numAtoms; i++) {
3312  BigReal dt_gamma = dt * langevinParam[i];
3313  //if ( ! dt_gamma ) continue;
3314 
3315  BigReal scaling = 1. - 0.5 * dt_gamma;
3316  vel_x[i] *= scaling;
3317  vel_y[i] *= scaling;
3318  vel_z[i] *= scaling;
3319  }
3320  } // end if langevinOn
3321 }
3322 
3323 
3325  BigReal timestep
3326 #ifndef SOA_SIMPLIFY_PARAMS
3327  ,
3328  const float * __restrict langevinParam,
3329  const float * __restrict langScalVelBBK2,
3330  const float * __restrict langScalRandBBK2,
3331  float * __restrict gaussrand_x,
3332  float * __restrict gaussrand_y,
3333  float * __restrict gaussrand_z,
3334  double * __restrict vel_x,
3335  double * __restrict vel_y,
3336  double * __restrict vel_z,
3337  int numAtoms
3338 #endif
3339  )
3340 {
3341  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3342  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2_SOA);
3343 #ifdef SOA_SIMPLIFY_PARAMS
3344  const float * __restrict langevinParam = patch->patchDataSOA.langevinParam;
3345  const float * __restrict langScalVelBBK2 = patch->patchDataSOA.langScalVelBBK2;
3346  const float * __restrict langScalRandBBK2 = patch->patchDataSOA.langScalRandBBK2;
3347  float * __restrict gaussrand_x = patch->patchDataSOA.gaussrand_x;
3348  float * __restrict gaussrand_y = patch->patchDataSOA.gaussrand_y;
3349  float * __restrict gaussrand_z = patch->patchDataSOA.gaussrand_z;
3350  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3351  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3352  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3353  int numAtoms = patch->patchDataSOA.numAtoms;
3354 #endif
3355  if ( simParams->langevinOn /* && !simParams->langevin_useBAOAB */ )
3356  {
3357  // XXX missing Drude
3358 
3359  // Scale by TIMEFACTOR to convert to fs and then by 0.001 to ps
3360  // multiply by the Langevin damping coefficient, units 1/ps.
3361  // XXX we could instead store time-scaled Langevin parameters
3362  BigReal dt = timestep * (0.001 * TIMEFACTOR);
3363  // Buffer the Gaussian random numbers
3365  // Must re-satisfy constraints if Langevin gammas differ.
3366  // (conserve momentum?)
3367  TIMER_START(patch->timerSet, RATTLE1);
3368  rattle1_SOA(timestep, 1);
3369  TIMER_STOP(patch->timerSet, RATTLE1);
3370  //
3371  // We don't need random numbers for atoms such that gamma=0.
3372  // If gammas differ, the likely case is that we aren't applying
3373  // Langevin damping to hydrogen, making those langevinParam=0,
3374  // in which case we need only numAtoms/3 random vectors.
3375  //
3376  // XXX can refine code below, count in advance how many
3377  // random numbers we need to use Random array filling routine
3378  //
3379  // XXX Loop does not vectorize!
3380  for (int i=0; i < numAtoms; i++) {
3381  Vector rg; // = 0
3382  if (langevinParam[i] != 0) rg = random->gaussian_vector();
3383  gaussrand_x[i] = float(rg.x);
3384  gaussrand_y[i] = float(rg.y);
3385  gaussrand_z[i] = float(rg.z);
3386  }
3387  }
3388  else {
3389  // Need to completely fill random number arrays.
3390  random->gaussian_array_f(gaussrand_x, numAtoms);
3391  random->gaussian_array_f(gaussrand_y, numAtoms);
3392  random->gaussian_array_f(gaussrand_z, numAtoms);
3393  }
3394 
3395  // do the velocity updates
3396  for (int i=0; i < numAtoms; i++) {
3397  vel_x[i] += gaussrand_x[i] * langScalRandBBK2[i];
3398  vel_y[i] += gaussrand_y[i] * langScalRandBBK2[i];
3399  vel_z[i] += gaussrand_z[i] * langScalRandBBK2[i];
3400  vel_x[i] *= langScalVelBBK2[i];
3401  vel_y[i] *= langScalVelBBK2[i];
3402  vel_z[i] *= langScalVelBBK2[i];
3403  }
3404  } // end if langevinOn
3405 }
3406 
3408 #ifndef SOA_SIMPLIFY_PARAMS
3409  const int * __restrict hydrogenGroupSize,
3410  const float * __restrict mass,
3411  double * __restrict pos_x,
3412  double * __restrict pos_y,
3413  double * __restrict pos_z,
3414  int numAtoms,
3415 #endif
3416  int step)
3417 {
3418 #ifdef SOA_SIMPLIFY_PARAMS
3419  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
3420  const float * __restrict mass = patch->patchDataSOA.mass;
3421  double * __restrict pos_x = patch->patchDataSOA.pos_x;
3422  double * __restrict pos_y = patch->patchDataSOA.pos_y;
3423  double * __restrict pos_z = patch->patchDataSOA.pos_z;
3424  int numAtoms = patch->patchDataSOA.numAtoms;
3425 #endif
3426 
3427  //
3428  // Loops below simplify if we lift out special cases of fixed atoms
3429  // and pressure excluded atoms and make them their own branch.
3430  //
3431 
3435  // Blocking receive for the updated lattice scaling factor.
3436  Tensor factor = broadcast->positionRescaleFactor.get(step);
3437  patch->lattice.rescale(factor);
3438  Vector origin = patch->lattice.origin();
3439 
3440  if ( simParams->useGroupPressure ) {
3441  int hgs;
3442  for (int i = 0; i < numAtoms; i += hgs) {
3443  int j;
3444  hgs = hydrogenGroupSize[i];
3445  // missing fixed atoms implementation
3446  BigReal m_cm = 0;
3447  BigReal r_cm_x = 0;
3448  BigReal r_cm_y = 0;
3449  BigReal r_cm_z = 0;
3450  // calculate the center of mass
3451  for ( j = i; j < (i+hgs); ++j ) {
3452  m_cm += mass[j];
3453  r_cm_x += mass[j] * pos_x[j];
3454  r_cm_y += mass[j] * pos_y[j];
3455  r_cm_z += mass[j] * pos_z[j];
3456  }
3457  BigReal inv_m_cm = namd_reciprocal(m_cm);
3458  r_cm_x *= inv_m_cm;
3459  r_cm_y *= inv_m_cm;
3460  r_cm_z *= inv_m_cm;
3461  // scale the center of mass with factor
3462  // shift to origin
3463  double tx = r_cm_x - origin.x;
3464  double ty = r_cm_y - origin.y;
3465  double tz = r_cm_z - origin.z;
3466  // apply transformation
3467  double new_r_cm_x = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3468  double new_r_cm_y = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3469  double new_r_cm_z = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3470  // shift back
3471  new_r_cm_x += origin.x;
3472  new_r_cm_y += origin.y;
3473  new_r_cm_z += origin.z;
3474  // translation vector from old COM and new COM
3475  double delta_r_cm_x = new_r_cm_x - r_cm_x;
3476  double delta_r_cm_y = new_r_cm_y - r_cm_y;
3477  double delta_r_cm_z = new_r_cm_z - r_cm_z;
3478  // shift the hydrogen group with translation vector
3479  for (j = i; j < (i+hgs); ++j) {
3480  pos_x[j] += delta_r_cm_x;
3481  pos_y[j] += delta_r_cm_y;
3482  pos_z[j] += delta_r_cm_z;
3483  }
3484  }
3485  } else {
3486  for (int i = 0; i < numAtoms; ++i) {
3487  // missing fixed atoms implementation
3488  // scale the coordinates with factor
3489  // shift to origin
3490  double tx = pos_x[i] - origin.x;
3491  double ty = pos_y[i] - origin.y;
3492  double tz = pos_z[i] - origin.z;
3493  // apply transformation
3494  double ftx = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3495  double fty = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3496  double ftz = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3497  // shift back
3498  pos_x[i] = ftx + origin.x;
3499  pos_y[i] = fty + origin.y;
3500  pos_z[i] = ftz + origin.z;
3501  }
3502  }
3503  }
3504 }
3505 
3507 #ifndef SOA_SIMPLIFY_PARAMS
3508  const int * __restrict hydrogenGroupSize,
3509  const float * __restrict mass,
3510  double * __restrict pos_x,
3511  double * __restrict pos_y,
3512  double * __restrict pos_z,
3513  double * __restrict vel_x,
3514  double * __restrict vel_y,
3515  double * __restrict vel_z,
3516  int numAtoms,
3517 #endif
3518  int step
3519  )
3520 {
3521 #ifdef SOA_SIMPLIFY_PARAMS
3522  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
3523  const float * __restrict mass = patch->patchDataSOA.mass;
3524  double * __restrict pos_x = patch->patchDataSOA.pos_x;
3525  double * __restrict pos_y = patch->patchDataSOA.pos_y;
3526  double * __restrict pos_z = patch->patchDataSOA.pos_z;
3527  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3528  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3529  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3530  int numAtoms = patch->patchDataSOA.numAtoms;
3531 #endif
3532 
3533  //
3534  // Loops below simplify if we lift out special cases of fixed atoms
3535  // and pressure excluded atoms and make them their own branch.
3536  //
3537 
3538  // Blocking receive for the updated lattice scaling factor.
3539 
3540  Tensor factor = broadcast->positionRescaleFactor.get(step);
3541 
3542  TIMER_START(patch->timerSet, PISTON);
3543  // JCP FIX THIS!!!
3544  double velFactor_x = namd_reciprocal(factor.xx);
3545  double velFactor_y = namd_reciprocal(factor.yy);
3546  double velFactor_z = namd_reciprocal(factor.zz);
3547  patch->lattice.rescale(factor);
3548  Vector origin = patch->lattice.origin();
3549  if ( simParams->useGroupPressure ) {
3550  int hgs;
3551  for (int i=0; i < numAtoms; i += hgs) {
3552  int j;
3553  hgs = hydrogenGroupSize[i];
3554  // missing fixed atoms
3555  BigReal m_cm = 0;
3556  BigReal r_cm_x = 0;
3557  BigReal r_cm_y = 0;
3558  BigReal r_cm_z = 0;
3559  BigReal v_cm_x = 0;
3560  BigReal v_cm_y = 0;
3561  BigReal v_cm_z = 0;
3562  for ( j = i; j < (i+hgs); ++j ) {
3563  m_cm += mass[j];
3564  r_cm_x += mass[j] * pos_x[j];
3565  r_cm_y += mass[j] * pos_y[j];
3566  r_cm_z += mass[j] * pos_z[j];
3567  v_cm_x += mass[j] * vel_x[j];
3568  v_cm_y += mass[j] * vel_y[j];
3569  v_cm_z += mass[j] * vel_z[j];
3570  }
3571  BigReal inv_m_cm = namd_reciprocal(m_cm);
3572  r_cm_x *= inv_m_cm;
3573  r_cm_y *= inv_m_cm;
3574  r_cm_z *= inv_m_cm;
3575 
3576  double tx = r_cm_x - origin.x;
3577  double ty = r_cm_y - origin.y;
3578  double tz = r_cm_z - origin.z;
3579  double new_r_cm_x = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3580  double new_r_cm_y = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3581  double new_r_cm_z = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3582  new_r_cm_x += origin.x;
3583  new_r_cm_y += origin.y;
3584  new_r_cm_z += origin.z;
3585 
3586  double delta_r_cm_x = new_r_cm_x - r_cm_x;
3587  double delta_r_cm_y = new_r_cm_y - r_cm_y;
3588  double delta_r_cm_z = new_r_cm_z - r_cm_z;
3589  v_cm_x *= inv_m_cm;
3590  v_cm_y *= inv_m_cm;
3591  v_cm_z *= inv_m_cm;
3592  double delta_v_cm_x = ( velFactor_x - 1 ) * v_cm_x;
3593  double delta_v_cm_y = ( velFactor_y - 1 ) * v_cm_y;
3594  double delta_v_cm_z = ( velFactor_z - 1 ) * v_cm_z;
3595  for (j = i; j < (i+hgs); j++) {
3596  pos_x[j] += delta_r_cm_x;
3597  pos_y[j] += delta_r_cm_y;
3598  pos_z[j] += delta_r_cm_z;
3599  vel_x[j] += delta_v_cm_x;
3600  vel_y[j] += delta_v_cm_y;
3601  vel_z[j] += delta_v_cm_z;
3602  }
3603  // if (i < 10)
3604  // printf("cpu: %d, %f, %f, %f, %f, %f, %f\n", i,
3605  // pos_x[i], pos_y[i], pos_z[i],
3606  // vel_x[i], vel_y[i], vel_z[i]);
3607  }
3608  }
3609  else {
3610  for (int i=0; i < numAtoms; i++) {
3611  double tx = pos_x[i] - origin.x;
3612  double ty = pos_y[i] - origin.y;
3613  double tz = pos_z[i] - origin.z;
3614  double ftx = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3615  double fty = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3616  double ftz = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3617  pos_x[i] = ftx + origin.x;
3618  pos_y[i] = fty + origin.y;
3619  pos_z[i] = ftz + origin.z;
3620  vel_x[i] *= velFactor_x;
3621  vel_y[i] *= velFactor_y;
3622  vel_z[i] *= velFactor_z;
3623  // if (i < 10)
3624  // printf("cpu: %d, %f, %f, %f, %f, %f, %f\n", i,
3625  // pos_x[i], pos_y[i], pos_z[i],
3626  // vel_x[i], vel_y[i], vel_z[i]);
3627  }
3628  }
3629  TIMER_STOP(patch->timerSet, PISTON);
3630  // exit(0);
3631 }
3632 
3633 
3634 // timestep scaled by 1/TIMEFACTOR
3635 void Sequencer::rattle1_SOA(BigReal timestep, int pressure)
3636 {
3637  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::RATTLE1_SOA);
3638  if ( simParams->rigidBonds != RIGID_NONE ) {
3639  Tensor virial;
3640  Tensor *vp = ( pressure ? &virial : 0 );
3641  // XXX pressureProfileReduction == NULL?
3642  if ( patch->rattle1_SOA(timestep, vp, pressureProfileReduction) ) {
3643  iout << iERROR <<
3644  "Constraint failure; simulation has become unstable.\n" << endi;
3646  terminate();
3647  }
3648  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
3649  }
3650 }
3651 
3652 void Sequencer::runComputeObjects_SOA(int migration, int pairlists, int nstep)
3653 {
3654  if ( migration ) pairlistsAreValid = 0;
3655 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) || defined(NAMD_MIC)
3656  if ( pairlistsAreValid &&
3658  && ( pairlistsAge > pairlistsAgeLimit ) ) {
3659  pairlistsAreValid = 0;
3660  }
3661 #else
3663  pairlistsAreValid = 0;
3664  }
3665 #endif
3666  if ( ! simParams->usePairlists ) pairlists = 0;
3667  patch->flags.usePairlists = pairlists || pairlistsAreValid;
3668  patch->flags.savePairlists = pairlists && ! pairlistsAreValid;
3669 
3670 #if defined(NTESTPID)
3671  if (1 && patch->patchID == NTESTPID) {
3672  int step = patch->flags.step;
3673  int numAtoms = patch->numAtoms;
3674  double *xyzq = new double[4*numAtoms];
3675  double *x = patch->patchDataSOA.pos_x;
3676  double *y = patch->patchDataSOA.pos_y;
3677  double *z = patch->patchDataSOA.pos_z;
3678  float *q = patch->patchDataSOA.charge;
3679  for (int i=0; i < numAtoms; i++) {
3680  xyzq[4*i ] = x[i];
3681  xyzq[4*i+1] = y[i];
3682  xyzq[4*i+2] = z[i];
3683  xyzq[4*i+3] = q[i];
3684  }
3685  char fname[128], remark[128];
3686  sprintf(fname, "xyzq_soa_pid%d_step%d.bin", NTESTPID, step);
3687  sprintf(remark, "SOA xyzq, patch %d, step %d", NTESTPID, step);
3688  TestArray_write<double>(fname, remark, xyzq, 4*numAtoms);
3689  delete[] xyzq;
3690  }
3691 #endif
3692  // Zero all SOA global forces before computing force
3693  patch->zero_global_forces_SOA();
3694  patch->positionsReady_SOA(migration); // updates flags.sequence
3695 
3696  int seq = patch->flags.sequence;
3697  int basePriority = ( (seq & 0xffff) << 15 )
3699 
3700  // XXX missing GBIS
3701  priority = basePriority + COMPUTE_HOME_PRIORITY;
3702  //char prbuf[32];
3703  //sprintf(prbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::SEQ_SUSPEND], patch->getPatchID());
3704  //NAMD_EVENT_START_EX(1, NamdProfileEvent::SEQ_SUSPEND, prbuf);
3705  suspend(); // until all deposit boxes close
3706  //NAMD_EVENT_STOP(1, NamdProfileEvent::SEQ_SUSPEND);
3707 
3708 #ifdef NODEGROUP_FORCE_REGISTER
3709  if(!simParams->CUDASOAintegrate || migration){
3710  patch->copy_forces_to_SOA();
3711  }
3712 #else
3713  patch->copy_forces_to_SOA();
3714 #endif
3715 
3716 #if defined(NTESTPID)
3717  if (1 && patch->patchID == NTESTPID) {
3718  int step = patch->flags.step;
3719  int numAtoms = patch->numAtoms;
3720  char fname[128];
3721  char remark[128];
3722  double *fxyz = new double[3*numAtoms];
3723  double *fx = patch->patchDataSOA.f_normal_x;
3724  double *fy = patch->patchDataSOA.f_normal_y;
3725  double *fz = patch->patchDataSOA.f_normal_z;
3726  for (int i=0; i < numAtoms; i++) {
3727  fxyz[3*i ] = fx[i];
3728  fxyz[3*i+1] = fy[i];
3729  fxyz[3*i+2] = fz[i];
3730  }
3731  sprintf(fname, "fxyz_normal_soa_pid%d_step%d.bin", NTESTPID, step);
3732  sprintf(remark, "SOA fxyz normal, patch %d, step %d", NTESTPID, step);
3733  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3734  fx = patch->patchDataSOA.f_nbond_x;
3735  fy = patch->patchDataSOA.f_nbond_y;
3736  fz = patch->patchDataSOA.f_nbond_z;
3737  for (int i=0; i < numAtoms; i++) {
3738  fxyz[3*i ] = fx[i];
3739  fxyz[3*i+1] = fy[i];
3740  fxyz[3*i+2] = fz[i];
3741  }
3742  sprintf(fname, "fxyz_nbond_soa_pid%d_step%d.bin", NTESTPID, step);
3743  sprintf(remark, "SOA fxyz nonbonded, patch %d, step %d", NTESTPID, step);
3744  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3745  fx = patch->patchDataSOA.f_slow_x;
3746  fy = patch->patchDataSOA.f_slow_y;
3747  fz = patch->patchDataSOA.f_slow_z;
3748  for (int i=0; i < numAtoms; i++) {
3749  fxyz[3*i ] = fx[i];
3750  fxyz[3*i+1] = fy[i];
3751  fxyz[3*i+2] = fz[i];
3752  }
3753  sprintf(fname, "fxyz_slow_soa_pid%d_step%d.bin", NTESTPID, step);
3754  sprintf(remark, "SOA fxyz slow, patch %d, step %d", NTESTPID, step);
3755  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3756  delete[] fxyz;
3757  }
3758 #endif
3759 
3760 #if 0
3761  if (1 && patch->patchID == 0) {
3762  int numAtoms = patch->numAtoms;
3763  double *fxyz = new double[3*numAtoms];
3764  double *fx, *fy, *fz;
3765  char fname[64], remark[128];
3766  int step = patch->flags.step;
3767 
3768  fx = patch->patchDataSOA.f_slow_x;
3769  fy = patch->patchDataSOA.f_slow_y;
3770  fz = patch->patchDataSOA.f_slow_z;
3771  for (int i=0; i < numAtoms; i++) {
3772  fxyz[3*i ] = fx[i];
3773  fxyz[3*i+1] = fy[i];
3774  fxyz[3*i+2] = fz[i];
3775  }
3776  sprintf(fname, "fslow_soa_%d.bin", step);
3777  sprintf(remark, "SOA slow forces, step %d\n", step);
3778  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3779 
3780  fx = patch->patchDataSOA.f_nbond_x;
3781  fy = patch->patchDataSOA.f_nbond_y;
3782  fz = patch->patchDataSOA.f_nbond_z;
3783  for (int i=0; i < numAtoms; i++) {
3784  fxyz[3*i ] = fx[i];
3785  fxyz[3*i+1] = fy[i];
3786  fxyz[3*i+2] = fz[i];
3787  }
3788  sprintf(fname, "fnbond_soa_%d.bin", step);
3789  sprintf(remark, "SOA nonbonded forces, step %d\n", step);
3790  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3791 
3792  fx = patch->patchDataSOA.f_normal_x;
3793  fy = patch->patchDataSOA.f_normal_y;
3794  fz = patch->patchDataSOA.f_normal_z;
3795  for (int i=0; i < numAtoms; i++) {
3796  fxyz[3*i ] = fx[i];
3797  fxyz[3*i+1] = fy[i];
3798  fxyz[3*i+2] = fz[i];
3799  }
3800  sprintf(fname, "fnormal_soa_%d.bin", step);
3801  sprintf(remark, "SOA normal forces, step %d\n", step);
3802  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3803 
3804  delete[] fxyz;
3805  }
3806 #endif
3807 
3808 #if 0
3809  //Will print forces here after runComputeObjects
3810  if(nstep == 1){
3811  fprintf(stderr, "CPU force arrays for alanin\n" );
3812  for(int i = 0; i < patch->patchDataSOA.numAtoms; i++){
3813  fprintf(stderr, "f[%i] = %lf %lf %lf | %lf %lf %lf | %lf %lf %lf\n", i,
3814  patch->patchDataSOA.f_normal_x[i], patch->patchDataSOA.f_normal_y[i], patch->patchDataSOA.f_normal_z[i],
3815  patch->patchDataSOA.f_nbond_x[i], patch->patchDataSOA.f_nbond_y[i], patch->patchDataSOA.f_nbond_z[i],
3816  patch->patchDataSOA.f_slow_x[i], patch->patchDataSOA.f_slow_y[i], patch->patchDataSOA.f_slow_z[i]);
3817  }
3818  }
3819 #endif
3820 
3822  pairlistsAreValid = 1;
3823  pairlistsAge = 0;
3824  }
3825  // For multigrator, do not age pairlist during pressure step
3826  // NOTE: for non-multigrator pressureStep = 0 always
3827  if ( pairlistsAreValid /* && !pressureStep */ ) ++pairlistsAge;
3828 
3829  // XXX missing lonepairs
3830  // XXX missing Molly
3831  // XXX missing Lowe-Andersen
3832 }
3833 
3839 {
3842  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3843  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3844  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3845  int numAtoms = patch->patchDataSOA.numAtoms;
3846  // Blocking receive for the temperature coupling coefficient.
3847  BigReal velrescaling = broadcast->stochRescaleCoefficient.get(step);
3848  DebugM(4, "stochastically rescaling velocities at step " << step << " by " << velrescaling << "\n");
3849  for ( int i = 0; i < numAtoms; ++i ) {
3850  vel_x[i] *= velrescaling;
3851  vel_y[i] *= velrescaling;
3852  vel_z[i] *= velrescaling;
3853  }
3854  stochRescale_count = 0;
3855  }
3856 }
3857 
3858 //
3859 // end SOA code
3860 //
3862 
3863 #endif // SEQUENCER_SOA
3864 
3865 
3866 extern int eventEndOfTimeStep;
3867 
3868 void Sequencer::integrate(int scriptTask) {
3869  char traceNote[24];
3870  char tracePrefix[20];
3871  sprintf(tracePrefix, "p:%d,s:",patch->patchID);
3872 // patch->write_tip4_props();
3873 
3874  //
3875  // DJH: Copy all data into SOA (structure of arrays)
3876  // from AOS (array of structures) data structure.
3877  //
3878  //patch->copy_all_to_SOA();
3879 
3880 #ifdef TIMER_COLLECTION
3881  TimerSet& t = patch->timerSet;
3882 #endif
3883  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
3884  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
3885  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
3886  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
3887  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
3888  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
3889  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
3890  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
3891  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
3892  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
3893 
3894  int &step = patch->flags.step;
3895  step = simParams->firstTimestep;
3896 
3897  // drag switches
3898  const Bool rotDragOn = simParams->rotDragOn;
3899  const Bool movDragOn = simParams->movDragOn;
3900 
3901  const int commOnly = simParams->commOnly;
3902 
3903  int &maxForceUsed = patch->flags.maxForceUsed;
3904  int &maxForceMerged = patch->flags.maxForceMerged;
3905  maxForceUsed = Results::normal;
3906  maxForceMerged = Results::normal;
3907 
3908  const int numberOfSteps = simParams->N;
3909  const int stepsPerCycle = simParams->stepsPerCycle;
3910  const BigReal timestep = simParams->dt;
3911 
3912  // what MTS method?
3913  const int staleForces = ( simParams->MTSAlgorithm == NAIVE );
3914 
3915  const int nonbondedFrequency = simParams->nonbondedFrequency;
3916  slowFreq = nonbondedFrequency;
3917  const BigReal nbondstep = timestep * (staleForces?1:nonbondedFrequency);
3918  int &doNonbonded = patch->flags.doNonbonded;
3919  doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
3920  if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
3921  if ( doNonbonded ) maxForceUsed = Results::nbond;
3922 
3923  // Do we do full electrostatics?
3924  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
3925  const int fullElectFrequency = simParams->fullElectFrequency;
3926  if ( dofull ) slowFreq = fullElectFrequency;
3927  const BigReal slowstep = timestep * (staleForces?1:fullElectFrequency);
3928  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
3929  doFullElectrostatics = (dofull && ((step >= numberOfSteps) || !(step%fullElectFrequency)));
3930  if ( dofull && (fullElectFrequency == 1) && !(simParams->mollyOn) )
3931  maxForceMerged = Results::slow;
3932  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
3933 
3934  // If doing LJ-PME, track doFullElectrostatics
3935  int &doFullDispersion = patch->flags.doFullDispersion;
3936 if ( ! simParams->LJPMESerial ) {
3937  doFullDispersion = (simParams->LJPMEOn && doFullElectrostatics);
3938 }
3939 
3940 //#ifndef UPPER_BOUND
3941  const Bool accelMDOn = simParams->accelMDOn;
3942  const Bool accelMDdihe = simParams->accelMDdihe;
3943  const Bool accelMDdual = simParams->accelMDdual;
3944  if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed = Results::amdf;
3945 
3946  // Is adaptive tempering on?
3947  const Bool adaptTempOn = simParams->adaptTempOn;
3949  if (simParams->langevinOn)
3951  else if (simParams->rescaleFreq > 0)
3953 
3954 
3955  int &doMolly = patch->flags.doMolly;
3956  doMolly = simParams->mollyOn && doFullElectrostatics;
3957  // BEGIN LA
3958  int &doLoweAndersen = patch->flags.doLoweAndersen;
3959  doLoweAndersen = simParams->loweAndersenOn && doNonbonded;
3960  // END LA
3961 
3962  int &doGBIS = patch->flags.doGBIS;
3963  doGBIS = simParams->GBISOn;
3964 
3965  int &doLCPO = patch->flags.doLCPO;
3966  doLCPO = simParams->LCPOOn;
3967 
3968  int zeroMomentum = simParams->zeroMomentum;
3969 
3970  // Do we need to return forces to TCL script or Colvar module?
3971  int doTcl = simParams->tclForcesOn;
3972  int doColvars = simParams->colvarsOn;
3973 //#endif
3974  int doGlobal = doTcl || doColvars;
3976 
3977  // Bother to calculate energies?
3978  int &doEnergy = patch->flags.doEnergy;
3979  int energyFrequency = simParams->computeEnergies;
3980 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3981  if(simParams->alchOn) energyFrequency = NAMD_gcd(energyFrequency, simParams->alchOutFreq);
3982 #endif
3983 #ifndef UPPER_BOUND
3984  const int reassignFreq = simParams->reassignFreq;
3985 #endif
3986 
3987  int &doVirial = patch->flags.doVirial;
3988  doVirial = 1;
3989 
3990  if ( scriptTask == SCRIPT_RUN ) {
3991 
3992 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
3993 
3994 #ifndef UPPER_BOUND
3995 // printf("Doing initial rattle\n");
3996 #ifndef UPPER_BOUND
3997 D_MSG("rattle1()");
3998  TIMER_START(t, RATTLE1);
3999  rattle1(0.,0); // enforce rigid bond constraints on initial positions
4000  TIMER_STOP(t, RATTLE1);
4001 #endif
4002 
4005  patch->atom.begin(),patch->atom.end());
4006  }
4007 
4008  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4009  reassignVelocities(timestep,step);
4010  }
4011 #endif
4012 
4013  doEnergy = ! ( step % energyFrequency );
4014 #ifndef UPPER_BOUND
4015  if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4016  //Update energy every timestep for adaptive tempering
4017  if ( adaptTempOn ) doEnergy=1;
4018 #endif
4019 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4020 D_MSG("runComputeObjects()");
4021  runComputeObjects(1,step<numberOfSteps); // must migrate here!
4022 #ifndef UPPER_BOUND
4023  rescaleaccelMD(step, doNonbonded, doFullElectrostatics); // for accelMD
4024  adaptTempUpdate(step); // update adaptive tempering temperature
4025 #endif
4026 
4027 #ifndef UPPER_BOUND
4028  if ( staleForces || doGlobal ) {
4029  if ( doNonbonded ) saveForce(Results::nbond);
4030  if ( doFullElectrostatics ) saveForce(Results::slow);
4031  }
4032 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4033  if ( ! commOnly ) {
4034 D_MSG("newtonianVelocities()");
4035  TIMER_START(t, KICK);
4036  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,0,1,1);
4037  TIMER_STOP(t, KICK);
4038  }
4040 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4041 #ifndef UPPER_BOUND
4042 D_MSG("rattle1()");
4043  TIMER_START(t, RATTLE1);
4044  rattle1(-timestep,0);
4045  TIMER_STOP(t, RATTLE1);
4046 #endif
4047 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4048 D_MSG("submitHalfstep()");
4049  TIMER_START(t, SUBMITHALF);
4050  submitHalfstep(step);
4051  TIMER_STOP(t, SUBMITHALF);
4052 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4053  if ( ! commOnly ) {
4054 D_MSG("newtonianVelocities()");
4055  TIMER_START(t, KICK);
4056  newtonianVelocities(1.0,timestep,nbondstep,slowstep,0,1,1);
4057  TIMER_STOP(t, KICK);
4058  }
4059 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4060 D_MSG("rattle1()");
4061  TIMER_START(t, RATTLE1);
4062  rattle1(timestep,1);
4063  TIMER_STOP(t, RATTLE1);
4064  if (doGlobal) // include constraint forces
4065  computeGlobal->saveTotalForces(patch);
4066 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4067 D_MSG("submitHalfstep()");
4068  TIMER_START(t, SUBMITHALF);
4069  submitHalfstep(step);
4070  TIMER_STOP(t, SUBMITHALF);
4071  if ( zeroMomentum && doFullElectrostatics ) submitMomentum(step);
4072  if ( ! commOnly ) {
4073 D_MSG("newtonianVelocities()");
4074  TIMER_START(t, KICK);
4075  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,0,1,1);
4076  TIMER_STOP(t, KICK);
4077  }
4078 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4079 #endif
4080 D_MSG("submitReductions()");
4081  TIMER_START(t, SUBMITFULL);
4082  submitReductions(step);
4083  TIMER_STOP(t, SUBMITFULL);
4084 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4085 #ifndef UPPER_BOUND
4086  if(0){ // if(traceIsOn()){
4087  traceUserEvent(eventEndOfTimeStep);
4088  sprintf(traceNote, "%s%d",tracePrefix,step);
4089  traceUserSuppliedNote(traceNote);
4090  }
4091 #endif
4092  rebalanceLoad(step);
4093 
4094  } // scriptTask == SCRIPT_RUN
4095 
4096 #ifndef UPPER_BOUND
4097  bool doMultigratorRattle = false;
4098 #endif
4099 
4100  //
4101  // DJH: There are a lot of mod operations below and elsewhere to
4102  // test step number against the frequency of something happening.
4103  // Mod and integer division are expensive!
4104  // Might be better to replace with counters and test equality.
4105  //
4106 #if 0
4107  for(int i = 0; i < NamdProfileEvent::EventsCount; i++)
4108  CkPrintf("-------------- [%d] %s -------------\n", i, NamdProfileEventStr[i]);
4109 #endif
4110 
4111 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
4112  int& eon = patch->flags.event_on;
4113  int epid = (simParams->beginEventPatchID <= patch->getPatchID()
4114  && patch->getPatchID() <= simParams->endEventPatchID);
4115  int beginStep = simParams->beginEventStep;
4116  int endStep = simParams->endEventStep;
4117  bool controlProfiling = patch->getPatchID() == 0;
4118 #endif
4119 
4120  for ( ++step; step <= numberOfSteps; ++step )
4121  {
4122 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
4123  eon = epid && (beginStep < step && step <= endStep);
4124 
4125  if (controlProfiling && step == beginStep) {
4127  }
4128  if (controlProfiling && step == endStep) {
4130  }
4131  char buf[32];
4132  sprintf(buf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::INTEGRATE_1], patch->getPatchID());
4133  NAMD_EVENT_START_EX(eon, NamdProfileEvent::INTEGRATE_1, buf);
4134 #endif
4135  DebugM(3,"for step "<<step<< " dGlobal " << doGlobal<<"\n"<<endi);
4136 #ifndef UPPER_BOUND
4137  rescaleVelocities(step);
4138  tcoupleVelocities(timestep,step);
4139  if ( simParams->stochRescaleOn ) {
4140  stochRescaleVelocities(step);
4141  }
4142  berendsenPressure(step);
4143 
4144  if ( ! commOnly ) {
4145  TIMER_START(t, KICK);
4146  newtonianVelocities(0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4147  TIMER_STOP(t, KICK);
4148  }
4149 
4150  // We do RATTLE here if multigrator thermostat was applied in the previous step
4151  if (doMultigratorRattle) rattle1(timestep, 1);
4152 
4153  /* reassignment based on half-step velocities
4154  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4155  addVelocityToPosition(0.5*timestep);
4156  reassignVelocities(timestep,step);
4157  addVelocityToPosition(0.5*timestep);
4158  rattle1(0.,0);
4159  rattle1(-timestep,0);
4160  addVelocityToPosition(-1.0*timestep);
4161  rattle1(timestep,0);
4162  } */
4163 
4164  TIMER_START(t, MAXMOVE);
4165  maximumMove(timestep);
4166  TIMER_STOP(t, MAXMOVE);
4167 
4168  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_1); // integrate 1
4169 
4171  if ( ! commOnly ) {
4172  TIMER_START(t, DRIFT);
4173  addVelocityToPosition(0.5*timestep);
4174  TIMER_STOP(t, DRIFT);
4175  }
4176  // We add an Ornstein-Uhlenbeck integration step for the case of BAOAB (Langevin)
4177  langevinVelocities(timestep);
4178 
4179  // There is a blocking receive inside of langevinPiston()
4180  // that might suspend the current thread of execution,
4181  // so split profiling around this conditional block.
4182  langevinPiston(step);
4183 
4184  if ( ! commOnly ) {
4185  TIMER_START(t, DRIFT);
4186  addVelocityToPosition(0.5*timestep);
4187  TIMER_STOP(t, DRIFT);
4188  }
4189  } else {
4190  // If Langevin is not used, take full time step directly instread of two half steps
4191  if ( ! commOnly ) {
4192  TIMER_START(t, DRIFT);
4193  addVelocityToPosition(timestep);
4194  TIMER_STOP(t, DRIFT);
4195  }
4196  }
4197 
4198  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_2);
4199 
4200  // impose hard wall potential for Drude bond length
4201  hardWallDrude(timestep, 1);
4202 
4204 #endif // UPPER_BOUND
4205 
4206  doNonbonded = !(step%nonbondedFrequency);
4207  doFullElectrostatics = (dofull && !(step%fullElectFrequency));
4208 if ( ! simParams->LJPMESerial ) {
4209  // XXX in preparation for supporting LJ-PME with MTS
4210  doFullDispersion = (simParams->LJPMEOn && doFullElectrostatics);
4211 }
4212 
4213 #ifndef UPPER_BOUND
4214  if ( zeroMomentum && doFullElectrostatics ) {
4215  // There is a blocking receive inside of correctMomentum().
4216  correctMomentum(step,slowstep);
4217  }
4218 
4219  // There are NO sends in submitHalfstep() just local summation
4220  // into the Reduction struct.
4221  TIMER_START(t, SUBMITHALF);
4222  submitHalfstep(step);
4223  TIMER_STOP(t, SUBMITHALF);
4224 
4225  doMolly = simParams->mollyOn && doFullElectrostatics;
4226  // BEGIN LA
4227  doLoweAndersen = simParams->loweAndersenOn && doNonbonded;
4228  // END LA
4229 
4230  maxForceUsed = Results::normal;
4231  if ( doNonbonded ) maxForceUsed = Results::nbond;
4232  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
4233  if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed = Results::amdf;
4234 
4235  // Migrate Atoms on stepsPerCycle
4236  doEnergy = ! ( step % energyFrequency );
4237  if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4238  if ( adaptTempOn ) doEnergy=1;
4239 
4240  // Multigrator
4241  if (simParams->multigratorOn) {
4242  doVirial = (!(step % energyFrequency) || ((simParams->outputPressure > 0) && !(step % simParams->outputPressure))
4243  || !(step % simParams->multigratorPressureFreq));
4244  doKineticEnergy = (!(step % energyFrequency) || !(step % simParams->multigratorTemperatureFreq));
4245  doMomenta = (simParams->outputMomenta > 0) && !(step % simParams->outputMomenta);
4246  } else {
4247  doVirial = 1;
4248  doKineticEnergy = 1;
4249  doMomenta = 1;
4250  }
4251 #endif
4252  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_2); // integrate 2
4253 
4254  // The current thread of execution will suspend in runComputeObjects().
4255  runComputeObjects(!(step%stepsPerCycle),step<numberOfSteps);
4256 
4257  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_3);
4258 
4259 #ifndef UPPER_BOUND
4260  rescaleaccelMD(step, doNonbonded, doFullElectrostatics); // for accelMD
4261 
4262  if ( staleForces || doGlobal ) {
4263  if ( doNonbonded ) saveForce(Results::nbond);
4264  if ( doFullElectrostatics ) saveForce(Results::slow);
4265  }
4266 
4267  // reassignment based on full-step velocities
4268  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4269  reassignVelocities(timestep,step);
4270  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4271  rattle1(-timestep,0);
4272  }
4273 
4274  if ( ! commOnly ) {
4275  TIMER_START(t, VELBBK1);
4276  langevinVelocitiesBBK1(timestep);
4277  TIMER_STOP(t, VELBBK1);
4278  TIMER_START(t, KICK);
4279  newtonianVelocities(1.0,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4280  TIMER_STOP(t, KICK);
4281  TIMER_START(t, VELBBK2);
4282  langevinVelocitiesBBK2(timestep);
4283  TIMER_STOP(t, VELBBK2);
4284  }
4285 
4286  // add drag to each atom's positions
4287  if ( ! commOnly && movDragOn ) addMovDragToPosition(timestep);
4288  if ( ! commOnly && rotDragOn ) addRotDragToPosition(timestep);
4289 
4290  TIMER_START(t, RATTLE1);
4291  rattle1(timestep,1);
4292  TIMER_STOP(t, RATTLE1);
4293  if (doGlobal) // include constraint forces
4294  computeGlobal->saveTotalForces(patch);
4295 
4296  TIMER_START(t, SUBMITHALF);
4297  submitHalfstep(step);
4298  TIMER_STOP(t, SUBMITHALF);
4299  if ( zeroMomentum && doFullElectrostatics ) submitMomentum(step);
4300 
4301  if ( ! commOnly ) {
4302  TIMER_START(t, KICK);
4303  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4304  TIMER_STOP(t, KICK);
4305  }
4306 
4307  // rattle2(timestep,step);
4308 #endif
4309 
4310  TIMER_START(t, SUBMITFULL);
4311  submitReductions(step);
4312  TIMER_STOP(t, SUBMITFULL);
4313  TIMER_START(t, SUBMITCOLLECT);
4314  submitCollections(step);
4315  TIMER_STOP(t, SUBMITCOLLECT);
4316 #ifndef UPPER_BOUND
4317  //Update adaptive tempering temperature
4318  adaptTempUpdate(step);
4319 
4320  // Multigrator temperature and pressure steps
4321  multigratorTemperature(step, 1);
4322  multigratorPressure(step, 1);
4323  multigratorPressure(step, 2);
4324  multigratorTemperature(step, 2);
4325  doMultigratorRattle = (simParams->multigratorOn && !(step % simParams->multigratorTemperatureFreq));
4326 
4327  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_3); // integrate 3
4328 #endif
4329 
4330 #if CYCLE_BARRIER
4331  cycleBarrier(!((step+1) % stepsPerCycle), step);
4332 #elif PME_BARRIER
4333  cycleBarrier(doFullElectrostatics, step);
4334 #elif STEP_BARRIER
4335  cycleBarrier(1, step);
4336 #endif
4337 
4338 #ifndef UPPER_BOUND
4339  if(Node::Object()->specialTracing || simParams->statsOn){
4340  int bstep = simParams->traceStartStep;
4341  int estep = bstep + simParams->numTraceSteps;
4342  if(step == bstep || step == estep){
4343  traceBarrier(step);
4344  }
4345  }
4346 
4347 #ifdef MEASURE_NAMD_WITH_PAPI
4348  if(simParams->papiMeasure) {
4349  int bstep = simParams->papiMeasureStartStep;
4350  int estep = bstep + simParams->numPapiMeasureSteps;
4351  if(step == bstep || step==estep) {
4352  papiMeasureBarrier(step);
4353  }
4354  }
4355 #endif
4356 
4357  if(0){ // if(traceIsOn()){
4358  traceUserEvent(eventEndOfTimeStep);
4359  sprintf(traceNote, "%s%d",tracePrefix,step);
4360  traceUserSuppliedNote(traceNote);
4361  }
4362 #endif // UPPER_BOUND
4363  rebalanceLoad(step);
4364 
4365 #if PME_BARRIER
4366  // a step before PME
4367  cycleBarrier(dofull && !((step+1)%fullElectFrequency),step);
4368 #endif
4369 
4370 #if USE_HPM
4371  if(step == START_HPM_STEP)
4372  (CProxy_Node(CkpvAccess(BOCclass_group).node)).startHPM();
4373 
4374  if(step == STOP_HPM_STEP)
4375  (CProxy_Node(CkpvAccess(BOCclass_group).node)).stopHPM();
4376 #endif
4377 
4378  }
4379 
4380  TIMER_DONE(t);
4381 #ifdef TIMER_COLLECTION
4382  if (patch->patchID == SPECIAL_PATCH_ID) {
4383  printf("Timer collection reporting in microseconds for "
4384  "Patch %d\n", patch->patchID);
4385  TIMER_REPORT(t);
4386  }
4387 #endif // TIMER_COLLECTION
4388  //
4389  // DJH: Copy updates of SOA back into AOS.
4390  //
4391  //patch->copy_updates_to_AOS();
4392 }
4393 
4394 // add moving drag to each atom's position
4396  FullAtom *atom = patch->atom.begin();
4397  int numAtoms = patch->numAtoms;
4398  Molecule *molecule = Node::Object()->molecule; // need its methods
4399  const BigReal movDragGlobVel = simParams->movDragGlobVel;
4400  const BigReal dt = timestep / TIMEFACTOR; // MUST be as in the integrator!
4401  Vector movDragVel, dragIncrement;
4402  for ( int i = 0; i < numAtoms; ++i )
4403  {
4404  // skip if fixed atom or zero drag attribute
4405  if ( (simParams->fixedAtomsOn && atom[i].atomFixed)
4406  || !(molecule->is_atom_movdragged(atom[i].id)) ) continue;
4407  molecule->get_movdrag_params(movDragVel, atom[i].id);
4408  dragIncrement = movDragGlobVel * movDragVel * dt;
4409  atom[i].position += dragIncrement;
4410  }
4411 }
4412 
4413 // add rotating drag to each atom's position
4415  FullAtom *atom = patch->atom.begin();
4416  int numAtoms = patch->numAtoms;
4417  Molecule *molecule = Node::Object()->molecule; // need its methods
4418  const BigReal rotDragGlobVel = simParams->rotDragGlobVel;
4419  const BigReal dt = timestep / TIMEFACTOR; // MUST be as in the integrator!
4420  BigReal rotDragVel, dAngle;
4421  Vector atomRadius;
4422  Vector rotDragAxis, rotDragPivot, dragIncrement;
4423  for ( int i = 0; i < numAtoms; ++i )
4424  {
4425  // skip if fixed atom or zero drag attribute
4426  if ( (simParams->fixedAtomsOn && atom[i].atomFixed)
4427  || !(molecule->is_atom_rotdragged(atom[i].id)) ) continue;
4428  molecule->get_rotdrag_params(rotDragVel, rotDragAxis, rotDragPivot, atom[i].id);
4429  dAngle = rotDragGlobVel * rotDragVel * dt;
4430  rotDragAxis /= rotDragAxis.length();
4431  atomRadius = atom[i].position - rotDragPivot;
4432  dragIncrement = cross(rotDragAxis, atomRadius) * dAngle;
4433  atom[i].position += dragIncrement;
4434  }
4435 }
4436 
4438  //
4439  // DJH: Copy all data into SOA (structure of arrays)
4440  // from AOS (array of structures) data structure.
4441  //
4442  //patch->copy_all_to_SOA();
4443 
4444  const int numberOfSteps = simParams->N;
4445  const int stepsPerCycle = simParams->stepsPerCycle;
4446 #if 0 && defined(NODEGROUP_FORCE_REGISTER)
4447  // XXX DJH: This is a hack that is found to get GPU nonbonded
4448  // force calculation right for --with-single-node-cuda builds
4449  const int stepsPerCycle_save = stepsPerCycle;
4450  simParams->stepsPerCycle = 1;
4451 #endif
4452  int &step = patch->flags.step;
4453  step = simParams->firstTimestep;
4454 
4455  int &maxForceUsed = patch->flags.maxForceUsed;
4456  int &maxForceMerged = patch->flags.maxForceMerged;
4457  maxForceUsed = Results::normal;
4458  maxForceMerged = Results::normal;
4459  int &doNonbonded = patch->flags.doNonbonded;
4460  doNonbonded = 1;
4461  maxForceUsed = Results::nbond;
4462  maxForceMerged = Results::nbond;
4463  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
4464  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
4465  doFullElectrostatics = dofull;
4466  if ( dofull ) {
4467  maxForceMerged = Results::slow;
4468  maxForceUsed = Results::slow;
4469  }
4470  int &doMolly = patch->flags.doMolly;
4471  doMolly = simParams->mollyOn && doFullElectrostatics;
4472  int &doMinimize = patch->flags.doMinimize;
4473  doMinimize = 1;
4474  // BEGIN LA
4475  int &doLoweAndersen = patch->flags.doLoweAndersen;
4476  doLoweAndersen = 0;
4477  // END LA
4478 
4479  int &doGBIS = patch->flags.doGBIS;
4480  doGBIS = simParams->GBISOn;
4481 
4482  int &doLCPO = patch->flags.doLCPO;
4483  doLCPO = simParams->LCPOOn;
4484 
4485  int doTcl = simParams->tclForcesOn;
4486  int doColvars = simParams->colvarsOn;
4487  int doGlobal = doTcl || doColvars;
4489 
4490  int &doEnergy = patch->flags.doEnergy;
4491  doEnergy = 1;
4492 
4493  // Do this to stabilize the minimizer, whether or not the user
4494  // wants rigid bond constraints enabled for dynamics.
4495  // In order to enforce, we have to call HomePatch::rattle1() directly.
4496  patch->rattle1(0.,0,0); // enforce rigid bond constraints on initial positions
4497 
4500  patch->atom.begin(),patch->atom.end());
4501  }
4502 
4503  runComputeObjects(1,step<numberOfSteps); // must migrate here!
4504 
4505  if ( doGlobal ) {
4506 #ifdef DEBUG_MINIMIZE
4507  printf("doTcl = %d doColvars = %d\n", doTcl, doColvars);
4508 #endif
4509  if ( doNonbonded ) saveForce(Results::nbond);
4510  if ( doFullElectrostatics ) saveForce(Results::slow);
4511  computeGlobal->saveTotalForces(patch);
4512  }
4513 #ifdef DEBUG_MINIMIZE
4514  else { printf("No computeGlobal\n"); }
4515 #endif
4516 
4518 
4519  submitMinimizeReductions(step,fmax2);
4520  rebalanceLoad(step);
4521 
4522  int downhill = 1; // start out just fixing bad contacts
4523  int minSeq = 0;
4524  for ( ++step; step <= numberOfSteps; ++step ) {
4525  // Blocking receive for the minimization coefficient.
4526  BigReal c = broadcast->minimizeCoefficient.get(minSeq++);
4527 
4528  if ( downhill ) {
4529  if ( c ) minimizeMoveDownhill(fmax2);
4530  else {
4531  downhill = 0;
4532  fmax2 *= 10000.;
4533  }
4534  }
4535  if ( ! downhill ) {
4536  if ( ! c ) { // new direction
4537 
4538  // Blocking receive for the minimization coefficient.
4539  c = broadcast->minimizeCoefficient.get(minSeq++);
4540 
4541  newMinimizeDirection(c); // v = c * v + f
4542 
4543  // Blocking receive for the minimization coefficient.
4544  c = broadcast->minimizeCoefficient.get(minSeq++);
4545 
4546  } // same direction
4547  newMinimizePosition(c); // x = x + c * v
4548  }
4549 
4550  runComputeObjects(!(step%stepsPerCycle),step<numberOfSteps);
4551  if ( doGlobal ) {
4552  if ( doNonbonded ) saveForce(Results::nbond);
4553  if ( doFullElectrostatics ) saveForce(Results::slow);
4554  computeGlobal->saveTotalForces(patch);
4555  }
4556  submitMinimizeReductions(step,fmax2);
4557  submitCollections(step, 1); // write out zeros for velocities
4558  rebalanceLoad(step);
4559  }
4560  quenchVelocities(); // zero out bogus velocity
4561 
4562  doMinimize = 0;
4563 
4564 #if 0
4565  // when using CUDASOAintegrate, need to update SOA data structures
4567  patch->copy_atoms_to_SOA();
4568  }
4569 #endif
4570 
4571 #if 0 && defined(NODEGROUP_FORCE_REGISTER)
4572  // XXX DJH: all patches in a PE are writing into simParams
4573  // so this hack needs a guard
4574  simParams->stepsPerCycle = stepsPerCycle_save;
4575 #endif
4576  //
4577  // DJH: Copy updates of SOA back into AOS.
4578  //
4579  //patch->copy_updates_to_AOS();
4580 }
4581 
4582 // x = x + 0.1 * unit(f) for large f
4584 
4585  FullAtom *a = patch->atom.begin();
4586  Force *f1 = patch->f[Results::normal].begin(); // includes nbond and slow
4587  int numAtoms = patch->numAtoms;
4588 
4589  for ( int i = 0; i < numAtoms; ++i ) {
4590  if ( simParams->fixedAtomsOn && a[i].atomFixed ) continue;
4591  Force f = f1[i];
4592  if ( f.length2() > fmax2 ) {
4593  a[i].position += ( 0.1 * f.unit() );
4594  int hgs = a[i].hydrogenGroupSize; // 0 if not parent
4595  for ( int j=1; j<hgs; ++j ) {
4596  a[++i].position += ( 0.1 * f.unit() );
4597  }
4598  }
4599  }
4600 
4601  patch->rattle1(0.,0,0);
4602 }
4603 
4604 // v = c * v + f
4606  FullAtom *a = patch->atom.begin();
4607  Force *f1 = patch->f[Results::normal].begin(); // includes nbond and slow
4608  const bool fixedAtomsOn = simParams->fixedAtomsOn;
4609  const bool drudeHardWallOn = simParams->drudeHardWallOn;
4610  int numAtoms = patch->numAtoms;
4611  BigReal maxv2 = 0.;
4612 
4613  for ( int i = 0; i < numAtoms; ++i ) {
4614  a[i].velocity *= c;
4615  a[i].velocity += f1[i];
4616  if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4617  a[i].velocity = a[i-1].velocity;
4618  }
4619  if ( fixedAtomsOn && a[i].atomFixed ) a[i].velocity = 0;
4620  BigReal v2 = a[i].velocity.length2();
4621  if ( v2 > maxv2 ) maxv2 = v2;
4622  }
4623 
4624  { Tensor virial; patch->minimize_rattle2( 0.1 * TIMEFACTOR / sqrt(maxv2), &virial); }
4625 
4626  maxv2 = 0.;
4627  for ( int i = 0; i < numAtoms; ++i ) {
4628  if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4629  a[i].velocity = a[i-1].velocity;
4630  }
4631  if ( fixedAtomsOn && a[i].atomFixed ) a[i].velocity = 0;
4632  BigReal v2 = a[i].velocity.length2();
4633  if ( v2 > maxv2 ) maxv2 = v2;
4634  }
4635 
4636  min_reduction->max(0,maxv2);
4637  min_reduction->submit();
4638 
4639  // prevent hydrogens from being left behind
4640  BigReal fmax2 = 0.01 * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR;
4641  // int adjustCount = 0;
4642  int hgs;
4643  for ( int i = 0; i < numAtoms; i += hgs ) {
4644  hgs = a[i].hydrogenGroupSize;
4645  BigReal minChildVel = a[i].velocity.length2();
4646  if ( minChildVel < fmax2 ) continue;
4647  int adjustChildren = 1;
4648  for ( int j = i+1; j < (i+hgs); ++j ) {
4649  if ( a[j].velocity.length2() > minChildVel ) adjustChildren = 0;
4650  }
4651  if ( adjustChildren ) {
4652  // if ( hgs > 1 ) ++adjustCount;
4653  for ( int j = i+1; j < (i+hgs); ++j ) {
4654  if (a[i].mass < 0.01) continue; // lone pair
4655  a[j].velocity = a[i].velocity;
4656  }
4657  }
4658  }
4659  // if (adjustCount) CkPrintf("Adjusting %d hydrogen groups\n", adjustCount);
4660 
4661 }
4662 
4663 // x = x + c * v
4665  FullAtom *a = patch->atom.begin();
4666  int numAtoms = patch->numAtoms;
4667 
4668  for ( int i = 0; i < numAtoms; ++i ) {
4669  a[i].position += c * a[i].velocity;
4670  }
4671 
4672  if ( simParams->drudeHardWallOn ) {
4673  for ( int i = 1; i < numAtoms; ++i ) {
4674  if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4675  a[i].position -= a[i-1].position;
4676  }
4677  }
4678  }
4679 
4680  patch->rattle1(0.,0,0);
4681 
4682  if ( simParams->drudeHardWallOn ) {
4683  for ( int i = 1; i < numAtoms; ++i ) {
4684  if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4685  a[i].position += a[i-1].position;
4686  }
4687  }
4688  }
4689 }
4690 
4691 // v = 0
4693  FullAtom *a = patch->atom.begin();
4694  int numAtoms = patch->numAtoms;
4695 
4696  for ( int i = 0; i < numAtoms; ++i ) {
4697  a[i].velocity = 0;
4698  }
4699 }
4700 
4702 
4703  FullAtom *a = patch->atom.begin();
4704  const int numAtoms = patch->numAtoms;
4705 
4706  Vector momentum = 0;
4707  BigReal mass = 0;
4708 if ( simParams->zeroMomentumAlt ) {
4709  for ( int i = 0; i < numAtoms; ++i ) {
4710  momentum += a[i].mass * a[i].velocity;
4711  mass += 1.;
4712  }
4713 } else {
4714  for ( int i = 0; i < numAtoms; ++i ) {
4715  momentum += a[i].mass * a[i].velocity;
4716  mass += a[i].mass;
4717  }
4718 }
4719 
4720  ADD_VECTOR_OBJECT(reduction,REDUCTION_HALFSTEP_MOMENTUM,momentum);
4722 }
4723 
4724 void Sequencer::correctMomentum(int step, BigReal drifttime) {
4725 
4726  //
4727  // DJH: This test should be done in SimParameters.
4728  //
4729  if ( simParams->fixedAtomsOn )
4730  NAMD_die("Cannot zero momentum when fixed atoms are present.");
4731 
4732  // Blocking receive for the momentum correction vector.
4733  const Vector dv = broadcast->momentumCorrection.get(step);
4734 
4735  const Vector dx = dv * ( drifttime / TIMEFACTOR );
4736 
4737  FullAtom *a = patch->atom.begin();
4738  const int numAtoms = patch->numAtoms;
4739 
4740 if ( simParams->zeroMomentumAlt ) {
4741  for ( int i = 0; i < numAtoms; ++i ) {
4742  a[i].velocity += dv * a[i].recipMass;
4743  a[i].position += dx * a[i].recipMass;
4744  }
4745 } else {
4746  for ( int i = 0; i < numAtoms; ++i ) {
4747  a[i].velocity += dv;
4748  a[i].position += dx;
4749  }
4750 }
4751 
4752 }
4753 
4754 // --------- For Multigrator ---------
4755 void Sequencer::scalePositionsVelocities(const Tensor& posScale, const Tensor& velScale) {
4756  FullAtom *a = patch->atom.begin();
4757  int numAtoms = patch->numAtoms;
4758  Position origin = patch->lattice.origin();
4759  if ( simParams->fixedAtomsOn ) {
4760  NAMD_bug("Sequencer::scalePositionsVelocities, fixed atoms not implemented");
4761  }
4762  if ( simParams->useGroupPressure ) {
4763  int hgs;
4764  for ( int i = 0; i < numAtoms; i += hgs ) {
4765  hgs = a[i].hydrogenGroupSize;
4766  Position pos_cm(0.0, 0.0, 0.0);
4767  Velocity vel_cm(0.0, 0.0, 0.0);
4768  BigReal m_cm = 0.0;
4769  for (int j=0;j < hgs;++j) {
4770  m_cm += a[i+j].mass;
4771  pos_cm += a[i+j].mass*a[i+j].position;
4772  vel_cm += a[i+j].mass*a[i+j].velocity;
4773  }
4774  pos_cm /= m_cm;
4775  vel_cm /= m_cm;
4776  pos_cm -= origin;
4777  Position dpos = posScale*pos_cm;
4778  Velocity dvel = velScale*vel_cm;
4779  for (int j=0;j < hgs;++j) {
4780  a[i+j].position += dpos;
4781  a[i+j].velocity += dvel;
4782  }
4783  }
4784  } else {
4785  for ( int i = 0; i < numAtoms; i++) {
4786  a[i].position += posScale*(a[i].position-origin);
4787  a[i].velocity = velScale*a[i].velocity;
4788  }
4789  }
4790 }
4791 
4792 void Sequencer::multigratorPressure(int step, int callNumber) {
4793 // Calculate new positions, momenta, and volume using positionRescaleFactor and
4794 // velocityRescaleTensor values returned from Controller::multigratorPressureCalcScale()
4796  FullAtom *a = patch->atom.begin();
4797  int numAtoms = patch->numAtoms;
4798 
4799  // Blocking receive (get) scaling factors from Controller
4800  Tensor scaleTensor = (callNumber == 1) ? broadcast->positionRescaleFactor.get(step) : broadcast->positionRescaleFactor2.get(step);
4801  Tensor velScaleTensor = (callNumber == 1) ? broadcast->velocityRescaleTensor.get(step) : broadcast->velocityRescaleTensor2.get(step);
4802  Tensor posScaleTensor = scaleTensor;
4803  posScaleTensor -= Tensor::identity();
4804  if (simParams->useGroupPressure) {
4805  velScaleTensor -= Tensor::identity();
4806  }
4807 
4808  // Scale volume
4809  patch->lattice.rescale(scaleTensor);
4810  // Scale positions and velocities
4811  scalePositionsVelocities(posScaleTensor, velScaleTensor);
4812 
4813  if (!patch->flags.doFullElectrostatics) NAMD_bug("Sequencer::multigratorPressure, doFullElectrostatics must be true");
4814 
4815  // Calculate new forces
4816  // NOTE: We should not need to migrate here since any migration should have happened in the
4817  // previous call to runComputeObjects inside the MD loop in Sequencer::integrate()
4818  const int numberOfSteps = simParams->N;
4819  const int stepsPerCycle = simParams->stepsPerCycle;
4820  runComputeObjects(0 , step<numberOfSteps, 1);
4821 
4822  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
4824 
4825  // Virials etc.
4826  Tensor virialNormal;
4827  Tensor momentumSqrSum;
4828  BigReal kineticEnergy = 0;
4829  if ( simParams->pairInteractionOn ) {
4830  if ( simParams->pairInteractionSelf ) {
4831  for ( int i = 0; i < numAtoms; ++i ) {
4832  if ( a[i].partition != 1 ) continue;
4833  kineticEnergy += a[i].mass * a[i].velocity.length2();
4834  virialNormal.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4835  }
4836  }
4837  } else {
4838  for ( int i = 0; i < numAtoms; ++i ) {
4839  if (a[i].mass < 0.01) continue;
4840  kineticEnergy += a[i].mass * a[i].velocity.length2();
4841  virialNormal.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4842  }
4843  }
4844  if (!simParams->useGroupPressure) momentumSqrSum = virialNormal;
4845  kineticEnergy *= 0.5;
4847  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virialNormal);
4848 
4849  if ( simParams->fixedAtomsOn ) {
4850  Tensor fixVirialNormal;
4851  Tensor fixVirialNbond;
4852  Tensor fixVirialSlow;
4853  Vector fixForceNormal = 0;
4854  Vector fixForceNbond = 0;
4855  Vector fixForceSlow = 0;
4856 
4857  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
4858 
4859  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, fixVirialNormal);
4860  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, fixVirialNbond);
4861  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, fixVirialSlow);
4862  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_NORMAL, fixForceNormal);
4863  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_NBOND, fixForceNbond);
4864  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_SLOW, fixForceSlow);
4865  }
4866 
4867  // Internal virial and group momentum
4868  Tensor intVirialNormal;
4869  Tensor intVirialNormal2;
4870  Tensor intVirialNbond;
4871  Tensor intVirialSlow;
4872  int hgs;
4873  for ( int i = 0; i < numAtoms; i += hgs ) {
4874  hgs = a[i].hydrogenGroupSize;
4875  int j;
4876  BigReal m_cm = 0;
4877  Position x_cm(0,0,0);
4878  Velocity v_cm(0,0,0);
4879  for ( j = i; j < (i+hgs); ++j ) {
4880  m_cm += a[j].mass;
4881  x_cm += a[j].mass * a[j].position;
4882  v_cm += a[j].mass * a[j].velocity;
4883  }
4884  if (simParams->useGroupPressure) momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
4885  x_cm /= m_cm;
4886  v_cm /= m_cm;
4887  if (simParams->fixedAtomsOn) NAMD_bug("Sequencer::multigratorPressure, simParams->fixedAtomsOn not implemented yet");
4888  if ( simParams->pairInteractionOn ) {
4889  if ( simParams->pairInteractionSelf ) {
4890  NAMD_bug("Sequencer::multigratorPressure, this part needs to be implemented correctly");
4891  for ( j = i; j < (i+hgs); ++j ) {
4892  if ( a[j].partition != 1 ) continue;
4893  BigReal mass = a[j].mass;
4894  Vector v = a[j].velocity;
4895  Vector dv = v - v_cm;
4896  intVirialNormal2.outerAdd (mass, v, dv);
4897  Vector dx = a[j].position - x_cm;
4898  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
4899  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
4900  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
4901  }
4902  }
4903  } else {
4904  for ( j = i; j < (i+hgs); ++j ) {
4905  BigReal mass = a[j].mass;
4906  Vector v = a[j].velocity;
4907  Vector dv = v - v_cm;
4908  intVirialNormal2.outerAdd(mass, v, dv);
4909  Vector dx = a[j].position - x_cm;
4910  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
4911  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
4912  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
4913  }
4914  }
4915  }
4916 
4917  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal);
4918  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal2);
4919  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NBOND, intVirialNbond);
4920  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_SLOW, intVirialSlow);
4921  ADD_TENSOR_OBJECT(reduction, REDUCTION_MOMENTUM_SQUARED, momentumSqrSum);
4922 
4923  reduction->submit();
4924  }
4925 }
4926 
4927 void Sequencer::scaleVelocities(const BigReal velScale) {
4928  FullAtom *a = patch->atom.begin();
4929  int numAtoms = patch->numAtoms;
4930  for ( int i = 0; i < numAtoms; i++) {
4931  a[i].velocity *= velScale;
4932  }
4933 }
4934 
4936  FullAtom *a = patch->atom.begin();
4937  int numAtoms = patch->numAtoms;
4938  BigReal kineticEnergy = 0.0;
4939  if ( simParams->pairInteractionOn ) {
4940  if ( simParams->pairInteractionSelf ) {
4941  for (int i = 0; i < numAtoms; ++i ) {
4942  if ( a[i].partition != 1 ) continue;
4943  kineticEnergy += a[i].mass * a[i].velocity.length2();
4944  }
4945  }
4946  } else {
4947  for (int i = 0; i < numAtoms; ++i ) {
4948  kineticEnergy += a[i].mass * a[i].velocity.length2();
4949  }
4950  }
4951  kineticEnergy *= 0.5;
4952  return kineticEnergy;
4953 }
4954 
4955 void Sequencer::multigratorTemperature(int step, int callNumber) {
4957  // Blocking receive (get) velocity scaling factor.
4958  BigReal velScale = (callNumber == 1) ? broadcast->velocityRescaleFactor.get(step) : broadcast->velocityRescaleFactor2.get(step);
4959  scaleVelocities(velScale);
4960  // Calculate new kineticEnergy
4961  BigReal kineticEnergy = calcKineticEnergy();
4963  if (callNumber == 1 && !(step % simParams->multigratorPressureFreq)) {
4964  // If this is a pressure cycle, calculate new momentum squared sum
4965  FullAtom *a = patch->atom.begin();
4966  int numAtoms = patch->numAtoms;
4967  Tensor momentumSqrSum;
4968  if (simParams->useGroupPressure) {
4969  int hgs;
4970  for ( int i = 0; i < numAtoms; i += hgs ) {
4971  hgs = a[i].hydrogenGroupSize;
4972  int j;
4973  BigReal m_cm = 0;
4974  Position x_cm(0,0,0);
4975  Velocity v_cm(0,0,0);
4976  for ( j = i; j < (i+hgs); ++j ) {
4977  m_cm += a[j].mass;
4978  x_cm += a[j].mass * a[j].position;
4979  v_cm += a[j].mass * a[j].velocity;
4980  }
4981  momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
4982  }
4983  } else {
4984  for ( int i = 0; i < numAtoms; i++) {
4985  momentumSqrSum.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4986  }
4987  }
4988  ADD_TENSOR_OBJECT(multigratorReduction, MULTIGRATOR_REDUCTION_MOMENTUM_SQUARED, momentumSqrSum);
4989  }
4990  // Submit reductions (kineticEnergy and, if applicable, momentumSqrSum)
4992 
4993  }
4994 }
4995 // --------- End Multigrator ---------
4996 
4997 //
4998 // DJH: Calls one or more addForceToMomentum which in turn calls HomePatch
4999 // versions. We should inline to reduce the number of function calls.
5000 //
5001 void Sequencer::newtonianVelocities(BigReal stepscale, const BigReal timestep,
5002  const BigReal nbondstep,
5003  const BigReal slowstep,
5004  const int staleForces,
5005  const int doNonbonded,
5006  const int doFullElectrostatics)
5007 {
5008  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5009  NamdProfileEvent::NEWTONIAN_VELOCITIES);
5010 
5011  // Deterministic velocity update, account for multigrator
5012  if (staleForces || (doNonbonded && doFullElectrostatics)) {
5013  addForceToMomentum3(stepscale*timestep, Results::normal, 0,
5014  stepscale*nbondstep, Results::nbond, staleForces,
5015  stepscale*slowstep, Results::slow, staleForces);
5016  } else {
5017  addForceToMomentum(stepscale*timestep);
5018  if (staleForces || doNonbonded)
5019  addForceToMomentum(stepscale*nbondstep, Results::nbond, staleForces);
5020  if (staleForces || doFullElectrostatics)
5021  addForceToMomentum(stepscale*slowstep, Results::slow, staleForces);
5022  }
5023 }
5024 
5026 {
5027 // This routine is used for the BAOAB integrator,
5028 // Ornstein-Uhlenbeck exact solve for the O-part.
5029 // See B. Leimkuhler and C. Matthews, AMRX (2012)
5030 // Routine originally written by JPhillips, with fresh errors by CMatthews June2012
5031 
5033  {
5034  FullAtom *a = patch->atom.begin();
5035  int numAtoms = patch->numAtoms;
5036  Molecule *molecule = Node::Object()->molecule;
5037  BigReal dt = dt_fs * 0.001; // convert to ps
5040  {
5041  kbT = BOLTZMANN*adaptTempT;
5042  }
5043 
5044  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5045  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5046 
5047  for ( int i = 0; i < numAtoms; ++i )
5048  {
5049  BigReal dt_gamma = dt * a[i].langevinParam;
5050  if ( ! dt_gamma ) continue;
5051 
5052  BigReal f1 = exp( -dt_gamma );
5053  BigReal f2 = sqrt( ( 1. - f1*f1 ) * kbT *
5054  ( a[i].partition ? tempFactor : 1.0 ) *
5055  a[i].recipMass );
5056  a[i].velocity *= f1;
5057  a[i].velocity += f2 * random->gaussian_vector();
5058  }
5059  }
5060 }
5061 
5063 {
5064  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5065  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1);
5067  {
5068  FullAtom *a = patch->atom.begin();
5069  int numAtoms = patch->numAtoms;
5070  Molecule *molecule = Node::Object()->molecule;
5071  BigReal dt = dt_fs * 0.001; // convert to ps
5072  int i;
5073 
5074  if (simParams->drudeOn) {
5075  for (i = 0; i < numAtoms; i++) {
5076 
5077  if (i < numAtoms-1 &&
5078  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5079  //printf("*** Found Drude particle %d\n", a[i+1].id);
5080  // i+1 is a Drude particle with parent i
5081 
5082  // convert from Cartesian coordinates to (COM,bond) coordinates
5083  BigReal m = a[i+1].mass / (a[i].mass + a[i+1].mass); // mass ratio
5084  Vector v_bnd = a[i+1].velocity - a[i].velocity; // vel of bond
5085  Vector v_com = a[i].velocity + m * v_bnd; // vel of COM
5086  BigReal dt_gamma;
5087 
5088  // use Langevin damping factor i for v_com
5089  dt_gamma = dt * a[i].langevinParam;
5090  if (dt_gamma != 0.0) {
5091  v_com *= ( 1. - 0.5 * dt_gamma );
5092  }
5093 
5094  // use Langevin damping factor i+1 for v_bnd
5095  dt_gamma = dt * a[i+1].langevinParam;
5096  if (dt_gamma != 0.0) {
5097  v_bnd *= ( 1. - 0.5 * dt_gamma );
5098  }
5099 
5100  // convert back
5101  a[i].velocity = v_com - m * v_bnd;
5102  a[i+1].velocity = v_bnd + a[i].velocity;
5103 
5104  i++; // +1 from loop, we've updated both particles
5105  }
5106  else {
5107  BigReal dt_gamma = dt * a[i].langevinParam;
5108  if ( ! dt_gamma ) continue;
5109 
5110  a[i].velocity *= ( 1. - 0.5 * dt_gamma );
5111  }
5112 
5113  } // end for
5114  } // end if drudeOn
5115  else {
5116 
5117  //
5118  // DJH: The conditional inside loop prevents vectorization and doesn't
5119  // avoid much work since addition and multiplication are cheap.
5120  //
5121  for ( i = 0; i < numAtoms; ++i )
5122  {
5123  BigReal dt_gamma = dt * a[i].langevinParam;
5124  if ( ! dt_gamma ) continue;
5125 
5126  a[i].velocity *= ( 1. - 0.5 * dt_gamma );
5127  }
5128 
5129  } // end else
5130 
5131  } // end if langevinOn
5132 }
5133 
5134 
5136 {
5137  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5138  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2);
5140  {
5141  //
5142  // DJH: This call is expensive. Avoid calling when gammas don't differ.
5143  // Set flag in SimParameters and make this call conditional.
5144  //
5145  TIMER_START(patch->timerSet, RATTLE1);
5146  rattle1(dt_fs,1); // conserve momentum if gammas differ
5147  TIMER_STOP(patch->timerSet, RATTLE1);
5148 
5149  FullAtom *a = patch->atom.begin();
5150  int numAtoms = patch->numAtoms;
5151  Molecule *molecule = Node::Object()->molecule;
5152  BigReal dt = dt_fs * 0.001; // convert to ps
5155  {
5156  kbT = BOLTZMANN*adaptTempT;
5157  }
5158  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5159  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5160  int i;
5161 
5162  if (simParams->drudeOn) {
5163  BigReal kbT_bnd = BOLTZMANN*(simParams->drudeTemp); // drude bond Temp
5164 
5165  for (i = 0; i < numAtoms; i++) {
5166 
5167  if (i < numAtoms-1 &&
5168  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5169  //printf("*** Found Drude particle %d\n", a[i+1].id);
5170  // i+1 is a Drude particle with parent i
5171 
5172  // convert from Cartesian coordinates to (COM,bond) coordinates
5173  BigReal m = a[i+1].mass / (a[i].mass + a[i+1].mass); // mass ratio
5174  Vector v_bnd = a[i+1].velocity - a[i].velocity; // vel of bond
5175  Vector v_com = a[i].velocity + m * v_bnd; // vel of COM
5176  BigReal dt_gamma;
5177 
5178  // use Langevin damping factor i for v_com
5179  dt_gamma = dt * a[i].langevinParam;
5180  if (dt_gamma != 0.0) {
5181  BigReal mass = a[i].mass + a[i+1].mass;
5182  v_com += random->gaussian_vector() *
5183  sqrt( 2 * dt_gamma * kbT *
5184  ( a[i].partition ? tempFactor : 1.0 ) / mass );
5185  v_com /= ( 1. + 0.5 * dt_gamma );
5186  }
5187 
5188  // use Langevin damping factor i+1 for v_bnd
5189  dt_gamma = dt * a[i+1].langevinParam;
5190  if (dt_gamma != 0.0) {
5191  BigReal mass = a[i+1].mass * (1. - m);
5192  v_bnd += random->gaussian_vector() *
5193  sqrt( 2 * dt_gamma * kbT_bnd *
5194  ( a[i+1].partition ? tempFactor : 1.0 ) / mass );
5195  v_bnd /= ( 1. + 0.5 * dt_gamma );
5196  }
5197 
5198  // convert back
5199  a[i].velocity = v_com - m * v_bnd;
5200  a[i+1].velocity = v_bnd + a[i].velocity;
5201 
5202  i++; // +1 from loop, we've updated both particles
5203  }
5204  else {
5205  BigReal dt_gamma = dt * a[i].langevinParam;
5206  if ( ! dt_gamma ) continue;
5207 
5208  a[i].velocity += random->gaussian_vector() *
5209  sqrt( 2 * dt_gamma * kbT *
5210  ( a[i].partition ? tempFactor : 1.0 ) * a[i].recipMass );
5211  a[i].velocity /= ( 1. + 0.5 * dt_gamma );
5212  }
5213 
5214  } // end for
5215  } // end if drudeOn
5216  else {
5217 
5218  //
5219  // DJH: For case using same gamma (the Langevin parameter),
5220  // no partitions (e.g. FEP), and no adaptive tempering (adaptTempMD),
5221  // we can precompute constants. Then by lifting the RNG from the
5222  // loop (filling up an array of random numbers), we can vectorize
5223  // loop and simplify arithmetic to just addition and multiplication.
5224  //
5225  for ( i = 0; i < numAtoms; ++i )
5226  {
5227  BigReal dt_gamma = dt * a[i].langevinParam;
5228  if ( ! dt_gamma ) continue;
5229 
5230  a[i].velocity += random->gaussian_vector() *
5231  sqrt( 2 * dt_gamma * kbT *
5232  ( a[i].partition ? tempFactor : 1.0 ) * a[i].recipMass );
5233  a[i].velocity /= ( 1. + 0.5 * dt_gamma );
5234  }
5235 
5236  } // end else
5237 
5238  } // end if langevinOn
5239 }
5240 
5241 
5243 {
5244  if ( simParams->berendsenPressureOn ) {
5246  const int freq = simParams->berendsenPressureFreq;
5247  if ( ! (berendsenPressure_count % freq ) ) {
5249  FullAtom *a = patch->atom.begin();
5250  int numAtoms = patch->numAtoms;
5251  // Blocking receive for the updated lattice scaling factor.
5252  Tensor factor = broadcast->positionRescaleFactor.get(step);
5253  patch->lattice.rescale(factor);
5254  if ( simParams->useGroupPressure )
5255  {
5256  int hgs;
5257  for ( int i = 0; i < numAtoms; i += hgs ) {
5258  int j;
5259  hgs = a[i].hydrogenGroupSize;
5260  if ( simParams->fixedAtomsOn && a[i].groupFixed ) {
5261  for ( j = i; j < (i+hgs); ++j ) {
5263  a[j].fixedPosition,a[j].transform);
5264  }
5265  continue;
5266  }
5267  BigReal m_cm = 0;
5268  Position x_cm(0,0,0);
5269  for ( j = i; j < (i+hgs); ++j ) {
5270  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
5271  m_cm += a[j].mass;
5272  x_cm += a[j].mass * a[j].position;
5273  }
5274  x_cm /= m_cm;
5275  Position new_x_cm = x_cm;
5276  patch->lattice.rescale(new_x_cm,factor);
5277  Position delta_x_cm = new_x_cm - x_cm;
5278  for ( j = i; j < (i+hgs); ++j ) {
5279  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5281  a[j].fixedPosition,a[j].transform);
5282  continue;
5283  }
5284  a[j].position += delta_x_cm;
5285  }
5286  }
5287  }
5288  else
5289  {
5290  for ( int i = 0; i < numAtoms; ++i )
5291  {
5292  if ( simParams->fixedAtomsOn && a[i].atomFixed ) {
5294  a[i].fixedPosition,a[i].transform);
5295  continue;
5296  }
5297  patch->lattice.rescale(a[i].position,factor);
5298  }
5299  }
5300  }
5301  } else {
5303  }
5304 }
5305 
5307 {
5308  if ( simParams->langevinPistonOn && ! ( (step-1-slowFreq/2) % slowFreq ) )
5309  {
5310  //
5311  // DJH: Loops below simplify if we lift out special cases of fixed atoms
5312  // and pressure excluded atoms and make them their own branch.
5313  //
5314  FullAtom *a = patch->atom.begin();
5315  int numAtoms = patch->numAtoms;
5316  // Blocking receive for the updated lattice scaling factor.
5317  Tensor factor = broadcast->positionRescaleFactor.get(step);
5318  TIMER_START(patch->timerSet, PISTON);
5319  // JCP FIX THIS!!!
5320  Vector velFactor(1/factor.xx,1/factor.yy,1/factor.zz);
5321  patch->lattice.rescale(factor);
5322  Molecule *mol = Node::Object()->molecule;
5323  if ( simParams->useGroupPressure )
5324  {
5325  int hgs;
5326  for ( int i = 0; i < numAtoms; i += hgs ) {
5327  int j;
5328  hgs = a[i].hydrogenGroupSize;
5329  if ( simParams->fixedAtomsOn && a[i].groupFixed ) {
5330  for ( j = i; j < (i+hgs); ++j ) {
5332  a[j].fixedPosition,a[j].transform);
5333  }
5334  continue;
5335  }
5336  BigReal m_cm = 0;
5337  Position x_cm(0,0,0);
5338  Velocity v_cm(0,0,0);
5339  for ( j = i; j < (i+hgs); ++j ) {
5340  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
5341  m_cm += a[j].mass;
5342  x_cm += a[j].mass * a[j].position;
5343  v_cm += a[j].mass * a[j].velocity;
5344  }
5345  x_cm /= m_cm;
5346  Position new_x_cm = x_cm;
5347  patch->lattice.rescale(new_x_cm,factor);
5348  Position delta_x_cm = new_x_cm - x_cm;
5349  v_cm /= m_cm;
5350  Velocity delta_v_cm;
5351  delta_v_cm.x = ( velFactor.x - 1 ) * v_cm.x;
5352  delta_v_cm.y = ( velFactor.y - 1 ) * v_cm.y;
5353  delta_v_cm.z = ( velFactor.z - 1 ) * v_cm.z;
5354  for ( j = i; j < (i+hgs); ++j ) {
5355  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5357  a[j].fixedPosition,a[j].transform);
5358  continue;
5359  }
5360  if ( mol->is_atom_exPressure(a[j].id) ) continue;
5361  a[j].position += delta_x_cm;
5362  a[j].velocity += delta_v_cm;
5363  }
5364  }
5365  }
5366  else
5367  {
5368  for ( int i = 0; i < numAtoms; ++i )
5369  {
5370  if ( simParams->fixedAtomsOn && a[i].atomFixed ) {
5372  a[i].fixedPosition,a[i].transform);
5373  continue;
5374  }
5375  if ( mol->is_atom_exPressure(a[i].id) ) continue;
5376  patch->lattice.rescale(a[i].position,factor);
5377  a[i].velocity.x *= velFactor.x;
5378  a[i].velocity.y *= velFactor.y;
5379  a[i].velocity.z *= velFactor.z;
5380  }
5381  }
5382  TIMER_STOP(patch->timerSet, PISTON);
5383  }
5384 }
5385 
5387 {
5388  const int rescaleFreq = simParams->rescaleFreq;
5389  if ( rescaleFreq > 0 ) {
5390  FullAtom *a = patch->atom.begin();
5391  int numAtoms = patch->numAtoms;
5393  if ( rescaleVelocities_numTemps == rescaleFreq ) {
5394  // Blocking receive for the velcity scaling factor.
5395  BigReal factor = broadcast->velocityRescaleFactor.get(step);
5396  for ( int i = 0; i < numAtoms; ++i )
5397  {
5398  a[i].velocity *= factor;
5399  }
5401  }
5402  }
5403 }
5404 
5405 void Sequencer::rescaleaccelMD (int step, int doNonbonded, int doFullElectrostatics)
5406 {
5407  if (!simParams->accelMDOn) return;
5408  if ((step < simParams->accelMDFirstStep) || ( simParams->accelMDLastStep >0 && step > simParams->accelMDLastStep)) return;
5409 
5410  // Blocking receive for the Accelerated MD scaling factors.
5411  Vector accelMDfactor = broadcast->accelMDRescaleFactor.get(step);
5412  const BigReal factor_dihe = accelMDfactor[0];
5413  const BigReal factor_tot = accelMDfactor[1];
5414  const int numAtoms = patch->numAtoms;
5415 
5416  if (simParams->accelMDdihe && factor_tot <1 )
5417  NAMD_die("accelMD broadcasting error!\n");
5418  if (!simParams->accelMDdihe && !simParams->accelMDdual && factor_dihe <1 )
5419  NAMD_die("accelMD broadcasting error!\n");
5420 
5421  if (simParams->accelMDdihe && factor_dihe < 1) {
5422  for (int i = 0; i < numAtoms; ++i)
5423  if (patch->f[Results::amdf][i][0] || patch->f[Results::amdf][i][1] || patch->f[Results::amdf][i][2])
5424  patch->f[Results::normal][i] += patch->f[Results::amdf][i]*(factor_dihe - 1);
5425  }
5426 
5427  if ( !simParams->accelMDdihe && factor_tot < 1) {
5428  for (int i = 0; i < numAtoms; ++i)
5429  patch->f[Results::normal][i] *= factor_tot;
5430  if (doNonbonded) {
5431  for (int i = 0; i < numAtoms; ++i)
5432  patch->f[Results::nbond][i] *= factor_tot;
5433  }
5434  if (doFullElectrostatics) {
5435  for (int i = 0; i < numAtoms; ++i)
5436  patch->f[Results::slow][i] *= factor_tot;
5437  }
5438  }
5439 
5440  if (simParams->accelMDdual && factor_dihe < 1) {
5441  for (int i = 0; i < numAtoms; ++i)
5442  if (patch->f[Results::amdf][i][0] || patch->f[Results::amdf][i][1] || patch->f[Results::amdf][i][2])
5443  patch->f[Results::normal][i] += patch->f[Results::amdf][i]*(factor_dihe - factor_tot);
5444  }
5445 
5446 }
5447 
5449 {
5450  //check if adaptive tempering is enabled and in the right timestep range
5451  if (!simParams->adaptTempOn) return;
5452  if ( (step < simParams->adaptTempFirstStep ) ||
5454  if (simParams->langevinOn) // restore langevin temperature
5456  return;
5457  }
5458  // Get Updated Temperature
5459  if ( !(step % simParams->adaptTempFreq ) && (step > simParams->firstTimestep ))
5460  // Blocking receive for the updated adaptive tempering temperature.
5462 }
5463 
5464 void Sequencer::reassignVelocities(BigReal timestep, int step)
5465 {
5466  const int reassignFreq = simParams->reassignFreq;
5467  if ( ( reassignFreq > 0 ) && ! ( step % reassignFreq ) ) {
5468  FullAtom *a = patch->atom.begin();
5469  int numAtoms = patch->numAtoms;
5470  BigReal newTemp = simParams->reassignTemp;
5471  newTemp += ( step / reassignFreq ) * simParams->reassignIncr;
5472  if ( simParams->reassignIncr > 0.0 ) {
5473  if ( newTemp > simParams->reassignHold && simParams->reassignHold > 0.0 )
5474  newTemp = simParams->reassignHold;
5475  } else {
5476  if ( newTemp < simParams->reassignHold )
5477  newTemp = simParams->reassignHold;
5478  }
5479  BigReal kbT = BOLTZMANN * newTemp;
5480 
5481  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5482  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5483 
5484  for ( int i = 0; i < numAtoms; ++i )
5485  {
5486  a[i].velocity = ( ( simParams->fixedAtomsOn &&
5487  a[i].atomFixed && a[i].mass > 0.) ? Vector(0,0,0) :
5488  sqrt(kbT * (a[i].partition ? tempFactor : 1.0) * a[i].recipMass) *
5489  random->gaussian_vector() );
5490  }
5491  } else {
5492  NAMD_bug("Sequencer::reassignVelocities called improperly!");
5493  }
5494 }
5495 
5497 {
5498  FullAtom *a = patch->atom.begin();
5499  int numAtoms = patch->numAtoms;
5500  BigReal newTemp = simParams->initialTemp;
5501  BigReal kbT = BOLTZMANN * newTemp;
5502 
5503  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5504  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5505 
5506  for ( int i = 0; i < numAtoms; ++i )
5507  {
5508  a[i].velocity = ( ( (simParams->fixedAtomsOn && a[i].atomFixed) ||
5509  a[i].mass <= 0.) ? Vector(0,0,0) :
5510  sqrt(kbT * (a[i].partition ? tempFactor : 1.0) * a[i].recipMass) *
5511  random->gaussian_vector() );
5512  if ( simParams->drudeOn && i+1 < numAtoms && a[i+1].mass < 1.0 && a[i+1].mass > 0.05 ) {
5513  a[i+1].velocity = a[i].velocity; // zero is good enough
5514  ++i;
5515  }
5516  }
5517 }
5518 
5520 {
5521  FullAtom *a = patch->atom.begin();
5522  int numAtoms = patch->numAtoms;
5523  for ( int i = 0; i < numAtoms; ++i )
5524  {
5525  a[i].velocity *= factor;
5526  }
5527 }
5528 
5530 {
5531  FullAtom *a = patch->atom.begin();
5532  int numAtoms = patch->numAtoms;
5533  Molecule *molecule = Node::Object()->molecule;
5534  for ( int i = 0; i < numAtoms; ++i )
5535  {
5536  a[i].charge = molecule->atomcharge(a[i].id);
5537  }
5538 }
5539 
5540 // REST2 solute charge scaling
5542 {
5543  FullAtom *a = patch->atom.begin();
5544  int numAtoms = patch->numAtoms;
5545  Molecule *molecule = Node::Object()->molecule;
5546  BigReal sqrt_factor = sqrt(factor);
5547  // apply scaling to the original charge (stored in molecule)
5548  // of just the marked solute atoms
5549  for ( int i = 0; i < numAtoms; ++i ) {
5550  if (molecule->get_ss_type(a[i].id)) {
5551  a[i].charge = sqrt_factor * molecule->atomcharge(a[i].id);
5552  if (simParams->SOAintegrateOn) patch->patchDataSOA.charge[i] = a[i].charge;
5553  }
5554  }
5555 }
5556 
5558 {
5559  if ( simParams->tCoupleOn )
5560  {
5561  FullAtom *a = patch->atom.begin();
5562  int numAtoms = patch->numAtoms;
5563  // Blocking receive for the temperature coupling coefficient.
5564  BigReal coefficient = broadcast->tcoupleCoefficient.get(step);
5565  Molecule *molecule = Node::Object()->molecule;
5566  BigReal dt = dt_fs * 0.001; // convert to ps
5567  coefficient *= dt;
5568  for ( int i = 0; i < numAtoms; ++i )
5569  {
5570  BigReal f1 = exp( coefficient * a[i].langevinParam );
5571  a[i].velocity *= f1;
5572  }
5573  }
5574 }
5575 
5581 {
5584  FullAtom *a = patch->atom.begin();
5585  int numAtoms = patch->numAtoms;
5586  // Blocking receive for the temperature coupling coefficient.
5587  BigReal velrescaling = broadcast->stochRescaleCoefficient.get(step);
5588  DebugM(4, "stochastically rescaling velocities at step " << step << " by " << velrescaling << "\n");
5589  for ( int i = 0; i < numAtoms; ++i ) {
5590  a[i].velocity *= velrescaling;
5591  }
5592  stochRescale_count = 0;
5593  }
5594 }
5595 
5596 void Sequencer::saveForce(const int ftag)
5597 {
5598  patch->saveForce(ftag);
5599 }
5600 
5601 //
5602 // DJH: Need to change division by TIMEFACTOR into multiplication by
5603 // reciprocal of TIMEFACTOR. Done several times for each iteration of
5604 // the integrate() loop.
5605 //
5606 
5608  BigReal timestep, const int ftag, const int useSaved
5609  ) {
5610  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5611  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5612 #if CMK_BLUEGENEL
5613  CmiNetworkProgressAfter (0);
5614 #endif
5615  const BigReal dt = timestep / TIMEFACTOR;
5616  FullAtom *atom_arr = patch->atom.begin();
5617  ForceList *f_use = (useSaved ? patch->f_saved : patch->f);
5618  const Force *force_arr = f_use[ftag].const_begin();
5619  patch->addForceToMomentum(atom_arr, force_arr, dt, patch->numAtoms);
5620 }
5621 
5623  const BigReal timestep1, const int ftag1, const int useSaved1,
5624  const BigReal timestep2, const int ftag2, const int useSaved2,
5625  const BigReal timestep3, const int ftag3, const int useSaved3
5626  ) {
5627  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5628  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5629 #if CMK_BLUEGENEL
5630  CmiNetworkProgressAfter (0);
5631 #endif
5632  const BigReal dt1 = timestep1 / TIMEFACTOR;
5633  const BigReal dt2 = timestep2 / TIMEFACTOR;
5634  const BigReal dt3 = timestep3 / TIMEFACTOR;
5635  ForceList *f_use1 = (useSaved1 ? patch->f_saved : patch->f);
5636  ForceList *f_use2 = (useSaved2 ? patch->f_saved : patch->f);
5637  ForceList *f_use3 = (useSaved3 ? patch->f_saved : patch->f);
5638  FullAtom *atom_arr = patch->atom.begin();
5639  const Force *force_arr1 = f_use1[ftag1].const_begin();
5640  const Force *force_arr2 = f_use2[ftag2].const_begin();
5641  const Force *force_arr3 = f_use3[ftag3].const_begin();
5642  patch->addForceToMomentum3 (atom_arr, force_arr1, force_arr2, force_arr3,
5643  dt1, dt2, dt3, patch->numAtoms);
5644 }
5645 
5647 {
5648  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5649  NamdProfileEvent::ADD_VELOCITY_TO_POSITION);
5650 #if CMK_BLUEGENEL
5651  CmiNetworkProgressAfter (0);
5652 #endif
5653  const BigReal dt = timestep / TIMEFACTOR;
5654  FullAtom *atom_arr = patch->atom.begin();
5655  patch->addVelocityToPosition(atom_arr, dt, patch->numAtoms);
5656 }
5657 
5658 void Sequencer::hardWallDrude(BigReal dt, int pressure)
5659 {
5660  if ( simParams->drudeHardWallOn ) {
5661  Tensor virial;
5662  Tensor *vp = ( pressure ? &virial : 0 );
5663  if ( patch->hardWallDrude(dt, vp, pressureProfileReduction) ) {
5664  iout << iERROR << "Constraint failure in HardWallDrude(); "
5665  << "simulation may become unstable.\n" << endi;
5667  terminate();
5668  }
5669  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5670  }
5671 }
5672 
5673 void Sequencer::rattle1(BigReal dt, int pressure)
5674 {
5675  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::RATTLE1);
5676  if ( simParams->rigidBonds != RIGID_NONE ) {
5677  Tensor virial;
5678  Tensor *vp = ( pressure ? &virial : 0 );
5679  if ( patch->rattle1(dt, vp, pressureProfileReduction) ) {
5680  iout << iERROR <<
5681  "Constraint failure; simulation has become unstable.\n" << endi;
5683  terminate();
5684  }
5685 #if 0
5686  printf("virial = %g %g %g %g %g %g %g %g %g\n",
5687  virial.xx, virial.xy, virial.xz,
5688  virial.yx, virial.yy, virial.yz,
5689  virial.zx, virial.zy, virial.zz);
5690 #endif
5691  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5692 #if 0
5693  {
5694  const FullAtom *a = patch->atom.const_begin();
5695  for (int n=0; n < patch->numAtoms; n++) {
5696  printf("pos[%d] = %g %g %g\n", n,
5697  a[n].position.x, a[n].position.y, a[n].position.z);
5698  }
5699  for (int n=0; n < patch->numAtoms; n++) {
5700  printf("vel[%d] = %g %g %g\n", n,
5701  a[n].velocity.x, a[n].velocity.y, a[n].velocity.z);
5702  }
5703  if (pressure) {
5704  for (int n=0; n < patch->numAtoms; n++) {
5705  printf("force[%d] = %g %g %g\n", n,
5706  patch->f[Results::normal][n].x,
5707  patch->f[Results::normal][n].y,
5708  patch->f[Results::normal][n].z);
5709  }
5710  }
5711  }
5712 #endif
5713  }
5714 }
5715 
5716 // void Sequencer::rattle2(BigReal dt, int step)
5717 // {
5718 // if ( simParams->rigidBonds != RIGID_NONE ) {
5719 // Tensor virial;
5720 // patch->rattle2(dt, &virial);
5721 // ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5722 // // we need to add to alt and int virial because not included in forces
5723 // #ifdef ALTVIRIAL
5724 // ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,virial);
5725 // #endif
5726 // ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,virial);
5727 // }
5728 // }
5729 
5731 {
5732  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::MAXIMUM_MOVE);
5733 
5734  FullAtom *a = patch->atom.begin();
5735  int numAtoms = patch->numAtoms;
5736  if ( simParams->maximumMove ) {
5737  const BigReal dt = timestep / TIMEFACTOR;
5738  const BigReal maxvel = simParams->maximumMove / dt;
5739  const BigReal maxvel2 = maxvel * maxvel;
5740  for ( int i=0; i<numAtoms; ++i ) {
5741  if ( a[i].velocity.length2() > maxvel2 ) {
5742  a[i].velocity *= ( maxvel / a[i].velocity.length() );
5743  }
5744  }
5745  } else {
5746  const BigReal dt = timestep / TIMEFACTOR;
5747  const BigReal maxvel = simParams->cutoff / dt;
5748  const BigReal maxvel2 = maxvel * maxvel;
5749  int killme = 0;
5750  for ( int i=0; i<numAtoms; ++i ) {
5751  killme = killme || ( a[i].velocity.length2() > maxvel2 );
5752  }
5753  if ( killme ) {
5754  killme = 0;
5755  for ( int i=0; i<numAtoms; ++i ) {
5756  if ( a[i].velocity.length2() > maxvel2 ) {
5757  ++killme;
5758  iout << iERROR << "Atom " << (a[i].id + 1) << " velocity is "
5759  << ( PDBVELFACTOR * a[i].velocity ) << " (limit is "
5760  << ( PDBVELFACTOR * maxvel ) << ", atom "
5761  << i << " of " << numAtoms << " on patch "
5762  << patch->patchID << " pe " << CkMyPe() << ")\n" << endi;
5763  }
5764  }
5765  iout << iERROR <<
5766  "Atoms moving too fast; simulation has become unstable ("
5767  << killme << " atoms on patch " << patch->patchID
5768  << " pe " << CkMyPe() << ").\n" << endi;
5770  terminate();
5771  }
5772  }
5773 }
5774 
5776 {
5777  if ( simParams->minimizeOn ) {
5778  FullAtom *a = patch->atom.begin();
5779  int numAtoms = patch->numAtoms;
5780  for ( int i=0; i<numAtoms; ++i ) {
5781  a[i].velocity = 0.;
5782  }
5783  }
5784 }
5785 
5787 {
5788  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::SUBMIT_HALFSTEP);
5789 
5790  // velocity-dependent quantities *** ONLY ***
5791  // positions are not at half-step when called
5792  FullAtom *a = patch->atom.begin();
5793  int numAtoms = patch->numAtoms;
5794 
5795 #if CMK_BLUEGENEL
5796  CmiNetworkProgressAfter (0);
5797 #endif
5798 
5799  // For non-Multigrator doKineticEnergy = 1 always
5800  Tensor momentumSqrSum;
5802  {
5803  BigReal kineticEnergy = 0;
5804  Tensor virial;
5805  if ( simParams->pairInteractionOn ) {
5806  if ( simParams->pairInteractionSelf ) {
5807  for ( int i = 0; i < numAtoms; ++i ) {
5808  if ( a[i].partition != 1 ) continue;
5809  kineticEnergy += a[i].mass * a[i].velocity.length2();
5810  virial.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5811  }
5812  }
5813  } else {
5814  for ( int i = 0; i < numAtoms; ++i ) {
5815  if (a[i].mass < 0.01) continue;
5816  kineticEnergy += a[i].mass * a[i].velocity.length2();
5817  virial.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5818  }
5819  }
5820 
5822  momentumSqrSum = virial;
5823  }
5824  kineticEnergy *= 0.5 * 0.5;
5826  virial *= 0.5;
5827  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5828 #ifdef ALTVIRIAL
5829  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,virial);
5830 #endif
5831  }
5832 
5834  int nslabs = simParams->pressureProfileSlabs;
5835  const Lattice &lattice = patch->lattice;
5836  BigReal idz = nslabs/lattice.c().z;
5837  BigReal zmin = lattice.origin().z - 0.5*lattice.c().z;
5838  int useGroupPressure = simParams->useGroupPressure;
5839 
5840  // Compute kinetic energy partition, possibly subtracting off
5841  // internal kinetic energy if group pressure is enabled.
5842  // Since the regular pressure is 1/2 mvv and the internal kinetic
5843  // term that is subtracted off for the group pressure is
5844  // 1/2 mv (v-v_cm), the group pressure kinetic contribution is
5845  // 1/2 m * v * v_cm. The factor of 1/2 is because submitHalfstep
5846  // gets called twice per timestep.
5847  int hgs;
5848  for (int i=0; i<numAtoms; i += hgs) {
5849  int j, ppoffset;
5850  hgs = a[i].hydrogenGroupSize;
5851  int partition = a[i].partition;
5852 
5853  BigReal m_cm = 0;
5854  Velocity v_cm(0,0,0);
5855  for (j=i; j< i+hgs; ++j) {
5856  m_cm += a[j].mass;
5857  v_cm += a[j].mass * a[j].velocity;
5858  }
5859  v_cm /= m_cm;
5860  for (j=i; j < i+hgs; ++j) {
5861  BigReal mass = a[j].mass;
5862  if (! (useGroupPressure && j != i)) {
5863  BigReal z = a[j].position.z;
5864  int slab = (int)floor((z-zmin)*idz);
5865  if (slab < 0) slab += nslabs;
5866  else if (slab >= nslabs) slab -= nslabs;
5867  ppoffset = 3*(slab + partition*nslabs);
5868  }
5869  BigReal wxx, wyy, wzz;
5870  if (useGroupPressure) {
5871  wxx = 0.5*mass * a[j].velocity.x * v_cm.x;
5872  wyy = 0.5*mass * a[j].velocity.y * v_cm.y;
5873  wzz = 0.5*mass * a[j].velocity.z * v_cm.z;
5874  } else {
5875  wxx = 0.5*mass * a[j].velocity.x * a[j].velocity.x;
5876  wyy = 0.5*mass * a[j].velocity.y * a[j].velocity.y;
5877  wzz = 0.5*mass * a[j].velocity.z * a[j].velocity.z;
5878  }
5879  pressureProfileReduction->item(ppoffset ) += wxx;
5880  pressureProfileReduction->item(ppoffset+1) += wyy;
5881  pressureProfileReduction->item(ppoffset+2) += wzz;
5882  }
5883  }
5884  }
5885 
5886  // For non-Multigrator doKineticEnergy = 1 always
5888  {
5889  BigReal intKineticEnergy = 0;
5890  Tensor intVirialNormal;
5891 
5892  int hgs;
5893  for ( int i = 0; i < numAtoms; i += hgs ) {
5894 
5895 #if CMK_BLUEGENEL
5896  CmiNetworkProgress ();
5897 #endif
5898 
5899  hgs = a[i].hydrogenGroupSize;
5900  int j;
5901  BigReal m_cm = 0;
5902  Velocity v_cm(0,0,0);
5903  for ( j = i; j < (i+hgs); ++j ) {
5904  m_cm += a[j].mass;
5905  v_cm += a[j].mass * a[j].velocity;
5906  }
5908  momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
5909  }
5910  v_cm /= m_cm;
5911  if ( simParams->pairInteractionOn ) {
5912  if ( simParams->pairInteractionSelf ) {
5913  for ( j = i; j < (i+hgs); ++j ) {
5914  if ( a[j].partition != 1 ) continue;
5915  BigReal mass = a[j].mass;
5916  Vector v = a[j].velocity;
5917  Vector dv = v - v_cm;
5918  intKineticEnergy += mass * (v * dv);
5919  intVirialNormal.outerAdd (mass, v, dv);
5920  }
5921  }
5922  } else {
5923  for ( j = i; j < (i+hgs); ++j ) {
5924  BigReal mass = a[j].mass;
5925  Vector v = a[j].velocity;
5926  Vector dv = v - v_cm;
5927  intKineticEnergy += mass * (v * dv);
5928  intVirialNormal.outerAdd(mass, v, dv);
5929  }
5930  }
5931  }
5932  intKineticEnergy *= 0.5 * 0.5;
5934  intVirialNormal *= 0.5;
5935  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
5936  if ( simParams->multigratorOn) {
5937  momentumSqrSum *= 0.5;
5938  ADD_TENSOR_OBJECT(reduction,REDUCTION_MOMENTUM_SQUARED,momentumSqrSum);
5939  }
5940  }
5941 
5942 }
5943 
5944 void Sequencer::calcFixVirial(Tensor& fixVirialNormal, Tensor& fixVirialNbond, Tensor& fixVirialSlow,
5945  Vector& fixForceNormal, Vector& fixForceNbond, Vector& fixForceSlow) {
5946 
5947  FullAtom *a = patch->atom.begin();
5948  int numAtoms = patch->numAtoms;
5949 
5950  for ( int j = 0; j < numAtoms; j++ ) {
5951  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5952  Vector dx = a[j].fixedPosition;
5953  // all negative because fixed atoms cancels these forces
5954  fixVirialNormal.outerAdd(-1.0, patch->f[Results::normal][j], dx);
5955  fixVirialNbond.outerAdd(-1.0, patch->f[Results::nbond][j], dx);
5956  fixVirialSlow.outerAdd(-1.0, patch->f[Results::slow][j], dx);
5957  fixForceNormal -= patch->f[Results::normal][j];
5958  fixForceNbond -= patch->f[Results::nbond][j];
5959  fixForceSlow -= patch->f[Results::slow][j];
5960  }
5961  }
5962 }
5963 
5965 {
5966 #ifndef UPPER_BOUND
5967  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5968  NamdProfileEvent::SUBMIT_REDUCTIONS);
5969  FullAtom *a = patch->atom.begin();
5970 #endif
5971  int numAtoms = patch->numAtoms;
5972 
5973 #if CMK_BLUEGENEL
5974  CmiNetworkProgressAfter(0);
5975 #endif
5976 
5977  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
5979 
5980 #ifndef UPPER_BOUND
5981  // For non-Multigrator doKineticEnergy = 1 always
5983  {
5984  BigReal kineticEnergy = 0;
5985  Vector momentum = 0;
5986  Vector angularMomentum = 0;
5987  Vector o = patch->lattice.origin();
5988  int i;
5989  if ( simParams->pairInteractionOn ) {
5990  if ( simParams->pairInteractionSelf ) {
5991  for (i = 0; i < numAtoms; ++i ) {
5992  if ( a[i].partition != 1 ) continue;
5993  kineticEnergy += a[i].mass * a[i].velocity.length2();
5994  momentum += a[i].mass * a[i].velocity;
5995  angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
5996  }
5997  }
5998  } else {
5999  for (i = 0; i < numAtoms; ++i ) {
6000  kineticEnergy += a[i].mass * a[i].velocity.length2();
6001  momentum += a[i].mass * a[i].velocity;
6002  angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6003  }
6004  if (simParams->drudeOn) {
6005  BigReal drudeComKE = 0.;
6006  BigReal drudeBondKE = 0.;
6007 
6008  for (i = 0; i < numAtoms; i++) {
6009  if (i < numAtoms-1 &&
6010  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
6011  // i+1 is a Drude particle with parent i
6012 
6013  // convert from Cartesian coordinates to (COM,bond) coordinates
6014  BigReal m_com = (a[i].mass + a[i+1].mass); // mass of COM
6015  BigReal m = a[i+1].mass / m_com; // mass ratio
6016  BigReal m_bond = a[i+1].mass * (1. - m); // mass of bond
6017  Vector v_bond = a[i+1].velocity - a[i].velocity; // vel of bond
6018  Vector v_com = a[i].velocity + m * v_bond; // vel of COM
6019 
6020  drudeComKE += m_com * v_com.length2();
6021  drudeBondKE += m_bond * v_bond.length2();
6022 
6023  i++; // +1 from loop, we've updated both particles
6024  }
6025  else {
6026  drudeComKE += a[i].mass * a[i].velocity.length2();
6027  }
6028  } // end for
6029 
6030  drudeComKE *= 0.5;
6031  drudeBondKE *= 0.5;
6033  += drudeComKE;
6035  += drudeBondKE;
6036  } // end drudeOn
6037 
6038  } // end else
6039 
6040  kineticEnergy *= 0.5;
6042  ADD_VECTOR_OBJECT(reduction,REDUCTION_MOMENTUM,momentum);
6043  ADD_VECTOR_OBJECT(reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
6044  }
6045 
6046 #ifdef ALTVIRIAL
6047  // THIS IS NOT CORRECTED FOR PAIR INTERACTIONS
6048  {
6049  Tensor altVirial;
6050  for ( int i = 0; i < numAtoms; ++i ) {
6051  altVirial.outerAdd(1.0, patch->f[Results::normal][i], a[i].position);
6052  }
6053  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,altVirial);
6054  }
6055  {
6056  Tensor altVirial;
6057  for ( int i = 0; i < numAtoms; ++i ) {
6058  altVirial.outerAdd(1.0, patch->f[Results::nbond][i], a[i].position);
6059  }
6060  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NBOND,altVirial);
6061  }
6062  {
6063  Tensor altVirial;
6064  for ( int i = 0; i < numAtoms; ++i ) {
6065  altVirial.outerAdd(1.0, patch->f[Results::slow][i], a[i].position);
6066  }
6067  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_SLOW,altVirial);
6068  }
6069 #endif
6070 
6071  // For non-Multigrator doKineticEnergy = 1 always
6073  {
6074  BigReal intKineticEnergy = 0;
6075  Tensor intVirialNormal;
6076  Tensor intVirialNbond;
6077  Tensor intVirialSlow;
6078 
6079  int hgs;
6080  for ( int i = 0; i < numAtoms; i += hgs ) {
6081 #if CMK_BLUEGENEL
6082  CmiNetworkProgress();
6083 #endif
6084  hgs = a[i].hydrogenGroupSize;
6085  int j;
6086  BigReal m_cm = 0;
6087  Position x_cm(0,0,0);
6088  Velocity v_cm(0,0,0);
6089  for ( j = i; j < (i+hgs); ++j ) {
6090  m_cm += a[j].mass;
6091  x_cm += a[j].mass * a[j].position;
6092  v_cm += a[j].mass * a[j].velocity;
6093  }
6094  x_cm /= m_cm;
6095  v_cm /= m_cm;
6096  int fixedAtomsOn = simParams->fixedAtomsOn;
6097  if ( simParams->pairInteractionOn ) {
6098  int pairInteractionSelf = simParams->pairInteractionSelf;
6099  for ( j = i; j < (i+hgs); ++j ) {
6100  if ( a[j].partition != 1 &&
6101  ( pairInteractionSelf || a[j].partition != 2 ) ) continue;
6102  // net force treated as zero for fixed atoms
6103  if ( fixedAtomsOn && a[j].atomFixed ) continue;
6104  BigReal mass = a[j].mass;
6105  Vector v = a[j].velocity;
6106  Vector dv = v - v_cm;
6107  intKineticEnergy += mass * (v * dv);
6108  Vector dx = a[j].position - x_cm;
6109  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6110  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6111  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6112  }
6113  } else {
6114  for ( j = i; j < (i+hgs); ++j ) {
6115  // net force treated as zero for fixed atoms
6116  if ( fixedAtomsOn && a[j].atomFixed ) continue;
6117  BigReal mass = a[j].mass;
6118  Vector v = a[j].velocity;
6119  Vector dv = v - v_cm;
6120  intKineticEnergy += mass * (v * dv);
6121  Vector dx = a[j].position - x_cm;
6122  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6123  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6124  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6125  }
6126  }
6127  }
6128 
6129  intKineticEnergy *= 0.5;
6131  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
6132  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
6133  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
6134  }
6135 
6137  // subtract off internal virial term, calculated as for intVirial.
6138  int nslabs = simParams->pressureProfileSlabs;
6139  const Lattice &lattice = patch->lattice;
6140  BigReal idz = nslabs/lattice.c().z;
6141  BigReal zmin = lattice.origin().z - 0.5*lattice.c().z;
6142  int useGroupPressure = simParams->useGroupPressure;
6143 
6144  int hgs;
6145  for (int i=0; i<numAtoms; i += hgs) {
6146  int j;
6147  hgs = a[i].hydrogenGroupSize;
6148  BigReal m_cm = 0;
6149  Position x_cm(0,0,0);
6150  for (j=i; j< i+hgs; ++j) {
6151  m_cm += a[j].mass;
6152  x_cm += a[j].mass * a[j].position;
6153  }
6154  x_cm /= m_cm;
6155 
6156  BigReal z = a[i].position.z;
6157  int slab = (int)floor((z-zmin)*idz);
6158  if (slab < 0) slab += nslabs;
6159  else if (slab >= nslabs) slab -= nslabs;
6160  int partition = a[i].partition;
6161  int ppoffset = 3*(slab + nslabs*partition);
6162  for (j=i; j < i+hgs; ++j) {
6163  BigReal mass = a[j].mass;
6164  Vector dx = a[j].position - x_cm;
6165  const Vector &fnormal = patch->f[Results::normal][j];
6166  const Vector &fnbond = patch->f[Results::nbond][j];
6167  const Vector &fslow = patch->f[Results::slow][j];
6168  BigReal wxx = (fnormal.x + fnbond.x + fslow.x) * dx.x;
6169  BigReal wyy = (fnormal.y + fnbond.y + fslow.y) * dx.y;
6170  BigReal wzz = (fnormal.z + fnbond.z + fslow.z) * dx.z;
6171  pressureProfileReduction->item(ppoffset ) -= wxx;
6172  pressureProfileReduction->item(ppoffset+1) -= wyy;
6173  pressureProfileReduction->item(ppoffset+2) -= wzz;
6174  }
6175  }
6176  }
6177 
6178  // For non-Multigrator doVirial = 1 always
6179  if (patch->flags.doVirial)
6180  {
6181  if ( simParams->fixedAtomsOn ) {
6182  Tensor fixVirialNormal;
6183  Tensor fixVirialNbond;
6184  Tensor fixVirialSlow;
6185  Vector fixForceNormal = 0;
6186  Vector fixForceNbond = 0;
6187  Vector fixForceSlow = 0;
6188 
6189  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6190 
6191 #if 0
6192  auto printTensor = [](const Tensor& t, const std::string& name){
6193  CkPrintf("%s", name.c_str());
6194  CkPrintf("\n%12.5lf %12.5lf %12.5lf\n"
6195  "%12.5lf %12.5lf %12.5lf\n"
6196  "%12.5lf %12.5lf %12.5lf\n",
6197  t.xx, t.xy, t.xz,
6198  t.yx, t.yy, t.yz,
6199  t.zx, t.zy, t.zz);
6200  };
6201  printTensor(fixVirialNormal, "fixVirialNormal = ");
6202  printTensor(fixVirialNbond, "fixVirialNbond = ");
6203  printTensor(fixVirialSlow, "fixVirialSlow = ");
6204 #endif
6205 
6206  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,fixVirialNormal);
6207  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NBOND,fixVirialNbond);
6208  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,fixVirialSlow);
6209  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NORMAL,fixForceNormal);
6210  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NBOND,fixForceNbond);
6211  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_SLOW,fixForceSlow);
6212  }
6213  }
6214 #endif // UPPER_BOUND
6215 
6216  reduction->submit();
6217 #ifndef UPPER_BOUND
6219 #endif
6220 }
6221 
6223 {
6224  FullAtom *a = patch->atom.begin();
6225  Force *f1 = patch->f[Results::normal].begin();
6226  Force *f2 = patch->f[Results::nbond].begin();
6227  Force *f3 = patch->f[Results::slow].begin();
6228  const bool fixedAtomsOn = simParams->fixedAtomsOn;
6229  const bool drudeHardWallOn = simParams->drudeHardWallOn;
6230  const double drudeBondLen = simParams->drudeBondLen;
6231  const double drudeBondLen2 = drudeBondLen * drudeBondLen;
6232  const double drudeStep = 0.1/(TIMEFACTOR*TIMEFACTOR);
6233  const double drudeMove = 0.01;
6234  const double drudeStep2 = drudeStep * drudeStep;
6235  const double drudeMove2 = drudeMove * drudeMove;
6236  int numAtoms = patch->numAtoms;
6237 
6238  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
6239 
6240  for ( int i = 0; i < numAtoms; ++i ) {
6241 #if 0
6242  printf("ap[%2d]= %f %f %f\n", i, a[i].position.x, a[i].position.y, a[i].position.z);
6243  printf("f1[%2d]= %f %f %f\n", i, f1[i].x, f1[i].y, f1[i].z);
6244  printf("f2[%2d]= %f %f %f\n", i, f2[i].x, f2[i].y, f2[i].z);
6245  //printf("f3[%2d]= %f %f %f\n", i, f3[i].x, f3[i].y, f3[i].z);
6246 #endif
6247  f1[i] += f2[i] + f3[i]; // add all forces
6248  if ( drudeHardWallOn && i && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) { // drude particle
6249  if ( ! fixedAtomsOn || ! a[i].atomFixed ) {
6250  if ( drudeStep2 * f1[i].length2() > drudeMove2 ) {
6251  a[i].position += drudeMove * f1[i].unit();
6252  } else {
6253  a[i].position += drudeStep * f1[i];
6254  }
6255  if ( (a[i].position - a[i-1].position).length2() > drudeBondLen2 ) {
6256  a[i].position = a[i-1].position + drudeBondLen * (a[i].position - a[i-1].position).unit();
6257  }
6258  }
6259  Vector netf = f1[i-1] + f1[i];
6260  if ( fixedAtomsOn && a[i-1].atomFixed ) netf = 0;
6261  f1[i-1] = netf;
6262  f1[i] = 0.;
6263  }
6264  if ( fixedAtomsOn && a[i].atomFixed ) f1[i] = 0;
6265  }
6266 
6267  f2 = f3 = 0; // included in f1
6268 
6269  BigReal maxv2 = 0.;
6270 
6271  for ( int i = 0; i < numAtoms; ++i ) {
6272  BigReal v2 = a[i].velocity.length2();
6273  if ( v2 > 0. ) {
6274  if ( v2 > maxv2 ) maxv2 = v2;
6275  } else {
6276  v2 = f1[i].length2();
6277  if ( v2 > maxv2 ) maxv2 = v2;
6278  }
6279  }
6280 
6281  if ( fmax2 > 10. * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR )
6282  { Tensor virial; patch->minimize_rattle2( 0.1 * TIMEFACTOR / sqrt(maxv2), &virial, true /* forces */); }
6283 
6284  BigReal fdotf = 0;
6285  BigReal fdotv = 0;
6286  BigReal vdotv = 0;
6287  int numHuge = 0;
6288  for ( int i = 0; i < numAtoms; ++i ) {
6289  if ( simParams->fixedAtomsOn && a[i].atomFixed ) continue;
6290  if ( drudeHardWallOn && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) continue; // drude particle
6291  Force f = f1[i];
6292  BigReal ff = f * f;
6293  if ( ff > fmax2 ) {
6294  if (simParams->printBadContacts) {
6295  CkPrintf("STEP(%i) MIN_HUGE[%i] f=%e kcal/mol/A\n",patch->flags.sequence,patch->pExt[i].id,ff);
6296  }
6297  ++numHuge;
6298  // pad scaling so minimizeMoveDownhill() doesn't miss them
6299  BigReal fmult = 1.01 * sqrt(fmax2/ff);
6300  f *= fmult; ff = f * f;
6301  f1[i] *= fmult;
6302  }
6303  fdotf += ff;
6304  fdotv += f * a[i].velocity;
6305  vdotv += a[i].velocity * a[i].velocity;
6306  }
6307 
6308 #if 0
6309  printf("fdotf = %f\n", fdotf);
6310  printf("fdotv = %f\n", fdotv);
6311  printf("vdotv = %f\n", vdotv);
6312 #endif
6317 
6318  {
6319  Tensor intVirialNormal;
6320  Tensor intVirialNbond;
6321  Tensor intVirialSlow;
6322 
6323  int hgs;
6324  for ( int i = 0; i < numAtoms; i += hgs ) {
6325  hgs = a[i].hydrogenGroupSize;
6326  int j;
6327  BigReal m_cm = 0;
6328  Position x_cm(0,0,0);
6329  for ( j = i; j < (i+hgs); ++j ) {
6330  m_cm += a[j].mass;
6331  x_cm += a[j].mass * a[j].position;
6332  }
6333  x_cm /= m_cm;
6334  for ( j = i; j < (i+hgs); ++j ) {
6335  BigReal mass = a[j].mass;
6336  // net force treated as zero for fixed atoms
6337  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
6338  Vector dx = a[j].position - x_cm;
6339  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6340  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6341  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6342  }
6343  }
6344 
6345  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
6346  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
6347  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
6348  }
6349 
6350  if ( simParams->fixedAtomsOn ) {
6351  Tensor fixVirialNormal;
6352  Tensor fixVirialNbond;
6353  Tensor fixVirialSlow;
6354  Vector fixForceNormal = 0;
6355  Vector fixForceNbond = 0;
6356  Vector fixForceSlow = 0;
6357 
6358  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6359 
6360  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,fixVirialNormal);
6361  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NBOND,fixVirialNbond);
6362  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,fixVirialSlow);
6363  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NORMAL,fixForceNormal);
6364  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NBOND,fixForceNbond);
6365  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_SLOW,fixForceSlow);
6366  }
6367 
6368  reduction->submit();
6369 }
6370 
6371 void Sequencer::submitCollections(int step, int zeroVel)
6372 {
6373  //
6374  // DJH: Copy updates of SOA back into AOS.
6375  // Do we need to update everything or is it safe to just update
6376  // positions and velocities separately, as needed?
6377  //
6378  //patch->copy_updates_to_AOS();
6379 
6380  if (simParams->IMDon &&
6382  !(step % simParams->IMDfreq) &&
6384  (step != simParams->firstTimestep)) {
6386  }
6387 
6388  NAMD_EVENT_RANGE_2(patch->flags.event_on,
6389  NamdProfileEvent::SUBMIT_COLLECTIONS);
6390  int prec;
6391  int dcdSelectionIndex;
6392  std::tie (prec, dcdSelectionIndex) = Output::coordinateNeeded(step);
6393  if ( prec ) {
6394  collection->submitPositions(step,patch->atom,patch->lattice,prec,dcdSelectionIndex);
6395  }
6396  prec = Output::velocityNeeded(step);
6397  if ( prec ) {
6398  collection->submitVelocities(step,zeroVel,patch->atom,prec);
6399  }
6400  prec = Output::forceNeeded(step);
6401  if ( prec ) {
6402  int maxForceUsed = patch->flags.maxForceUsed;
6403  if ( maxForceUsed > Results::slow ) maxForceUsed = Results::slow;
6404  collection->submitForces(step,patch->atom,maxForceUsed,patch->f,prec);
6405  }
6406 }
6407 
6408 void Sequencer::runComputeObjects(int migration, int pairlists, int pressureStep)
6409 {
6410  if ( migration ) pairlistsAreValid = 0;
6411 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
6412  if ( pairlistsAreValid &&
6414  && ( pairlistsAge > pairlistsAgeLimit ) ) {
6415  pairlistsAreValid = 0;
6416  }
6417 #else
6419  pairlistsAreValid = 0;
6420  }
6421 #endif
6422  if ( ! simParams->usePairlists ) pairlists = 0;
6423  patch->flags.usePairlists = pairlists || pairlistsAreValid;
6425  pairlists && ! pairlistsAreValid;
6426 
6427  if ( simParams->singleTopology ) patch->reposition_all_alchpairs();
6428  if ( simParams->lonepairs ) patch->reposition_all_lonepairs();
6429 
6430  //
6431  // DJH: Copy updates of SOA back into AOS.
6432  // The positionsReady() routine starts force computation and atom migration.
6433  //
6434  // We could reduce amount of copying here by checking migration status
6435  // and copying velocities only when migrating. Some types of simulation
6436  // always require velocities, such as Lowe-Anderson.
6437  //
6438  //patch->copy_updates_to_AOS();
6439 
6440  patch->positionsReady(migration); // updates flags.sequence
6441 
6442  int seq = patch->flags.sequence;
6443  int basePriority = ( (seq & 0xffff) << 15 )
6445  if ( patch->flags.doGBIS && patch->flags.doNonbonded) {
6446  priority = basePriority + GB1_COMPUTE_HOME_PRIORITY;
6447  suspend(); // until all deposit boxes close
6449  priority = basePriority + GB2_COMPUTE_HOME_PRIORITY;
6450  suspend();
6452  priority = basePriority + COMPUTE_HOME_PRIORITY;
6453  suspend();
6454  } else {
6455  priority = basePriority + COMPUTE_HOME_PRIORITY;
6456  suspend(); // until all deposit boxes close
6457  }
6458 
6459  //
6460  // DJH: Copy all data into SOA from AOS.
6461  //
6462  // We need everything copied after atom migration.
6463  // When doing force computation without atom migration,
6464  // all data except forces will already be up-to-date in SOA
6465  // (except maybe for some special types of simulation).
6466  //
6467  //patch->copy_all_to_SOA();
6468 
6469  //
6470  // DJH: Copy forces to SOA.
6471  // Force available after suspend() has returned.
6472  //
6473  //patch->copy_forces_to_SOA();
6474 
6476  pairlistsAreValid = 1;
6477  pairlistsAge = 0;
6478  }
6479  // For multigrator, do not age pairlist during pressure step
6480  // NOTE: for non-multigrator pressureStep = 0 always
6481  if ( pairlistsAreValid && !pressureStep ) ++pairlistsAge;
6482 
6483  if (simParams->lonepairs) {
6484  {
6485  Tensor virial;
6486  patch->redistrib_lonepair_forces(Results::normal, &virial);
6487  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6488  }
6489  if (patch->flags.doNonbonded) {
6490  Tensor virial;
6491  patch->redistrib_lonepair_forces(Results::nbond, &virial);
6492  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6493  }
6495  Tensor virial;
6496  patch->redistrib_lonepair_forces(Results::slow, &virial);
6497  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6498  }
6499  } else if (simParams->watmodel == WaterModel::TIP4) {
6500  {
6501  Tensor virial;
6502  patch->redistrib_tip4p_forces(Results::normal, &virial);
6503  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6504  }
6505  if (patch->flags.doNonbonded) {
6506  Tensor virial;
6507  patch->redistrib_tip4p_forces(Results::nbond, &virial);
6508  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6509  }
6511  Tensor virial;
6512  patch->redistrib_tip4p_forces(Results::slow, &virial);
6513  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6514  }
6515  } else if (simParams->watmodel == WaterModel::SWM4) {
6516  {
6517  Tensor virial;
6518  patch->redistrib_swm4_forces(Results::normal, &virial);
6519  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6520  }
6521  if (patch->flags.doNonbonded) {
6522  Tensor virial;
6523  patch->redistrib_swm4_forces(Results::nbond, &virial);
6524  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6525  }
6527  Tensor virial;
6528  patch->redistrib_swm4_forces(Results::slow, &virial);
6529  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6530  }
6531  }
6532 
6533  if (simParams->singleTopology) {
6534  patch->redistrib_alchpair_forces(Results::normal);
6535  if (patch->flags.doNonbonded) {
6536  patch->redistrib_alchpair_forces(Results::nbond);
6537  }
6539  patch->redistrib_alchpair_forces(Results::slow);
6540  }
6541  }
6542 
6543  if ( patch->flags.doMolly ) {
6544  Tensor virial;
6545  patch->mollyMollify(&virial);
6546  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,virial);
6547  }
6548 
6549 
6550  // BEGIN LA
6551  if (patch->flags.doLoweAndersen) {
6553  }
6554  // END LA
6555 //TODO:HIP check if this applies to HIP
6556 #ifdef NAMD_CUDA_XXX
6557  int numAtoms = patch->numAtoms;
6558  FullAtom *a = patch->atom.begin();
6559  for ( int i=0; i<numAtoms; ++i ) {
6560  CkPrintf("%d %g %g %g\n", a[i].id,
6561  patch->f[Results::normal][i].x +
6562  patch->f[Results::nbond][i].x +
6563  patch->f[Results::slow][i].x,
6564  patch->f[Results::normal][i].y +
6565  patch->f[Results::nbond][i].y +
6566  patch->f[Results::slow][i].y,
6567  patch->f[Results::normal][i].z +
6568  patch->f[Results::nbond][i].z +
6569  patch->f[Results::slow][i].z);
6570  CkPrintf("%d %g %g %g\n", a[i].id,
6571  patch->f[Results::normal][i].x,
6572  patch->f[Results::nbond][i].x,
6573  patch->f[Results::slow][i].x);
6574  CkPrintf("%d %g %g %g\n", a[i].id,
6575  patch->f[Results::normal][i].y,
6576  patch->f[Results::nbond][i].y,
6577  patch->f[Results::slow][i].y);
6578  CkPrintf("%d %g %g %g\n", a[i].id,
6579  patch->f[Results::normal][i].z,
6580  patch->f[Results::nbond][i].z,
6581  patch->f[Results::slow][i].z);
6582  }
6583 #endif
6584 
6585 //#undef PRINT_FORCES
6586 //#define PRINT_FORCES 1
6587 #if PRINT_FORCES
6588  int numAtoms = patch->numAtoms;
6589  FullAtom *a = patch->atom.begin();
6590  for ( int i=0; i<numAtoms; ++i ) {
6591  float fxNo = patch->f[Results::normal][i].x;
6592  float fxNb = patch->f[Results::nbond][i].x;
6593  float fxSl = patch->f[Results::slow][i].x;
6594  float fyNo = patch->f[Results::normal][i].y;
6595  float fyNb = patch->f[Results::nbond][i].y;
6596  float fySl = patch->f[Results::slow][i].y;
6597  float fzNo = patch->f[Results::normal][i].z;
6598  float fzNb = patch->f[Results::nbond][i].z;
6599  float fzSl = patch->f[Results::slow][i].z;
6600  float fx = fxNo+fxNb+fxSl;
6601  float fy = fyNo+fyNb+fySl;
6602  float fz = fzNo+fzNb+fzSl;
6603 
6604  float f = sqrt(fx*fx+fy*fy+fz*fz);
6605  int id = patch->pExt[i].id;
6606  int seq = patch->flags.sequence;
6607  float x = patch->p[i].position.x;
6608  float y = patch->p[i].position.y;
6609  float z = patch->p[i].position.z;
6610  //CkPrintf("FORCE(%04i)[%04i] = <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <<% .4e, % .4e, % .4e>>\n", seq,id,
6611  CkPrintf("FORCE(%04i)[%04i] = % .9e % .9e % .9e\n", seq,id,
6612  //CkPrintf("FORCE(%04i)[%04i] = <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e>\n", seq,id,
6613 //fxNo,fyNo,fzNo,
6614 fxNb,fyNb,fzNb
6615 //fxSl,fySl,fzSl,
6616 //fx,fy,fz
6617 );
6618  }
6619 #endif
6620 }
6621 
6622 void Sequencer::rebalanceLoad(int timestep) {
6623  if ( ! ldbSteps ) {
6625  }
6626  if ( ! --ldbSteps ) {
6627  patch->submitLoadStats(timestep);
6628  ldbCoordinator->rebalance(this,patch->getPatchID());
6629  pairlistsAreValid = 0;
6630  }
6631 }
6632 
6633 void Sequencer::cycleBarrier(int doBarrier, int step) {
6634 #if USE_BARRIER
6635  if (doBarrier)
6636  // Blocking receive for the cycle barrier.
6637  broadcast->cycleBarrier.get(step);
6638 #endif
6639 }
6640 
6642  // Blocking receive for the trace barrier.
6643  broadcast->traceBarrier.get(step);
6644 }
6645 
6646 #ifdef MEASURE_NAMD_WITH_PAPI
6647 void Sequencer::papiMeasureBarrier(int step){
6648  // Blocking receive for the PAPI measure barrier.
6649  broadcast->papiMeasureBarrier.get(step);
6650 }
6651 #endif
6652 
6655  CthFree(thread);
6656  CthSuspend();
6657 }
static Node * Object()
Definition: Node.h:86
HomePatch *const patch
Definition: Sequencer.h:323
Real atomcharge(int anum) const
Definition: Molecule.h:1124
SubmitReduction * multigratorReduction
Definition: Sequencer.h:309
Vector gaussian_vector(void)
Definition: Random.h:219
void rescaleVelocities(int)
Definition: Sequencer.C:5386
void finishReduction(bool doEnergyVirial)
double * vel_y
Definition: NamdTypes.h:397
int doKineticEnergy
Definition: Sequencer.h:310
void minimizationQuenchVelocity(void)
Definition: Sequencer.C:5775
int period
period for some step dependent event (e.g. stepsPerCycle)
Definition: Sequencer.C:143
NAMD_HOST_DEVICE void rescale(Tensor factor)
Definition: Lattice.h:60
void max(int i, BigReal v)
Definition: ReductionMgr.h:349
int init(int initstep, int initperiod, int delta=0)
Definition: Sequencer.C:159
#define RECIP_TIMEFACTOR
Definition: common.h:61
DCDParams dcdSelectionParams[16]
Definition: Molecule.h:482
void barrier(const SynchronousCollectiveScope scope)
BigReal zy
Definition: Tensor.h:19
Real langevinParam
Definition: NamdTypes.h:220
Bool berendsenPressureOn
void tcoupleVelocities(BigReal, int)
Definition: Sequencer.C:5557
void addMovDragToPosition(BigReal)
Definition: Sequencer.C:4395
void terminate(void)
Definition: Sequencer.C:6653
BigReal soluteScalingFactorCharge
void submitForces(int seq, FullAtomList &a, int maxForceUsed, ForceList *f, int prec)
virtual void algorithm(void)
Definition: Sequencer.C:289
void get_rotdrag_params(BigReal &v, Vector &a, Vector &p, int atomnum) const
Definition: Molecule.h:1419
void langevinVelocitiesBBK2_SOA(BigReal timestep)
Definition: Sequencer.C:3324
#define NAMD_EVENT_STOP(eon, id)
int frequency
Definition: common.h:255
Bool is_atom_movdragged(int atomnum) const
Definition: Molecule.h:1289
SubmitReduction * pressureProfileReduction
Definition: Sequencer.h:325
void suspendULTs()
int getNumAtoms() const
Definition: Patch.h:105
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
Definition: HomePatch.C:4382
void integrate(int)
Definition: Sequencer.C:3868
friend class SequencerCUDA
Definition: Sequencer.h:49
HomePatch * patch
Definition: HomePatchList.h:23
Definition: PDB.h:36
void scaleVelocities(const BigReal velScale)
Definition: Sequencer.C:4927
void positionsReady_SOA(int doMigration=0)
Definition: HomePatch.C:971
#define GB1_COMPUTE_HOME_PRIORITY
Definition: Priorities.h:56
void addVelocityToPosition(BigReal)
Definition: Sequencer.C:5646
SubmitReduction * reduction
Definition: Sequencer.h:324
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
BigReal xz
Definition: Tensor.h:17
SubmitReduction * min_reduction
Definition: Sequencer.h:228
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
Bool is_atom_exPressure(int atomnum) const
Definition: Molecule.h:1544
SimpleBroadcastObject< int > traceBarrier
Definition: Broadcasts.h:89
BigReal accelMDLastStep
void maximumMove(BigReal)
Definition: Sequencer.C:5730
Bool monteCarloPressureOn
int marginViolations
Definition: HomePatch.h:401
Definition: common.h:275
#define BOLTZMANN
Definition: common.h:54
Definition: Node.h:78
double * f_normal_z
Definition: NamdTypes.h:430
const GlobalMasterIMD * getIMD()
Definition: IMDOutput.h:42
void cycleBarrier(int, int)
Definition: Sequencer.C:6633
#define FILE_OUTPUT
Definition: Output.h:25
IMDOutput * imd
Definition: Node.h:186
double * f_normal_y
Definition: NamdTypes.h:429
Position fixedPosition
Definition: NamdTypes.h:212
Lattice & lattice
Definition: Patch.h:127
void submitCollections_SOA(int step, int zeroVel=0)
Definition: Sequencer.C:3171
Bool globalMasterScaleByFrequency
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
SimpleBroadcastObject< Vector > momentumCorrection
Definition: Broadcasts.h:82
void addRotDragToPosition(BigReal)
Definition: Sequencer.C:4414
static PatchMap * Object()
Definition: PatchMap.h:27
void saveForce(const int ftag=Results::normal)
Definition: Sequencer.C:5596
void registerIDsFullAtom(const FullAtom *begin, const FullAtom *end)
Definition: AtomMap.C:50
CmiNodeLock printlock
Definition: PatchData.h:163
#define EVAL_MEASURE
Definition: Output.h:27
double * f_slow_y
Definition: NamdTypes.h:435
Definition: Vector.h:72
void langevinVelocitiesBBK2(BigReal)
Definition: Sequencer.C:5135
void monteCarloPressureControl(const int step, const int doMigration, const int doEnergy, const int doVirial, const int maxForceNumber, const int doGlobal)
virtual void submit(void)=0
Output * output
Definition: Node.h:185
#define ADD_TENSOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:44
SimParameters * simParameters
Definition: Node.h:181
int slowFreq
Definition: Sequencer.h:297
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3319
void newMinimizeDirection(BigReal)
Definition: Sequencer.C:4605
void newMinimizePosition(BigReal)
Definition: Sequencer.C:4664
double stochRescaleCoefficient()
Definition: Controller.C:1784
Bool CUDASOAintegrateMode
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3788
int savePairlists
Definition: PatchTypes.h:41
int nextstep
next step value
Definition: Sequencer.C:144
bool masterThread
Definition: Sequencer.h:331
BigReal reassignTemp
BigReal & item(int i)
Definition: ReductionMgr.h:336
void gbisComputeAfterP2()
Definition: HomePatch.C:4943
#define DebugM(x, y)
Definition: Debug.h:75
void startWork(const LDObjHandle &handle)
HomePatchList * homePatchList()
Definition: PatchMap.C:438
void langevinVelocitiesBBK1(BigReal)
Definition: Sequencer.C:5062
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
BigReal z
Definition: Vector.h:74
char const *const NamdProfileEventStr[]
int getNumDevice()
Definition: DeviceCUDA.h:125
int usePairlists
Definition: PatchTypes.h:40
Position position
Definition: NamdTypes.h:78
BigReal rotDragGlobVel
BigReal yz
Definition: Tensor.h:18
void updateDevicePatchMap(int startup)
int berendsenPressureFreq
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:368
void rattle1(BigReal, int)
Definition: Sequencer.C:5673
void saveTotalForces(HomePatch *)
SimpleBroadcastObject< BigReal > adaptTemperature
Definition: Broadcasts.h:92
unsigned char get_ss_type(int anum) const
Definition: Molecule.h:1448
void rebalanceLoad(int timestep)
Definition: Sequencer.C:6622
void submitHalfstep(int)
Definition: Sequencer.C:5786
Bool globalMasterStaleForces
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:290
#define iout
Definition: InfoStream.h:51
void addForceToMomentum_SOA(const double scaling, double dt_normal, double dt_nbond, double dt_slow, int maxForceNumber)
Definition: Sequencer.C:2724
int doLoweAndersen
Definition: PatchTypes.h:28
Velocity velocity
Definition: NamdTypes.h:211
int pressureProfileSlabs
void minimizeMoveDownhill(BigReal fmax2)
Definition: Sequencer.C:4583
Patch * patch(PatchID pid)
Definition: PatchMap.h:244
void addForceToMomentum(BigReal, const int ftag=Results::normal, const int useSaved=0)
Definition: Sequencer.C:5607
void submitReductions_SOA()
Definition: Sequencer.C:2966
std::vector< PatchRecord > & getPatches()
static PatchMap * ObjectOnPe(int pe)
Definition: PatchMap.h:28
float * langScalVelBBK2
derived from langevinParam
Definition: NamdTypes.h:419
uint32 groupFixed
Definition: NamdTypes.h:163
void pauseWork(const LDObjHandle &handle)
void langevinPiston(int)
Definition: Sequencer.C:5306
SimpleBroadcastObject< BigReal > tcoupleCoefficient
Definition: Broadcasts.h:79
int NAMD_gcd(int a, int b)
Definition: common.C:102
void exchangeCheckpoint(int scriptTask, int &bpc)
Definition: HomePatch.C:5263
AtomMapper * atomMapper
Definition: Patch.h:159
Bool pairInteractionOn
float * gaussrand_y
Definition: NamdTypes.h:424
Molecule stores the structural information for the system.
Definition: Molecule.h:174
LDObjHandle ldObjHandle
Definition: HomePatch.h:554
void wakeULTs()
double * pos_y
Definition: NamdTypes.h:378
void split(int iStream, int numStreams)
Definition: Random.h:77
static NAMD_HOST_DEVICE Tensor identity(BigReal v1=1.0)
Definition: Tensor.h:31
void addForceToMomentum3(const BigReal timestep1, const int ftag1, const int useSaved1, const BigReal timestep2, const int ftag2, const int useSaved2, const BigReal timestep3, const int ftag3, const int useSaved3)
Definition: Sequencer.C:5622
void positionsReady(int doMigration=0)
Definition: HomePatch.C:1895
Definition: Patch.h:35
Bool useDeviceMigration
float * mass
Definition: NamdTypes.h:405
Flags flags
Definition: Patch.h:128
void submitHalfstep_SOA()
Definition: Sequencer.C:2866
WaterModel watmodel
SimpleBroadcastObject< BigReal > stochRescaleCoefficient
Definition: Broadcasts.h:80
SimpleBroadcastObject< int > monteCarloBarostatAcceptance
Definition: Broadcasts.h:84
double * f_nbond_y
Definition: NamdTypes.h:432
uint32 id
Definition: NamdTypes.h:160
void revert(void)
Definition: HomePatch.C:5232
void submitCollections(int step, int zeroVel=0)
Definition: Sequencer.C:6371
void stochRescaleVelocities_SOA(int step)
Definition: Sequencer.C:3838
static void print_vel_SOA(const double *vel_x, const double *vel_y, const double *vel_z, int ilo=0, int ihip1=1)
Definition: Sequencer.C:107
Charge charge
Definition: NamdTypes.h:79
void runComputeObjects_SOA(int migration, int pairlists, int step)
Definition: Sequencer.C:3652
BigReal calcKineticEnergy()
Definition: Sequencer.C:4935
#define SEQ_STK_SZ
Definition: Thread.h:11
void adaptTempUpdate(int)
Definition: Sequencer.C:5448
double * f_nbond_z
Definition: NamdTypes.h:433
void positionsReady_GPU(int doMigration=0, int startup=0)
Bool langevin_useBAOAB
int32 * hydrogenGroupSize
Definition: NamdTypes.h:385
#define TIMER_START(T, TYPE)
Definition: HomePatch.h:264
#define NAIVE
Definition: SimParameters.h:52
Definition: Output.h:35
int rattle1_SOA(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:4659
double * f_normal_x
Definition: NamdTypes.h:428
void calcFixVirial(Tensor &fixVirialNormal, Tensor &fixVirialNbond, Tensor &fixVirialSlow, Vector &fixForceNormal, Vector &fixForceNbond, Vector &fixForceSlow)
Definition: Sequencer.C:5944
float * langevinParam
Definition: NamdTypes.h:406
Definition: Random.h:37
#define NAMD_PROFILE_START()
float * gaussrand_x
fill with Gaussian distributed random numbers
Definition: NamdTypes.h:423
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
int numPatches(void) const
Definition: PatchMap.h:59
void awaken(void)
Definition: Sequencer.h:55
static std::pair< int, int > coordinateNeeded(int timestep)
Check if the step requires to output the coordinates.
Definition: Output.C:185
#define NAMD_EVENT_START(eon, id)
int pairlistsAge
Definition: Sequencer.h:232
void stochRescaleVelocities(int)
Definition: Sequencer.C:5580
void rattle1_SOA(BigReal, int)
Definition: Sequencer.C:3635
#define COMPUTE_HOME_PRIORITY
Definition: Priorities.h:76
void constructDevicePatchMap()
static void print_tensor(const Tensor &t)
Definition: Sequencer.C:120
NAMD_HOST_DEVICE BigReal length(void) const
Definition: Vector.h:202
int getMasterPe()
Definition: DeviceCUDA.h:137
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
Definition: Lattice.h:137
BigReal rescaleTemp
void NAMD_bug(const char *err_msg)
Definition: common.C:195
float * gaussrand_z
Definition: NamdTypes.h:425
#define TIMER_REPORT(T)
Definition: HomePatch.h:267
void multigratorPressure(int step, int callNumber)
Definition: Sequencer.C:4792
int doEnergy
Definition: PatchTypes.h:20
static ComputeCUDAMgr * getComputeCUDAMgr()
void berendsenPressure(int)
Definition: Sequencer.C:5242
void submitMomentum(int step)
Definition: Sequencer.C:4701
int doFullElectrostatics
Definition: PatchTypes.h:23
BigReal yx
Definition: Tensor.h:18
Bool adaptTempLangevin
int rescaleVelocities_numTemps
Definition: Sequencer.h:277
double * vel_x
Jim recommends double precision velocity.
Definition: NamdTypes.h:396
int32 * id
Definition: NamdTypes.h:390
void submitLoadStats(int timestep)
Definition: HomePatch.C:5428
void mollyMollify(Tensor *virial)
Definition: HomePatch.C:5159
void runComputeObjects(int migration=1, int pairlists=0, int pressureStep=0)
Definition: Sequencer.C:6408
void awaken(void)
Definition: Controller.C:371
void rebalance(Sequencer *seq, PatchID id)
void rescaleaccelMD(int, int, int)
Definition: Sequencer.C:5405
SimpleBroadcastObject< Tensor > velocityRescaleTensor2
Definition: Broadcasts.h:75
float * charge
Definition: NamdTypes.h:381
int Bool
Definition: common.h:142
BigReal drudeBondLen
CompAtomList p
Definition: Patch.h:153
SimpleBroadcastObject< int > IMDTimeEnergyBarrier
Definition: Broadcasts.h:90
Sequencer(HomePatch *p)
Definition: Sequencer.C:171
BigReal langevinTemp
int time_switch
Definition: imd.h:62
void clearDevicePatchMap()
NAMD_HOST_DEVICE BigReal length2(void) const
Definition: Vector.h:206
int ldbSteps
Definition: Sequencer.h:330
int numAtoms
Definition: Patch.h:151
MTSChoices MTSAlgorithm
#define NAMD_EVENT_RANGE_2(eon, id)
void run(void)
Definition: Sequencer.C:269
SimpleBroadcastObject< int > scriptBarrier
Definition: Broadcasts.h:88
uint8 partition
Definition: NamdTypes.h:81
bool getIsGlobalDevice() const
Definition: DeviceCUDA.h:172
BigReal scriptArg1
BigReal x
Definition: Vector.h:74
uint8 hydrogenGroupSize
Definition: NamdTypes.h:89
const_iterator const_begin(void) const
Definition: ResizeArray.h:39
PatchID getPatchID() const
Definition: Patch.h:114
void scalePositionsVelocities(const Tensor &posScale, const Tensor &velScale)
Definition: Sequencer.C:4755
int monteCarloPressureFreq
int getPesSharingDevice(const int i)
Definition: DeviceCUDA.h:139
BigReal adaptTempT
Definition: Sequencer.h:272
int maxForceUsed
Definition: PatchTypes.h:33
SimpleBroadcastObject< BigReal > velocityRescaleFactor2
Definition: Broadcasts.h:76
int sequence
Definition: PatchTypes.h:18
Bool is_atom_rotdragged(int atomnum) const
Definition: Molecule.h:1305
#define D_MSG(t)
Definition: Debug.h:165
int eventEndOfTimeStep
Definition: Node.C:296
void doMigrationGPU(const int startup, const int doGlobal, const int updatePatchMap)
void langevinPiston_SOA(int step)
Definition: Sequencer.C:3506
#define END_OF_RUN
Definition: Output.h:26
void gbisComputeAfterP1()
Definition: HomePatch.C:4915
void integrate_SOA(int)
Definition: Sequencer.C:2049
void traceBarrier(int)
Definition: Sequencer.C:6641
int doNonbonded
Definition: PatchTypes.h:22
void NAMD_die(const char *err_msg)
Definition: common.C:147
PDB * pdb
Definition: Node.h:183
static LdbCoordinator * Object()
BigReal reassignIncr
void gaussian_array_f(float *a, int n)
Definition: Random.h:258
#define TIMER_INIT_WIDTH(T, TYPE, WIDTH)
Definition: HomePatch.h:263
int getForceSendActive() const
Definition: ComputeGlobal.h:46
static int forceNeeded(int timestep)
Check if the step requires to output the forces.
Definition: Output.C:612
int berendsenPressure_count
Definition: Sequencer.h:294
SimpleBroadcastObject< BigReal > velocityRescaleFactor
Definition: Broadcasts.h:71
void publish(int tag, const T &t)
SimpleBroadcastObject< BigReal > minimizeCoefficient
Definition: Broadcasts.h:81
void reassignVelocities(BigReal, int)
Definition: Sequencer.C:5464
void langevinVelocitiesBBK1_SOA(BigReal timestep)
Definition: Sequencer.C:3278
SimpleBroadcastObject< Vector > accelMDRescaleFactor
Definition: Broadcasts.h:91
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3410
ComputeGlobal * computeGlobalObject
Definition: ComputeMgr.h:160
Elem & item(int i)
Definition: ResizeArray.h:119
void saveForce(const int ftag=Results::normal)
Definition: HomePatch.C:2315
Random * random
Definition: Sequencer.h:321
void runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup)
BigReal xx
Definition: Tensor.h:17
SimpleBroadcastObject< Tensor > positionRescaleFactor
Definition: Broadcasts.h:72
void buildRattleList_SOA()
Definition: HomePatch.C:4520
int getDeviceID()
Definition: DeviceCUDA.h:144
void langevinVelocities(BigReal)
Definition: Sequencer.C:5025
IMDSessionInfo IMDsendsettings
static CollectionMaster * Object()
void hardWallDrude(BigReal, int)
Definition: Sequencer.C:5658
NodeBroadcast * nodeBroadcast
Definition: PatchData.h:141
void checkpoint(void)
Definition: HomePatch.C:5222
BigReal zz
Definition: Tensor.h:19
#define TIMER_STOP(T, TYPE)
Definition: HomePatch.h:265
void suspend(void)
Definition: Sequencer.C:279
void multigratorTemperature(int step, int callNumber)
Definition: Sequencer.C:4955
static constexpr int num_inline_peer
Definition: CudaRecord.h:36
unsigned int randomSeed
double * recipMass
derived from mass
Definition: NamdTypes.h:404
BigReal initialTemp
void reinitVelocities(void)
Definition: Sequencer.C:5496
int pressureProfileAtomTypes
int checkpoint_berendsenPressure_count
Definition: Sequencer.h:295
#define simParams
Definition: Output.C:131
ControllerBroadcasts * broadcast
Definition: Sequencer.h:328
#define NAMD_EVENT_START_EX(eon, id, str)
iterator begin(void)
Definition: ResizeArray.h:36
void maximumMove_SOA(const double dt, const double maxvel2)
Definition: Sequencer.C:3220
double * pos_z
Definition: NamdTypes.h:379
double * f_slow_x
Definition: NamdTypes.h:434
CollectionMgr *const collection
Definition: Sequencer.h:327
void updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal)
const PatchID patchID
Definition: Patch.h:150
#define GB2_COMPUTE_HOME_PRIORITY
Definition: Priorities.h:64
int numHomePatches(void)
Definition: PatchMap.C:432
Definition: Tensor.h:15
BigReal xy
Definition: Tensor.h:17
iterator end(void)
Definition: ResizeArray.h:37
bool rattleListValid_SOA
Definition: HomePatch.h:454
#define NAMD_PROFILE_STOP()
Bool langevinGammasDiffer
double * pos_x
Definition: NamdTypes.h:377
int doVirial
Definition: PatchTypes.h:21
BigReal y
Definition: Vector.h:74
virtual ~Sequencer(void)
Definition: Sequencer.C:245
BigReal movDragGlobVel
int getNumStepsToRun(void)
bool getIsPmeDevice()
Definition: DeviceCUDA.h:168
int doLCPO
Definition: PatchTypes.h:31
void resetMovingAverage()
Definition: Controller.C:656
void newtonianVelocities(BigReal, const BigReal, const BigReal, const BigReal, const int, const int, const int)
Definition: Sequencer.C:5001
static void print_vel_AOS(const FullAtom *a, int ilo=0, int ihip1=1)
Definition: Sequencer.C:95
void rescaleSoluteCharges(BigReal)
Definition: Sequencer.C:5541
void addVelocityToPosition_SOA(const double dt)
Definition: Sequencer.C:2827
double * vel_z
Definition: NamdTypes.h:398
void setVal(const NodeReduction *other)
Definition: ReductionMgr.C:681
#define SOA_SIMPLIFY_PARAMS
Definition: Sequencer.h:31
Mass mass
Definition: NamdTypes.h:218
void submitVelocities(int seq, int zero, FullAtomList &a, int prec)
Bool pressureProfileOn
void submitMinimizeReductions(int, BigReal fmax2)
Definition: Sequencer.C:6222
#define ADD_VECTOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:28
BigReal yy
Definition: Tensor.h:18
int doMomenta
Definition: Sequencer.h:311
#define TIMER_DONE(T)
Definition: HomePatch.h:266
#define PDBVELFACTOR
Definition: common.h:57
CudaComputeNonbonded * getCudaComputeNonbonded()
#define TIMEFACTOR
Definition: common.h:55
Bool pairInteractionSelf
int multigratorPressureFreq
static int velocityNeeded(int timestep)
Check if the step requires to output the velocities.
Definition: Output.C:502
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
int bufferOffsetNBPad
Definition: CudaRecord.h:39
double * f_nbond_x
Definition: NamdTypes.h:431
int getDeviceIndex()
Definition: DeviceCUDA.h:166
BigReal maximumMove
#define SPECIAL_PATCH_ID
Definition: Sequencer.C:88
void submitPositions(int seq, FullAtomList &a, Lattice l, int prec, int dcdSelectionIndex)
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
void correctMomentum(int step, BigReal drifttime)
Definition: Sequencer.C:4724
bool getIsMasterDevice()
Definition: DeviceCUDA.C:642
int pairlistsAgeLimit
Definition: Sequencer.h:233
NAMD_HOST_DEVICE void outerAdd(BigReal scale, const Vector &v1, const Vector &v2)
Definition: Tensor.h:255
int pairlistsAreValid
Definition: Sequencer.h:231
int doGBIS
Definition: PatchTypes.h:30
int stochRescale_count
Definition: Sequencer.h:290
int doFullDispersion
Definition: PatchTypes.h:24
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:83
int check(int step)
Definition: Sequencer.C:149
ComputeMgr * computeMgr
Definition: Node.h:172
int maxForceMerged
Definition: PatchTypes.h:34
BigReal reassignHold
bool getIsPmeDevice()
Definition: GlobalGPUMgr.C:100
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3348
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3387
void quenchVelocities()
Definition: Sequencer.C:4692
ForceList f[Results::maxNumForces]
Definition: Patch.h:214
float * langScalRandBBK2
from langevinParam and recipMass
Definition: NamdTypes.h:420
void get_movdrag_params(Vector &v, int atomnum) const
Definition: Molecule.h:1413
static GlobalGPUMgr * Object()
Definition: GlobalGPUMgr.h:61
void enableEarlyExit(void)
Definition: Node.C:1461
void submitReductions(int)
Definition: Sequencer.C:5964
#define namd_reciprocal(x)
Definition: Vector.h:69
SimpleBroadcastObject< Tensor > positionRescaleFactor2
Definition: Broadcasts.h:77
void integrate_CUDA_SOA(int scriptTask)
#define RIGID_NONE
Definition: SimParameters.h:80
void loweAndersenFinish()
Definition: HomePatch.C:4881
uint32 atomFixed
Definition: NamdTypes.h:162
int getNumPesSharingDevice()
Definition: DeviceCUDA.h:138
SimParameters *const simParams
Definition: Sequencer.h:322
SimpleBroadcastObject< Tensor > velocityRescaleTensor
Definition: Broadcasts.h:74
NAMD_HOST_DEVICE Vector unit(void) const
Definition: Vector.h:215
BigReal zx
Definition: Tensor.h:19
CompAtomExtList pExt
Definition: Patch.h:181
int energies_switch
Definition: imd.h:63
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
Molecule * molecule
Definition: Node.h:179
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278
void rescaleVelocitiesByFactor(BigReal)
Definition: Sequencer.C:5519
double * f_slow_z
Definition: NamdTypes.h:436
int doMolly
Definition: PatchTypes.h:25
int multigratorTemperatureFreq
void reloadCharges()
Definition: Sequencer.C:5529
int doMinimize
Definition: PatchTypes.h:26
#define FORCE_OUTPUT
Definition: Output.h:28
int globalMasterFrequency
double BigReal
Definition: common.h:123
void minimize()
Definition: Sequencer.C:4437
static SynchronousCollectives * Object()
CudaPmeOneDevice * createCudaPmeOneDevice()
int step
Definition: PatchTypes.h:16
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
CudaPmeOneDevice * getCudaPmeOneDevice()
void updatePatchOrder(const std::vector< CudaLocalRecord > &data)
for(int i=0;i< n1;++i)
void berendsenPressure_SOA(int step)
Definition: Sequencer.C:3407
int32 numAtoms
number of atoms
Definition: NamdTypes.h:456
void printDevicePatchMap()
BigReal drudeTemp
void compute(const Lattice &lattice, int doEnergyVirial, int step)
void exchangeAtoms(int scriptTask)
Definition: HomePatch.C:5370
T get(int tag, const int expected=-1)