NAMD
Sequencer.C
Go to the documentation of this file.
1 
7 /*****************************************************************************
8  * $Source: /home/cvs/namd/cvsroot/namd2/src/Sequencer.C,v $
9  * $Author: jim $
10  * $Date: 2016/08/26 19:40:32 $
11  * $Revision: 1.1230 $
12  *****************************************************************************/
13 
14 // The UPPER_BOUND macro is used to eliminate all of the per atom
15 // computation done for the numerical integration in Sequencer::integrate()
16 // other than the actual force computation and atom migration.
17 // The idea is to "turn off" the integration for doing performance
18 // profiling in order to get an upper bound on the speedup available
19 // by moving the integration parts to the GPU.
20 //
21 // Define it in the Make.config file, i.e. CXXOPTS += -DUPPER_BOUND
22 // or simply uncomment the line below.
23 //
24 //#define UPPER_BOUND
25 
26 //for gbis debugging; print net force on each atom
27 #include "CudaRecord.h"
28 #include "PatchData.h"
29 #include "common.h"
30 #define PRINT_FORCES 0
31 
32 #include "InfoStream.h"
33 #include "Node.h"
34 #include "SimParameters.h"
35 #include "Sequencer.h"
36 #include "HomePatch.h"
37 #include "ReductionMgr.h"
38 #include "CollectionMgr.h"
39 #include "BroadcastObject.h"
40 #include "Output.h"
41 #include "Controller.h"
42 #include "Broadcasts.h"
43 #include "Molecule.h"
44 #include "NamdOneTools.h"
45 #include "LdbCoordinator.h"
46 #include "Thread.h"
47 #include "Random.h"
48 #include "PatchMap.inl"
49 #include "ComputeMgr.h"
50 #include "ComputeGlobal.h"
51 #include "NamdEventsProfiling.h"
52 #include <iomanip>
53 #include "ComputeCUDAMgr.h"
54 #include "CollectionMaster.h"
55 #include "IMDOutput.h"
56 #include "CudaGlobalMasterServer.h"
57 
58 #include "TestArray.h"
59 
60 #include <algorithm> // Used for sorting
61 
62 #define MIN_DEBUG_LEVEL 3
63 //#define DEBUGM
64 //
65 // Define NL_DEBUG below to activate D_*() macros in integrate_SOA()
66 // for debugging.
67 //
68 //#define NL_DEBUG
69 #include "Debug.h"
70 
71 #if USE_HPM
72 #define START_HPM_STEP 1000
73 #define STOP_HPM_STEP 1500
74 #endif
75 
76 #include "DeviceCUDA.h"
77 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
78 #ifdef WIN32
79 #define __thread __declspec(thread)
80 #endif
81 extern __thread DeviceCUDA *deviceCUDA;
82 #ifdef __IBMCPP__
83 // IBM compiler requires separate definition for static members
85 #endif
86 #endif
87 
88 #define SPECIAL_PATCH_ID 91
89 
90 //
91 // BEGIN
92 // print_* routines
93 // assist in debugging SOA integration code
94 //
95 static void print_vel_AOS(
96  const FullAtom *a,
97  int ilo=0, int ihip1=1
98  ) {
99  printf("AOS Velocities:\n");
100  for (int i=ilo; i < ihip1; i++) {
101  printf("%d %g %g %g\n", i,
102  a[i].velocity.x, a[i].velocity.y, a[i].velocity.z);
103  }
104 }
105 
106 
107 static void print_vel_SOA(
108  const double *vel_x,
109  const double *vel_y,
110  const double *vel_z,
111  int ilo=0, int ihip1=1
112  ) {
113  printf("SOA Velocities:\n");
114  for (int i=ilo; i < ihip1; i++) {
115  printf("%d %g %g %g\n", i, vel_x[i], vel_y[i], vel_z[i]);
116  }
117 }
118 
119 
120 static void print_tensor(const Tensor& t) {
121  printf("%g %g %g %g %g %g %g %g %g\n",
122  t.xx, t.xy, t.xz, t.yx, t.yy, t.yz, t.zx, t.zy, t.zz);
123 }
124 //
125 // END
126 // print_* routines
127 // assist in debugging SOA integration code
128 //
129 
130 
142 struct CheckStep {
143  int period;
144  int nextstep;
145 
149  inline int check(int step) {
150  if (step == nextstep) return( nextstep += period, 1 );
151  else return 0;
152  }
153 
159  inline int init(int initstep, int initperiod, int delta=0) {
160  period = initperiod;
161  nextstep = initstep - (initstep % period) - (delta % period);
162  while (nextstep <= initstep) nextstep += period;
163  // returns true if initstep is divisible by period
164  return (initstep + period == nextstep);
165  }
166 
167  CheckStep() : period(0), nextstep(0) { }
168 };
169 
170 
172  simParams(Node::Object()->simParameters),
173  patch(p),
174  collection(CollectionMgr::Object()),
175  ldbSteps(0),
176  pairlistsAreValid(0),
177  pairlistsAge(0),
178  pairlistsAgeLimit(0)
179 {
182 
183 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
185  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
186  PatchData* patchData = cpdata.ckLocalBranch();
188  } else
189 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP)
190  {
192  }
193 
198  int ntypes = simParams->pressureProfileAtomTypes;
199  int nslabs = simParams->pressureProfileSlabs;
202  REDUCTIONS_PPROF_INTERNAL, 3*nslabs*ntypes);
203  } else {
205  }
206  if (simParams->multigratorOn) {
208  } else {
209  multigratorReduction = NULL;
210  }
211  ldbCoordinator = (LdbCoordinator::Object());
214 
215  // Is soluteScaling enabled?
216  if (simParams->soluteScalingOn) {
217  // If so, we must "manually" perform charge scaling on startup because
218  // Sequencer will not get a scripting task for initial charge scaling.
219  // Subsequent rescalings will take place through a scripting task.
221  }
222 
224  stochRescale_count = 0;
226  masterThread = true;
227 // patch->write_tip4_props();
228 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER)
230 #if 0
231  CUDASequencer = new SequencerCUDA(deviceCUDA->getDeviceID(),
232  simParams);
233 #else
234  CUDASequencer = SequencerCUDA::InstanceInit(deviceCUDA->getDeviceID(),
235  simParams);
236 
237  syncColl = SynchronousCollectives::Object();
238  globalGPUMgr = GlobalGPUMgr::Object();
240 #endif
241  }
242 #endif
243 }
244 
246 {
247  delete broadcast;
248  delete reduction;
249  delete min_reduction;
251  delete random;
253 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER)
255  delete CUDASequencer;
257  }
258 #endif
259 }
260 
261 // Invoked by thread
262 void Sequencer::threadRun(Sequencer* arg)
263 {
265  arg->algorithm();
266 }
267 
268 // Invoked by Node::run() via HomePatch::runSequencer()
269 void Sequencer::run(void)
270 {
271  // create a Thread and invoke it
272  DebugM(4, "::run() - this = " << this << "\n" );
273  thread = CthCreate((CthVoidFn)&(threadRun),(void*)(this),SEQ_STK_SZ);
274  CthSetStrategyDefault(thread);
275  priority = PATCH_PRIORITY(patch->getPatchID());
276  awaken();
277 }
278 
280 {
282  CthSuspend();
284 }
285 
286 // Defines sequence of operations on a patch. e.g. when
287 // to push out information for Compute objects to consume
288 // when to migrate atoms, when to add forces to velocity update.
290 {
291  int scriptTask;
292  int scriptSeq = 0;
293  // Blocking receive for the script barrier.
294  while ( (scriptTask = broadcast->scriptBarrier.get(scriptSeq++)) != SCRIPT_END ) {
295  switch ( scriptTask ) {
296  case SCRIPT_OUTPUT:
298  break;
299  case SCRIPT_FORCEOUTPUT:
301  break;
302  case SCRIPT_MEASURE:
304  break;
305  case SCRIPT_REINITVELS:
307  break;
308  case SCRIPT_RESCALEVELS:
310  break;
313  break;
315  reloadCharges();
316  break;
317  case SCRIPT_CHECKPOINT:
318  patch->checkpoint();
320  break;
321  case SCRIPT_REVERT:
322  patch->revert();
324  pairlistsAreValid = 0;
325  break;
331  break;
332  case SCRIPT_ATOMSENDRECV:
333  case SCRIPT_ATOMSEND:
334  case SCRIPT_ATOMRECV:
335  patch->exchangeAtoms(scriptTask);
336  break;
337  case SCRIPT_MINIMIZE:
338 #if 0
340  NAMD_die("Minimization is currently not supported on the GPU integrator\n");
341  }
342 #endif
343  minimize();
344  break;
345  case SCRIPT_RUN:
346  case SCRIPT_CONTINUE:
347  //
348  // DJH: Call a cleaned up version of integrate().
349  //
350  // We could test for simulation options and call a more basic version
351  // of integrate() where we can avoid performing most tests.
352  //
353 #ifdef SEQUENCER_SOA
354  if ( simParams->SOAintegrateOn ) {
355 #ifdef NODEGROUP_FORCE_REGISTER
356 
358  else {
359 #endif
360  integrate_SOA(scriptTask);
361 #ifdef NODEGROUP_FORCE_REGISTER
362  }
363 #endif
364  }
365  else
366 #endif
367  integrate(scriptTask);
368  break;
369  default:
370  NAMD_bug("Unknown task in Sequencer::algorithm");
371  }
372  }
374  terminate();
375 }
376 
377 
378 #ifdef SEQUENCER_SOA
379 
381 //
382 // begin SOA code
383 //
384 
385 #if defined(NODEGROUP_FORCE_REGISTER)
386 
388  PatchMap* patchMap = PatchMap::Object();
389  CUDASequencer->numPatchesCheckedIn += 1;
390  if (CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe())) {
391  masterThread = false;
392  CUDASequencer->waitingThreads.push_back(CthSelf());
393  NAMD_EVENT_STOP(patch->flags.event_on, NamdProfileEvent::INTEGRATE_SOA_1);
394  CthSuspend();
395 
396  // JM: if a thread get here, it will be for migrating atoms until the end of the simulation
397  while(true){
398  // read global flags
399  int lastStep = CUDASequencer->patchData->flags.step;
400  int startup = (CUDASequencer->patchData->flags.step == simParams->firstTimestep);
401  if (CUDASequencer->breakSuspends) break;
403  this->patch->positionsReady_GPU(true, startup);
404  } else {
405  this->patch->positionsReady_SOA(true);
406  }
407  CUDASequencer->numPatchesCheckedIn += 1;
408  CUDASequencer->waitingThreads.push_back(CthSelf());
409  if(CUDASequencer->numPatchesCheckedIn == patchMap->numPatchesOnNode(CkMyPe()) - 1 &&
410  CUDASequencer->masterThreadSleeping){
411  CUDASequencer->masterThreadSleeping = false;
412  CthAwaken(CUDASequencer->masterThread);
413  }
414  CthSuspend();
415  }
416  }
417 }
418 void Sequencer::wakeULTs(){
419  CUDASequencer->numPatchesCheckedIn = 0;
420  for (CthThread t : CUDASequencer->waitingThreads) {
421  CthAwaken(t);
422  }
423  CUDASequencer->waitingThreads.clear();
424 }
425 
426 void Sequencer::runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup) {
427 
428  PatchMap* map = PatchMap::Object();
429 
430  bool isMaster = deviceCUDA->getMasterPe() == CkMyPe();
433 
434  // Sync after the node barrier. This is making sure that the position buffers have been
435  // populated. However, this doesn't need to happen at the node level. I.e. the non-pme
436  // nonbonded calculations can begin before the PME device is finished setting it's positions.
437  // There is a node barrier after the forces are done, so we don't have to worry about
438  // the positions being updated before the positions have been set
439  if (isMaster) {
440  CUDASequencer->sync();
441  }
442 
443 
444  // JM: Each masterPE owns a particular copy of the compute object we need to launch
445  // work on. The goal is to launch work on everyone, but for migration steps, sometimes
446  // there are a few operation that need to be launched on computes owned by different PEs.
447  // ComputeBondedCUDA::openBoxesOnPe() is an example: There is a list of PEs on each compute
448  // which holds information on which proxy object it should also invoke openBoxesOnPe();
449 
450  // We need to be mindful of that and, since we want to launch methods on different computes.
451  // A data structure that holds all nonbonded Computes from all masterPEs is necessary
452  ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
453  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
454  CudaPmeOneDevice* cudaPme = (globalGPUMgr->getIsPmeDevice() && simParams->fullElectFrequency) ?
455  cudaMgr->getCudaPmeOneDevice() : NULL;
456  // We need to submit PME reductions even if we don't compute the pme force
457  int computePme = (patch->flags.doFullElectrostatics);
458  int reducePme = (patch->flags.doVirial || patch->flags.doEnergy);
459  auto cudaGlobal = deviceCUDA->getIsGlobalDevice() ? cudaMgr->getCudaGlobalMaster() : nullptr;
460  if (isMaster && cudaGlobal && doMigration) cudaGlobal->setStep(static_cast<int64_t>(patch->flags.step));
461  // fprintf(stderr, "Patch %d invoking computes\n", this->patch->patchID);
462 
463 
464  // JM NOTE: I don't think the scheme below holds for nMasterPes > 1, check it out laters
465 
466  // Invoking computes on the GPU //
467  if(doMigration){
468  // JM: if we're on a migration step, we call the setup functions manually
469  // which means:
470  // 0. masterPe->doWork();
471  // 1. openBoxesOnPe();
472  // loadTuplesOnPe();
473  // 2. masterPe->launchWork();
474  // 3. finishPatchesOnPe();
475  // 4. masterPe->finishReductions();
476 
477  if(isMaster){
478  NAMD_EVENT_START(1, NamdProfileEvent::MIG_ATOMUPDATE);
479  cudaNbond->atomUpdate();
480  cudaBond->atomUpdate();
481  cudaNbond->doWork();
482  cudaBond->doWork();
483  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_ATOMUPDATE);
484 
485  if (cudaPme && !simParams->useDeviceMigration) CUDASequencer->atomUpdatePme();
486  if (cudaGlobal) {
487  cudaGlobal->updateAtomMaps();
488  cudaGlobal->communicateToClients(&(this->patch->lattice));
489  }
490  }
491 
493 
495  if(isMaster){
496  CUDASequencer->launch_set_compute_positions();
497  CUDASequencer->sync(); // TODO move this to tuple migration
498  }
500  }
501 
502  NAMD_EVENT_START(1, NamdProfileEvent::MIG_OPENBOXESONPE);
503 
504  // Here we need to do the following, for each Comput
505  for(int i = 0 ; i < CkNumPes(); i++){
506  // Here I need to find if the PE is on the bonded PE list
507  // XXX NOTE: This might be inefficient. Check the overhead later
508  ComputeBondedCUDA* b = CUDASequencer->patchData->cudaBondedList[i];
509  CudaComputeNonbonded* nb = CUDASequencer->patchData->cudaNonbondedList[i];
510  if (b == NULL) continue;
511  auto list = std::find(std::begin(b->getBondedPes()), std::end(b->getBondedPes()), CkMyPe());
512  if( list != std::end(b->getBondedPes()) ){
513  b->openBoxesOnPe(startup);
514 
515  // XXX NOTE: nb has a differente PE list!!! We need a different loop for nb
516  nb->openBoxesOnPe();
517 
518  }
520  }
521  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_OPENBOXESONPE);
522  // for the bonded kernels, there's an additional step here, loadTuplesOnPe
523  // JM NOTE: Those are major hotspots, they account for 50% of the migration time.
525  NAMD_EVENT_START(1, NamdProfileEvent::MIG_LOADTUPLESONPE);
526 
527  // NOTE: problem here: One of the CompAtomExt structures is turning to null, why?
528  cudaBond->loadTuplesOnPe(startup);
529  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_LOADTUPLESONPE);
531  NAMD_EVENT_START(1, NamdProfileEvent::MIG_COPYTUPLEDATA);
532 
534  cudaBond->copyTupleDataGPU(startup);
535  } else {
536  cudaBond->copyTupleDataSN();
537  }
538 
539  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_COPYTUPLEDATA);
540  // waits until everyone has finished to open their respective boxes
541  // node barrier actually prevents the error that is happening.
543  if(isMaster){
544  // launches work on the masterPe
545  NAMD_EVENT_START(1, NamdProfileEvent::MIG_LAUNCHWORK);
546  cudaBond->launchWork();
547  cudaNbond->launchWork();
548  if (cudaPme && computePme) {
549  cudaPme->compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
550  }
551  cudaNbond->reSortTileLists();
552  if (cudaGlobal) {
553  // cudaGlobal->communicateToClients(&(this->patch->lattice));
554  cudaGlobal->calculate();
555  cudaGlobal->communicateToMD(patch->flags.doEnergy, patch->flags.doVirial);
556  }
557  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_LAUNCHWORK);
558  }
559 
561  //global master force calculation
562 
563  if(doGlobal) {
564  NAMD_EVENT_START(1, NamdProfileEvent::GM_CALCULATE);
566  // Zero all SOA global forces before computing next global force
567  NAMD_EVENT_START(1, NamdProfileEvent::GM_ZERO);
568  int numhp = PatchMap::Object()->numHomePatches();
570  for(int i = 0; i < numhp; ++i) {
571  HomePatch *hp = hpList->item(i).patch;
572  hp->zero_global_forces_SOA();
573  }
574  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_ZERO);
575  NAMD_EVENT_START(1, NamdProfileEvent::GM_DOWORK);
576  // call globalmaster to calculate the force from client.
577  computeGlobal->doWork();
578  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_DOWORK);
579  NAMD_EVENT_START(1, NamdProfileEvent::GM_BARRIER);
581  // CkPrintf("post doWork step %d \n",this->patch->flags.step);
582  // CUDASequencer->printSOAPositionsAndVelocities();
583  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_BARRIER);
584  if(isMaster) {
585  // aggregate and copy the global forces to d_f_global device buffer
586  NAMD_EVENT_START(1, NamdProfileEvent::GM_CPY_FORCE);
587  CUDASequencer->copyGlobalForcesToDevice();
588  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CPY_FORCE);
589  }
590  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CALCULATE);
591  }
592  NAMD_EVENT_START(1, NamdProfileEvent::MIG_FINISHPATCHES);
593  cudaNbond->finishPatches();
594  cudaBond->finishPatches();
595  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_FINISHPATCHES);
597 
598  // finishes reduction with masterPe!
599  if(isMaster){
600  cudaNbond->finishReductions();
601  if (cudaPme) cudaPme->finishReduction(reducePme);
602  cudaBond->finishReductions();
603  if (cudaGlobal) cudaGlobal->finishReductions();
604  }
606  }
607  // if we're not on a migration step, do the work only on masterPE, except globalmaster work
608  else {
609  int doNbond = patch->flags.doNonbonded;
610  if(isMaster) {
611  // JM NOTE: We issue the nonbonded work first and sync it last
612  if (cudaPme && computePme) {
613  cudaPme->compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
614  }
615  cudaNbond->doWork();
616  cudaBond->doWork();
617  if (cudaGlobal) {
618  // cudaGlobal->communicateToClients(&(this->patch->lattice));
619  cudaGlobal->calculate();
620  cudaGlobal->communicateToMD(patch->flags.doEnergy, patch->flags.doVirial);
621  }
622  }
623  //global master force calculation
624  if(doGlobal) {
625  NAMD_EVENT_START(1, NamdProfileEvent::GM_CALCULATE);
626  NAMD_EVENT_START(1, NamdProfileEvent::GM_ZERO);
628  // Zero all SOA global forces before computing next global force
629  int numhp = PatchMap::Object()->numHomePatches();
631  for(int i = 0; i < numhp; ++i) {
632  HomePatch *hp = hpList->item(i).patch;
633  hp->zero_global_forces_SOA();
634  }
635  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_ZERO);
636  // call globalmaster to calculate the force from client.
637  NAMD_EVENT_START(1, NamdProfileEvent::GM_DOWORK);
638  computeGlobal->doWork();
639  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_DOWORK);
640  NAMD_EVENT_START(1, NamdProfileEvent::GM_BARRIER);
642  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_BARRIER);
643  // CkPrintf("post doWork 2 step %d \n",this->patch->flags.step);
644  // CUDASequencer->printSOAPositionsAndVelocities();
645  if(isMaster) {
646  // aggregate and copy the global forces to d_f_global device buffer
647  NAMD_EVENT_START(1, NamdProfileEvent::GM_CPY_FORCE);
648  CUDASequencer->copyGlobalForcesToDevice();
649  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CPY_FORCE);
650  }
651  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CALCULATE);
652  }
653  if(isMaster) {
654  cudaBond->finishPatches();
655  if (cudaPme) {
656  cudaPme->finishReduction(reducePme);
657  }
658  cudaNbond->finishPatches();
659  if (cudaGlobal) cudaGlobal->finishReductions();
660  }
661  }
662 
663 #if 0
664  // for migrations, I need to call OpenBoxesOnPe and finishPatches for every Pe
666  pairlistsAreValid = 1;
667  pairlistsAge = 0;
668  }
669  if ( pairlistsAreValid /* && !pressureStep */ ) ++pairlistsAge;
670 #endif
671  // syncColl->barrier(SynchronousCollectiveScope::all);
672 }
673 
674 //apply MC pressure control
676  const int step,
677  const int doMigration,
678  const int doEnergy,
679  const int doVirial,
680  const int maxForceNumber,
681  const int doGlobal)
682 {
683  bool isMasterPe = (deviceCUDA->getMasterPe() == CkMyPe() );
684  NodeReduction *reduction = CUDASequencer->patchData->reductionBackend;
685  Controller *c_out = CUDASequencer->patchData->c_out;
686  bool mGpuOn = CUDASequencer->mGpuOn;
687  Lattice oldLattice = this->patch->lattice;
688  Vector origin = this->patch->lattice.origin();
689  Tensor factor;
690  int accepted = 0; // status of MC volume fluctuation trial
691 
692  CUDASequencer->submitReductionValues(); // Copy data to NodeReduction
693  if(isMasterPe){
694  // Backup the reduction values for rejected move
695  CUDASequencer->patchData->reductionBackendSave->setVal(reduction);
696 
698  // Send the rescale factor for Monte Carlo Volume change from controller
699  c_out->mcPressure_prepare(step);
700  // receive the factor
701  factor = broadcast->positionRescaleFactor.get(step, CkNumPes());
702  }
703 
704  // Backup positions and forces, scale the coordinates and lattice
705  // Setup positions for energy and force calculation
706  CUDASequencer->monteCarloPressure_part1(factor, origin, oldLattice);
708  // Scale the lattice with factor
709  // patch.lattice is pointing to patch.flags.lattice
710  this->patch->lattice.rescale(factor);
711  CUDASequencer->patchData->lat = &(this->patch->lattice);
712  CUDASequencer->patchData->factor = &(factor);
713  // Copy scaled lattic flags to all patches
714  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
715 
716  // Zero all reduction values. We will add halfStep values, if
717  // the move is accepted.
718  reduction->zero();
719  }
720  }
721 
723  if(isMasterPe){
724  // copy global flags
725  CUDASequencer->update_patch_flags();
726  }
728  // Calculate the new force and energy after rescaling the coordinates
729  // Migration happend before calling this function
730  this->runComputeObjectsCUDA(0, doGlobal, 1, step, 0 /* startup */);
732 
733  if(isMasterPe){
734  // Accumulate force to SOA, calculate External energy/force
735  // reduce energy and virial
736  CUDASequencer->monteCarloPressure_part2(step, maxForceNumber,
737  doEnergy, doGlobal, doVirial);
738  CUDASequencer->submitReductionValues(); // Copy data to NodeReduction
739 
741  // Check to see if the move is accepted or not
742  c_out->mcPressure_accept(step);
743  accepted = broadcast->monteCarloBarostatAcceptance.get(step);
744  //printf("Sequencer (accept): step: %d, Pe: %d, ACC status: %d\n", step, CkMyPe(), accepted);
745  }
746 
747  if (accepted) { // Move accepted
748  CUDASequencer->monteCarloPressure_accept(doMigration);
749  } else { // Move rejected
751  // Set the lattice to the original value, before scaling
752  this->patch->lattice = oldLattice;
753  CUDASequencer->patchData->lat = &(this->patch->lattice);
754  // Copy scaled lattic flags to all patches
755  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
756  }
757 
758  // Restore all positions and forces and cuLattice
759  CUDASequencer->monteCarloPressure_reject(this->patch->lattice);
760  // Restore the reduction values
761 
762  reduction->setVal(CUDASequencer->patchData->reductionBackendSave);
763  }
764  }
765 
767  //continue the rejection step. Need to update lattice in all patches
768  if(isMasterPe && !accepted){
769  // copy global flags
770  CUDASequencer->update_patch_flags();
771  }
772 }
773 
774 void Sequencer::doMigrationGPU(const int startup, const int doGlobal,
775  const int updatePatchMap) {
776 
777  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
778  const bool updatePatchData = startup || doGlobal || updatePatchMap;
779  PatchMap* patchMap = PatchMap::Object();
780 
781  bool realloc = false;
782 
783  // This will check if a reallocation was done on the previous migration
784  // We use the scratch buffers to store the atomic data during reallocation
785  // However, the migrationDestination data much be maintained throughout
786  // migration (and tuple migration so beyond the scope of this function)
787  // We probably should add a function to do this at the end of migration
788  // But for now, DMC thought it was easier to just do at the begining
789  for (int i = 0; i < deviceCUDA->getNumDevice(); i++) {
790  if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
791  realloc = true;
792  break;
793  }
794  }
795  if (realloc) {
796  if (isMasterPe) {
797  CUDASequencer->reallocateMigrationDestination();
798  CUDASequencer->registerSOAPointersToHost();
799  }
801  if (isMasterPe) {
802  CUDASequencer->copySOAHostRegisterToDevice();
803  }
804  }
805 
806  // Proceed with migration
807  //
808  // Starts GPU migration
809  //
810  if (isMasterPe) {
811  CUDASequencer->migrationLocalInit();
812  // Hidden stream sync
813  }
815 
816  if (isMasterPe) {
817  CUDASequencer->migrationPerform();
818  // Hidden stream sync
819  }
821 
822  if (isMasterPe) {
823  CUDASequencer->migrationUpdateAtomCounts();
824  // Hidden stream sync
825  }
827 
828  if (isMasterPe) {
829  CUDASequencer->migrationUpdateAtomOffsets();
830  // Hidden stream sync
831  }
833 
834  if (isMasterPe) {
835  CUDASequencer->copyPatchDataToHost();
836  // Hidden stream sync
837  }
839 
840  // Update device buffer allocations
841  realloc = false;
842  if (isMasterPe) {
843  realloc = CUDASequencer->copyPatchData(true, false);
844  CUDASequencer->patchData->atomReallocationFlagPerDevice[deviceCUDA->getDeviceIndex()] = realloc;
845  }
847 
848  // If any of the devices have reallocated, we need to re-register the p2p buffers
849  for (int i = 0; i < deviceCUDA->getNumDevice(); i++) {
850  if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
851  realloc = true;
852  break;
853  }
854  }
855  if (realloc) {
856  if (isMasterPe) {
857  CUDASequencer->registerSOAPointersToHost();
858  }
860  if (isMasterPe) {
861  CUDASequencer->copySOAHostRegisterToDevice();
862  }
863  }
864 
865  // Performs various post processing like Solute/Solvent sorting and copies back to host
866  if (isMasterPe) {
867  CUDASequencer->migrationLocalPost(0);
868  CUDASequencer->migrationSortAtomsNonbonded();
869  }
870 
871  // If this is startup, we need to delay this until after AoS has been copied back to host
872  // Because we do need the atomIDs for the atom map initially
873  if (!updatePatchData) {
874  wakeULTs(); // Wakes everyone back up for migration
875  this->patch->positionsReady_GPU(1, startup);
876  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
877  CUDASequencer->masterThreadSleeping = true;
878  CUDASequencer->masterThread = CthSelf();
879  CthSuspend();
880  }
881  }
882 
883  if (isMasterPe) {
884  CUDASequencer->sync();
885  }
887 
888  if (isMasterPe) {
889  CUDASequencer->migrationUpdateDestination();
890  }
892 
893  if (isMasterPe) {
894  CUDASequencer->migrationUpdateProxyDestination();
895  }
897 
898  if (isMasterPe) {
899  CUDASequencer->migrationUpdateRemoteOffsets();
900  }
902 
903  if (isMasterPe) {
904  CUDASequencer->copyDataToPeers(true);
905  }
907 
908  if (updatePatchData) {
909  // The atom maps need to be cleared the HomePatch atom arrays have been updated
910  int numhp = PatchMap::Object()->numHomePatches();
912  for(int i = 0; i < numhp; ++i) {
913  HomePatch *hp = hpList->item(i).patch;
914  hp->clearAtomMap();
915  }
917  if (isMasterPe) {
918  // We need the atom ordering to be correct within each
919  // patch to setup the atom map. The vdwType of each atom
920  // is also used for exclusion tuple generation
921  CUDASequencer->copyAoSDataToHost();
922  }
924  wakeULTs(); // Wakes everyone back up for migration
925  this->patch->positionsReady_GPU(1, startup);
926  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
927  CUDASequencer->masterThreadSleeping = true;
928  CUDASequencer->masterThread = CthSelf();
929  CthSuspend();
930  }
932  }
933  if (isMasterPe) {
934  if (doGlobal || simParams->forceDcdFrequency > 0) {
935  CUDASequencer->updateHostPatchDataSOA(); // Needs to be called after HomePatch updates
936  }
937  }
939  if (isMasterPe) {
940  // This needs to be called after positionsReady_GPU to that the atom maps have been updated
941  // This will be called in updateDeviceData during with startup=true, but we need to call it
942  // with startup=false to make sure the atoms are updated
943  CUDASequencer->migrationUpdateAdvancedFeatures(false);
944  }
946 }
947 
948 // JM: Single-node integration scheme
949 void Sequencer::integrate_CUDA_SOA(int scriptTask){
950 
951  #ifdef TIMER_COLLECTION
952  TimerSet& t = patch->timerSet;
953  #endif
954  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
955  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
956  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
957  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
958  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
959  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
960  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
961  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
962  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
963  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
964 
965  // Keep track of the step number.
966  //int &step = patch->flags.step;
967  int &step = patch->flags.step;
968  step = simParams->firstTimestep;
969  Controller *c_out = CUDASequencer->patchData->c_out;
970  PatchMap* patchMap = PatchMap::Object();
971 
972  // For multiple time stepping, which force boxes are used?
973  int &maxForceUsed = patch->flags.maxForceUsed;
974  int &maxForceMerged = patch->flags.maxForceMerged;
975  maxForceUsed = Results::normal;
976  maxForceMerged = Results::normal;
977 
978  // Keep track of total steps and steps per cycle.
979  const int numberOfSteps = simParams->N;
980  //const int stepsPerCycle = simParams->stepsPerCycle;
981  CheckStep stepsPerCycle;
982  stepsPerCycle.init(step, simParams->stepsPerCycle);
983  // The fundamental time step, get the scaling right for velocity units.
984  const BigReal timestep = simParams->dt * RECIP_TIMEFACTOR;
985 
986  //const int nonbondedFrequency = simParams->nonbondedFrequency;
987  //slowFreq = nonbondedFrequency;
988  CheckStep nonbondedFrequency;
990  // The step size for short-range nonbonded forces.
991  const BigReal nbondstep = timestep * simParams->nonbondedFrequency;
992  int &doNonbonded = patch->flags.doNonbonded;
993  //doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
994  doNonbonded = (step >= numberOfSteps) ||
995  nonbondedFrequency.init(step, simParams->nonbondedFrequency);
996  //if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
997  if ( nonbondedFrequency.period == 1 ) maxForceMerged = Results::nbond;
998  if ( doNonbonded ) maxForceUsed = Results::nbond;
999 
1000  // Do we do full electrostatics?
1001  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
1002  //const int fullElectFrequency = simParams->fullElectFrequency;
1003  //if ( dofull ) slowFreq = fullElectFrequency;
1004  CheckStep fullElectFrequency;
1005  if ( dofull ) slowFreq = simParams->fullElectFrequency;
1006  // The step size for long-range electrostatics.
1007  const BigReal slowstep = timestep * simParams->fullElectFrequency;
1008  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
1009  //doFullElectrostatics = (dofull &&
1010  // ((step >= numberOfSteps) || !(step%fullElectFrequency)));
1011  doFullElectrostatics = (dofull &&
1012  ((step >= numberOfSteps) ||
1013  fullElectFrequency.init(step, simParams->fullElectFrequency)));
1014  //if ( dofull && fullElectFrequency == 1 ) maxForceMerged = Results::slow;
1015  if ( dofull && fullElectFrequency.period == 1 ) maxForceMerged = Results::slow;
1016  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
1017 
1018  // Bother to calculate energies?
1019  int &doEnergy = patch->flags.doEnergy;
1020  //int energyFrequency = simParams->outputEnergies;
1021  CheckStep energyFrequency;
1022  int newComputeEnergies = simParams->computeEnergies;
1023  if(simParams->alchOn) newComputeEnergies = NAMD_gcd(newComputeEnergies, simParams->alchOutFreq);
1024  doEnergy = energyFrequency.init(step, newComputeEnergies);
1025 
1026  // check for Monte Carlo pressure control.
1027  CheckStep monteCarloPressureFrequency;
1028  doEnergy += monteCarloPressureFrequency.init(step, (simParams->monteCarloPressureOn ?
1029  simParams->monteCarloPressureFreq : numberOfSteps + 1) );
1030 
1031  int &doVirial = patch->flags.doVirial;
1032  doVirial = 1;
1033  // Do we need to return forces to TCL script or Colvar module?
1034  int doTcl = simParams->tclForcesOn;
1035  int doColvars = simParams->colvarsOn;
1036  const int doIMD = (simParams->IMDon && ! (simParams->IMDignore || simParams->IMDignoreForces));
1037  int doGlobal = (doTcl || doColvars || doIMD);
1039  CheckStep globalMasterFrequency;
1040  bool globalMasterStep=false;
1041  int doGlobalObjects=0;
1042  int doGlobalStaleForces = 0;
1043 
1044  if(doGlobal)
1045  {
1046  globalMasterFrequency.init(step, (simParams->globalMasterFrequency > 0 ? simParams->globalMasterFrequency : numberOfSteps+1));
1047  globalMasterStep = globalMasterFrequency.check(step);
1048  doGlobalObjects = globalMasterStep? 1:0;
1050  {
1051  doGlobalObjects=1;
1052  doGlobalStaleForces=1;
1053  }
1055  {
1056  doGlobalStaleForces = simParams->globalMasterStaleForces;
1057  }
1059  {
1060  doGlobalStaleForces=doGlobalObjects;
1061  }
1062  else
1063  {
1064  doGlobalStaleForces=doGlobalObjects;
1065  }
1066  }
1067  else
1068  {
1069  doGlobalStaleForces = 0;
1070  doGlobalObjects = 0;
1071  }
1072  // The following flags have to be explicitly disabled in Patch object.
1073  patch->flags.doMolly = 0;
1074  patch->flags.doLoweAndersen = 0;
1075  patch->flags.doGBIS = 0;
1076  patch->flags.doLCPO = 0;
1077 
1078  // Square of maximum velocity for simulation safety check
1079  const BigReal maxvel2 =
1080  (simParams->cutoff * simParams->cutoff) / (timestep * timestep);
1081 
1082  // check for Langevin piston
1083  // set period beyond numberOfSteps to disable
1084  // fprintf(stderr, " Patch %d Pinging in from integrate_cuda!\n", this->patch->getPatchID());
1085  CheckStep langevinPistonFrequency;
1086  langevinPistonFrequency.init(step,
1087  (simParams->langevinPistonOn ? slowFreq : numberOfSteps+1 ),
1088  (simParams->langevinPistonOn ? -1-slowFreq/2 : 0) /* = delta */);
1089 
1090  // check for velocity rescaling
1091  // set period beyond numberOfSteps to disable
1092  CheckStep stochRescaleFrequency;
1093  stochRescaleFrequency.init(step, (simParams->stochRescaleOn ?
1094  simParams->stochRescaleFreq : numberOfSteps+1 ) );
1095 
1096  CheckStep reassignVelocityFrequency;
1097  reassignVelocityFrequency.init(step, ((simParams->reassignFreq>0) ?
1098  simParams->reassignFreq : numberOfSteps+1 ) );
1099 
1100  // check for output
1101  // set period beyond numberOfSteps to disable
1102  CheckStep restartFrequency;
1103  restartFrequency.init(step, (simParams->restartFrequency > 0 ?
1104  simParams->restartFrequency : numberOfSteps+1) );
1105  CheckStep dcdFrequency;
1106  dcdFrequency.init(step, (simParams->dcdFrequency > 0 ?
1107  simParams->dcdFrequency : numberOfSteps+1) );
1108  CheckStep velDcdFrequency;
1109  velDcdFrequency.init(step, (simParams->velDcdFrequency > 0 ?
1110  simParams->velDcdFrequency : numberOfSteps+1) );
1111  CheckStep forceDcdFrequency;
1112  forceDcdFrequency.init(step, (simParams->forceDcdFrequency > 0 ?
1113  simParams->forceDcdFrequency : numberOfSteps+1) );
1114  CheckStep imdFrequency;
1115  imdFrequency.init(step, (simParams->IMDon ?
1116  simParams->IMDfreq : numberOfSteps+1) );
1117 
1118  patch->copy_atoms_to_SOA(); // do this whether or not useDeviceMigration
1119 
1120  // Haochuan: is this really needed for GPU-resident?
1121  if (simParams->rigidBonds != RIGID_NONE && ! patch->settle_initialized) {
1123  patch->rattleListValid_SOA = true;
1124  }
1125 
1126  this->suspendULTs();
1127  // for "run 0", numberOfSteps is zero, but we want to have at least a single energy evaluation
1128  if(!masterThread) {
1129  return;
1130  }
1131  bool isMasterPe = (deviceCUDA->getMasterPe() == CkMyPe() );
1133 
1134  CUDASequencer->breakSuspends = false;
1135 
1136  // XXX this is ugly!
1137  // one thread will have the CollectionMaster and Output defined
1138  // use it to set the node group so that any thread can access
1139  if (CUDASequencer->patchData->ptrCollectionMaster == NULL) {
1140  CollectionMaster *pcm = CkpvAccess(CollectionMaster_instance)->Object();
1141  if (pcm) {
1142  CUDASequencer->patchData->ptrCollectionMaster = pcm;
1143  }
1144  }
1145  if (CUDASequencer->patchData->ptrOutput == NULL) {
1146  Output *pout = Node::Object()->output;
1147  if (pout) {
1148  CUDASequencer->patchData->ptrOutput = pout;
1149  }
1150  }
1151  if (CUDASequencer->patchData->pdb == NULL) {
1152  PDB *pdb = Node::Object()->pdb;
1153  if (pdb) {
1154  CUDASequencer->patchData->pdb = pdb;
1155  }
1156  }
1157  if (CUDASequencer->patchData->imd == NULL) {
1158  IMDOutput *imd = Node::Object()->imd;
1159  if (imd->getIMD()) {
1160  CUDASequencer->patchData->imd = imd;
1161  }
1162  }
1163 
1164  // Register ComputeCUDAMgrs from each PE into a list for later usage
1165  if(isMasterPe){
1166  // Each masterPE registers its own computeCUDAMgr
1167  CUDASequencer->patchData->cudaBondedList[CkMyPe()] = ComputeCUDAMgr::getComputeCUDAMgr()->getComputeBondedCUDA();
1168  CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = ComputeCUDAMgr::getComputeCUDAMgr()->getCudaComputeNonbonded();
1169  }else{
1170  CUDASequencer->patchData->cudaBondedList[CkMyPe()] = NULL;
1171  CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = NULL;
1172  }
1173 
1174  if (isMasterPe) {
1176  if(dofull && deviceCUDA->getIsPmeDevice()){
1177  CudaPmeOneDevice* cudaPme = 0;
1178  cudaPme = cudaMgr->createCudaPmeOneDevice();
1179  }
1180  }
1181 
1183 
1184 /* JM NOTE: This Will Contains the first calls to the integration loop. The order is:
1185  * 1 - Rattle (0,0)
1186  * 2 - runComputeObjects
1187  * 3 - addForceToMomentum(-0.5, tstep)
1188  * 4 - Rattle (-timestep, 0);
1189  * 5 - submitHalfstep();
1190  * 6 - addForceToMomentum(1.0 , tstep)
1191  * 7 - Rattle (tstep, 1)
1192  * 8 - SubmitHalf()
1193  * 9 - addForceToMomentum(-0.5, tstep)
1194  * 10 - submitReductions()
1195  */
1196 
1197  if(scriptTask == SCRIPT_RUN){
1198  updateDeviceData(1, maxForceUsed, doGlobal);
1199 
1200  if(isMasterPe) {
1201  if(patchData->updateCounter.load()>0)
1202  {
1203  CUDASequencer->updateDeviceKernels();
1204  }
1205 
1206  // warm_up1 is basically rattle1_SOA(0,0)
1207  CUDASequencer->startRun1(maxForceUsed, this->patch->lattice);
1208  (this->patch->flags.sequence)++;
1209  if (deviceCUDA->getIsMasterDevice()){
1210  CUDASequencer->patchData->lat = &(this->patch->lattice);
1211  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1212  }
1214  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1215  const bool addCudaGlobalForces =
1216  (cudaGlobalMasterObject != nullptr) ?
1217  cudaGlobalMasterObject->willAddGlobalForces() :
1218  false;
1219  if (addCudaGlobalForces) {
1220  CUDASequencer->allocateGPUSavedForces();
1221  }
1222  }
1223 
1225  if (!simParams->useDeviceMigration) {
1226  wakeULTs(); // Wakes everyone back up for migration
1227  this->patch->positionsReady_SOA(1);
1228  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
1229  CUDASequencer->masterThreadSleeping = true;
1230  CUDASequencer->masterThread = CthSelf();
1231  CthSuspend();
1232  }
1234  updateDeviceData(0, maxForceUsed, doGlobal);
1235  } else {
1236  doMigrationGPU(1, doGlobal, simParams->updateAtomMap);
1237  }
1251  if (isMasterPe) {
1252  CUDASequencer->setRescalePairlistTolerance(step < numberOfSteps);
1253  }
1255  // I've migrated everything. Now run computes
1256  runComputeObjectsCUDA(/*isMigration = */ 1 ,
1257  doGlobal,
1258  /* step < numberofSteps */ 1,
1259  /* step = */ 0,
1260  /* startup = */ 1);
1261 
1262  if(isMasterPe){
1263  CUDASequencer->finish_patch_flags(true);
1265  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1266  const bool addCudaGlobalForces =
1267  (cudaGlobalMasterObject != nullptr) ?
1268  cudaGlobalMasterObject->willAddGlobalForces() :
1269  false;
1270  CUDASequencer->startRun2(timestep,
1271  nbondstep, slowstep, this->patch->lattice.origin(),
1272  doGlobal || addCudaGlobalForces, maxForceUsed);
1273  }
1275  if(isMasterPe){
1276  const bool requestTotalForces = computeGlobal ? computeGlobal->getForceSendActive() : false;
1278  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1279  const bool requestGPUTotalForces =
1280  (cudaGlobalMasterObject != nullptr) ?
1281  cudaGlobalMasterObject->requestedTotalForces() :
1282  false;
1283  CUDASequencer->startRun3(timestep,
1284  nbondstep, slowstep, this->patch->lattice.origin(),
1285  requestTotalForces, doGlobalStaleForces,
1287  requestGPUTotalForces,
1288  maxForceUsed);
1289  }
1290 
1291  // save total force in computeGlobal, forces are copied from device
1292  // to host in startRun3
1293  if (doGlobal) {
1295  // store the total force for compute global clients
1296  int numhp = PatchMap::Object()->numHomePatches();
1298  for(int i = 0; i < numhp; ++i) {
1299  HomePatch *hp = hpList->item(i).patch;
1300  computeGlobal->saveTotalForces(hp);
1301  }
1302  }
1303  }
1304  CUDASequencer->submitReductionValues();
1305  syncColl->waitAndAwaken(); // Allow charm++ reductions to finish before calling require in print step
1306 
1307  // Called everything, now I can go ahead and print the step
1308  // PE 0 needs to handle IO as it owns the controller object
1309  // JM: What happens if PE 0 does not own a GPU here? XXX Check
1310  if(deviceCUDA->getIsMasterDevice()) {
1311  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1312  c_out->resetMovingAverage();
1313  c_out->printStep(step);
1314  }
1316 
1317  // XXX Should we promote velrescaling into Sequencer in order to save
1318  // the velocity rescaling coefficient between script run commands?
1319  double velrescaling = 1;
1320  // --------- Start of the MD loop ------- //
1321  for( ++step; step <= numberOfSteps; ++step ){
1322  const int imdStep = imdFrequency.check(step);
1323  const int isForcesOutputStep = forceDcdFrequency.check(step) + (doIMD ? imdStep : 0);
1324  int dcdSelectionChecks=0;
1325  Molecule *molecule = Node::Object()->molecule;
1326  for(int dcdindex=0; dcdindex<16;++dcdindex)
1327  {
1328  int dcdSelectionFrequency = molecule->dcdSelectionParams[dcdindex].frequency;
1329  if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
1330  dcdSelectionChecks++;
1331  }
1332  const int isCollection = restartFrequency.check(step) +
1333  dcdFrequency.check(step) + velDcdFrequency.check(step) +
1334  imdStep + dcdSelectionChecks;
1335  int isMigration = false;
1336  const int doVelocityRescale = stochRescaleFrequency.check(step);
1337  const int doMCPressure = monteCarloPressureFrequency.check(step);
1338  // XXX doVelRescale should instead set a "doTemperature" flag
1339  doEnergy = energyFrequency.check(step) || doVelocityRescale || doMCPressure;
1340  int langevinPistonStep = langevinPistonFrequency.check(step);
1341 
1342  int reassignVelocityStep = reassignVelocityFrequency.check(step);
1343 
1344  // berendsen pressure control
1345  int berendsenPressureStep = 0;
1350  berendsenPressureStep = 1;
1351  }
1352  }
1353  if(patchData->updateCounter.load()>0)
1354  {
1355  CUDASequencer->updateDeviceKernels();
1356  }
1357 
1358  if(doGlobal)
1359  {
1360  globalMasterStep = globalMasterFrequency.check(step);
1361  doGlobalObjects = globalMasterStep? 1:0;
1363  {
1364  doGlobalObjects=1;
1365  doGlobalStaleForces=1;
1366  }
1368  {
1369  doGlobalStaleForces = simParams->globalMasterStaleForces;
1370  }
1372  {
1373  doGlobalStaleForces=doGlobalObjects;
1374  }
1375  else
1376  {
1377  doGlobalStaleForces=doGlobalObjects;
1378  }
1379  }
1380  else
1381  {
1382  doGlobalStaleForces = 0;
1383  doGlobalObjects = 0;
1384  globalMasterStep = false;
1385  }
1386  // CkPrintf("step %d doGlobal %d doGlobalObjects %d doGlobalStaleForces %d globalMasterStep %d globalMasterFrequency %d\n", step, doGlobal, doGlobalObjects, doGlobalStaleForces, globalMasterStep, simParams->globalMasterFrequency);
1387 
1388 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
1389  //eon = epid && (beginStep < step && step <= endStep);
1390  // int eon = epid && (beginStep < step && step <= endStep);
1391  // if (controlProfiling && step == beginStep) {
1392  // NAMD_PROFILE_START();
1393  // }
1394  //if (controlProfiling && step == endStep) {
1395  // NAMD_PROFILE_STOP();
1396  //}
1397 #endif
1398 
1399  Vector origin = this->patch->lattice.origin();
1400  Tensor factor;
1401  if (deviceCUDA->getIsMasterDevice()) {
1402  if (simParams->langevinPistonOn) {
1403  c_out->piston1(step);
1404  }
1405  // Get the rescale factor for berendsen from controller
1407  c_out->berendsenPressureController(step);
1408  }
1409  }
1410 
1412  syncColl->waitAndAwaken();
1413  if (isMasterPe) cudaCheck(cudaDeviceSynchronize());
1415  }
1416  if (langevinPistonStep || berendsenPressureStep) {
1417  factor = broadcast->positionRescaleFactor.get(step, CkNumPes());
1418 
1419  if (isMasterPe) {
1420  this->patch->lattice.rescale(factor);
1421  CUDASequencer->patchData->lat = &(this->patch->lattice);
1422  CUDASequencer->patchData->factor = &(factor);
1423  }
1424  }
1425 
1427  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1428  int previousMaxForceUsed;
1429  if(isMasterPe){
1430  // need to remember number of buffers for previous force calculation
1431  previousMaxForceUsed = maxForceUsed;
1432  // update local flags
1433  //doNonbonded = !(step%nonbondedFrequency);
1434  // no need to include doMCPressure since it's common factor of nonbondedFrequency
1435  doNonbonded = nonbondedFrequency.check(step);
1436  // no need to include doMCPressure since it's common factor of fullElectFrequency
1437  doFullElectrostatics = (dofull && fullElectFrequency.check(step));
1438  maxForceUsed = Results::normal;
1439  if ( doNonbonded ) maxForceUsed = Results::nbond;
1440  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
1441 
1442  (this->patch->flags.sequence)++;
1443  // JM: Pressures needed for every timestep if the piston is on
1445 
1446  // copy local flags to global
1447  if(deviceCUDA->getIsMasterDevice()) CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1448  }
1449 
1451 
1452  if(isMasterPe){
1453  CUDASequencer->launch_part1(
1454  step,
1455  timestep, nbondstep, slowstep, velrescaling, maxvel2,
1456  *(CUDASequencer->patchData->factor),
1457  origin,
1458  // this->patch->lattice, // need to use the lattice from PE 0 right now
1459  (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1460  reassignVelocityStep,
1461  langevinPistonStep,
1462  berendsenPressureStep,
1463  previousMaxForceUsed, // call with previous maxForceUsed
1464  (const int)(step == simParams->firstTimestep + 1),
1465  this->patch->flags.savePairlists, // XXX how to initialize?
1466  this->patch->flags.usePairlists, // XXX how to initialize?
1467  doEnergy);
1468  // reset velocity rescaling coefficient after applying it
1469  velrescaling = 1;
1470  }
1471  if (reassignVelocityStep)
1472  {
1473  // CkPrintf("dump after launch_part1\n");
1474  // CUDASequencer->printSOAPositionsAndVelocities(2,10);
1475  }
1476  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1477 
1479 
1480  if(isMasterPe){
1481  CUDASequencer->launch_part11(
1482  timestep, nbondstep, slowstep, velrescaling, maxvel2,
1483  *(CUDASequencer->patchData->factor),
1484  origin,
1485  // this->patch->lattice, // need to use the lattice from PE 0 right now
1486  (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1487  langevinPistonStep,
1488  previousMaxForceUsed, // call with previous maxForceUsed
1489  (const int)(step == simParams->firstTimestep + 1),
1490  this->patch->flags.savePairlists, // XXX how to initialize?
1491  this->patch->flags.usePairlists, // XXX how to initialize?
1492  doEnergy);
1493  // reset velocity rescaling coefficient after applying it
1494  velrescaling = 1;
1495  }
1496  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1497 
1499 
1500 
1501  for(int i = 0; i < deviceCUDA->getNumDevice(); i++){
1502  if(CUDASequencer->patchData->migrationFlagPerDevice[i] != 0) {
1503  isMigration = true;
1504  break;
1505  }
1506  }
1507 
1508  if(isMasterPe){
1509  // If this is a Device Migration step we'll do it later
1510  if (!simParams->useDeviceMigration || !isMigration) {
1511  CUDASequencer->launch_set_compute_positions();
1512  }
1513  }
1514 
1515  // isMigration = (CUDASequencer->patchData->migrationFlagPerDevice.end() != t) ? 1:0;
1516 
1517  if(isMasterPe) {
1518  // if(CkMyPe() == 0) CUDASequencer->updatePairlistFlags(isMigration);
1519  CUDASequencer->updatePairlistFlags(isMigration);
1520  if (!simParams->useDeviceMigration) {
1521  CUDASequencer->copyPositionsAndVelocitiesToHost(isMigration, doGlobalObjects);
1522  }
1523  if (simParams->useCudaGlobal && !isMigration) {
1524  // Copy atoms to clients if CudaGlobalMaster is used
1525  // For a migration step, we will do it in runComputeObjectsCUDA
1527  auto cudaGlobal = deviceCUDA->getIsGlobalDevice() ? cudaMgr->getCudaGlobalMaster() : nullptr;
1528  if (cudaGlobal) {
1529  cudaGlobal->setStep(static_cast<int64_t>(patch->flags.step));
1530  cudaGlobal->communicateToClients(&(this->patch->lattice));
1531  }
1532  }
1533  }
1534 
1535 
1536  if(isMigration) {
1537  if (!simParams->useDeviceMigration) {
1539  wakeULTs(); // sets the number of patches
1540  this->patch->positionsReady_SOA(isMigration);
1541  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
1542  CUDASequencer->masterThreadSleeping = true;
1543  CUDASequencer->masterThread = CthSelf();
1544  CthSuspend(); // suspends until everyone else has pinged back. :]
1545  }
1547  updateDeviceData(0, maxForceUsed, doGlobal);
1548  } else {
1549  doMigrationGPU(false, doGlobal, simParams->updateAtomMap);
1551  }
1552  }
1553 
1554  // Calculate force/energy for bond, nonBond, pme.
1555 
1556  this->runComputeObjectsCUDA(isMigration, doGlobalObjects, step<numberOfSteps, step, 0 /* startup */);
1557 
1558  if (isMasterPe) {
1559  // if(CkMyPe() == 0) CUDASequencer->finish_patch_flags(isMigration);
1560  CUDASequencer->finish_patch_flags(isMigration);
1561  CUDASequencer->patchData->migrationFlagPerDevice[deviceCUDA->getDeviceIndex()] = 0; // flags it back to zero
1562  }
1564 
1565  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1566  if(isMasterPe){
1568  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1569  const bool addCudaGlobalForces =
1570  (cudaGlobalMasterObject != nullptr) ?
1571  cudaGlobalMasterObject->willAddGlobalForces() :
1572  false;
1573  CUDASequencer->launch_part2(doMCPressure,
1574  timestep, nbondstep, slowstep,
1575  origin,
1576  step,
1577  maxForceUsed,
1578  langevinPistonStep,
1579  isMigration && (!simParams->useDeviceMigration),
1580  isCollection,
1581  doGlobalStaleForces || addCudaGlobalForces,
1582  doEnergy);
1583  }
1585  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1586 
1587  // Apply MC pressure control
1588  if(doMCPressure){
1589  monteCarloPressureControl(step, isMigration, 1, 1, maxForceUsed, doGlobalStaleForces);
1591  }
1592 
1593  const bool requestTotalForces = (computeGlobal ? computeGlobal->getForceSendActive() : false) && doGlobalObjects;
1594  // continue launch_part2, after cellBasis fluctuation in MC barostat
1595  if(isMasterPe){
1597  const auto CudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1598  const bool requestGPUTotalForces =
1599  (CudaGlobalMasterObject != nullptr) ?
1600  CudaGlobalMasterObject->requestedTotalForces() :
1601  false;
1602  CUDASequencer->launch_part3(doMCPressure,
1603  timestep, nbondstep, slowstep,
1604  origin,
1605  step,
1606  maxForceUsed,
1607  requestTotalForces, // requested Force
1608  doGlobalStaleForces,
1609  requestGPUTotalForces,
1610  isMigration,
1611  isCollection,
1612  doEnergy,
1613  isForcesOutputStep);
1614  }
1616  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1617 
1618  // save total force in computeGlobal, forces are copied from device
1619  // to host in launch_part3
1620  if (requestTotalForces) {
1622  // store the total force for compute global clients
1623  int numhp = PatchMap::Object()->numHomePatches();
1625  for(int i = 0; i < numhp; ++i) {
1626  HomePatch *hp = hpList->item(i).patch;
1627  computeGlobal->saveTotalForces(hp);
1628  }
1629  }
1630 
1631  CUDASequencer->submitReductionValues();
1632 
1633  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_PRTSTEP);
1634  syncColl->waitAndAwaken(); // Allow charm++ reductions to finish before calling require in print step
1635 
1636  if (deviceCUDA->getIsMasterDevice()) {
1637  // even though you're not on a printstep, calling this still takes 15us approx!!!
1638  c_out->printStep(step);
1639  // stochastic velocity rescaling
1640  // get coefficient from current temperature
1641  // to be applied on NEXT loop iteration
1642  if (doVelocityRescale) {
1643  // calculate coefficient based on current temperature
1644  velrescaling = c_out->stochRescaleCoefficient();
1645  broadcast->stochRescaleCoefficient.publish(step, velrescaling);
1646  }
1647  }
1648  // Non-master PEs should get the rescale factor here.
1649  if (doVelocityRescale) {
1650  syncColl->waitAndAwaken(); // Allow charm++ broadcast to happen and sync
1651  velrescaling = broadcast->stochRescaleCoefficient.get(step, CkNumPes());
1652  }
1653  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_PRTSTEP);
1654 
1655  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_SUBCOL);
1656  if (isCollection) {
1659  if (isMasterPe) {
1660  CUDASequencer->copyAoSDataToHost();
1661  }
1662  // Make sure the data has been copied to all home patches. All PEs
1663  // participate in outputting
1665  }
1666  HomePatchList *hplist = patchMap->homePatchList();
1667  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1668  HomePatch *hp = i->patch;
1669  hp->sequencer->submitCollections_SOA(step);
1670  }
1671 
1672  syncColl->waitAndAwaken(); // Allow for collections to finish
1673  }
1674  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_SUBCOL);
1675  }
1676 
1679  if (isMasterPe) {
1680  CUDASequencer->copyAoSDataToHost();
1681  CUDASequencer->updateHostPatchDataSOA();
1682  CUDASequencer->saveForceCUDASOA_direct(false, true, maxForceUsed);
1683  }
1685  // Ensure that the SoA data is also fresh to avoid unforeseen issues. sort_solvent_atoms should not actually
1686  // order atoms, but ensure that the solute/solvent counts are accurate
1687  HomePatchList *hplist = patchMap->homePatchList();
1688  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1689  HomePatch *hp = i->patch;
1690  hp->sort_solvent_atoms();
1691  hp->copy_atoms_to_SOA();
1692  hp->copy_forces_to_AOS(); // to support "output withforces"
1693  }
1694  } else {
1695  if(isMasterPe) {
1696  CUDASequencer->updateHostPatchDataSOA();
1697  CUDASequencer->saveForceCUDASOA_direct(false, true, maxForceUsed);
1698  }
1699  if(isMasterPe) CUDASequencer->copyPositionsAndVelocitiesToHost(true,doGlobal);
1701  HomePatchList *hplist = patchMap->homePatchList();
1702  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1703  HomePatch *hp = i->patch;
1704  hp->copy_updates_to_AOS();
1705  hp->copy_forces_to_AOS(); // to support "output withforces"
1706  }
1707  }
1708  syncColl->barrier(SynchronousCollectiveScope::all); // Make sure the data has been copied to all home patches
1709 
1710  //syncColl->barrier(SynchronousCollectiveScope::all);
1711  CUDASequencer->breakSuspends = true;
1712  wakeULTs();
1713  if(deviceCUDA->getIsMasterDevice()) c_out->awaken();
1714 }
1715 
1716 
1717 /*
1718  * Updates device data after a migration
1719  *
1720  */
1721 void Sequencer::updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal) {
1722  bool isMaster = deviceCUDA->getMasterPe() == CkMyPe();
1724  if (isMaster) {
1725  CUDASequencer->copyPatchData(true, startup);
1727  CUDASequencer->reallocateMigrationDestination();
1728  CUDASequencer->copyAtomDataToDeviceAoS();
1729  } else {
1730  CUDASequencer->copyAtomDataToDevice(startup, maxForceUsed);
1731  }
1732  CUDASequencer->migrationLocalPost(startup);
1733  CUDASequencer->migrationUpdateAdvancedFeatures(startup);
1734  // XXX This is only necessary if reallocation happens
1735  CUDASequencer->registerSOAPointersToHost();
1736  }
1738  if (isMaster) {
1739  CUDASequencer->copySOAHostRegisterToDevice();
1741  CUDASequencer->patchData->atomReallocationFlagPerDevice[deviceCUDA->getDeviceIndex()] = 0;
1742  }
1743 
1744  if (doGlobal || simParams->forceDcdFrequency > 0) {
1745  CUDASequencer->updateHostPatchDataSOA(); // Needs to be called after HomePatch::domigration
1746  }
1747  }
1749 }
1750 
1751 /*
1752  * Constructs the meta data structures storing the patch data for GPU resident code path
1753  *
1754  * This is called once during startup
1755  *
1756  */
1759  ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
1760  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
1761 
1762  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1763  patchData = cpdata.ckLocalBranch();
1764 
1765  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1766 
1767  // constructDevicePatchMap should only be called once per PE
1768  if (patchData->devicePatchMapFlag[CkMyPe()]) return;
1769  patchData->devicePatchMapFlag[CkMyPe()] = 1;
1770 
1771  // One thread per GPU will execute this block
1772  if (isMasterPe) {
1773  const int deviceIndex = deviceCUDA->getDeviceIndex();
1774 
1775  // Nonbonded patches are computed by CudaComputeNonbonded and contain all the patches and proxy
1776  // patches on this device. HomePatches is computed by SequencerCUDA and only contains the
1777  // home patches. localPatches will be generated by this function
1778  using NBPatchRecord = CudaComputeNonbonded::PatchRecord;
1780  std::vector<NBPatchRecord>& nonbondedPatches = cudaNbond->getPatches();
1781  std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1782  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1783 
1784  // The home patches are not necessarily ordered by their patchID. This can happen if there
1785  // are multiple PEs assigned to the same GPU. Sorting the home patches by their patch ID
1786  // makes it easy to have a consistent ordering
1787  std::stable_sort(
1788  homePatches.begin(),
1789  homePatches.end(),
1790  [](HomePatch* a, HomePatch* b) {
1791  return (a->getPatchID() < b->getPatchID());
1792  });
1793 
1794  // Iterates over all the patches on this device and adds them to h_localPatches
1795  // and determine if they are a home or proxy patch
1796  for (int i = 0; i < nonbondedPatches.size(); i++) {
1797  CudaLocalRecord record;
1798  record.patchID = nonbondedPatches[i].patchID;
1799 
1800  // TODO DMC the patchmap should be able to do this
1801  const int targetPatchID = record.patchID;
1802  auto result = std::find_if(
1803  homePatches.begin(),
1804  homePatches.end(),
1805  [targetPatchID](HomePatch* p) {
1806  return (p->getPatchID() == targetPatchID);
1807  });
1808 
1809  record.isProxy = (result == homePatches.end());
1810  localPatches.push_back(record);
1811  }
1812 
1813  // The home patches should be at the begining of the patch list
1814  // This makes integration easier since we can ignore the patches and operate on a
1815  // contiguous chunk of home atoms
1816  std::stable_sort(
1817  localPatches.begin(),
1818  localPatches.end(),
1819  [](CudaLocalRecord a, CudaLocalRecord b) {
1820  return (a.isProxy < b.isProxy);
1821  });
1822 
1823  // Now the ordering is fixed we can update the bonded and nonbonded orders. Since we have
1824  // moved the home patches to the begining the ordering has changed
1825  cudaBond->updatePatchOrder(localPatches);
1826  cudaNbond->updatePatchOrder(localPatches);
1827  patchData->devData[deviceIndex].numPatchesHome = homePatches.size();
1828  patchData->devData[deviceIndex].numPatchesHomeAndProxy = localPatches.size();
1829  }
1831 
1832  // Iterates over all patches again, and generates the mapping between GPUs. For each patch,
1833  // it checks the other devices to see if the patch is on that device.
1834  // - For HomePatches, there will be a peer record for all of its proxies
1835  // - For ProxyPatches, there will only be a peer record for its home patch
1836  // There is a single array of peer records per device. Each patch stores an offset into this
1837  // array as well as its number of peer records
1838  if (isMasterPe) {
1839  const int deviceIndex = deviceCUDA->getDeviceIndex();
1840  std::vector<CudaPeerRecord>& myPeerPatches = patchData->devData[deviceIndex].h_peerPatches;
1841  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1842 
1843  for (int i = 0; i < localPatches.size(); i++) {
1844  std::vector<CudaPeerRecord> tempPeers;
1845  const int targetPatchID = localPatches[i].patchID;
1846  const int targetIsProxy = localPatches[i].isProxy;
1847 
1848  for (int devIdx = 0; devIdx < deviceCUDA->getNumDevice(); devIdx++) {
1849  if (devIdx == deviceIndex) continue;
1850  std::vector<CudaLocalRecord>& peerPatches = patchData->devData[devIdx].h_localPatches;
1851 
1852  // Searches peerPatches for patchID. If it is not being integrated on this device
1853  // then ignore other non-integration patches
1854  for (int j = 0; j < patchData->devData[devIdx].numPatchesHomeAndProxy; j++) {
1855  const CudaLocalRecord peer = peerPatches[j];
1856  if (peer.patchID == targetPatchID && peer.isProxy != targetIsProxy) {
1857  CudaPeerRecord peerRecord;
1858  peerRecord.deviceIndex = devIdx;
1859  peerRecord.patchIndex = j;
1860  tempPeers.push_back(peerRecord);
1861  break;
1862  }
1863  }
1864  }
1865 
1866  // Once we have the list of peer records, add them to the single device-width vector
1867  // and record the offset and count
1868  localPatches[i].numPeerRecord = tempPeers.size();
1869  if (!tempPeers.empty()) {
1870  localPatches[i].peerRecordStartIndex = myPeerPatches.size();
1871  myPeerPatches.insert(myPeerPatches.end(), tempPeers.begin(), tempPeers.end());
1872  }
1873  }
1874  }
1876 }
1877 
1879  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1880  patchData = cpdata.ckLocalBranch();
1881 
1882  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1883 
1884  if (isMasterPe) {
1885  const int deviceIndex = deviceCUDA->getDeviceIndex();
1886  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1887  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1888 
1889  CmiLock(patchData->printlock);
1890  CkPrintf("PE: %d\n", CkMyPe());
1891 
1892  CkPrintf("[%d] Home patches %d Local patches %d\n", CkMyPe(), numPatchesHome, localPatches.size());
1893 
1894  CkPrintf("Home Patches: ");
1895  for (int i = 0; i < numPatchesHome; i++) {
1896  CkPrintf("%d ", localPatches[i].patchID);
1897  }
1898  CkPrintf("\n");
1899 
1900  CkPrintf("Proxy Patches: ");
1901  for (int i = numPatchesHome; i < localPatches.size(); i++) {
1902  CkPrintf("%d ", localPatches[i].patchID);
1903  }
1904  CkPrintf("\n");
1905 
1906  CmiUnlock(patchData->printlock);
1907  }
1909 }
1910 
1912  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1913  patchData = cpdata.ckLocalBranch();
1914 
1915  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1916 
1917  // clearDevicePatchMap should only be called once per PE
1918  if (!patchData->devicePatchMapFlag[CkMyPe()]) return;
1919  patchData->devicePatchMapFlag[CkMyPe()] = 0;
1920 
1921  // One thread per GPU will execute this block
1922  if (isMasterPe) {
1923  const int deviceIndex = deviceCUDA->getDeviceIndex();
1924 
1925  using NBPatchRecord = CudaComputeNonbonded::PatchRecord;
1926  std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1927  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1928  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1929 
1930  homePatches.clear();
1931  localPatches.clear();
1932  peerPatches.clear();
1934  }
1935 }
1936 
1937 /*
1938  * Updates the meta data structures storing the patch data for GPU resident code path
1939  *
1940  * This is called every migration step. The actual mapping stays the same,
1941  * but the atom counts per patch change
1942  *
1943  */
1944 void Sequencer::updateDevicePatchMap(int startup) {
1945  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1946  patchData = cpdata.ckLocalBranch();
1947 
1948  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1949 
1950  if (isMasterPe) {
1951  const int deviceIndex = deviceCUDA->getDeviceIndex();
1952  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1953  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1956  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
1957 
1958  int max_atom_count = 0;
1959  int total_atom_count = 0;
1960 
1961  // Update the atom count of home patches
1962  for (int i = 0; i < numPatchesHome; i++) {
1963  Patch* patch = NULL;
1964  for(int j = 0; j < deviceCUDA->getNumPesSharingDevice(); j++){
1966  patch = pm->patch(localPatches[i].patchID);
1967  if (patch != NULL) break;
1968  }
1969  if (patch == NULL) NAMD_die("Sequencer: Failed to find patch in updateDevicePatchMap");
1970 
1971  localPatches[i].numAtoms = patch->getNumAtoms();
1972  localPatches[i].numAtomsNBPad = CudaComputeNonbondedKernel::computeAtomPad(localPatches[i].numAtoms);
1973 
1974  if (localPatches[i].numAtoms > max_atom_count) max_atom_count = localPatches[i].numAtoms;
1975  total_atom_count += localPatches[i].numAtoms;
1976  }
1977  }
1979 
1980  // Update the proxy patches next, using the home patch atom counts of other devices
1981  if (isMasterPe) {
1982  const int deviceIndex = deviceCUDA->getDeviceIndex();
1983  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1984  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
1985  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1986  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1987 
1988  for (int i = numPatchesHome; i < numPatchesHomeAndProxy; i++) {
1989  const int index = localPatches[i].peerRecordStartIndex;
1990  const int devIdx = peerPatches[index].deviceIndex;
1991  const int peerIdx = peerPatches[index].patchIndex;
1992  const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
1993 
1994  localPatches[i].numAtoms = peer.numAtoms;
1995  localPatches[i].numAtomsNBPad = peer.numAtomsNBPad;
1996  }
1997  }
1999 
2000  // Computes the offset for each patch using the atom counts
2001  if (isMasterPe) {
2002  const int deviceIndex = deviceCUDA->getDeviceIndex();
2003  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2004  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2005 
2006  int runningOffset = 0;
2007  int runningOffsetNBPad = 0;
2008  // TODO Change to a C++ prefix sum
2009  for (int i = 0; i < numPatchesHomeAndProxy; i++) {
2010  localPatches[i].bufferOffset = runningOffset;
2011  localPatches[i].bufferOffsetNBPad = runningOffsetNBPad;
2012  runningOffset += localPatches[i].numAtoms;
2013  runningOffsetNBPad += localPatches[i].numAtomsNBPad;
2014  }
2015  }
2017 
2018  // Update the peer records using the local record data
2019  if (isMasterPe) {
2020  const int deviceIndex = deviceCUDA->getDeviceIndex();
2021  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2022  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2023  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2024 
2025 
2026  for (int i = 0; i < peerPatches.size(); i++) {
2027  const int devIdx = peerPatches[i].deviceIndex;
2028  const int peerIdx = peerPatches[i].patchIndex;
2029  const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2030 
2031  peerPatches[i].bufferOffset = peer.bufferOffset;
2032  peerPatches[i].bufferOffsetNBPad = peer.bufferOffsetNBPad;
2033  }
2034 
2035  // Update inline copy of peer data
2036  for (int i = 0; i < numPatchesHomeAndProxy; i++) {
2037  const int numPeerRecord = localPatches[i].numPeerRecord;
2038  const int peerOffset = localPatches[i].peerRecordStartIndex;
2039 
2040  for (int j = 0; j < std::min(numPeerRecord, CudaLocalRecord::num_inline_peer); j++) {
2041  localPatches[i].inline_peers[j] = peerPatches[peerOffset+j];
2042  }
2043  }
2044  }
2046 }
2047 
2048 #endif
2049 
2050 
2051 void Sequencer::integrate_SOA(int scriptTask) {
2052  //
2053  // Below when accessing the array buffers for position, velocity, force,
2054  // note that we don't want to set up pointers directly to the buffers
2055  // because the allocations might get resized after atom migration.
2056  //
2057 
2058 #ifdef TIMER_COLLECTION
2059  TimerSet& t = patch->timerSet;
2060 #endif
2061  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
2062  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
2063  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
2064  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
2065  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
2066  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
2067  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
2068  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
2069  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
2070  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
2071 
2072  // Keep track of the step number.
2073  int &step = patch->flags.step;
2074  step = simParams->firstTimestep;
2075 
2076  // For multiple time stepping, which force boxes are used?
2077  int &maxForceUsed = patch->flags.maxForceUsed;
2078  int &maxForceMerged = patch->flags.maxForceMerged;
2079  maxForceUsed = Results::normal;
2080  maxForceMerged = Results::normal;
2081 
2082  // Keep track of total steps and steps per cycle.
2083  const int numberOfSteps = simParams->N;
2084  //const int stepsPerCycle = simParams->stepsPerCycle;
2085  CheckStep stepsPerCycle;
2086  stepsPerCycle.init(step, simParams->stepsPerCycle);
2087  // The fundamental time step, get the scaling right for velocity units.
2088  const BigReal timestep = simParams->dt * RECIP_TIMEFACTOR;
2089 
2090  //const int nonbondedFrequency = simParams->nonbondedFrequency;
2091  //slowFreq = nonbondedFrequency;
2092  CheckStep nonbondedFrequency;
2094  // The step size for short-range nonbonded forces.
2095  const BigReal nbondstep = timestep * simParams->nonbondedFrequency;
2096  int &doNonbonded = patch->flags.doNonbonded;
2097  //doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
2098  doNonbonded = (step >= numberOfSteps) ||
2099  nonbondedFrequency.init(step, simParams->nonbondedFrequency);
2100  //if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
2101  if ( nonbondedFrequency.period == 1 ) maxForceMerged = Results::nbond;
2102  if ( doNonbonded ) maxForceUsed = Results::nbond;
2103 
2104  // Do we do full electrostatics?
2105  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
2106  //const int fullElectFrequency = simParams->fullElectFrequency;
2107  //if ( dofull ) slowFreq = fullElectFrequency;
2108  CheckStep fullElectFrequency;
2109  if ( dofull ) slowFreq = simParams->fullElectFrequency;
2110  // The step size for long-range electrostatics.
2111  const BigReal slowstep = timestep * simParams->fullElectFrequency;
2112  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
2113  //doFullElectrostatics = (dofull &&
2114  // ((step >= numberOfSteps) || !(step%fullElectFrequency)));
2115  doFullElectrostatics = (dofull &&
2116  ((step >= numberOfSteps) ||
2117  fullElectFrequency.init(step, simParams->fullElectFrequency)));
2118  //if ( dofull && fullElectFrequency == 1 ) maxForceMerged = Results::slow;
2119  if ( dofull && fullElectFrequency.period == 1 ) maxForceMerged = Results::slow;
2120  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
2121 
2122  // Bother to calculate energies?
2123  int &doEnergy = patch->flags.doEnergy;
2124  //int energyFrequency = simParams->outputEnergies;
2125  CheckStep energyFrequency;
2126  int newComputeEnergies = simParams->computeEnergies;
2127  if(simParams->alchOn) newComputeEnergies = NAMD_gcd(newComputeEnergies, simParams->alchOutFreq);
2128  doEnergy = energyFrequency.init(step, newComputeEnergies);
2129 
2130  // Do we need to return forces to TCL script or Colvar module?
2131  int doTcl = simParams->tclForcesOn;
2132  int doColvars = simParams->colvarsOn;
2133  int doGlobal = doTcl || doColvars;
2135  int &doVirial = patch->flags.doVirial;
2136  doVirial = 1;
2137 
2138  // The following flags have to be explicitly disabled in Patch object.
2139  patch->flags.doMolly = 0;
2140  patch->flags.doLoweAndersen = 0;
2141  patch->flags.doGBIS = 0;
2142  patch->flags.doLCPO = 0;
2143 
2144  // Square of maximum velocity for simulation safety check
2145  const BigReal maxvel2 =
2146  (simParams->cutoff * simParams->cutoff) / (timestep * timestep);
2147 
2148  // check for Langevin piston
2149  // set period beyond numberOfSteps to disable
2150  CheckStep langevinPistonFrequency;
2151  langevinPistonFrequency.init(step,
2152  (simParams->langevinPistonOn ? slowFreq : numberOfSteps+1 ),
2153  (simParams->langevinPistonOn ? -1-slowFreq/2 : 0) /* = delta */);
2154 
2155  // check for output
2156  // set period beyond numberOfSteps to disable
2157  CheckStep restartFrequency;
2158  restartFrequency.init(step, (simParams->restartFrequency ?
2159  simParams->restartFrequency : numberOfSteps+1) );
2160  CheckStep dcdFrequency;
2161  dcdFrequency.init(step, (simParams->dcdFrequency ?
2162  simParams->dcdFrequency : numberOfSteps+1) );
2163  CheckStep velDcdFrequency;
2164  velDcdFrequency.init(step, (simParams->velDcdFrequency ?
2165  simParams->velDcdFrequency : numberOfSteps+1) );
2166  CheckStep forceDcdFrequency;
2167  forceDcdFrequency.init(step, (simParams->forceDcdFrequency ?
2168  simParams->forceDcdFrequency : numberOfSteps+1) );
2169  CheckStep imdFrequency;
2170  imdFrequency.init(step, (simParams->IMDfreq ?
2171  simParams->IMDfreq : numberOfSteps+1) );
2172 
2173  if ( scriptTask == SCRIPT_RUN ) {
2174  // enforce rigid bond constraints on initial positions
2175  TIMER_START(t, RATTLE1);
2176  rattle1_SOA(0., 0);
2177  TIMER_STOP(t, RATTLE1);
2178 
2179  // must migrate here!
2180  int natoms = patch->patchDataSOA.numAtoms;
2181  runComputeObjects_SOA(1, step<numberOfSteps, step);
2182  // kick -0.5
2183  TIMER_START(t, KICK);
2184  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2185 #ifndef SOA_SIMPLIFY_PARAMS
2186  patch->patchDataSOA.recipMass,
2187  patch->patchDataSOA.f_normal_x,
2188  patch->patchDataSOA.f_normal_y,
2189  patch->patchDataSOA.f_normal_z,
2190  patch->patchDataSOA.f_nbond_x,
2191  patch->patchDataSOA.f_nbond_y,
2192  patch->patchDataSOA.f_nbond_z,
2193  patch->patchDataSOA.f_slow_x,
2194  patch->patchDataSOA.f_slow_y,
2195  patch->patchDataSOA.f_slow_z,
2196  patch->patchDataSOA.vel_x,
2197  patch->patchDataSOA.vel_y,
2198  patch->patchDataSOA.vel_z,
2199  patch->patchDataSOA.numAtoms,
2200 #endif
2201  maxForceUsed
2202  );
2203  TIMER_STOP(t, KICK);
2204 
2205  TIMER_START(t, RATTLE1);
2206  rattle1_SOA(-timestep, 0);
2207  TIMER_STOP(t, RATTLE1);
2208 
2209  TIMER_START(t, SUBMITHALF);
2211 #ifndef SOA_SIMPLIFY_PARAMS
2212  patch->patchDataSOA.hydrogenGroupSize,
2213  patch->patchDataSOA.mass,
2214  patch->patchDataSOA.vel_x,
2215  patch->patchDataSOA.vel_y,
2216  patch->patchDataSOA.vel_z,
2217  patch->patchDataSOA.numAtoms
2218 #endif
2219  );
2220  TIMER_STOP(t, SUBMITHALF);
2221 
2222  // kick 1.0
2223  TIMER_START(t, KICK);
2224  addForceToMomentum_SOA(1.0, timestep, nbondstep, slowstep,
2225 #ifndef SOA_SIMPLIFY_PARAMS
2226  patch->patchDataSOA.recipMass,
2227  patch->patchDataSOA.f_normal_x,
2228  patch->patchDataSOA.f_normal_y,
2229  patch->patchDataSOA.f_normal_z,
2230  patch->patchDataSOA.f_nbond_x,
2231  patch->patchDataSOA.f_nbond_y,
2232  patch->patchDataSOA.f_nbond_z,
2233  patch->patchDataSOA.f_slow_x,
2234  patch->patchDataSOA.f_slow_y,
2235  patch->patchDataSOA.f_slow_z,
2236  patch->patchDataSOA.vel_x,
2237  patch->patchDataSOA.vel_y,
2238  patch->patchDataSOA.vel_z,
2239  patch->patchDataSOA.numAtoms,
2240 #endif
2241  maxForceUsed
2242  );
2243  TIMER_STOP(t, KICK);
2244 
2245  TIMER_START(t, RATTLE1);
2246  rattle1_SOA(timestep, 1);
2247  TIMER_STOP(t, RATTLE1);
2248 
2249  // save total force in computeGlobal
2250  if (doGlobal) {
2251  computeGlobal->saveTotalForces(patch);
2252  }
2253 
2254  TIMER_START(t, SUBMITHALF);
2256 #ifndef SOA_SIMPLIFY_PARAMS
2257  patch->patchDataSOA.hydrogenGroupSize,
2258  patch->patchDataSOA.mass,
2259  patch->patchDataSOA.vel_x,
2260  patch->patchDataSOA.vel_y,
2261  patch->patchDataSOA.vel_z,
2262  patch->patchDataSOA.numAtoms
2263 #endif
2264  );
2265  TIMER_STOP(t, SUBMITHALF);
2266 
2267  // kick -0.5
2268  TIMER_START(t, KICK);
2269  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2270 #ifndef SOA_SIMPLIFY_PARAMS
2271  patch->patchDataSOA.recipMass,
2272  patch->patchDataSOA.f_normal_x,
2273  patch->patchDataSOA.f_normal_y,
2274  patch->patchDataSOA.f_normal_z,
2275  patch->patchDataSOA.f_nbond_x,
2276  patch->patchDataSOA.f_nbond_y,
2277  patch->patchDataSOA.f_nbond_z,
2278  patch->patchDataSOA.f_slow_x,
2279  patch->patchDataSOA.f_slow_y,
2280  patch->patchDataSOA.f_slow_z,
2281  patch->patchDataSOA.vel_x,
2282  patch->patchDataSOA.vel_y,
2283  patch->patchDataSOA.vel_z,
2284  patch->patchDataSOA.numAtoms,
2285 #endif
2286  maxForceUsed
2287  );
2288  TIMER_STOP(t, KICK);
2289 
2290  TIMER_START(t, SUBMITFULL);
2292 #ifndef SOA_SIMPLIFY_PARAMS
2293  patch->patchDataSOA.hydrogenGroupSize,
2294  patch->patchDataSOA.mass,
2295  patch->patchDataSOA.pos_x,
2296  patch->patchDataSOA.pos_y,
2297  patch->patchDataSOA.pos_z,
2298  patch->patchDataSOA.vel_x,
2299  patch->patchDataSOA.vel_y,
2300  patch->patchDataSOA.vel_z,
2301  patch->patchDataSOA.f_normal_x,
2302  patch->patchDataSOA.f_normal_y,
2303  patch->patchDataSOA.f_normal_z,
2304  patch->patchDataSOA.f_nbond_x,
2305  patch->patchDataSOA.f_nbond_y,
2306  patch->patchDataSOA.f_nbond_z,
2307  patch->patchDataSOA.f_slow_x,
2308  patch->patchDataSOA.f_slow_y,
2309  patch->patchDataSOA.f_slow_z,
2310  patch->patchDataSOA.numAtoms
2311 #endif
2312  );
2313  TIMER_STOP(t, SUBMITFULL);
2314 
2315  rebalanceLoad(step);
2316  } // scriptTask == SCRIPT_RUN
2317 
2318 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
2319  int& eon = patch->flags.event_on;
2320  int epid = (simParams->beginEventPatchID <= patch->getPatchID()
2321  && patch->getPatchID() <= simParams->endEventPatchID);
2322  int beginStep = simParams->beginEventStep;
2323  int endStep = simParams->endEventStep;
2324  bool controlProfiling = patch->getPatchID() == 0;
2325 #endif
2326 
2327  for ( ++step; step <= numberOfSteps; ++step ) {
2328  int dcdSelectionChecks=0;
2329  Molecule *molecule = Node::Object()->molecule;
2330  for(int dcdindex=0; dcdindex<16;++dcdindex)
2331  {
2332  int dcdSelectionFrequency = molecule->dcdSelectionParams[dcdindex].frequency;
2333  if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
2334  dcdSelectionChecks++;
2335  }
2336  const int isCollection = restartFrequency.check(step) +
2337  dcdFrequency.check(step) + velDcdFrequency.check(step) +
2338  forceDcdFrequency.check(step) + imdFrequency.check(step) +
2339  dcdSelectionChecks;
2340  const int isMigration = stepsPerCycle.check(step);
2341  doEnergy = energyFrequency.check(step);
2342  DebugM(3,"doGlobal now "<< doGlobal<<"\n"<<endi);
2343 
2344 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
2345  eon = epid && (beginStep < step && step <= endStep);
2346 
2347  if (controlProfiling && step == beginStep) {
2349  }
2350  if (controlProfiling && step == endStep) {
2352  }
2353 // NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_1);
2354  char buf[32];
2355  sprintf(buf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::INTEGRATE_SOA_1], patch->getPatchID());
2356  NAMD_EVENT_START_EX(eon, NamdProfileEvent::INTEGRATE_SOA_1, buf);
2357 #endif
2358 
2359  if ( simParams->stochRescaleOn ) {
2361  }
2362 
2363  if ( simParams->berendsenPressureOn ) {
2365 #ifndef SOA_SIMPLIFY_PARAMS
2366  patch->patchDataSOA.hydrogenGroupSize,
2367  patch->patchDataSOA.mass,
2368  patch->patchDataSOA.pos_x,
2369  patch->patchDataSOA.pos_y,
2370  patch->patchDataSOA.pos_z,
2371  patch->patchDataSOA.numAtoms,
2372 #endif
2373  step);
2374  }
2375 
2376  // kick 0.5
2377  TIMER_START(t, KICK);
2378  addForceToMomentum_SOA(0.5, timestep, nbondstep, slowstep,
2379 #ifndef SOA_SIMPLIFY_PARAMS
2380  patch->patchDataSOA.recipMass,
2381  patch->patchDataSOA.f_normal_x,
2382  patch->patchDataSOA.f_normal_y,
2383  patch->patchDataSOA.f_normal_z,
2384  patch->patchDataSOA.f_nbond_x,
2385  patch->patchDataSOA.f_nbond_y,
2386  patch->patchDataSOA.f_nbond_z,
2387  patch->patchDataSOA.f_slow_x,
2388  patch->patchDataSOA.f_slow_y,
2389  patch->patchDataSOA.f_slow_z,
2390  patch->patchDataSOA.vel_x,
2391  patch->patchDataSOA.vel_y,
2392  patch->patchDataSOA.vel_z,
2393  patch->patchDataSOA.numAtoms,
2394 #endif
2395  maxForceUsed
2396  );
2397  TIMER_STOP(t, KICK);
2398 
2399  // maximumMove checks velocity bound on atoms
2400  TIMER_START(t, MAXMOVE);
2401  maximumMove_SOA(timestep, maxvel2
2402 #ifndef SOA_SIMPLIFY_PARAMS
2403  ,
2404  patch->patchDataSOA.vel_x,
2405  patch->patchDataSOA.vel_y,
2406  patch->patchDataSOA.vel_z,
2407  patch->patchDataSOA.numAtoms
2408 #endif
2409  );
2410  TIMER_STOP(t, MAXMOVE);
2411 
2412 
2413  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_1);
2414 
2415  // Check to see if Langevin piston is enabled this step:
2416  // ! ((step-1-slowFreq/2) % slowFreq)
2417  if ( langevinPistonFrequency.check(step) ) {
2418  // if (langevinPistonStep) {
2419  // drift 0.5
2420  TIMER_START(t, DRIFT);
2421  addVelocityToPosition_SOA(0.5*timestep
2422 #ifndef SOA_SIMPLIFY_PARAMS
2423  ,
2424  patch->patchDataSOA.vel_x,
2425  patch->patchDataSOA.vel_y,
2426  patch->patchDataSOA.vel_z,
2427  patch->patchDataSOA.pos_x,
2428  patch->patchDataSOA.pos_y,
2429  patch->patchDataSOA.pos_z,
2430  patch->patchDataSOA.numAtoms
2431 #endif
2432  );
2433  TIMER_STOP(t, DRIFT);
2434  // There is a blocking receive inside of langevinPiston()
2435  // that might suspend the current thread of execution,
2436  // so split profiling around this conditional block.
2438 #ifndef SOA_SIMPLIFY_PARAMS
2439  patch->patchDataSOA.hydrogenGroupSize,
2440  patch->patchDataSOA.mass,
2441  patch->patchDataSOA.pos_x,
2442  patch->patchDataSOA.pos_y,
2443  patch->patchDataSOA.pos_z,
2444  patch->patchDataSOA.vel_x,
2445  patch->patchDataSOA.vel_y,
2446  patch->patchDataSOA.vel_z,
2447  patch->patchDataSOA.numAtoms,
2448 #endif
2449  step
2450  );
2451 
2452  // drift 0.5
2453  TIMER_START(t, DRIFT);
2454  addVelocityToPosition_SOA(0.5*timestep
2455 #ifndef SOA_SIMPLIFY_PARAMS
2456  ,
2457  patch->patchDataSOA.vel_x,
2458  patch->patchDataSOA.vel_y,
2459  patch->patchDataSOA.vel_z,
2460  patch->patchDataSOA.pos_x,
2461  patch->patchDataSOA.pos_y,
2462  patch->patchDataSOA.pos_z,
2463  patch->patchDataSOA.numAtoms
2464 #endif
2465  );
2466  TIMER_STOP(t, DRIFT);
2467  }
2468  else {
2469  // drift 1.0
2470  TIMER_START(t, DRIFT);
2471  addVelocityToPosition_SOA(timestep
2472 #ifndef SOA_SIMPLIFY_PARAMS
2473  ,
2474  patch->patchDataSOA.vel_x,
2475  patch->patchDataSOA.vel_y,
2476  patch->patchDataSOA.vel_z,
2477  patch->patchDataSOA.pos_x,
2478  patch->patchDataSOA.pos_y,
2479  patch->patchDataSOA.pos_z,
2480  patch->patchDataSOA.numAtoms
2481 #endif
2482  );
2483  TIMER_STOP(t, DRIFT);
2484  }
2485 
2486  //NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_2);
2487 
2488  // There are NO sends in submitHalfstep() just local summation
2489  // into the Reduction struct.
2490  TIMER_START(t, SUBMITHALF);
2492 #ifndef SOA_SIMPLIFY_PARAMS
2493  patch->patchDataSOA.hydrogenGroupSize,
2494  patch->patchDataSOA.mass,
2495  patch->patchDataSOA.vel_x,
2496  patch->patchDataSOA.vel_y,
2497  patch->patchDataSOA.vel_z,
2498  patch->patchDataSOA.numAtoms
2499 #endif
2500  );
2501  TIMER_STOP(t, SUBMITHALF);
2502 
2503  //doNonbonded = !(step%nonbondedFrequency);
2504  doNonbonded = nonbondedFrequency.check(step);
2505  //doFullElectrostatics = (dofull && !(step%fullElectFrequency));
2506  doFullElectrostatics = (dofull && fullElectFrequency.check(step));
2507 
2508  maxForceUsed = Results::normal;
2509  if ( doNonbonded ) maxForceUsed = Results::nbond;
2510  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
2511 
2512  // Migrate Atoms on stepsPerCycle
2513  // Check to see if this is energy evaluation step:
2514  // doEnergy = ! ( step % energyFrequency );
2515  doVirial = 1;
2516  doKineticEnergy = 1;
2517  doMomenta = 1;
2518 
2519  //NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_2); // integrate_SOA 2
2520 
2521  // The current thread of execution will suspend in runComputeObjects().
2522  // Check to see if we are at a migration step:
2523  // runComputeObjects_SOA(!(step%stepsPerCycle), step<numberOfSteps);
2524  runComputeObjects_SOA(isMigration, step<numberOfSteps, step);
2525 
2526  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_3);
2527 
2528  TIMER_START(t, VELBBK1);
2530  timestep
2531 #ifndef SOA_SIMPLIFY_PARAMS
2532  ,
2533  patch->patchDataSOA.langevinParam,
2534  patch->patchDataSOA.vel_x,
2535  patch->patchDataSOA.vel_y,
2536  patch->patchDataSOA.vel_z,
2537  patch->patchDataSOA.numAtoms
2538 #endif
2539  );
2540  TIMER_STOP(t, VELBBK1);
2541 
2542  // kick 1.0
2543  TIMER_START(t, KICK);
2544  addForceToMomentum_SOA(1.0, timestep, nbondstep, slowstep,
2545 #ifndef SOA_SIMPLIFY_PARAMS
2546  patch->patchDataSOA.recipMass,
2547  patch->patchDataSOA.f_normal_x,
2548  patch->patchDataSOA.f_normal_y,
2549  patch->patchDataSOA.f_normal_z,
2550  patch->patchDataSOA.f_nbond_x,
2551  patch->patchDataSOA.f_nbond_y,
2552  patch->patchDataSOA.f_nbond_z,
2553  patch->patchDataSOA.f_slow_x,
2554  patch->patchDataSOA.f_slow_y,
2555  patch->patchDataSOA.f_slow_z,
2556  patch->patchDataSOA.vel_x,
2557  patch->patchDataSOA.vel_y,
2558  patch->patchDataSOA.vel_z,
2559  patch->patchDataSOA.numAtoms,
2560 #endif
2561  maxForceUsed
2562  );
2563  TIMER_STOP(t, KICK);
2564 
2565  TIMER_START(t, VELBBK2);
2567  timestep
2568 #ifndef SOA_SIMPLIFY_PARAMS
2569  ,
2570  patch->patchDataSOA.langevinParam,
2571  patch->patchDataSOA.langScalVelBBK2,
2572  patch->patchDataSOA.langScalRandBBK2,
2573  patch->patchDataSOA.gaussrand_x,
2574  patch->patchDataSOA.gaussrand_y,
2575  patch->patchDataSOA.gaussrand_z,
2576  patch->patchDataSOA.vel_x,
2577  patch->patchDataSOA.vel_y,
2578  patch->patchDataSOA.vel_z,
2579  patch->patchDataSOA.numAtoms
2580 #endif
2581  );
2582  TIMER_STOP(t, VELBBK2);
2583 
2584  TIMER_START(t, RATTLE1);
2585  rattle1_SOA(timestep, 1);
2586  TIMER_STOP(t, RATTLE1);
2587 
2588  // save total force in computeGlobal
2589  if (doGlobal) {
2590  computeGlobal->saveTotalForces(patch);
2591  }
2592 
2593  TIMER_START(t, SUBMITHALF);
2595 #ifndef SOA_SIMPLIFY_PARAMS
2596  patch->patchDataSOA.hydrogenGroupSize,
2597  patch->patchDataSOA.mass,
2598  patch->patchDataSOA.vel_x,
2599  patch->patchDataSOA.vel_y,
2600  patch->patchDataSOA.vel_z,
2601  patch->patchDataSOA.numAtoms
2602 #endif
2603  );
2604  TIMER_STOP(t, SUBMITHALF);
2605 
2606  // kick -0.5
2607  TIMER_START(t, KICK);
2608  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2609 #ifndef SOA_SIMPLIFY_PARAMS
2610  patch->patchDataSOA.recipMass,
2611  patch->patchDataSOA.f_normal_x,
2612  patch->patchDataSOA.f_normal_y,
2613  patch->patchDataSOA.f_normal_z,
2614  patch->patchDataSOA.f_nbond_x,
2615  patch->patchDataSOA.f_nbond_y,
2616  patch->patchDataSOA.f_nbond_z,
2617  patch->patchDataSOA.f_slow_x,
2618  patch->patchDataSOA.f_slow_y,
2619  patch->patchDataSOA.f_slow_z,
2620  patch->patchDataSOA.vel_x,
2621  patch->patchDataSOA.vel_y,
2622  patch->patchDataSOA.vel_z,
2623  patch->patchDataSOA.numAtoms,
2624 #endif
2625  maxForceUsed
2626  );
2627  TIMER_STOP(t, KICK);
2628 
2629  // XXX rattle2_SOA(timestep,step);
2630 
2631  TIMER_START(t, SUBMITFULL);
2633 #ifndef SOA_SIMPLIFY_PARAMS
2634  patch->patchDataSOA.hydrogenGroupSize,
2635  patch->patchDataSOA.mass,
2636  patch->patchDataSOA.pos_x,
2637  patch->patchDataSOA.pos_y,
2638  patch->patchDataSOA.pos_z,
2639  patch->patchDataSOA.vel_x,
2640  patch->patchDataSOA.vel_y,
2641  patch->patchDataSOA.vel_z,
2642  patch->patchDataSOA.f_normal_x,
2643  patch->patchDataSOA.f_normal_y,
2644  patch->patchDataSOA.f_normal_z,
2645  patch->patchDataSOA.f_nbond_x,
2646  patch->patchDataSOA.f_nbond_y,
2647  patch->patchDataSOA.f_nbond_z,
2648  patch->patchDataSOA.f_slow_x,
2649  patch->patchDataSOA.f_slow_y,
2650  patch->patchDataSOA.f_slow_z,
2651  patch->patchDataSOA.numAtoms
2652 #endif
2653  );
2654  TIMER_STOP(t, SUBMITFULL);
2655 #ifdef TESTPID
2656  if (1) {
2657  int pid = TESTPID;
2658  if (patch->patchID == pid) {
2659  const PatchDataSOA& p = patch->patchDataSOA;
2660  int n = p.numAtoms;
2661 #if 0
2662  fprintf(stderr, "Patch %d has %d atoms\n", pid, n);
2663  fprintf(stderr, "%3s %8s %12s %12s %12s\n",
2664  "", "id", "fnormal_x", "fnbond_x", "fslow_x");
2665  for (int i=0; i < n; i++) {
2666  int index = p.id[i];
2667  fprintf(stderr, "%3d %8d %12.8f %12.8f %12.8f\n",
2668  i, index, p.f_normal_x[i], p.f_nbond_x[i], p.f_slow_x[i]);
2669  }
2670 #else
2671  Vector *f_normal = new Vector[n];
2672  Vector *f_nbond = new Vector[n];
2673  Vector *f_slow = new Vector[n];
2674  for (int i=0; i < n; i++) {
2675  f_normal[i].x = p.f_normal_x[i];
2676  f_normal[i].y = p.f_normal_y[i];
2677  f_normal[i].z = p.f_normal_z[i];
2678  f_nbond[i].x = p.f_nbond_x[i];
2679  f_nbond[i].y = p.f_nbond_y[i];
2680  f_nbond[i].z = p.f_nbond_z[i];
2681  f_slow[i].x = p.f_slow_x[i];
2682  f_slow[i].y = p.f_slow_y[i];
2683  f_slow[i].z = p.f_slow_z[i];
2684  }
2685  TestArray_write<double>(
2686  "f_normal_good.bin", "f_normal good", (double*)f_normal, 3*n);
2687  TestArray_write<double>(
2688  "f_nbond_good.bin", "f_nbond good", (double*)f_nbond, 3*n);
2689  TestArray_write<double>(
2690  "f_slow_good.bin", "f_slow good", (double*)f_slow, 3*n);
2691  delete [] f_normal;
2692  delete [] f_nbond;
2693  delete [] f_slow;
2694 #endif
2695  }
2696  }
2697 #endif
2698 
2699  // Do collections if any checks below are "on."
2700  // We add because we can't short-circuit.
2701  TIMER_START(t, SUBMITCOLLECT);
2702  if (isCollection) {
2703  submitCollections_SOA(step);
2704  }
2705  TIMER_STOP(t, SUBMITCOLLECT);
2706 
2707  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_3); // integrate_SOA 3
2708 
2709  rebalanceLoad(step);
2710  }
2711 
2712  patch->copy_updates_to_AOS();
2713 
2714  TIMER_DONE(t);
2715  if (patch->patchID == SPECIAL_PATCH_ID) {
2716  printf("Timer collection reporting in microseconds for "
2717  "Patch %d\n", patch->patchID);
2718  TIMER_REPORT(t);
2719  }
2720 }
2721 
2722 
2723 // XXX inline it?
2724 // XXX does not handle fixed atoms
2725 // Each timestep: dt = scaling * (timestep / TIMEFACTOR);
2727  const double scaling,
2728  double dt_normal, // timestep Results::normal = 0
2729  double dt_nbond, // timestep Results::nbond = 1
2730  double dt_slow, // timestep Results::slow = 2
2731 #ifndef SOA_SIMPLIFY_PARAMS
2732  const double * __restrict recipMass,
2733  const double * __restrict f_normal_x, // force Results::normal = 0
2734  const double * __restrict f_normal_y,
2735  const double * __restrict f_normal_z,
2736  const double * __restrict f_nbond_x, // force Results::nbond = 1
2737  const double * __restrict f_nbond_y,
2738  const double * __restrict f_nbond_z,
2739  const double * __restrict f_slow_x, // force Results::slow = 2
2740  const double * __restrict f_slow_y,
2741  const double * __restrict f_slow_z,
2742  double * __restrict vel_x,
2743  double * __restrict vel_y,
2744  double * __restrict vel_z,
2745  int numAtoms,
2746 #endif
2747  int maxForceNumber
2748  ) {
2749  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2750  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM_SOA);
2751 
2752 #ifdef SOA_SIMPLIFY_PARAMS
2753  const double * __restrict recipMass = patch->patchDataSOA.recipMass;
2754  // force Results::normal = 0
2755  const double * __restrict f_normal_x = patch->patchDataSOA.f_normal_x;
2756  const double * __restrict f_normal_y = patch->patchDataSOA.f_normal_y;
2757  const double * __restrict f_normal_z = patch->patchDataSOA.f_normal_z;
2758  // force Results::nbond = 1
2759  const double * __restrict f_nbond_x = patch->patchDataSOA.f_nbond_x;
2760  const double * __restrict f_nbond_y = patch->patchDataSOA.f_nbond_y;
2761  const double * __restrict f_nbond_z = patch->patchDataSOA.f_nbond_z;
2762  // force Results::slow = 2
2763  const double * __restrict f_slow_x = patch->patchDataSOA.f_slow_x;
2764  const double * __restrict f_slow_y = patch->patchDataSOA.f_slow_y;
2765  const double * __restrict f_slow_z = patch->patchDataSOA.f_slow_z;
2766  double * __restrict vel_x = patch->patchDataSOA.vel_x;
2767  double * __restrict vel_y = patch->patchDataSOA.vel_y;
2768  double * __restrict vel_z = patch->patchDataSOA.vel_z;
2769  int numAtoms = patch->patchDataSOA.numAtoms;
2770 #endif
2771  //
2772  // We could combine each case into a single loop with breaks,
2773  // with all faster forces also summed, like addForceToMomentum3().
2774  //
2775  // Things to consider:
2776  // - Do we always use acceleration (f/m) instead of just plain force?
2777  // Then we could instead buffer accel_slow, accel_nbond, etc.
2778  // - We will always need one multiply, since each dt includes
2779  // also a scaling factor.
2780  //
2781 
2782 #if 0
2783  if(this->patch->getPatchID() == 538){
2784  // fprintf(stderr, "Old Positions %lf %lf %lf\n", patch->patchDataSOA.pos_x[43], patch->patchDataSOA.pos_y[43], patch->patchDataSOA.pos_z[43]);
2785  // fprintf(stderr, "Old Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[ 43]);
2786  // fprintf(stderr, "Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2787  // f_slow_x[43], f_slow_y[43], f_slow_z[43],
2788  // f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2789  // f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2790  fprintf(stderr, "Old Positions %lf %lf %lf\n", patch->patchDataSOA.pos_x[0], patch->patchDataSOA.pos_y[0], patch->patchDataSOA.pos_z[0]);
2791  fprintf(stderr, "Old Velocities %lf %lf %lf\n", vel_x[0], vel_y[0], vel_z[ 0]);
2792  fprintf(stderr, "Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2793  f_slow_x[43], f_slow_y[43], f_slow_z[43],
2794  f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2795  f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2796  }
2797 #endif
2798  switch (maxForceNumber) {
2799  case Results::slow:
2800  dt_slow *= scaling;
2801  for (int i=0; i < numAtoms; i++) {
2802  vel_x[i] += f_slow_x[i] * recipMass[i] * dt_slow;
2803  vel_y[i] += f_slow_y[i] * recipMass[i] * dt_slow;
2804  vel_z[i] += f_slow_z[i] * recipMass[i] * dt_slow;
2805  }
2806  // fall through because we will always have the "faster" forces
2807  case Results::nbond:
2808  dt_nbond *= scaling;
2809  for (int i=0; i < numAtoms; i++) {
2810  vel_x[i] += f_nbond_x[i] * recipMass[i] * dt_nbond;
2811  vel_y[i] += f_nbond_y[i] * recipMass[i] * dt_nbond;
2812  vel_z[i] += f_nbond_z[i] * recipMass[i] * dt_nbond;
2813  }
2814  // fall through because we will always have the "faster" forces
2815  case Results::normal:
2816  dt_normal *= scaling;
2817  for (int i=0; i < numAtoms; i++) {
2818  vel_x[i] += f_normal_x[i] * recipMass[i] * dt_normal;
2819  vel_y[i] += f_normal_y[i] * recipMass[i] * dt_normal;
2820  vel_z[i] += f_normal_z[i] * recipMass[i] * dt_normal;
2821  }
2822  }
2823 }
2824 
2825 
2826 // XXX inline it?
2827 // XXX does not handle fixed atoms
2828 // Timestep: dt = scaling * (timestep / TIMEFACTOR);
2830  const double dt
2831 #ifndef SOA_SIMPLIFY_PARAMS
2832  ,
2833  const double * __restrict vel_x,
2834  const double * __restrict vel_y,
2835  const double * __restrict vel_z,
2836  double * __restrict pos_x,
2837  double * __restrict pos_y,
2838  double * __restrict pos_z,
2839  int numAtoms
2840 #endif
2841  ) {
2842  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2843  NamdProfileEvent::ADD_VELOCITY_TO_POSITION_SOA);
2844 #ifdef SOA_SIMPLIFY_PARAMS
2845  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
2846  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
2847  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
2848  double * __restrict pos_x = patch->patchDataSOA.pos_x;
2849  double * __restrict pos_y = patch->patchDataSOA.pos_y;
2850  double * __restrict pos_z = patch->patchDataSOA.pos_z;
2851  int numAtoms = patch->patchDataSOA.numAtoms;
2852 #endif
2853  for (int i=0; i < numAtoms; i++) {
2854  pos_x[i] += vel_x[i] * dt;
2855  pos_y[i] += vel_y[i] * dt;
2856  pos_z[i] += vel_z[i] * dt;
2857  }
2858 #if 0
2859  if(this->patch->getPatchID() == 538){
2860  fprintf(stderr, "New Positions %lf %lf %lf\n", pos_x[43], pos_y[43], pos_z[43]);
2861  fprintf(stderr, "New Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[43]);
2862  }
2863 #endif
2864 
2865 }
2866 
2867 
2869 #ifndef SOA_SIMPLIFY_PARAMS
2870  const int * __restrict hydrogenGroupSize,
2871  const float * __restrict mass,
2872  const double * __restrict vel_x,
2873  const double * __restrict vel_y,
2874  const double * __restrict vel_z,
2875  int numAtoms
2876 #endif
2877  ) {
2878  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2879  NamdProfileEvent::SUBMIT_HALFSTEP_SOA);
2880 #ifdef SOA_SIMPLIFY_PARAMS
2881  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
2882  const float * __restrict mass = patch->patchDataSOA.mass;
2883  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
2884  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
2885  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
2886  int numAtoms = patch->patchDataSOA.numAtoms;
2887 #endif
2888  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
2889  BigReal kineticEnergy = 0;
2890  Tensor virial;
2891  for (int i=0; i < numAtoms; i++) {
2892  // scalar kineticEnergy += mass[i] * vel[i]^2
2893  kineticEnergy += mass[i] *
2894  (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
2895  // tensor virial += mass[i] * outer_product(vel[i], vel[i])
2896  virial.xx += mass[i] * vel_x[i] * vel_x[i];
2897  virial.xy += mass[i] * vel_x[i] * vel_y[i];
2898  virial.xz += mass[i] * vel_x[i] * vel_z[i];
2899  virial.yx += mass[i] * vel_y[i] * vel_x[i];
2900  virial.yy += mass[i] * vel_y[i] * vel_y[i];
2901  virial.yz += mass[i] * vel_y[i] * vel_z[i];
2902  virial.zx += mass[i] * vel_z[i] * vel_x[i];
2903  virial.zy += mass[i] * vel_z[i] * vel_y[i];
2904  virial.zz += mass[i] * vel_z[i] * vel_z[i];
2905  }
2906  kineticEnergy *= 0.5 * 0.5;
2907  virial *= 0.5;
2908 
2910  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
2911  }
2912 
2913  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
2914  BigReal intKineticEnergy = 0;
2915  Tensor intVirialNormal;
2916  int hgs;
2917  for (int i=0; i < numAtoms; i += hgs) {
2918  // find velocity of center-of-mass of hydrogen group
2919  // calculate mass-weighted velocity
2920  hgs = hydrogenGroupSize[i];
2921  BigReal m_cm = 0;
2922  BigReal v_cm_x = 0;
2923  BigReal v_cm_y = 0;
2924  BigReal v_cm_z = 0;
2925  for (int j = i; j < (i+hgs); j++) {
2926  m_cm += mass[j];
2927  v_cm_x += mass[j] * vel_x[j];
2928  v_cm_y += mass[j] * vel_y[j];
2929  v_cm_z += mass[j] * vel_z[j];
2930  }
2931  BigReal recip_m_cm = 1.0 / m_cm;
2932  v_cm_x *= recip_m_cm;
2933  v_cm_y *= recip_m_cm;
2934  v_cm_z *= recip_m_cm;
2935  // sum virial contributions wrt vel center-of-mass
2936  for (int j = i; j < (i+hgs); j++) {
2937  BigReal dv_x = vel_x[j] - v_cm_x;
2938  BigReal dv_y = vel_y[j] - v_cm_y;
2939  BigReal dv_z = vel_z[j] - v_cm_z;
2940  // scalar intKineticEnergy += mass[j] * dot_product(vel[j], dv)
2941  intKineticEnergy += mass[j] *
2942  (vel_x[j] * dv_x + vel_y[j] * dv_y + vel_z[j] * dv_z);
2943  // tensor intVirialNormal += mass[j] * outer_product(vel[j], dv)
2944  intVirialNormal.xx += mass[j] * vel_x[j] * dv_x;
2945  intVirialNormal.xy += mass[j] * vel_x[j] * dv_y;
2946  intVirialNormal.xz += mass[j] * vel_x[j] * dv_z;
2947  intVirialNormal.yx += mass[j] * vel_y[j] * dv_x;
2948  intVirialNormal.yy += mass[j] * vel_y[j] * dv_y;
2949  intVirialNormal.yz += mass[j] * vel_y[j] * dv_z;
2950  intVirialNormal.zx += mass[j] * vel_z[j] * dv_x;
2951  intVirialNormal.zy += mass[j] * vel_z[j] * dv_y;
2952  intVirialNormal.zz += mass[j] * vel_z[j] * dv_z;
2953  }
2954  }
2955  intKineticEnergy *= 0.5 * 0.5;
2956  intVirialNormal *= 0.5;
2958  += intKineticEnergy;
2959  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL,
2960  intVirialNormal);
2961  }
2962 }
2963 
2964 
2965 //
2966 // XXX
2967 //
2969 #ifndef SOA_SIMPLIFY_PARAMS
2970  const int * __restrict hydrogenGroupSize,
2971  const float * __restrict mass,
2972  const double * __restrict pos_x,
2973  const double * __restrict pos_y,
2974  const double * __restrict pos_z,
2975  const double * __restrict vel_x,
2976  const double * __restrict vel_y,
2977  const double * __restrict vel_z,
2978  const double * __restrict f_normal_x,
2979  const double * __restrict f_normal_y,
2980  const double * __restrict f_normal_z,
2981  const double * __restrict f_nbond_x,
2982  const double * __restrict f_nbond_y,
2983  const double * __restrict f_nbond_z,
2984  const double * __restrict f_slow_x,
2985  const double * __restrict f_slow_y,
2986  const double * __restrict f_slow_z,
2987  int numAtoms
2988 #endif
2989  ) {
2990  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2991  NamdProfileEvent::SUBMIT_REDUCTIONS_SOA);
2992 #ifdef SOA_SIMPLIFY_PARAMS
2993  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
2994  const float * __restrict mass = patch->patchDataSOA.mass;
2995  const double * __restrict pos_x = patch->patchDataSOA.pos_x;
2996  const double * __restrict pos_y = patch->patchDataSOA.pos_y;
2997  const double * __restrict pos_z = patch->patchDataSOA.pos_z;
2998  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
2999  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
3000  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
3001  const double * __restrict f_normal_x = patch->patchDataSOA.f_normal_x;
3002  const double * __restrict f_normal_y = patch->patchDataSOA.f_normal_y;
3003  const double * __restrict f_normal_z = patch->patchDataSOA.f_normal_z;
3004  const double * __restrict f_nbond_x = patch->patchDataSOA.f_nbond_x;
3005  const double * __restrict f_nbond_y = patch->patchDataSOA.f_nbond_y;
3006  const double * __restrict f_nbond_z = patch->patchDataSOA.f_nbond_z;
3007  const double * __restrict f_slow_x = patch->patchDataSOA.f_slow_x;
3008  const double * __restrict f_slow_y = patch->patchDataSOA.f_slow_y;
3009  const double * __restrict f_slow_z = patch->patchDataSOA.f_slow_z;
3010  int numAtoms = patch->patchDataSOA.numAtoms;
3011 #endif
3012 
3013  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
3015 
3016  if ( 1 /* doKineticEnergy || doMomenta || patch->flags.doVirial */ ) {
3017  BigReal kineticEnergy = 0;
3018  BigReal momentum_x = 0;
3019  BigReal momentum_y = 0;
3020  BigReal momentum_z = 0;
3021  BigReal angularMomentum_x = 0;
3022  BigReal angularMomentum_y = 0;
3023  BigReal angularMomentum_z = 0;
3024  BigReal origin_x = patch->lattice.origin().x;
3025  BigReal origin_y = patch->lattice.origin().y;
3026  BigReal origin_z = patch->lattice.origin().z;
3027 
3028  // XXX pairInteraction
3029 
3030  for (int i=0; i < numAtoms; i++) {
3031 
3032  // scalar kineticEnergy += mass[i] * dot_product(vel[i], vel[i])
3033  kineticEnergy += mass[i] *
3034  (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
3035 
3036  // vector momentum += mass[i] * vel[i]
3037  momentum_x += mass[i] * vel_x[i];
3038  momentum_y += mass[i] * vel_y[i];
3039  momentum_z += mass[i] * vel_z[i];
3040 
3041  // vector dpos = pos[i] - origin
3042  BigReal dpos_x = pos_x[i] - origin_x;
3043  BigReal dpos_y = pos_y[i] - origin_y;
3044  BigReal dpos_z = pos_z[i] - origin_z;
3045 
3046  // vector angularMomentum += mass[i] * cross_product(dpos, vel[i])
3047  angularMomentum_x += mass[i] * (dpos_y*vel_z[i] - dpos_z*vel_y[i]);
3048  angularMomentum_y += mass[i] * (dpos_z*vel_x[i] - dpos_x*vel_z[i]);
3049  angularMomentum_z += mass[i] * (dpos_x*vel_y[i] - dpos_y*vel_x[i]);
3050  }
3051 
3052  // XXX missing Drude
3053 
3054  kineticEnergy *= 0.5;
3055  Vector momentum(momentum_x, momentum_y, momentum_z);
3056  Vector angularMomentum(angularMomentum_x, angularMomentum_y,
3057  angularMomentum_z);
3058 
3060  ADD_VECTOR_OBJECT(reduction,REDUCTION_MOMENTUM,momentum);
3061  ADD_VECTOR_OBJECT(reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
3062  }
3063  // For non-Multigrator doKineticEnergy = 1 always
3064  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
3065  BigReal intKineticEnergy = 0;
3066  Tensor intVirialNormal;
3067  Tensor intVirialNbond;
3068  Tensor intVirialSlow;
3069 
3070  int hgs = 1; // hydrogen group size
3071  for (int i=0; i < numAtoms; i += hgs) {
3072  hgs = hydrogenGroupSize[i];
3073  int j;
3074  BigReal m_cm = 0;
3075  BigReal r_cm_x = 0;
3076  BigReal r_cm_y = 0;
3077  BigReal r_cm_z = 0;
3078  BigReal v_cm_x = 0;
3079  BigReal v_cm_y = 0;
3080  BigReal v_cm_z = 0;
3081  for ( j = i; j < (i+hgs); ++j ) {
3082  m_cm += mass[j];
3083  r_cm_x += mass[j] * pos_x[j];
3084  r_cm_y += mass[j] * pos_y[j];
3085  r_cm_z += mass[j] * pos_z[j];
3086  v_cm_x += mass[j] * vel_x[j];
3087  v_cm_y += mass[j] * vel_y[j];
3088  v_cm_z += mass[j] * vel_z[j];
3089  }
3090  BigReal inv_m_cm = namd_reciprocal(m_cm);
3091  r_cm_x *= inv_m_cm;
3092  r_cm_y *= inv_m_cm;
3093  r_cm_z *= inv_m_cm;
3094  v_cm_x *= inv_m_cm;
3095  v_cm_y *= inv_m_cm;
3096  v_cm_z *= inv_m_cm;
3097 
3098  // XXX removed pairInteraction
3099  for ( j = i; j < (i+hgs); ++j ) {
3100  // XXX removed fixed atoms
3101 
3102  // vector vel[j] used twice below
3103  BigReal v_x = vel_x[j];
3104  BigReal v_y = vel_y[j];
3105  BigReal v_z = vel_z[j];
3106 
3107  // vector dv = vel[j] - v_cm
3108  BigReal dv_x = v_x - v_cm_x;
3109  BigReal dv_y = v_y - v_cm_y;
3110  BigReal dv_z = v_z - v_cm_z;
3111 
3112  // scalar intKineticEnergy += mass[j] * dot_product(v, dv)
3113  intKineticEnergy += mass[j] *
3114  (v_x * dv_x + v_y * dv_y + v_z * dv_z);
3115 
3116  // vector dr = pos[j] - r_cm
3117  BigReal dr_x = pos_x[j] - r_cm_x;
3118  BigReal dr_y = pos_y[j] - r_cm_y;
3119  BigReal dr_z = pos_z[j] - r_cm_z;
3120 
3121  // tensor intVirialNormal += outer_product(f_normal[j], dr)
3122  intVirialNormal.xx += f_normal_x[j] * dr_x;
3123  intVirialNormal.xy += f_normal_x[j] * dr_y;
3124  intVirialNormal.xz += f_normal_x[j] * dr_z;
3125  intVirialNormal.yx += f_normal_y[j] * dr_x;
3126  intVirialNormal.yy += f_normal_y[j] * dr_y;
3127  intVirialNormal.yz += f_normal_y[j] * dr_z;
3128  intVirialNormal.zx += f_normal_z[j] * dr_x;
3129  intVirialNormal.zy += f_normal_z[j] * dr_y;
3130  intVirialNormal.zz += f_normal_z[j] * dr_z;
3131 
3132  // tensor intVirialNbond += outer_product(f_nbond[j], dr)
3133  intVirialNbond.xx += f_nbond_x[j] * dr_x;
3134  intVirialNbond.xy += f_nbond_x[j] * dr_y;
3135  intVirialNbond.xz += f_nbond_x[j] * dr_z;
3136  intVirialNbond.yx += f_nbond_y[j] * dr_x;
3137  intVirialNbond.yy += f_nbond_y[j] * dr_y;
3138  intVirialNbond.yz += f_nbond_y[j] * dr_z;
3139  intVirialNbond.zx += f_nbond_z[j] * dr_x;
3140  intVirialNbond.zy += f_nbond_z[j] * dr_y;
3141  intVirialNbond.zz += f_nbond_z[j] * dr_z;
3142 
3143  // tensor intVirialSlow += outer_product(f_slow[j], dr)
3144  intVirialSlow.xx += f_slow_x[j] * dr_x;
3145  intVirialSlow.xy += f_slow_x[j] * dr_y;
3146  intVirialSlow.xz += f_slow_x[j] * dr_z;
3147  intVirialSlow.yx += f_slow_y[j] * dr_x;
3148  intVirialSlow.yy += f_slow_y[j] * dr_y;
3149  intVirialSlow.yz += f_slow_y[j] * dr_z;
3150  intVirialSlow.zx += f_slow_z[j] * dr_x;
3151  intVirialSlow.zy += f_slow_z[j] * dr_y;
3152  intVirialSlow.zz += f_slow_z[j] * dr_z;
3153  }
3154  }
3155 
3156  intKineticEnergy *= 0.5;
3157 
3159  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
3160  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
3161  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
3162  }
3163  // XXX removed pressure profile
3164 
3165  // XXX removed fixed atoms
3166 
3167  reduction->submit();
3168 
3169  // XXX removed pressure profile reduction
3170 }
3171 
3172 
3173 void Sequencer::submitCollections_SOA(int step, int zeroVel /* = 0 */)
3174 {
3175  //
3176  // Copy updates of SOA back into AOS for collections.
3177  //
3178  // XXX Could update positions and velocities separately.
3179  //
3180  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3181  NamdProfileEvent::SUBMIT_COLLECTIONS_SOA);
3182  //
3183  // XXX Poor implementation here!
3184  // The selector functions called below in Output.C are
3185  // doing several tests and in an average use case calculating
3186  // at least two mod functions.
3187  //
3188  // However, most steps are NOT output steps!
3189  //
3190  int is_pos_needed;
3191  int dcdIndex;
3192  std::tie(is_pos_needed, dcdIndex)= Output::coordinateNeeded(step);
3193  int is_vel_needed = Output::velocityNeeded(step);
3194  int is_f_needed = Output::forceNeeded(step);
3195  if (!simParams->useDeviceMigration) { // This is already done for GPU migration
3196  if ( is_pos_needed || is_vel_needed ) {
3197  patch->copy_updates_to_AOS();
3198  }
3199  }
3200  if (is_f_needed) {
3206  patch->copy_forces_to_AOS();
3207  }
3208  if ( is_pos_needed ) {
3209  collection->submitPositions(step,patch->atom,patch->lattice,is_pos_needed,dcdIndex);
3210  }
3211  if ( is_vel_needed ) {
3212  collection->submitVelocities(step,zeroVel,patch->atom,is_vel_needed);
3213  }
3214  if ( is_f_needed ) {
3215  int maxForceUsed = patch->flags.maxForceUsed;
3216  if ( maxForceUsed > Results::slow ) maxForceUsed = Results::slow;
3217  collection->submitForces(step,patch->atom,maxForceUsed,patch->f,is_f_needed);
3218  }
3219 }
3220 
3221 
3223  const double dt,
3224  const double maxvel2
3225 #ifndef SOA_SIMPLIFY_PARAMS
3226  ,
3227  const double * __restrict vel_x,
3228  const double * __restrict vel_y,
3229  const double * __restrict vel_z,
3230  int numAtoms
3231 #endif
3232  ) {
3233  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::MAXIMUM_MOVE_SOA);
3234 #ifdef SOA_SIMPLIFY_PARAMS
3235  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
3236  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
3237  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
3238  int numAtoms = patch->patchDataSOA.numAtoms;
3239 #endif
3240 
3241  // XXX missing maximum move
3242 
3243  // Loop vectorizes when replacing logical OR with summing.
3244  int killme = 0;
3245  for (int i=0; i < numAtoms; i++) {
3246  BigReal vel2 =
3247  vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3248  killme = killme + ( vel2 > maxvel2 );
3249  }
3250  if (killme) {
3251  // Found at least one atom that is moving too fast.
3252  // Terminating, so loop performance below doesn't matter.
3253  // Loop does not vectorize.
3254  killme = 0;
3255  for (int i=0; i < numAtoms; i++) {
3256  BigReal vel2 =
3257  vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3258  if (vel2 > maxvel2) {
3259  const FullAtom *a = patch->atom.begin();
3260  const Vector vel(vel_x[i], vel_y[i], vel_z[i]);
3261  const BigReal maxvel = sqrt(maxvel2);
3262  ++killme;
3263  iout << iERROR << "Atom " << (a[i].id + 1) << " velocity is "
3264  << ( PDBVELFACTOR * vel ) << " (limit is "
3265  << ( PDBVELFACTOR * maxvel ) << ", atom "
3266  << i << " of " << numAtoms << " on patch "
3267  << patch->patchID << " pe " << CkMyPe() << ")\n" << endi;
3268  }
3269  }
3270  iout << iERROR <<
3271  "Atoms moving too fast; simulation has become unstable ("
3272  << killme << " atoms on patch " << patch->patchID
3273  << " pe " << CkMyPe() << ").\n" << endi;
3275  terminate();
3276  }
3277 }
3278 
3279 
3281  BigReal timestep
3282 #ifndef SOA_SIMPLIFY_PARAMS
3283  ,
3284  const float * __restrict langevinParam,
3285  double * __restrict vel_x,
3286  double * __restrict vel_y,
3287  double * __restrict vel_z,
3288  int numAtoms
3289 #endif
3290  ) {
3291  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3292  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1_SOA);
3293 #ifdef SOA_SIMPLIFY_PARAMS
3294  const float * __restrict langevinParam = patch->patchDataSOA.langevinParam;
3295  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3296  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3297  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3298  int numAtoms = patch->patchDataSOA.numAtoms;
3299 #endif
3300  if ( simParams->langevinOn /* && !simParams->langevin_useBAOAB */ )
3301  {
3302  // scale by TIMEFACTOR to convert to fs and then by 0.001 to ps
3303  // multiply by the Langevin damping coefficient, units 1/ps
3304  // XXX we could instead store time-scaled Langevin parameters
3305  BigReal dt = timestep * (0.001 * TIMEFACTOR);
3306 
3307  // XXX missing Drude
3308 
3309  //
3310  // The conditional inside loop prevents vectorization and doesn't
3311  // avoid much work since addition and multiplication are cheap.
3312  //
3313  for (int i=0; i < numAtoms; i++) {
3314  BigReal dt_gamma = dt * langevinParam[i];
3315  //if ( ! dt_gamma ) continue;
3316 
3317  BigReal scaling = 1. - 0.5 * dt_gamma;
3318  vel_x[i] *= scaling;
3319  vel_y[i] *= scaling;
3320  vel_z[i] *= scaling;
3321  }
3322  } // end if langevinOn
3323 }
3324 
3325 
3327  BigReal timestep
3328 #ifndef SOA_SIMPLIFY_PARAMS
3329  ,
3330  const float * __restrict langevinParam,
3331  const float * __restrict langScalVelBBK2,
3332  const float * __restrict langScalRandBBK2,
3333  float * __restrict gaussrand_x,
3334  float * __restrict gaussrand_y,
3335  float * __restrict gaussrand_z,
3336  double * __restrict vel_x,
3337  double * __restrict vel_y,
3338  double * __restrict vel_z,
3339  int numAtoms
3340 #endif
3341  )
3342 {
3343  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3344  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2_SOA);
3345 #ifdef SOA_SIMPLIFY_PARAMS
3346  const float * __restrict langevinParam = patch->patchDataSOA.langevinParam;
3347  const float * __restrict langScalVelBBK2 = patch->patchDataSOA.langScalVelBBK2;
3348  const float * __restrict langScalRandBBK2 = patch->patchDataSOA.langScalRandBBK2;
3349  float * __restrict gaussrand_x = patch->patchDataSOA.gaussrand_x;
3350  float * __restrict gaussrand_y = patch->patchDataSOA.gaussrand_y;
3351  float * __restrict gaussrand_z = patch->patchDataSOA.gaussrand_z;
3352  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3353  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3354  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3355  int numAtoms = patch->patchDataSOA.numAtoms;
3356 #endif
3357  if ( simParams->langevinOn /* && !simParams->langevin_useBAOAB */ )
3358  {
3359  // XXX missing Drude
3360 
3361  // Scale by TIMEFACTOR to convert to fs and then by 0.001 to ps
3362  // multiply by the Langevin damping coefficient, units 1/ps.
3363  // XXX we could instead store time-scaled Langevin parameters
3364  BigReal dt = timestep * (0.001 * TIMEFACTOR);
3365  // Buffer the Gaussian random numbers
3367  // Must re-satisfy constraints if Langevin gammas differ.
3368  // (conserve momentum?)
3369  TIMER_START(patch->timerSet, RATTLE1);
3370  rattle1_SOA(timestep, 1);
3371  TIMER_STOP(patch->timerSet, RATTLE1);
3372  //
3373  // We don't need random numbers for atoms such that gamma=0.
3374  // If gammas differ, the likely case is that we aren't applying
3375  // Langevin damping to hydrogen, making those langevinParam=0,
3376  // in which case we need only numAtoms/3 random vectors.
3377  //
3378  // XXX can refine code below, count in advance how many
3379  // random numbers we need to use Random array filling routine
3380  //
3381  // XXX Loop does not vectorize!
3382  for (int i=0; i < numAtoms; i++) {
3383  Vector rg; // = 0
3384  if (langevinParam[i] != 0) rg = random->gaussian_vector();
3385  gaussrand_x[i] = float(rg.x);
3386  gaussrand_y[i] = float(rg.y);
3387  gaussrand_z[i] = float(rg.z);
3388  }
3389  }
3390  else {
3391  // Need to completely fill random number arrays.
3392  random->gaussian_array_f(gaussrand_x, numAtoms);
3393  random->gaussian_array_f(gaussrand_y, numAtoms);
3394  random->gaussian_array_f(gaussrand_z, numAtoms);
3395  }
3396 
3397  // do the velocity updates
3398  for (int i=0; i < numAtoms; i++) {
3399  vel_x[i] += gaussrand_x[i] * langScalRandBBK2[i];
3400  vel_y[i] += gaussrand_y[i] * langScalRandBBK2[i];
3401  vel_z[i] += gaussrand_z[i] * langScalRandBBK2[i];
3402  vel_x[i] *= langScalVelBBK2[i];
3403  vel_y[i] *= langScalVelBBK2[i];
3404  vel_z[i] *= langScalVelBBK2[i];
3405  }
3406  } // end if langevinOn
3407 }
3408 
3410 #ifndef SOA_SIMPLIFY_PARAMS
3411  const int * __restrict hydrogenGroupSize,
3412  const float * __restrict mass,
3413  double * __restrict pos_x,
3414  double * __restrict pos_y,
3415  double * __restrict pos_z,
3416  int numAtoms,
3417 #endif
3418  int step)
3419 {
3420 #ifdef SOA_SIMPLIFY_PARAMS
3421  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
3422  const float * __restrict mass = patch->patchDataSOA.mass;
3423  double * __restrict pos_x = patch->patchDataSOA.pos_x;
3424  double * __restrict pos_y = patch->patchDataSOA.pos_y;
3425  double * __restrict pos_z = patch->patchDataSOA.pos_z;
3426  int numAtoms = patch->patchDataSOA.numAtoms;
3427 #endif
3428 
3429  //
3430  // Loops below simplify if we lift out special cases of fixed atoms
3431  // and pressure excluded atoms and make them their own branch.
3432  //
3433 
3437  // Blocking receive for the updated lattice scaling factor.
3438  Tensor factor = broadcast->positionRescaleFactor.get(step);
3439  patch->lattice.rescale(factor);
3440  Vector origin = patch->lattice.origin();
3441 
3442  if ( simParams->useGroupPressure ) {
3443  int hgs;
3444  for (int i = 0; i < numAtoms; i += hgs) {
3445  int j;
3446  hgs = hydrogenGroupSize[i];
3447  // missing fixed atoms implementation
3448  BigReal m_cm = 0;
3449  BigReal r_cm_x = 0;
3450  BigReal r_cm_y = 0;
3451  BigReal r_cm_z = 0;
3452  // calculate the center of mass
3453  for ( j = i; j < (i+hgs); ++j ) {
3454  m_cm += mass[j];
3455  r_cm_x += mass[j] * pos_x[j];
3456  r_cm_y += mass[j] * pos_y[j];
3457  r_cm_z += mass[j] * pos_z[j];
3458  }
3459  BigReal inv_m_cm = namd_reciprocal(m_cm);
3460  r_cm_x *= inv_m_cm;
3461  r_cm_y *= inv_m_cm;
3462  r_cm_z *= inv_m_cm;
3463  // scale the center of mass with factor
3464  // shift to origin
3465  double tx = r_cm_x - origin.x;
3466  double ty = r_cm_y - origin.y;
3467  double tz = r_cm_z - origin.z;
3468  // apply transformation
3469  double new_r_cm_x = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3470  double new_r_cm_y = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3471  double new_r_cm_z = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3472  // shift back
3473  new_r_cm_x += origin.x;
3474  new_r_cm_y += origin.y;
3475  new_r_cm_z += origin.z;
3476  // translation vector from old COM and new COM
3477  double delta_r_cm_x = new_r_cm_x - r_cm_x;
3478  double delta_r_cm_y = new_r_cm_y - r_cm_y;
3479  double delta_r_cm_z = new_r_cm_z - r_cm_z;
3480  // shift the hydrogen group with translation vector
3481  for (j = i; j < (i+hgs); ++j) {
3482  pos_x[j] += delta_r_cm_x;
3483  pos_y[j] += delta_r_cm_y;
3484  pos_z[j] += delta_r_cm_z;
3485  }
3486  }
3487  } else {
3488  for (int i = 0; i < numAtoms; ++i) {
3489  // missing fixed atoms implementation
3490  // scale the coordinates with factor
3491  // shift to origin
3492  double tx = pos_x[i] - origin.x;
3493  double ty = pos_y[i] - origin.y;
3494  double tz = pos_z[i] - origin.z;
3495  // apply transformation
3496  double ftx = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3497  double fty = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3498  double ftz = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3499  // shift back
3500  pos_x[i] = ftx + origin.x;
3501  pos_y[i] = fty + origin.y;
3502  pos_z[i] = ftz + origin.z;
3503  }
3504  }
3505  }
3506 }
3507 
3509 #ifndef SOA_SIMPLIFY_PARAMS
3510  const int * __restrict hydrogenGroupSize,
3511  const float * __restrict mass,
3512  double * __restrict pos_x,
3513  double * __restrict pos_y,
3514  double * __restrict pos_z,
3515  double * __restrict vel_x,
3516  double * __restrict vel_y,
3517  double * __restrict vel_z,
3518  int numAtoms,
3519 #endif
3520  int step
3521  )
3522 {
3523 #ifdef SOA_SIMPLIFY_PARAMS
3524  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
3525  const float * __restrict mass = patch->patchDataSOA.mass;
3526  double * __restrict pos_x = patch->patchDataSOA.pos_x;
3527  double * __restrict pos_y = patch->patchDataSOA.pos_y;
3528  double * __restrict pos_z = patch->patchDataSOA.pos_z;
3529  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3530  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3531  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3532  int numAtoms = patch->patchDataSOA.numAtoms;
3533 #endif
3534 
3535  //
3536  // Loops below simplify if we lift out special cases of fixed atoms
3537  // and pressure excluded atoms and make them their own branch.
3538  //
3539 
3540  // Blocking receive for the updated lattice scaling factor.
3541 
3542  Tensor factor = broadcast->positionRescaleFactor.get(step);
3543 
3544  TIMER_START(patch->timerSet, PISTON);
3545  // JCP FIX THIS!!!
3546  double velFactor_x = namd_reciprocal(factor.xx);
3547  double velFactor_y = namd_reciprocal(factor.yy);
3548  double velFactor_z = namd_reciprocal(factor.zz);
3549  patch->lattice.rescale(factor);
3550  Vector origin = patch->lattice.origin();
3551  if ( simParams->useGroupPressure ) {
3552  int hgs;
3553  for (int i=0; i < numAtoms; i += hgs) {
3554  int j;
3555  hgs = hydrogenGroupSize[i];
3556  // missing fixed atoms
3557  BigReal m_cm = 0;
3558  BigReal r_cm_x = 0;
3559  BigReal r_cm_y = 0;
3560  BigReal r_cm_z = 0;
3561  BigReal v_cm_x = 0;
3562  BigReal v_cm_y = 0;
3563  BigReal v_cm_z = 0;
3564  for ( j = i; j < (i+hgs); ++j ) {
3565  m_cm += mass[j];
3566  r_cm_x += mass[j] * pos_x[j];
3567  r_cm_y += mass[j] * pos_y[j];
3568  r_cm_z += mass[j] * pos_z[j];
3569  v_cm_x += mass[j] * vel_x[j];
3570  v_cm_y += mass[j] * vel_y[j];
3571  v_cm_z += mass[j] * vel_z[j];
3572  }
3573  BigReal inv_m_cm = namd_reciprocal(m_cm);
3574  r_cm_x *= inv_m_cm;
3575  r_cm_y *= inv_m_cm;
3576  r_cm_z *= inv_m_cm;
3577 
3578  double tx = r_cm_x - origin.x;
3579  double ty = r_cm_y - origin.y;
3580  double tz = r_cm_z - origin.z;
3581  double new_r_cm_x = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3582  double new_r_cm_y = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3583  double new_r_cm_z = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3584  new_r_cm_x += origin.x;
3585  new_r_cm_y += origin.y;
3586  new_r_cm_z += origin.z;
3587 
3588  double delta_r_cm_x = new_r_cm_x - r_cm_x;
3589  double delta_r_cm_y = new_r_cm_y - r_cm_y;
3590  double delta_r_cm_z = new_r_cm_z - r_cm_z;
3591  v_cm_x *= inv_m_cm;
3592  v_cm_y *= inv_m_cm;
3593  v_cm_z *= inv_m_cm;
3594  double delta_v_cm_x = ( velFactor_x - 1 ) * v_cm_x;
3595  double delta_v_cm_y = ( velFactor_y - 1 ) * v_cm_y;
3596  double delta_v_cm_z = ( velFactor_z - 1 ) * v_cm_z;
3597  for (j = i; j < (i+hgs); j++) {
3598  pos_x[j] += delta_r_cm_x;
3599  pos_y[j] += delta_r_cm_y;
3600  pos_z[j] += delta_r_cm_z;
3601  vel_x[j] += delta_v_cm_x;
3602  vel_y[j] += delta_v_cm_y;
3603  vel_z[j] += delta_v_cm_z;
3604  }
3605  // if (i < 10)
3606  // printf("cpu: %d, %f, %f, %f, %f, %f, %f\n", i,
3607  // pos_x[i], pos_y[i], pos_z[i],
3608  // vel_x[i], vel_y[i], vel_z[i]);
3609  }
3610  }
3611  else {
3612  for (int i=0; i < numAtoms; i++) {
3613  double tx = pos_x[i] - origin.x;
3614  double ty = pos_y[i] - origin.y;
3615  double tz = pos_z[i] - origin.z;
3616  double ftx = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3617  double fty = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3618  double ftz = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3619  pos_x[i] = ftx + origin.x;
3620  pos_y[i] = fty + origin.y;
3621  pos_z[i] = ftz + origin.z;
3622  vel_x[i] *= velFactor_x;
3623  vel_y[i] *= velFactor_y;
3624  vel_z[i] *= velFactor_z;
3625  // if (i < 10)
3626  // printf("cpu: %d, %f, %f, %f, %f, %f, %f\n", i,
3627  // pos_x[i], pos_y[i], pos_z[i],
3628  // vel_x[i], vel_y[i], vel_z[i]);
3629  }
3630  }
3631  TIMER_STOP(patch->timerSet, PISTON);
3632  // exit(0);
3633 }
3634 
3635 
3636 // timestep scaled by 1/TIMEFACTOR
3637 void Sequencer::rattle1_SOA(BigReal timestep, int pressure)
3638 {
3639  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::RATTLE1_SOA);
3640  if ( simParams->rigidBonds != RIGID_NONE ) {
3641  Tensor virial;
3642  Tensor *vp = ( pressure ? &virial : 0 );
3643  // XXX pressureProfileReduction == NULL?
3644  if ( patch->rattle1_SOA(timestep, vp, pressureProfileReduction) ) {
3645  iout << iERROR <<
3646  "Constraint failure; simulation has become unstable.\n" << endi;
3648  terminate();
3649  }
3650  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
3651  }
3652 }
3653 
3654 void Sequencer::runComputeObjects_SOA(int migration, int pairlists, int nstep)
3655 {
3656  if ( migration ) pairlistsAreValid = 0;
3657 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) || defined(NAMD_MIC)
3658  if ( pairlistsAreValid &&
3660  && ( pairlistsAge > pairlistsAgeLimit ) ) {
3661  pairlistsAreValid = 0;
3662  }
3663 #else
3665  pairlistsAreValid = 0;
3666  }
3667 #endif
3668  if ( ! simParams->usePairlists ) pairlists = 0;
3669  patch->flags.usePairlists = pairlists || pairlistsAreValid;
3670  patch->flags.savePairlists = pairlists && ! pairlistsAreValid;
3671 
3672 #if defined(NTESTPID)
3673  if (1 && patch->patchID == NTESTPID) {
3674  int step = patch->flags.step;
3675  int numAtoms = patch->numAtoms;
3676  double *xyzq = new double[4*numAtoms];
3677  double *x = patch->patchDataSOA.pos_x;
3678  double *y = patch->patchDataSOA.pos_y;
3679  double *z = patch->patchDataSOA.pos_z;
3680  float *q = patch->patchDataSOA.charge;
3681  for (int i=0; i < numAtoms; i++) {
3682  xyzq[4*i ] = x[i];
3683  xyzq[4*i+1] = y[i];
3684  xyzq[4*i+2] = z[i];
3685  xyzq[4*i+3] = q[i];
3686  }
3687  char fname[128], remark[128];
3688  sprintf(fname, "xyzq_soa_pid%d_step%d.bin", NTESTPID, step);
3689  sprintf(remark, "SOA xyzq, patch %d, step %d", NTESTPID, step);
3690  TestArray_write<double>(fname, remark, xyzq, 4*numAtoms);
3691  delete[] xyzq;
3692  }
3693 #endif
3694  // Zero all SOA global forces before computing force
3695  patch->zero_global_forces_SOA();
3696  patch->positionsReady_SOA(migration); // updates flags.sequence
3697 
3698  int seq = patch->flags.sequence;
3699  int basePriority = ( (seq & 0xffff) << 15 )
3701 
3702  // XXX missing GBIS
3703  priority = basePriority + COMPUTE_HOME_PRIORITY;
3704  //char prbuf[32];
3705  //sprintf(prbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::SEQ_SUSPEND], patch->getPatchID());
3706  //NAMD_EVENT_START_EX(1, NamdProfileEvent::SEQ_SUSPEND, prbuf);
3707  suspend(); // until all deposit boxes close
3708  //NAMD_EVENT_STOP(1, NamdProfileEvent::SEQ_SUSPEND);
3709 
3710 #ifdef NODEGROUP_FORCE_REGISTER
3711  if(!simParams->CUDASOAintegrate || migration){
3712  patch->copy_forces_to_SOA();
3713  }
3714 #else
3715  patch->copy_forces_to_SOA();
3716 #endif
3717 
3718 #if defined(NTESTPID)
3719  if (1 && patch->patchID == NTESTPID) {
3720  int step = patch->flags.step;
3721  int numAtoms = patch->numAtoms;
3722  char fname[128];
3723  char remark[128];
3724  double *fxyz = new double[3*numAtoms];
3725  double *fx = patch->patchDataSOA.f_normal_x;
3726  double *fy = patch->patchDataSOA.f_normal_y;
3727  double *fz = patch->patchDataSOA.f_normal_z;
3728  for (int i=0; i < numAtoms; i++) {
3729  fxyz[3*i ] = fx[i];
3730  fxyz[3*i+1] = fy[i];
3731  fxyz[3*i+2] = fz[i];
3732  }
3733  sprintf(fname, "fxyz_normal_soa_pid%d_step%d.bin", NTESTPID, step);
3734  sprintf(remark, "SOA fxyz normal, patch %d, step %d", NTESTPID, step);
3735  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3736  fx = patch->patchDataSOA.f_nbond_x;
3737  fy = patch->patchDataSOA.f_nbond_y;
3738  fz = patch->patchDataSOA.f_nbond_z;
3739  for (int i=0; i < numAtoms; i++) {
3740  fxyz[3*i ] = fx[i];
3741  fxyz[3*i+1] = fy[i];
3742  fxyz[3*i+2] = fz[i];
3743  }
3744  sprintf(fname, "fxyz_nbond_soa_pid%d_step%d.bin", NTESTPID, step);
3745  sprintf(remark, "SOA fxyz nonbonded, patch %d, step %d", NTESTPID, step);
3746  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3747  fx = patch->patchDataSOA.f_slow_x;
3748  fy = patch->patchDataSOA.f_slow_y;
3749  fz = patch->patchDataSOA.f_slow_z;
3750  for (int i=0; i < numAtoms; i++) {
3751  fxyz[3*i ] = fx[i];
3752  fxyz[3*i+1] = fy[i];
3753  fxyz[3*i+2] = fz[i];
3754  }
3755  sprintf(fname, "fxyz_slow_soa_pid%d_step%d.bin", NTESTPID, step);
3756  sprintf(remark, "SOA fxyz slow, patch %d, step %d", NTESTPID, step);
3757  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3758  delete[] fxyz;
3759  }
3760 #endif
3761 
3762 #if 0
3763  if (1 && patch->patchID == 0) {
3764  int numAtoms = patch->numAtoms;
3765  double *fxyz = new double[3*numAtoms];
3766  double *fx, *fy, *fz;
3767  char fname[64], remark[128];
3768  int step = patch->flags.step;
3769 
3770  fx = patch->patchDataSOA.f_slow_x;
3771  fy = patch->patchDataSOA.f_slow_y;
3772  fz = patch->patchDataSOA.f_slow_z;
3773  for (int i=0; i < numAtoms; i++) {
3774  fxyz[3*i ] = fx[i];
3775  fxyz[3*i+1] = fy[i];
3776  fxyz[3*i+2] = fz[i];
3777  }
3778  sprintf(fname, "fslow_soa_%d.bin", step);
3779  sprintf(remark, "SOA slow forces, step %d\n", step);
3780  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3781 
3782  fx = patch->patchDataSOA.f_nbond_x;
3783  fy = patch->patchDataSOA.f_nbond_y;
3784  fz = patch->patchDataSOA.f_nbond_z;
3785  for (int i=0; i < numAtoms; i++) {
3786  fxyz[3*i ] = fx[i];
3787  fxyz[3*i+1] = fy[i];
3788  fxyz[3*i+2] = fz[i];
3789  }
3790  sprintf(fname, "fnbond_soa_%d.bin", step);
3791  sprintf(remark, "SOA nonbonded forces, step %d\n", step);
3792  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3793 
3794  fx = patch->patchDataSOA.f_normal_x;
3795  fy = patch->patchDataSOA.f_normal_y;
3796  fz = patch->patchDataSOA.f_normal_z;
3797  for (int i=0; i < numAtoms; i++) {
3798  fxyz[3*i ] = fx[i];
3799  fxyz[3*i+1] = fy[i];
3800  fxyz[3*i+2] = fz[i];
3801  }
3802  sprintf(fname, "fnormal_soa_%d.bin", step);
3803  sprintf(remark, "SOA normal forces, step %d\n", step);
3804  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3805 
3806  delete[] fxyz;
3807  }
3808 #endif
3809 
3810 #if 0
3811  //Will print forces here after runComputeObjects
3812  if(nstep == 1){
3813  fprintf(stderr, "CPU force arrays for alanin\n" );
3814  for(int i = 0; i < patch->patchDataSOA.numAtoms; i++){
3815  fprintf(stderr, "f[%i] = %lf %lf %lf | %lf %lf %lf | %lf %lf %lf\n", i,
3816  patch->patchDataSOA.f_normal_x[i], patch->patchDataSOA.f_normal_y[i], patch->patchDataSOA.f_normal_z[i],
3817  patch->patchDataSOA.f_nbond_x[i], patch->patchDataSOA.f_nbond_y[i], patch->patchDataSOA.f_nbond_z[i],
3818  patch->patchDataSOA.f_slow_x[i], patch->patchDataSOA.f_slow_y[i], patch->patchDataSOA.f_slow_z[i]);
3819  }
3820  }
3821 #endif
3822 
3824  pairlistsAreValid = 1;
3825  pairlistsAge = 0;
3826  }
3827  // For multigrator, do not age pairlist during pressure step
3828  // NOTE: for non-multigrator pressureStep = 0 always
3829  if ( pairlistsAreValid /* && !pressureStep */ ) ++pairlistsAge;
3830 
3831  // XXX missing lonepairs
3832  // XXX missing Molly
3833  // XXX missing Lowe-Andersen
3834 }
3835 
3841 {
3844  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3845  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3846  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3847  int numAtoms = patch->patchDataSOA.numAtoms;
3848  // Blocking receive for the temperature coupling coefficient.
3849  BigReal velrescaling = broadcast->stochRescaleCoefficient.get(step);
3850  DebugM(4, "stochastically rescaling velocities at step " << step << " by " << velrescaling << "\n");
3851  for ( int i = 0; i < numAtoms; ++i ) {
3852  vel_x[i] *= velrescaling;
3853  vel_y[i] *= velrescaling;
3854  vel_z[i] *= velrescaling;
3855  }
3856  stochRescale_count = 0;
3857  }
3858 }
3859 
3860 //
3861 // end SOA code
3862 //
3864 
3865 #endif // SEQUENCER_SOA
3866 
3867 
3868 extern int eventEndOfTimeStep;
3869 
3870 void Sequencer::integrate(int scriptTask) {
3871  char traceNote[24];
3872  char tracePrefix[20];
3873  sprintf(tracePrefix, "p:%d,s:",patch->patchID);
3874 // patch->write_tip4_props();
3875 
3876  //
3877  // DJH: Copy all data into SOA (structure of arrays)
3878  // from AOS (array of structures) data structure.
3879  //
3880  //patch->copy_all_to_SOA();
3881 
3882 #ifdef TIMER_COLLECTION
3883  TimerSet& t = patch->timerSet;
3884 #endif
3885  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
3886  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
3887  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
3888  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
3889  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
3890  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
3891  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
3892  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
3893  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
3894  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
3895 
3896  int &step = patch->flags.step;
3897  step = simParams->firstTimestep;
3898 
3899  // drag switches
3900  const Bool rotDragOn = simParams->rotDragOn;
3901  const Bool movDragOn = simParams->movDragOn;
3902 
3903  const int commOnly = simParams->commOnly;
3904 
3905  int &maxForceUsed = patch->flags.maxForceUsed;
3906  int &maxForceMerged = patch->flags.maxForceMerged;
3907  maxForceUsed = Results::normal;
3908  maxForceMerged = Results::normal;
3909 
3910  const int numberOfSteps = simParams->N;
3911  const int stepsPerCycle = simParams->stepsPerCycle;
3912  const BigReal timestep = simParams->dt;
3913 
3914  // what MTS method?
3915  const int staleForces = ( simParams->MTSAlgorithm == NAIVE );
3916 
3917  const int nonbondedFrequency = simParams->nonbondedFrequency;
3918  slowFreq = nonbondedFrequency;
3919  const BigReal nbondstep = timestep * (staleForces?1:nonbondedFrequency);
3920  int &doNonbonded = patch->flags.doNonbonded;
3921  doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
3922  if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
3923  if ( doNonbonded ) maxForceUsed = Results::nbond;
3924 
3925  // Do we do full electrostatics?
3926  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
3927  const int fullElectFrequency = simParams->fullElectFrequency;
3928  if ( dofull ) slowFreq = fullElectFrequency;
3929  const BigReal slowstep = timestep * (staleForces?1:fullElectFrequency);
3930  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
3931  doFullElectrostatics = (dofull && ((step >= numberOfSteps) || !(step%fullElectFrequency)));
3932  if ( dofull && (fullElectFrequency == 1) && !(simParams->mollyOn) )
3933  maxForceMerged = Results::slow;
3934  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
3935 
3936  // If doing LJ-PME, track doFullElectrostatics
3937  int &doFullDispersion = patch->flags.doFullDispersion;
3939  doFullDispersion = (simParams->LJPMEOn && doFullElectrostatics);
3940 }
3941 
3942 //#ifndef UPPER_BOUND
3943  const Bool accelMDOn = simParams->accelMDOn;
3944  const Bool accelMDdihe = simParams->accelMDdihe;
3945  const Bool accelMDdual = simParams->accelMDdual;
3946  if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed = Results::amdf;
3947 
3948  // Is adaptive tempering on?
3949  const Bool adaptTempOn = simParams->adaptTempOn;
3951  if (simParams->langevinOn)
3953  else if (simParams->rescaleFreq > 0)
3955 
3956 
3957  int &doMolly = patch->flags.doMolly;
3958  doMolly = simParams->mollyOn && doFullElectrostatics;
3959  // BEGIN LA
3960  int &doLoweAndersen = patch->flags.doLoweAndersen;
3961  doLoweAndersen = simParams->loweAndersenOn && doNonbonded;
3962  // END LA
3963 
3964  int &doGBIS = patch->flags.doGBIS;
3965  doGBIS = simParams->GBISOn;
3966 
3967  int &doLCPO = patch->flags.doLCPO;
3968  doLCPO = simParams->LCPOOn;
3969 
3970  int zeroMomentum = simParams->zeroMomentum;
3971 
3972  // Do we need to return forces to TCL script or Colvar module?
3973  int doTcl = simParams->tclForcesOn;
3974  int doColvars = simParams->colvarsOn;
3975 //#endif
3976  int doGlobal = doTcl || doColvars;
3978 
3979  // Bother to calculate energies?
3980  int &doEnergy = patch->flags.doEnergy;
3981  int energyFrequency = simParams->computeEnergies;
3982 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3983  if(simParams->alchOn) energyFrequency = NAMD_gcd(energyFrequency, simParams->alchOutFreq);
3984 #endif
3985 #ifndef UPPER_BOUND
3986  const int reassignFreq = simParams->reassignFreq;
3987 #endif
3988 
3989  int &doVirial = patch->flags.doVirial;
3990  doVirial = 1;
3991 
3992  if ( scriptTask == SCRIPT_RUN ) {
3993 
3994 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
3995 
3996 #ifndef UPPER_BOUND
3997 // printf("Doing initial rattle\n");
3998 #ifndef UPPER_BOUND
3999 D_MSG("rattle1()");
4000  TIMER_START(t, RATTLE1);
4001  rattle1(0.,0); // enforce rigid bond constraints on initial positions
4002  TIMER_STOP(t, RATTLE1);
4003 #endif
4004 
4007  patch->atom.begin(),patch->atom.end());
4008  }
4009 
4010  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4011  reassignVelocities(timestep,step);
4012  }
4013 #endif
4014 
4015  doEnergy = ! ( step % energyFrequency );
4016 #ifndef UPPER_BOUND
4017  if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4018  //Update energy every timestep for adaptive tempering
4019  if ( adaptTempOn ) doEnergy=1;
4020 #endif
4021 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4022 D_MSG("runComputeObjects()");
4023  runComputeObjects(1,step<numberOfSteps); // must migrate here!
4024 #ifndef UPPER_BOUND
4025  rescaleaccelMD(step, doNonbonded, doFullElectrostatics); // for accelMD
4026  adaptTempUpdate(step); // update adaptive tempering temperature
4027 #endif
4028 
4029 #ifndef UPPER_BOUND
4030  if ( staleForces || doGlobal ) {
4031  if ( doNonbonded ) saveForce(Results::nbond);
4032  if ( doFullElectrostatics ) saveForce(Results::slow);
4033  }
4034 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4035  if ( ! commOnly ) {
4036 D_MSG("newtonianVelocities()");
4037  TIMER_START(t, KICK);
4038  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,0,1,1);
4039  TIMER_STOP(t, KICK);
4040  }
4042 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4043 #ifndef UPPER_BOUND
4044 D_MSG("rattle1()");
4045  TIMER_START(t, RATTLE1);
4046  rattle1(-timestep,0);
4047  TIMER_STOP(t, RATTLE1);
4048 #endif
4049 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4050 D_MSG("submitHalfstep()");
4051  TIMER_START(t, SUBMITHALF);
4052  submitHalfstep(step);
4053  TIMER_STOP(t, SUBMITHALF);
4054 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4055  if ( ! commOnly ) {
4056 D_MSG("newtonianVelocities()");
4057  TIMER_START(t, KICK);
4058  newtonianVelocities(1.0,timestep,nbondstep,slowstep,0,1,1);
4059  TIMER_STOP(t, KICK);
4060  }
4061 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4062 D_MSG("rattle1()");
4063  TIMER_START(t, RATTLE1);
4064  rattle1(timestep,1);
4065  TIMER_STOP(t, RATTLE1);
4066  if (doGlobal) // include constraint forces
4067  computeGlobal->saveTotalForces(patch);
4068 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4069 D_MSG("submitHalfstep()");
4070  TIMER_START(t, SUBMITHALF);
4071  submitHalfstep(step);
4072  TIMER_STOP(t, SUBMITHALF);
4073  if ( zeroMomentum && doFullElectrostatics ) submitMomentum(step);
4074  if ( ! commOnly ) {
4075 D_MSG("newtonianVelocities()");
4076  TIMER_START(t, KICK);
4077  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,0,1,1);
4078  TIMER_STOP(t, KICK);
4079  }
4080 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4081 #endif
4082 D_MSG("submitReductions()");
4083  TIMER_START(t, SUBMITFULL);
4084  submitReductions(step);
4085  TIMER_STOP(t, SUBMITFULL);
4086 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4087 #ifndef UPPER_BOUND
4088  if(0){ // if(traceIsOn()){
4089  traceUserEvent(eventEndOfTimeStep);
4090  sprintf(traceNote, "%s%d",tracePrefix,step);
4091  traceUserSuppliedNote(traceNote);
4092  }
4093 #endif
4094  rebalanceLoad(step);
4095 
4096  } // scriptTask == SCRIPT_RUN
4097 
4098 #ifndef UPPER_BOUND
4099  bool doMultigratorRattle = false;
4100 #endif
4101 
4102  //
4103  // DJH: There are a lot of mod operations below and elsewhere to
4104  // test step number against the frequency of something happening.
4105  // Mod and integer division are expensive!
4106  // Might be better to replace with counters and test equality.
4107  //
4108 #if 0
4109  for(int i = 0; i < NamdProfileEvent::EventsCount; i++)
4110  CkPrintf("-------------- [%d] %s -------------\n", i, NamdProfileEventStr[i]);
4111 #endif
4112 
4113 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
4114  int& eon = patch->flags.event_on;
4115  int epid = (simParams->beginEventPatchID <= patch->getPatchID()
4116  && patch->getPatchID() <= simParams->endEventPatchID);
4117  int beginStep = simParams->beginEventStep;
4118  int endStep = simParams->endEventStep;
4119  bool controlProfiling = patch->getPatchID() == 0;
4120 #endif
4121 
4122  for ( ++step; step <= numberOfSteps; ++step )
4123  {
4124 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
4125  eon = epid && (beginStep < step && step <= endStep);
4126 
4127  if (controlProfiling && step == beginStep) {
4129  }
4130  if (controlProfiling && step == endStep) {
4132  }
4133  char buf[32];
4134  sprintf(buf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::INTEGRATE_1], patch->getPatchID());
4135  NAMD_EVENT_START_EX(eon, NamdProfileEvent::INTEGRATE_1, buf);
4136 #endif
4137  DebugM(3,"for step "<<step<< " dGlobal " << doGlobal<<"\n"<<endi);
4138 #ifndef UPPER_BOUND
4139  rescaleVelocities(step);
4140  tcoupleVelocities(timestep,step);
4141  if ( simParams->stochRescaleOn ) {
4142  stochRescaleVelocities(step);
4143  }
4144  berendsenPressure(step);
4145 
4146  if ( ! commOnly ) {
4147  TIMER_START(t, KICK);
4148  newtonianVelocities(0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4149  TIMER_STOP(t, KICK);
4150  }
4151 
4152  // We do RATTLE here if multigrator thermostat was applied in the previous step
4153  if (doMultigratorRattle) rattle1(timestep, 1);
4154 
4155  /* reassignment based on half-step velocities
4156  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4157  addVelocityToPosition(0.5*timestep);
4158  reassignVelocities(timestep,step);
4159  addVelocityToPosition(0.5*timestep);
4160  rattle1(0.,0);
4161  rattle1(-timestep,0);
4162  addVelocityToPosition(-1.0*timestep);
4163  rattle1(timestep,0);
4164  } */
4165 
4166  TIMER_START(t, MAXMOVE);
4167  maximumMove(timestep);
4168  TIMER_STOP(t, MAXMOVE);
4169 
4170  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_1); // integrate 1
4171 
4173  if ( ! commOnly ) {
4174  TIMER_START(t, DRIFT);
4175  addVelocityToPosition(0.5*timestep);
4176  TIMER_STOP(t, DRIFT);
4177  }
4178  // We add an Ornstein-Uhlenbeck integration step for the case of BAOAB (Langevin)
4179  langevinVelocities(timestep);
4180 
4181  // There is a blocking receive inside of langevinPiston()
4182  // that might suspend the current thread of execution,
4183  // so split profiling around this conditional block.
4184  langevinPiston(step);
4185 
4186  if ( ! commOnly ) {
4187  TIMER_START(t, DRIFT);
4188  addVelocityToPosition(0.5*timestep);
4189  TIMER_STOP(t, DRIFT);
4190  }
4191  } else {
4192  // If Langevin is not used, take full time step directly instread of two half steps
4193  if ( ! commOnly ) {
4194  TIMER_START(t, DRIFT);
4195  addVelocityToPosition(timestep);
4196  TIMER_STOP(t, DRIFT);
4197  }
4198  }
4199 
4200  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_2);
4201 
4202  // impose hard wall potential for Drude bond length
4203  hardWallDrude(timestep, 1);
4204 
4206 #endif // UPPER_BOUND
4207 
4208  doNonbonded = !(step%nonbondedFrequency);
4209  doFullElectrostatics = (dofull && !(step%fullElectFrequency));
4211  // XXX in preparation for supporting LJ-PME with MTS
4212  doFullDispersion = (simParams->LJPMEOn && doFullElectrostatics);
4213 }
4214 
4215 #ifndef UPPER_BOUND
4216  if ( zeroMomentum && doFullElectrostatics ) {
4217  // There is a blocking receive inside of correctMomentum().
4218  correctMomentum(step,slowstep);
4219  }
4220 
4221  // There are NO sends in submitHalfstep() just local summation
4222  // into the Reduction struct.
4223  TIMER_START(t, SUBMITHALF);
4224  submitHalfstep(step);
4225  TIMER_STOP(t, SUBMITHALF);
4226 
4227  doMolly = simParams->mollyOn && doFullElectrostatics;
4228  // BEGIN LA
4229  doLoweAndersen = simParams->loweAndersenOn && doNonbonded;
4230  // END LA
4231 
4232  maxForceUsed = Results::normal;
4233  if ( doNonbonded ) maxForceUsed = Results::nbond;
4234  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
4235  if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed = Results::amdf;
4236 
4237  // Migrate Atoms on stepsPerCycle
4238  doEnergy = ! ( step % energyFrequency );
4239  if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4240  if ( adaptTempOn ) doEnergy=1;
4241 
4242  // Multigrator
4243  if (simParams->multigratorOn) {
4244  doVirial = (!(step % energyFrequency) || ((simParams->outputPressure > 0) && !(step % simParams->outputPressure))
4245  || !(step % simParams->multigratorPressureFreq));
4246  doKineticEnergy = (!(step % energyFrequency) || !(step % simParams->multigratorTemperatureFreq));
4247  doMomenta = (simParams->outputMomenta > 0) && !(step % simParams->outputMomenta);
4248  } else {
4249  doVirial = 1;
4250  doKineticEnergy = 1;
4251  doMomenta = 1;
4252  }
4253 #endif
4254  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_2); // integrate 2
4255 
4256  // The current thread of execution will suspend in runComputeObjects().
4257  runComputeObjects(!(step%stepsPerCycle),step<numberOfSteps);
4258 
4259  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_3);
4260 
4261 #ifndef UPPER_BOUND
4262  rescaleaccelMD(step, doNonbonded, doFullElectrostatics); // for accelMD
4263 
4264  if ( staleForces || doGlobal ) {
4265  if ( doNonbonded ) saveForce(Results::nbond);
4266  if ( doFullElectrostatics ) saveForce(Results::slow);
4267  }
4268 
4269  // reassignment based on full-step velocities
4270  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4271  reassignVelocities(timestep,step);
4272  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4273  rattle1(-timestep,0);
4274  }
4275 
4276  if ( ! commOnly ) {
4277  TIMER_START(t, VELBBK1);
4278  langevinVelocitiesBBK1(timestep);
4279  TIMER_STOP(t, VELBBK1);
4280  TIMER_START(t, KICK);
4281  newtonianVelocities(1.0,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4282  TIMER_STOP(t, KICK);
4283  TIMER_START(t, VELBBK2);
4284  langevinVelocitiesBBK2(timestep);
4285  TIMER_STOP(t, VELBBK2);
4286  }
4287 
4288  // add drag to each atom's positions
4289  if ( ! commOnly && movDragOn ) addMovDragToPosition(timestep);
4290  if ( ! commOnly && rotDragOn ) addRotDragToPosition(timestep);
4291 
4292  TIMER_START(t, RATTLE1);
4293  rattle1(timestep,1);
4294  TIMER_STOP(t, RATTLE1);
4295  if (doGlobal) // include constraint forces
4296  computeGlobal->saveTotalForces(patch);
4297 
4298  TIMER_START(t, SUBMITHALF);
4299  submitHalfstep(step);
4300  TIMER_STOP(t, SUBMITHALF);
4301  if ( zeroMomentum && doFullElectrostatics ) submitMomentum(step);
4302 
4303  if ( ! commOnly ) {
4304  TIMER_START(t, KICK);
4305  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4306  TIMER_STOP(t, KICK);
4307  }
4308 
4309  // rattle2(timestep,step);
4310 #endif
4311 
4312  TIMER_START(t, SUBMITFULL);
4313  submitReductions(step);
4314  TIMER_STOP(t, SUBMITFULL);
4315  TIMER_START(t, SUBMITCOLLECT);
4316  submitCollections(step);
4317  TIMER_STOP(t, SUBMITCOLLECT);
4318 #ifndef UPPER_BOUND
4319  //Update adaptive tempering temperature
4320  adaptTempUpdate(step);
4321 
4322  // Multigrator temperature and pressure steps
4323  multigratorTemperature(step, 1);
4324  multigratorPressure(step, 1);
4325  multigratorPressure(step, 2);
4326  multigratorTemperature(step, 2);
4327  doMultigratorRattle = (simParams->multigratorOn && !(step % simParams->multigratorTemperatureFreq));
4328 
4329  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_3); // integrate 3
4330 #endif
4331 
4332 #if CYCLE_BARRIER
4333  cycleBarrier(!((step+1) % stepsPerCycle), step);
4334 #elif PME_BARRIER
4335  cycleBarrier(doFullElectrostatics, step);
4336 #elif STEP_BARRIER
4337  cycleBarrier(1, step);
4338 #endif
4339 
4340 #ifndef UPPER_BOUND
4341  if(Node::Object()->specialTracing || simParams->statsOn){
4342  int bstep = simParams->traceStartStep;
4343  int estep = bstep + simParams->numTraceSteps;
4344  if(step == bstep || step == estep){
4345  traceBarrier(step);
4346  }
4347  }
4348 
4349 #ifdef MEASURE_NAMD_WITH_PAPI
4350  if(simParams->papiMeasure) {
4351  int bstep = simParams->papiMeasureStartStep;
4352  int estep = bstep + simParams->numPapiMeasureSteps;
4353  if(step == bstep || step==estep) {
4354  papiMeasureBarrier(step);
4355  }
4356  }
4357 #endif
4358 
4359  if(0){ // if(traceIsOn()){
4360  traceUserEvent(eventEndOfTimeStep);
4361  sprintf(traceNote, "%s%d",tracePrefix,step);
4362  traceUserSuppliedNote(traceNote);
4363  }
4364 #endif // UPPER_BOUND
4365  rebalanceLoad(step);
4366 
4367 #if PME_BARRIER
4368  // a step before PME
4369  cycleBarrier(dofull && !((step+1)%fullElectFrequency),step);
4370 #endif
4371 
4372 #if USE_HPM
4373  if(step == START_HPM_STEP)
4374  (CProxy_Node(CkpvAccess(BOCclass_group).node)).startHPM();
4375 
4376  if(step == STOP_HPM_STEP)
4377  (CProxy_Node(CkpvAccess(BOCclass_group).node)).stopHPM();
4378 #endif
4379 
4380  }
4381 
4382  TIMER_DONE(t);
4383 #ifdef TIMER_COLLECTION
4384  if (patch->patchID == SPECIAL_PATCH_ID) {
4385  printf("Timer collection reporting in microseconds for "
4386  "Patch %d\n", patch->patchID);
4387  TIMER_REPORT(t);
4388  }
4389 #endif // TIMER_COLLECTION
4390  //
4391  // DJH: Copy updates of SOA back into AOS.
4392  //
4393  //patch->copy_updates_to_AOS();
4394 }
4395 
4396 // add moving drag to each atom's position
4398  FullAtom *atom = patch->atom.begin();
4399  int numAtoms = patch->numAtoms;
4400  Molecule *molecule = Node::Object()->molecule; // need its methods
4401  const BigReal movDragGlobVel = simParams->movDragGlobVel;
4402  const BigReal dt = timestep / TIMEFACTOR; // MUST be as in the integrator!
4403  Vector movDragVel, dragIncrement;
4404  for ( int i = 0; i < numAtoms; ++i )
4405  {
4406  // skip if fixed atom or zero drag attribute
4407  if ( (simParams->fixedAtomsOn && atom[i].atomFixed)
4408  || !(molecule->is_atom_movdragged(atom[i].id)) ) continue;
4409  molecule->get_movdrag_params(movDragVel, atom[i].id);
4410  dragIncrement = movDragGlobVel * movDragVel * dt;
4411  atom[i].position += dragIncrement;
4412  }
4413 }
4414 
4415 // add rotating drag to each atom's position
4417  FullAtom *atom = patch->atom.begin();
4418  int numAtoms = patch->numAtoms;
4419  Molecule *molecule = Node::Object()->molecule; // need its methods
4420  const BigReal rotDragGlobVel = simParams->rotDragGlobVel;
4421  const BigReal dt = timestep / TIMEFACTOR; // MUST be as in the integrator!
4422  BigReal rotDragVel, dAngle;
4423  Vector atomRadius;
4424  Vector rotDragAxis, rotDragPivot, dragIncrement;
4425  for ( int i = 0; i < numAtoms; ++i )
4426  {
4427  // skip if fixed atom or zero drag attribute
4428  if ( (simParams->fixedAtomsOn && atom[i].atomFixed)
4429  || !(molecule->is_atom_rotdragged(atom[i].id)) ) continue;
4430  molecule->get_rotdrag_params(rotDragVel, rotDragAxis, rotDragPivot, atom[i].id);
4431  dAngle = rotDragGlobVel * rotDragVel * dt;
4432  rotDragAxis /= rotDragAxis.length();
4433  atomRadius = atom[i].position - rotDragPivot;
4434  dragIncrement = cross(rotDragAxis, atomRadius) * dAngle;
4435  atom[i].position += dragIncrement;
4436  }
4437 }
4438 
4440  //
4441  // DJH: Copy all data into SOA (structure of arrays)
4442  // from AOS (array of structures) data structure.
4443  //
4444  //patch->copy_all_to_SOA();
4445 
4446  const int numberOfSteps = simParams->N;
4447  const int stepsPerCycle = simParams->stepsPerCycle;
4448 #if 0 && defined(NODEGROUP_FORCE_REGISTER)
4449  // XXX DJH: This is a hack that is found to get GPU nonbonded
4450  // force calculation right for --with-single-node-cuda builds
4451  const int stepsPerCycle_save = stepsPerCycle;
4452  simParams->stepsPerCycle = 1;
4453 #endif
4454  int &step = patch->flags.step;
4455  step = simParams->firstTimestep;
4456 
4457  int &maxForceUsed = patch->flags.maxForceUsed;
4458  int &maxForceMerged = patch->flags.maxForceMerged;
4459  maxForceUsed = Results::normal;
4460  maxForceMerged = Results::normal;
4461  int &doNonbonded = patch->flags.doNonbonded;
4462  doNonbonded = 1;
4463  maxForceUsed = Results::nbond;
4464  maxForceMerged = Results::nbond;
4465  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
4466  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
4467  doFullElectrostatics = dofull;
4468  if ( dofull ) {
4469  maxForceMerged = Results::slow;
4470  maxForceUsed = Results::slow;
4471  }
4472  int &doMolly = patch->flags.doMolly;
4473  doMolly = simParams->mollyOn && doFullElectrostatics;
4474  int &doMinimize = patch->flags.doMinimize;
4475  doMinimize = 1;
4476  // BEGIN LA
4477  int &doLoweAndersen = patch->flags.doLoweAndersen;
4478  doLoweAndersen = 0;
4479  // END LA
4480 
4481  int &doGBIS = patch->flags.doGBIS;
4482  doGBIS = simParams->GBISOn;
4483 
4484  int &doLCPO = patch->flags.doLCPO;
4485  doLCPO = simParams->LCPOOn;
4486 
4487  int doTcl = simParams->tclForcesOn;
4488  int doColvars = simParams->colvarsOn;
4489  int doGlobal = doTcl || doColvars;
4491 
4492  int &doEnergy = patch->flags.doEnergy;
4493  doEnergy = 1;
4494 
4495  // Do this to stabilize the minimizer, whether or not the user
4496  // wants rigid bond constraints enabled for dynamics.
4497  // In order to enforce, we have to call HomePatch::rattle1() directly.
4498  patch->rattle1(0.,0,0); // enforce rigid bond constraints on initial positions
4499 
4502  patch->atom.begin(),patch->atom.end());
4503  }
4504 
4505  runComputeObjects(1,step<numberOfSteps); // must migrate here!
4506 
4507  if ( doGlobal ) {
4508 #ifdef DEBUG_MINIMIZE
4509  printf("doTcl = %d doColvars = %d\n", doTcl, doColvars);
4510 #endif
4511  if ( doNonbonded ) saveForce(Results::nbond);
4512  if ( doFullElectrostatics ) saveForce(Results::slow);
4513  computeGlobal->saveTotalForces(patch);
4514  }
4515 #ifdef DEBUG_MINIMIZE
4516  else { printf("No computeGlobal\n"); }
4517 #endif
4518 
4520 
4521  submitMinimizeReductions(step,fmax2);
4522  rebalanceLoad(step);
4523 
4524  int downhill = 1; // start out just fixing bad contacts
4525  int minSeq = 0;
4526  for ( ++step; step <= numberOfSteps; ++step ) {
4527  // Blocking receive for the minimization coefficient.
4528  BigReal c = broadcast->minimizeCoefficient.get(minSeq++);
4529 
4530  if ( downhill ) {
4531  if ( c ) minimizeMoveDownhill(fmax2);
4532  else {
4533  downhill = 0;
4534  fmax2 *= 10000.;
4535  }
4536  }
4537  if ( ! downhill ) {
4538  if ( ! c ) { // new direction
4539 
4540  // Blocking receive for the minimization coefficient.
4541  c = broadcast->minimizeCoefficient.get(minSeq++);
4542 
4543  newMinimizeDirection(c); // v = c * v + f
4544 
4545  // Blocking receive for the minimization coefficient.
4546  c = broadcast->minimizeCoefficient.get(minSeq++);
4547 
4548  } // same direction
4549  newMinimizePosition(c); // x = x + c * v
4550  }
4551 
4552  runComputeObjects(!(step%stepsPerCycle),step<numberOfSteps);
4553  if ( doGlobal ) {
4554  if ( doNonbonded ) saveForce(Results::nbond);
4555  if ( doFullElectrostatics ) saveForce(Results::slow);
4556  computeGlobal->saveTotalForces(patch);
4557  }
4558  submitMinimizeReductions(step,fmax2);
4559  submitCollections(step, 1); // write out zeros for velocities
4560  rebalanceLoad(step);
4561  }
4562  quenchVelocities(); // zero out bogus velocity
4563 
4564  doMinimize = 0;
4565 
4566 #if 0
4567  // when using CUDASOAintegrate, need to update SOA data structures
4569  patch->copy_atoms_to_SOA();
4570  }
4571 #endif
4572 
4573 #if 0 && defined(NODEGROUP_FORCE_REGISTER)
4574  // XXX DJH: all patches in a PE are writing into simParams
4575  // so this hack needs a guard
4576  simParams->stepsPerCycle = stepsPerCycle_save;
4577 #endif
4578  //
4579  // DJH: Copy updates of SOA back into AOS.
4580  //
4581  //patch->copy_updates_to_AOS();
4582 }
4583 
4584 // x = x + 0.1 * unit(f) for large f
4586 
4587  FullAtom *a = patch->atom.begin();
4588  Force *f1 = patch->f[Results::normal].begin(); // includes nbond and slow
4589  int numAtoms = patch->numAtoms;
4590 
4591  for ( int i = 0; i < numAtoms; ++i ) {
4592  if ( simParams->fixedAtomsOn && a[i].atomFixed ) continue;
4593  Force f = f1[i];
4594  if ( f.length2() > fmax2 ) {
4595  a[i].position += ( 0.1 * f.unit() );
4596  int hgs = a[i].hydrogenGroupSize; // 0 if not parent
4597  for ( int j=1; j<hgs; ++j ) {
4598  a[++i].position += ( 0.1 * f.unit() );
4599  }
4600  }
4601  }
4602 
4603  patch->rattle1(0.,0,0);
4604 }
4605 
4606 // v = c * v + f
4608  FullAtom *a = patch->atom.begin();
4609  Force *f1 = patch->f[Results::normal].begin(); // includes nbond and slow
4610  const bool fixedAtomsOn = simParams->fixedAtomsOn;
4611  const bool drudeHardWallOn = simParams->drudeHardWallOn;
4612  int numAtoms = patch->numAtoms;
4613  BigReal maxv2 = 0.;
4614 
4615  for ( int i = 0; i < numAtoms; ++i ) {
4616  a[i].velocity *= c;
4617  a[i].velocity += f1[i];
4618  if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4619  a[i].velocity = a[i-1].velocity;
4620  }
4621  if ( fixedAtomsOn && a[i].atomFixed ) a[i].velocity = 0;
4622  BigReal v2 = a[i].velocity.length2();
4623  if ( v2 > maxv2 ) maxv2 = v2;
4624  }
4625 
4626  { Tensor virial; patch->minimize_rattle2( 0.1 * TIMEFACTOR / sqrt(maxv2), &virial); }
4627 
4628  maxv2 = 0.;
4629  for ( int i = 0; i < numAtoms; ++i ) {
4630  if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4631  a[i].velocity = a[i-1].velocity;
4632  }
4633  if ( fixedAtomsOn && a[i].atomFixed ) a[i].velocity = 0;
4634  BigReal v2 = a[i].velocity.length2();
4635  if ( v2 > maxv2 ) maxv2 = v2;
4636  }
4637 
4638  min_reduction->max(0,maxv2);
4639  min_reduction->submit();
4640 
4641  // prevent hydrogens from being left behind
4642  BigReal fmax2 = 0.01 * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR;
4643  // int adjustCount = 0;
4644  int hgs;
4645  for ( int i = 0; i < numAtoms; i += hgs ) {
4646  hgs = a[i].hydrogenGroupSize;
4647  BigReal minChildVel = a[i].velocity.length2();
4648  if ( minChildVel < fmax2 ) continue;
4649  int adjustChildren = 1;
4650  for ( int j = i+1; j < (i+hgs); ++j ) {
4651  if ( a[j].velocity.length2() > minChildVel ) adjustChildren = 0;
4652  }
4653  if ( adjustChildren ) {
4654  // if ( hgs > 1 ) ++adjustCount;
4655  for ( int j = i+1; j < (i+hgs); ++j ) {
4656  if (a[i].mass < 0.01) continue; // lone pair
4657  a[j].velocity = a[i].velocity;
4658  }
4659  }
4660  }
4661  // if (adjustCount) CkPrintf("Adjusting %d hydrogen groups\n", adjustCount);
4662 
4663 }
4664 
4665 // x = x + c * v
4667  FullAtom *a = patch->atom.begin();
4668  int numAtoms = patch->numAtoms;
4669 
4670  for ( int i = 0; i < numAtoms; ++i ) {
4671  a[i].position += c * a[i].velocity;
4672  }
4673 
4674  if ( simParams->drudeHardWallOn ) {
4675  for ( int i = 1; i < numAtoms; ++i ) {
4676  if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4677  a[i].position -= a[i-1].position;
4678  }
4679  }
4680  }
4681 
4682  patch->rattle1(0.,0,0);
4683 
4684  if ( simParams->drudeHardWallOn ) {
4685  for ( int i = 1; i < numAtoms; ++i ) {
4686  if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4687  a[i].position += a[i-1].position;
4688  }
4689  }
4690  }
4691 }
4692 
4693 // v = 0
4695  FullAtom *a = patch->atom.begin();
4696  int numAtoms = patch->numAtoms;
4697 
4698  for ( int i = 0; i < numAtoms; ++i ) {
4699  a[i].velocity = 0;
4700  }
4701 }
4702 
4704 
4705  FullAtom *a = patch->atom.begin();
4706  const int numAtoms = patch->numAtoms;
4707 
4708  Vector momentum = 0;
4709  BigReal mass = 0;
4710 if ( simParams->zeroMomentumAlt ) {
4711  for ( int i = 0; i < numAtoms; ++i ) {
4712  momentum += a[i].mass * a[i].velocity;
4713  mass += 1.;
4714  }
4715 } else {
4716  for ( int i = 0; i < numAtoms; ++i ) {
4717  momentum += a[i].mass * a[i].velocity;
4718  mass += a[i].mass;
4719  }
4720 }
4721 
4722  ADD_VECTOR_OBJECT(reduction,REDUCTION_HALFSTEP_MOMENTUM,momentum);
4724 }
4725 
4726 void Sequencer::correctMomentum(int step, BigReal drifttime) {
4727 
4728  //
4729  // DJH: This test should be done in SimParameters.
4730  //
4731  if ( simParams->fixedAtomsOn )
4732  NAMD_die("Cannot zero momentum when fixed atoms are present.");
4733 
4734  // Blocking receive for the momentum correction vector.
4735  const Vector dv = broadcast->momentumCorrection.get(step);
4736 
4737  const Vector dx = dv * ( drifttime / TIMEFACTOR );
4738 
4739  FullAtom *a = patch->atom.begin();
4740  const int numAtoms = patch->numAtoms;
4741 
4742 if ( simParams->zeroMomentumAlt ) {
4743  for ( int i = 0; i < numAtoms; ++i ) {
4744  a[i].velocity += dv * a[i].recipMass;
4745  a[i].position += dx * a[i].recipMass;
4746  }
4747 } else {
4748  for ( int i = 0; i < numAtoms; ++i ) {
4749  a[i].velocity += dv;
4750  a[i].position += dx;
4751  }
4752 }
4753 
4754 }
4755 
4756 // --------- For Multigrator ---------
4757 void Sequencer::scalePositionsVelocities(const Tensor& posScale, const Tensor& velScale) {
4758  FullAtom *a = patch->atom.begin();
4759  int numAtoms = patch->numAtoms;
4760  Position origin = patch->lattice.origin();
4761  if ( simParams->fixedAtomsOn ) {
4762  NAMD_bug("Sequencer::scalePositionsVelocities, fixed atoms not implemented");
4763  }
4764  if ( simParams->useGroupPressure ) {
4765  int hgs;
4766  for ( int i = 0; i < numAtoms; i += hgs ) {
4767  hgs = a[i].hydrogenGroupSize;
4768  Position pos_cm(0.0, 0.0, 0.0);
4769  Velocity vel_cm(0.0, 0.0, 0.0);
4770  BigReal m_cm = 0.0;
4771  for (int j=0;j < hgs;++j) {
4772  m_cm += a[i+j].mass;
4773  pos_cm += a[i+j].mass*a[i+j].position;
4774  vel_cm += a[i+j].mass*a[i+j].velocity;
4775  }
4776  pos_cm /= m_cm;
4777  vel_cm /= m_cm;
4778  pos_cm -= origin;
4779  Position dpos = posScale*pos_cm;
4780  Velocity dvel = velScale*vel_cm;
4781  for (int j=0;j < hgs;++j) {
4782  a[i+j].position += dpos;
4783  a[i+j].velocity += dvel;
4784  }
4785  }
4786  } else {
4787  for ( int i = 0; i < numAtoms; i++) {
4788  a[i].position += posScale*(a[i].position-origin);
4789  a[i].velocity = velScale*a[i].velocity;
4790  }
4791  }
4792 }
4793 
4794 void Sequencer::multigratorPressure(int step, int callNumber) {
4795 // Calculate new positions, momenta, and volume using positionRescaleFactor and
4796 // velocityRescaleTensor values returned from Controller::multigratorPressureCalcScale()
4798  FullAtom *a = patch->atom.begin();
4799  int numAtoms = patch->numAtoms;
4800 
4801  // Blocking receive (get) scaling factors from Controller
4802  Tensor scaleTensor = (callNumber == 1) ? broadcast->positionRescaleFactor.get(step) : broadcast->positionRescaleFactor2.get(step);
4803  Tensor velScaleTensor = (callNumber == 1) ? broadcast->velocityRescaleTensor.get(step) : broadcast->velocityRescaleTensor2.get(step);
4804  Tensor posScaleTensor = scaleTensor;
4805  posScaleTensor -= Tensor::identity();
4806  if (simParams->useGroupPressure) {
4807  velScaleTensor -= Tensor::identity();
4808  }
4809 
4810  // Scale volume
4811  patch->lattice.rescale(scaleTensor);
4812  // Scale positions and velocities
4813  scalePositionsVelocities(posScaleTensor, velScaleTensor);
4814 
4815  if (!patch->flags.doFullElectrostatics) NAMD_bug("Sequencer::multigratorPressure, doFullElectrostatics must be true");
4816 
4817  // Calculate new forces
4818  // NOTE: We should not need to migrate here since any migration should have happened in the
4819  // previous call to runComputeObjects inside the MD loop in Sequencer::integrate()
4820  const int numberOfSteps = simParams->N;
4821  const int stepsPerCycle = simParams->stepsPerCycle;
4822  runComputeObjects(0 , step<numberOfSteps, 1);
4823 
4824  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
4826 
4827  // Virials etc.
4828  Tensor virialNormal;
4829  Tensor momentumSqrSum;
4830  BigReal kineticEnergy = 0;
4831  if ( simParams->pairInteractionOn ) {
4832  if ( simParams->pairInteractionSelf ) {
4833  for ( int i = 0; i < numAtoms; ++i ) {
4834  if ( a[i].partition != 1 ) continue;
4835  kineticEnergy += a[i].mass * a[i].velocity.length2();
4836  virialNormal.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4837  }
4838  }
4839  } else {
4840  for ( int i = 0; i < numAtoms; ++i ) {
4841  if (a[i].mass < 0.01) continue;
4842  kineticEnergy += a[i].mass * a[i].velocity.length2();
4843  virialNormal.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4844  }
4845  }
4846  if (!simParams->useGroupPressure) momentumSqrSum = virialNormal;
4847  kineticEnergy *= 0.5;
4849  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virialNormal);
4850 
4851  if ( simParams->fixedAtomsOn ) {
4852  Tensor fixVirialNormal;
4853  Tensor fixVirialNbond;
4854  Tensor fixVirialSlow;
4855  Vector fixForceNormal = 0;
4856  Vector fixForceNbond = 0;
4857  Vector fixForceSlow = 0;
4858 
4859  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
4860 
4861  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, fixVirialNormal);
4862  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, fixVirialNbond);
4863  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, fixVirialSlow);
4864  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_NORMAL, fixForceNormal);
4865  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_NBOND, fixForceNbond);
4866  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_SLOW, fixForceSlow);
4867  }
4868 
4869  // Internal virial and group momentum
4870  Tensor intVirialNormal;
4871  Tensor intVirialNormal2;
4872  Tensor intVirialNbond;
4873  Tensor intVirialSlow;
4874  int hgs;
4875  for ( int i = 0; i < numAtoms; i += hgs ) {
4876  hgs = a[i].hydrogenGroupSize;
4877  int j;
4878  BigReal m_cm = 0;
4879  Position x_cm(0,0,0);
4880  Velocity v_cm(0,0,0);
4881  for ( j = i; j < (i+hgs); ++j ) {
4882  m_cm += a[j].mass;
4883  x_cm += a[j].mass * a[j].position;
4884  v_cm += a[j].mass * a[j].velocity;
4885  }
4886  if (simParams->useGroupPressure) momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
4887  x_cm /= m_cm;
4888  v_cm /= m_cm;
4889  if (simParams->fixedAtomsOn) NAMD_bug("Sequencer::multigratorPressure, simParams->fixedAtomsOn not implemented yet");
4890  if ( simParams->pairInteractionOn ) {
4891  if ( simParams->pairInteractionSelf ) {
4892  NAMD_bug("Sequencer::multigratorPressure, this part needs to be implemented correctly");
4893  for ( j = i; j < (i+hgs); ++j ) {
4894  if ( a[j].partition != 1 ) continue;
4895  BigReal mass = a[j].mass;
4896  Vector v = a[j].velocity;
4897  Vector dv = v - v_cm;
4898  intVirialNormal2.outerAdd (mass, v, dv);
4899  Vector dx = a[j].position - x_cm;
4900  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
4901  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
4902  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
4903  }
4904  }
4905  } else {
4906  for ( j = i; j < (i+hgs); ++j ) {
4907  BigReal mass = a[j].mass;
4908  Vector v = a[j].velocity;
4909  Vector dv = v - v_cm;
4910  intVirialNormal2.outerAdd(mass, v, dv);
4911  Vector dx = a[j].position - x_cm;
4912  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
4913  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
4914  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
4915  }
4916  }
4917  }
4918 
4919  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal);
4920  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal2);
4921  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NBOND, intVirialNbond);
4922  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_SLOW, intVirialSlow);
4923  ADD_TENSOR_OBJECT(reduction, REDUCTION_MOMENTUM_SQUARED, momentumSqrSum);
4924 
4925  reduction->submit();
4926  }
4927 }
4928 
4929 void Sequencer::scaleVelocities(const BigReal velScale) {
4930  FullAtom *a = patch->atom.begin();
4931  int numAtoms = patch->numAtoms;
4932  for ( int i = 0; i < numAtoms; i++) {
4933  a[i].velocity *= velScale;
4934  }
4935 }
4936 
4938  FullAtom *a = patch->atom.begin();
4939  int numAtoms = patch->numAtoms;
4940  BigReal kineticEnergy = 0.0;
4941  if ( simParams->pairInteractionOn ) {
4942  if ( simParams->pairInteractionSelf ) {
4943  for (int i = 0; i < numAtoms; ++i ) {
4944  if ( a[i].partition != 1 ) continue;
4945  kineticEnergy += a[i].mass * a[i].velocity.length2();
4946  }
4947  }
4948  } else {
4949  for (int i = 0; i < numAtoms; ++i ) {
4950  kineticEnergy += a[i].mass * a[i].velocity.length2();
4951  }
4952  }
4953  kineticEnergy *= 0.5;
4954  return kineticEnergy;
4955 }
4956 
4957 void Sequencer::multigratorTemperature(int step, int callNumber) {
4959  // Blocking receive (get) velocity scaling factor.
4960  BigReal velScale = (callNumber == 1) ? broadcast->velocityRescaleFactor.get(step) : broadcast->velocityRescaleFactor2.get(step);
4961  scaleVelocities(velScale);
4962  // Calculate new kineticEnergy
4963  BigReal kineticEnergy = calcKineticEnergy();
4965  if (callNumber == 1 && !(step % simParams->multigratorPressureFreq)) {
4966  // If this is a pressure cycle, calculate new momentum squared sum
4967  FullAtom *a = patch->atom.begin();
4968  int numAtoms = patch->numAtoms;
4969  Tensor momentumSqrSum;
4970  if (simParams->useGroupPressure) {
4971  int hgs;
4972  for ( int i = 0; i < numAtoms; i += hgs ) {
4973  hgs = a[i].hydrogenGroupSize;
4974  int j;
4975  BigReal m_cm = 0;
4976  Position x_cm(0,0,0);
4977  Velocity v_cm(0,0,0);
4978  for ( j = i; j < (i+hgs); ++j ) {
4979  m_cm += a[j].mass;
4980  x_cm += a[j].mass * a[j].position;
4981  v_cm += a[j].mass * a[j].velocity;
4982  }
4983  momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
4984  }
4985  } else {
4986  for ( int i = 0; i < numAtoms; i++) {
4987  momentumSqrSum.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4988  }
4989  }
4990  ADD_TENSOR_OBJECT(multigratorReduction, MULTIGRATOR_REDUCTION_MOMENTUM_SQUARED, momentumSqrSum);
4991  }
4992  // Submit reductions (kineticEnergy and, if applicable, momentumSqrSum)
4994 
4995  }
4996 }
4997 // --------- End Multigrator ---------
4998 
4999 //
5000 // DJH: Calls one or more addForceToMomentum which in turn calls HomePatch
5001 // versions. We should inline to reduce the number of function calls.
5002 //
5003 void Sequencer::newtonianVelocities(BigReal stepscale, const BigReal timestep,
5004  const BigReal nbondstep,
5005  const BigReal slowstep,
5006  const int staleForces,
5007  const int doNonbonded,
5008  const int doFullElectrostatics)
5009 {
5010  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5011  NamdProfileEvent::NEWTONIAN_VELOCITIES);
5012 
5013  // Deterministic velocity update, account for multigrator
5014  if (staleForces || (doNonbonded && doFullElectrostatics)) {
5015  addForceToMomentum3(stepscale*timestep, Results::normal, 0,
5016  stepscale*nbondstep, Results::nbond, staleForces,
5017  stepscale*slowstep, Results::slow, staleForces);
5018  } else {
5019  addForceToMomentum(stepscale*timestep);
5020  if (staleForces || doNonbonded)
5021  addForceToMomentum(stepscale*nbondstep, Results::nbond, staleForces);
5022  if (staleForces || doFullElectrostatics)
5023  addForceToMomentum(stepscale*slowstep, Results::slow, staleForces);
5024  }
5025 }
5026 
5028 {
5029 // This routine is used for the BAOAB integrator,
5030 // Ornstein-Uhlenbeck exact solve for the O-part.
5031 // See B. Leimkuhler and C. Matthews, AMRX (2012)
5032 // Routine originally written by JPhillips, with fresh errors by CMatthews June2012
5033 
5035  {
5036  FullAtom *a = patch->atom.begin();
5037  int numAtoms = patch->numAtoms;
5038  Molecule *molecule = Node::Object()->molecule;
5039  BigReal dt = dt_fs * 0.001; // convert to ps
5042  {
5043  kbT = BOLTZMANN*adaptTempT;
5044  }
5045 
5046  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5047  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5048 
5049  for ( int i = 0; i < numAtoms; ++i )
5050  {
5051  BigReal dt_gamma = dt * a[i].langevinParam;
5052  if ( ! dt_gamma ) continue;
5053 
5054  BigReal f1 = exp( -dt_gamma );
5055  BigReal f2 = sqrt( ( 1. - f1*f1 ) * kbT *
5056  ( a[i].partition ? tempFactor : 1.0 ) *
5057  a[i].recipMass );
5058  a[i].velocity *= f1;
5059  a[i].velocity += f2 * random->gaussian_vector();
5060  }
5061  }
5062 }
5063 
5065 {
5066  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5067  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1);
5069  {
5070  FullAtom *a = patch->atom.begin();
5071  int numAtoms = patch->numAtoms;
5072  Molecule *molecule = Node::Object()->molecule;
5073  BigReal dt = dt_fs * 0.001; // convert to ps
5074  int i;
5075 
5076  if (simParams->drudeOn) {
5077  for (i = 0; i < numAtoms; i++) {
5078 
5079  if (i < numAtoms-1 &&
5080  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5081  //printf("*** Found Drude particle %d\n", a[i+1].id);
5082  // i+1 is a Drude particle with parent i
5083 
5084  // convert from Cartesian coordinates to (COM,bond) coordinates
5085  BigReal m = a[i+1].mass / (a[i].mass + a[i+1].mass); // mass ratio
5086  Vector v_bnd = a[i+1].velocity - a[i].velocity; // vel of bond
5087  Vector v_com = a[i].velocity + m * v_bnd; // vel of COM
5088  BigReal dt_gamma;
5089 
5090  // use Langevin damping factor i for v_com
5091  dt_gamma = dt * a[i].langevinParam;
5092  if (dt_gamma != 0.0) {
5093  v_com *= ( 1. - 0.5 * dt_gamma );
5094  }
5095 
5096  // use Langevin damping factor i+1 for v_bnd
5097  dt_gamma = dt * a[i+1].langevinParam;
5098  if (dt_gamma != 0.0) {
5099  v_bnd *= ( 1. - 0.5 * dt_gamma );
5100  }
5101 
5102  // convert back
5103  a[i].velocity = v_com - m * v_bnd;
5104  a[i+1].velocity = v_bnd + a[i].velocity;
5105 
5106  i++; // +1 from loop, we've updated both particles
5107  }
5108  else {
5109  BigReal dt_gamma = dt * a[i].langevinParam;
5110  if ( ! dt_gamma ) continue;
5111 
5112  a[i].velocity *= ( 1. - 0.5 * dt_gamma );
5113  }
5114 
5115  } // end for
5116  } // end if drudeOn
5117  else {
5118 
5119  //
5120  // DJH: The conditional inside loop prevents vectorization and doesn't
5121  // avoid much work since addition and multiplication are cheap.
5122  //
5123  for ( i = 0; i < numAtoms; ++i )
5124  {
5125  BigReal dt_gamma = dt * a[i].langevinParam;
5126  if ( ! dt_gamma ) continue;
5127 
5128  a[i].velocity *= ( 1. - 0.5 * dt_gamma );
5129  }
5130 
5131  } // end else
5132 
5133  } // end if langevinOn
5134 }
5135 
5136 
5138 {
5139  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5140  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2);
5142  {
5143  //
5144  // DJH: This call is expensive. Avoid calling when gammas don't differ.
5145  // Set flag in SimParameters and make this call conditional.
5146  //
5147  TIMER_START(patch->timerSet, RATTLE1);
5148  rattle1(dt_fs,1); // conserve momentum if gammas differ
5149  TIMER_STOP(patch->timerSet, RATTLE1);
5150 
5151  FullAtom *a = patch->atom.begin();
5152  int numAtoms = patch->numAtoms;
5153  Molecule *molecule = Node::Object()->molecule;
5154  BigReal dt = dt_fs * 0.001; // convert to ps
5157  {
5158  kbT = BOLTZMANN*adaptTempT;
5159  }
5160  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5161  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5162  int i;
5163 
5164  if (simParams->drudeOn) {
5165  BigReal kbT_bnd = BOLTZMANN*(simParams->drudeTemp); // drude bond Temp
5166 
5167  for (i = 0; i < numAtoms; i++) {
5168 
5169  if (i < numAtoms-1 &&
5170  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5171  //printf("*** Found Drude particle %d\n", a[i+1].id);
5172  // i+1 is a Drude particle with parent i
5173 
5174  // convert from Cartesian coordinates to (COM,bond) coordinates
5175  BigReal m = a[i+1].mass / (a[i].mass + a[i+1].mass); // mass ratio
5176  Vector v_bnd = a[i+1].velocity - a[i].velocity; // vel of bond
5177  Vector v_com = a[i].velocity + m * v_bnd; // vel of COM
5178  BigReal dt_gamma;
5179 
5180  // use Langevin damping factor i for v_com
5181  dt_gamma = dt * a[i].langevinParam;
5182  if (dt_gamma != 0.0) {
5183  BigReal mass = a[i].mass + a[i+1].mass;
5184  v_com += random->gaussian_vector() *
5185  sqrt( 2 * dt_gamma * kbT *
5186  ( a[i].partition ? tempFactor : 1.0 ) / mass );
5187  v_com /= ( 1. + 0.5 * dt_gamma );
5188  }
5189 
5190  // use Langevin damping factor i+1 for v_bnd
5191  dt_gamma = dt * a[i+1].langevinParam;
5192  if (dt_gamma != 0.0) {
5193  BigReal mass = a[i+1].mass * (1. - m);
5194  v_bnd += random->gaussian_vector() *
5195  sqrt( 2 * dt_gamma * kbT_bnd *
5196  ( a[i+1].partition ? tempFactor : 1.0 ) / mass );
5197  v_bnd /= ( 1. + 0.5 * dt_gamma );
5198  }
5199 
5200  // convert back
5201  a[i].velocity = v_com - m * v_bnd;
5202  a[i+1].velocity = v_bnd + a[i].velocity;
5203 
5204  i++; // +1 from loop, we've updated both particles
5205  }
5206  else {
5207  BigReal dt_gamma = dt * a[i].langevinParam;
5208  if ( ! dt_gamma ) continue;
5209 
5210  a[i].velocity += random->gaussian_vector() *
5211  sqrt( 2 * dt_gamma * kbT *
5212  ( a[i].partition ? tempFactor : 1.0 ) * a[i].recipMass );
5213  a[i].velocity /= ( 1. + 0.5 * dt_gamma );
5214  }
5215 
5216  } // end for
5217  } // end if drudeOn
5218  else {
5219 
5220  //
5221  // DJH: For case using same gamma (the Langevin parameter),
5222  // no partitions (e.g. FEP), and no adaptive tempering (adaptTempMD),
5223  // we can precompute constants. Then by lifting the RNG from the
5224  // loop (filling up an array of random numbers), we can vectorize
5225  // loop and simplify arithmetic to just addition and multiplication.
5226  //
5227  for ( i = 0; i < numAtoms; ++i )
5228  {
5229  BigReal dt_gamma = dt * a[i].langevinParam;
5230  if ( ! dt_gamma ) continue;
5231 
5232  a[i].velocity += random->gaussian_vector() *
5233  sqrt( 2 * dt_gamma * kbT *
5234  ( a[i].partition ? tempFactor : 1.0 ) * a[i].recipMass );
5235  a[i].velocity /= ( 1. + 0.5 * dt_gamma );
5236  }
5237 
5238  } // end else
5239 
5240  } // end if langevinOn
5241 }
5242 
5243 
5245 {
5246  if ( simParams->berendsenPressureOn ) {
5248  const int freq = simParams->berendsenPressureFreq;
5249  if ( ! (berendsenPressure_count % freq ) ) {
5251  FullAtom *a = patch->atom.begin();
5252  int numAtoms = patch->numAtoms;
5253  // Blocking receive for the updated lattice scaling factor.
5254  Tensor factor = broadcast->positionRescaleFactor.get(step);
5255  patch->lattice.rescale(factor);
5256  if ( simParams->useGroupPressure )
5257  {
5258  int hgs;
5259  for ( int i = 0; i < numAtoms; i += hgs ) {
5260  int j;
5261  hgs = a[i].hydrogenGroupSize;
5262  if ( simParams->fixedAtomsOn && a[i].groupFixed ) {
5263  for ( j = i; j < (i+hgs); ++j ) {
5265  a[j].fixedPosition,a[j].transform);
5266  }
5267  continue;
5268  }
5269  BigReal m_cm = 0;
5270  Position x_cm(0,0,0);
5271  for ( j = i; j < (i+hgs); ++j ) {
5272  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
5273  m_cm += a[j].mass;
5274  x_cm += a[j].mass * a[j].position;
5275  }
5276  x_cm /= m_cm;
5277  Position new_x_cm = x_cm;
5278  patch->lattice.rescale(new_x_cm,factor);
5279  Position delta_x_cm = new_x_cm - x_cm;
5280  for ( j = i; j < (i+hgs); ++j ) {
5281  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5283  a[j].fixedPosition,a[j].transform);
5284  continue;
5285  }
5286  a[j].position += delta_x_cm;
5287  }
5288  }
5289  }
5290  else
5291  {
5292  for ( int i = 0; i < numAtoms; ++i )
5293  {
5294  if ( simParams->fixedAtomsOn && a[i].atomFixed ) {
5296  a[i].fixedPosition,a[i].transform);
5297  continue;
5298  }
5299  patch->lattice.rescale(a[i].position,factor);
5300  }
5301  }
5302  }
5303  } else {
5305  }
5306 }
5307 
5309 {
5310  if ( simParams->langevinPistonOn && ! ( (step-1-slowFreq/2) % slowFreq ) )
5311  {
5312  //
5313  // DJH: Loops below simplify if we lift out special cases of fixed atoms
5314  // and pressure excluded atoms and make them their own branch.
5315  //
5316  FullAtom *a = patch->atom.begin();
5317  int numAtoms = patch->numAtoms;
5318  // Blocking receive for the updated lattice scaling factor.
5319  Tensor factor = broadcast->positionRescaleFactor.get(step);
5320  TIMER_START(patch->timerSet, PISTON);
5321  // JCP FIX THIS!!!
5322  Vector velFactor(1/factor.xx,1/factor.yy,1/factor.zz);
5323  patch->lattice.rescale(factor);
5324  Molecule *mol = Node::Object()->molecule;
5325  if ( simParams->useGroupPressure )
5326  {
5327  int hgs;
5328  for ( int i = 0; i < numAtoms; i += hgs ) {
5329  int j;
5330  hgs = a[i].hydrogenGroupSize;
5331  if ( simParams->fixedAtomsOn && a[i].groupFixed ) {
5332  for ( j = i; j < (i+hgs); ++j ) {
5334  a[j].fixedPosition,a[j].transform);
5335  }
5336  continue;
5337  }
5338  BigReal m_cm = 0;
5339  Position x_cm(0,0,0);
5340  Velocity v_cm(0,0,0);
5341  for ( j = i; j < (i+hgs); ++j ) {
5342  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
5343  m_cm += a[j].mass;
5344  x_cm += a[j].mass * a[j].position;
5345  v_cm += a[j].mass * a[j].velocity;
5346  }
5347  x_cm /= m_cm;
5348  Position new_x_cm = x_cm;
5349  patch->lattice.rescale(new_x_cm,factor);
5350  Position delta_x_cm = new_x_cm - x_cm;
5351  v_cm /= m_cm;
5352  Velocity delta_v_cm;
5353  delta_v_cm.x = ( velFactor.x - 1 ) * v_cm.x;
5354  delta_v_cm.y = ( velFactor.y - 1 ) * v_cm.y;
5355  delta_v_cm.z = ( velFactor.z - 1 ) * v_cm.z;
5356  for ( j = i; j < (i+hgs); ++j ) {
5357  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5359  a[j].fixedPosition,a[j].transform);
5360  continue;
5361  }
5362  if ( mol->is_atom_exPressure(a[j].id) ) continue;
5363  a[j].position += delta_x_cm;
5364  a[j].velocity += delta_v_cm;
5365  }
5366  }
5367  }
5368  else
5369  {
5370  for ( int i = 0; i < numAtoms; ++i )
5371  {
5372  if ( simParams->fixedAtomsOn && a[i].atomFixed ) {
5374  a[i].fixedPosition,a[i].transform);
5375  continue;
5376  }
5377  if ( mol->is_atom_exPressure(a[i].id) ) continue;
5378  patch->lattice.rescale(a[i].position,factor);
5379  a[i].velocity.x *= velFactor.x;
5380  a[i].velocity.y *= velFactor.y;
5381  a[i].velocity.z *= velFactor.z;
5382  }
5383  }
5384  TIMER_STOP(patch->timerSet, PISTON);
5385  }
5386 }
5387 
5389 {
5390  const int rescaleFreq = simParams->rescaleFreq;
5391  if ( rescaleFreq > 0 ) {
5392  FullAtom *a = patch->atom.begin();
5393  int numAtoms = patch->numAtoms;
5395  if ( rescaleVelocities_numTemps == rescaleFreq ) {
5396  // Blocking receive for the velcity scaling factor.
5397  BigReal factor = broadcast->velocityRescaleFactor.get(step);
5398  for ( int i = 0; i < numAtoms; ++i )
5399  {
5400  a[i].velocity *= factor;
5401  }
5403  }
5404  }
5405 }
5406 
5407 void Sequencer::rescaleaccelMD (int step, int doNonbonded, int doFullElectrostatics)
5408 {
5409  if (!simParams->accelMDOn) return;
5410  if ((step < simParams->accelMDFirstStep) || ( simParams->accelMDLastStep >0 && step > simParams->accelMDLastStep)) return;
5411 
5412  // Blocking receive for the Accelerated MD scaling factors.
5413  Vector accelMDfactor = broadcast->accelMDRescaleFactor.get(step);
5414  const BigReal factor_dihe = accelMDfactor[0];
5415  const BigReal factor_tot = accelMDfactor[1];
5416  const int numAtoms = patch->numAtoms;
5417 
5418  if (simParams->accelMDdihe && factor_tot <1 )
5419  NAMD_die("accelMD broadcasting error!\n");
5420  if (!simParams->accelMDdihe && !simParams->accelMDdual && factor_dihe <1 )
5421  NAMD_die("accelMD broadcasting error!\n");
5422 
5423  if (simParams->accelMDdihe && factor_dihe < 1) {
5424  for (int i = 0; i < numAtoms; ++i)
5425  if (patch->f[Results::amdf][i][0] || patch->f[Results::amdf][i][1] || patch->f[Results::amdf][i][2])
5426  patch->f[Results::normal][i] += patch->f[Results::amdf][i]*(factor_dihe - 1);
5427  }
5428 
5429  if ( !simParams->accelMDdihe && factor_tot < 1) {
5430  for (int i = 0; i < numAtoms; ++i)
5431  patch->f[Results::normal][i] *= factor_tot;
5432  if (doNonbonded) {
5433  for (int i = 0; i < numAtoms; ++i)
5434  patch->f[Results::nbond][i] *= factor_tot;
5435  }
5436  if (doFullElectrostatics) {
5437  for (int i = 0; i < numAtoms; ++i)
5438  patch->f[Results::slow][i] *= factor_tot;
5439  }
5440  }
5441 
5442  if (simParams->accelMDdual && factor_dihe < 1) {
5443  for (int i = 0; i < numAtoms; ++i)
5444  if (patch->f[Results::amdf][i][0] || patch->f[Results::amdf][i][1] || patch->f[Results::amdf][i][2])
5445  patch->f[Results::normal][i] += patch->f[Results::amdf][i]*(factor_dihe - factor_tot);
5446  }
5447 
5448 }
5449 
5451 {
5452  //check if adaptive tempering is enabled and in the right timestep range
5453  if (!simParams->adaptTempOn) return;
5454  if ( (step < simParams->adaptTempFirstStep ) ||
5456  if (simParams->langevinOn) // restore langevin temperature
5458  return;
5459  }
5460  // Get Updated Temperature
5461  if ( !(step % simParams->adaptTempFreq ) && (step > simParams->firstTimestep ))
5462  // Blocking receive for the updated adaptive tempering temperature.
5464 }
5465 
5466 void Sequencer::reassignVelocities(BigReal timestep, int step)
5467 {
5468  const int reassignFreq = simParams->reassignFreq;
5469  if ( ( reassignFreq > 0 ) && ! ( step % reassignFreq ) ) {
5470  FullAtom *a = patch->atom.begin();
5471  int numAtoms = patch->numAtoms;
5472  BigReal newTemp = simParams->reassignTemp;
5473  newTemp += ( step / reassignFreq ) * simParams->reassignIncr;
5474  if ( simParams->reassignIncr > 0.0 ) {
5475  if ( newTemp > simParams->reassignHold && simParams->reassignHold > 0.0 )
5476  newTemp = simParams->reassignHold;
5477  } else {
5478  if ( newTemp < simParams->reassignHold )
5479  newTemp = simParams->reassignHold;
5480  }
5481  BigReal kbT = BOLTZMANN * newTemp;
5482 
5483  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5484  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5485 
5486  for ( int i = 0; i < numAtoms; ++i )
5487  {
5488  a[i].velocity = ( ( simParams->fixedAtomsOn &&
5489  a[i].atomFixed && a[i].mass > 0.) ? Vector(0,0,0) :
5490  sqrt(kbT * (a[i].partition ? tempFactor : 1.0) * a[i].recipMass) *
5491  random->gaussian_vector() );
5492  }
5493  } else {
5494  NAMD_bug("Sequencer::reassignVelocities called improperly!");
5495  }
5496 }
5497 
5499 {
5500  FullAtom *a = patch->atom.begin();
5501  int numAtoms = patch->numAtoms;
5502  BigReal newTemp = simParams->initialTemp;
5503  BigReal kbT = BOLTZMANN * newTemp;
5504 
5505  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5506  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5507 
5508  for ( int i = 0; i < numAtoms; ++i )
5509  {
5510  a[i].velocity = ( ( (simParams->fixedAtomsOn && a[i].atomFixed) ||
5511  a[i].mass <= 0.) ? Vector(0,0,0) :
5512  sqrt(kbT * (a[i].partition ? tempFactor : 1.0) * a[i].recipMass) *
5513  random->gaussian_vector() );
5514  if ( simParams->drudeOn && i+1 < numAtoms && a[i+1].mass < 1.0 && a[i+1].mass > 0.05 ) {
5515  a[i+1].velocity = a[i].velocity; // zero is good enough
5516  ++i;
5517  }
5518  }
5519 }
5520 
5522 {
5523  FullAtom *a = patch->atom.begin();
5524  int numAtoms = patch->numAtoms;
5525  for ( int i = 0; i < numAtoms; ++i )
5526  {
5527  a[i].velocity *= factor;
5528  }
5529 }
5530 
5532 {
5533  FullAtom *a = patch->atom.begin();
5534  int numAtoms = patch->numAtoms;
5535  Molecule *molecule = Node::Object()->molecule;
5536  for ( int i = 0; i < numAtoms; ++i )
5537  {
5538  a[i].charge = molecule->atomcharge(a[i].id);
5539  }
5540 }
5541 
5542 // REST2 solute charge scaling
5544 {
5545  FullAtom *a = patch->atom.begin();
5546  int numAtoms = patch->numAtoms;
5547  Molecule *molecule = Node::Object()->molecule;
5548  BigReal sqrt_factor = sqrt(factor);
5549  // apply scaling to the original charge (stored in molecule)
5550  // of just the marked solute atoms
5551  for ( int i = 0; i < numAtoms; ++i ) {
5552  if (molecule->get_ss_type(a[i].id)) {
5553  a[i].charge = sqrt_factor * molecule->atomcharge(a[i].id);
5554  if (simParams->SOAintegrateOn) patch->patchDataSOA.charge[i] = a[i].charge;
5555  }
5556  }
5557 }
5558 
5560 {
5561  if ( simParams->tCoupleOn )
5562  {
5563  FullAtom *a = patch->atom.begin();
5564  int numAtoms = patch->numAtoms;
5565  // Blocking receive for the temperature coupling coefficient.
5566  BigReal coefficient = broadcast->tcoupleCoefficient.get(step);
5567  Molecule *molecule = Node::Object()->molecule;
5568  BigReal dt = dt_fs * 0.001; // convert to ps
5569  coefficient *= dt;
5570  for ( int i = 0; i < numAtoms; ++i )
5571  {
5572  BigReal f1 = exp( coefficient * a[i].langevinParam );
5573  a[i].velocity *= f1;
5574  }
5575  }
5576 }
5577 
5583 {
5586  FullAtom *a = patch->atom.begin();
5587  int numAtoms = patch->numAtoms;
5588  // Blocking receive for the temperature coupling coefficient.
5589  BigReal velrescaling = broadcast->stochRescaleCoefficient.get(step);
5590  DebugM(4, "stochastically rescaling velocities at step " << step << " by " << velrescaling << "\n");
5591  for ( int i = 0; i < numAtoms; ++i ) {
5592  a[i].velocity *= velrescaling;
5593  }
5594  stochRescale_count = 0;
5595  }
5596 }
5597 
5598 void Sequencer::saveForce(const int ftag)
5599 {
5600  patch->saveForce(ftag);
5601 }
5602 
5603 //
5604 // DJH: Need to change division by TIMEFACTOR into multiplication by
5605 // reciprocal of TIMEFACTOR. Done several times for each iteration of
5606 // the integrate() loop.
5607 //
5608 
5610  BigReal timestep, const int ftag, const int useSaved
5611  ) {
5612  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5613  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5614 #if CMK_BLUEGENEL
5615  CmiNetworkProgressAfter (0);
5616 #endif
5617  const BigReal dt = timestep / TIMEFACTOR;
5618  FullAtom *atom_arr = patch->atom.begin();
5619  ForceList *f_use = (useSaved ? patch->f_saved : patch->f);
5620  const Force *force_arr = f_use[ftag].const_begin();
5621  patch->addForceToMomentum(atom_arr, force_arr, dt, patch->numAtoms);
5622 }
5623 
5625  const BigReal timestep1, const int ftag1, const int useSaved1,
5626  const BigReal timestep2, const int ftag2, const int useSaved2,
5627  const BigReal timestep3, const int ftag3, const int useSaved3
5628  ) {
5629  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5630  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5631 #if CMK_BLUEGENEL
5632  CmiNetworkProgressAfter (0);
5633 #endif
5634  const BigReal dt1 = timestep1 / TIMEFACTOR;
5635  const BigReal dt2 = timestep2 / TIMEFACTOR;
5636  const BigReal dt3 = timestep3 / TIMEFACTOR;
5637  ForceList *f_use1 = (useSaved1 ? patch->f_saved : patch->f);
5638  ForceList *f_use2 = (useSaved2 ? patch->f_saved : patch->f);
5639  ForceList *f_use3 = (useSaved3 ? patch->f_saved : patch->f);
5640  FullAtom *atom_arr = patch->atom.begin();
5641  const Force *force_arr1 = f_use1[ftag1].const_begin();
5642  const Force *force_arr2 = f_use2[ftag2].const_begin();
5643  const Force *force_arr3 = f_use3[ftag3].const_begin();
5644  patch->addForceToMomentum3 (atom_arr, force_arr1, force_arr2, force_arr3,
5645  dt1, dt2, dt3, patch->numAtoms);
5646 }
5647 
5649 {
5650  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5651  NamdProfileEvent::ADD_VELOCITY_TO_POSITION);
5652 #if CMK_BLUEGENEL
5653  CmiNetworkProgressAfter (0);
5654 #endif
5655  const BigReal dt = timestep / TIMEFACTOR;
5656  FullAtom *atom_arr = patch->atom.begin();
5657  patch->addVelocityToPosition(atom_arr, dt, patch->numAtoms);
5658 }
5659 
5660 void Sequencer::hardWallDrude(BigReal dt, int pressure)
5661 {
5662  if ( simParams->drudeHardWallOn ) {
5663  Tensor virial;
5664  Tensor *vp = ( pressure ? &virial : 0 );
5665  if ( patch->hardWallDrude(dt, vp, pressureProfileReduction) ) {
5666  iout << iERROR << "Constraint failure in HardWallDrude(); "
5667  << "simulation may become unstable.\n" << endi;
5669  terminate();
5670  }
5671  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5672  }
5673 }
5674 
5675 void Sequencer::rattle1(BigReal dt, int pressure)
5676 {
5677  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::RATTLE1);
5678  if ( simParams->rigidBonds != RIGID_NONE ) {
5679  Tensor virial;
5680  Tensor *vp = ( pressure ? &virial : 0 );
5681  if ( patch->rattle1(dt, vp, pressureProfileReduction) ) {
5682  iout << iERROR <<
5683  "Constraint failure; simulation has become unstable.\n" << endi;
5685  terminate();
5686  }
5687 #if 0
5688  printf("virial = %g %g %g %g %g %g %g %g %g\n",
5689  virial.xx, virial.xy, virial.xz,
5690  virial.yx, virial.yy, virial.yz,
5691  virial.zx, virial.zy, virial.zz);
5692 #endif
5693  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5694 #if 0
5695  {
5696  const FullAtom *a = patch->atom.const_begin();
5697  for (int n=0; n < patch->numAtoms; n++) {
5698  printf("pos[%d] = %g %g %g\n", n,
5699  a[n].position.x, a[n].position.y, a[n].position.z);
5700  }
5701  for (int n=0; n < patch->numAtoms; n++) {
5702  printf("vel[%d] = %g %g %g\n", n,
5703  a[n].velocity.x, a[n].velocity.y, a[n].velocity.z);
5704  }
5705  if (pressure) {
5706  for (int n=0; n < patch->numAtoms; n++) {
5707  printf("force[%d] = %g %g %g\n", n,
5708  patch->f[Results::normal][n].x,
5709  patch->f[Results::normal][n].y,
5710  patch->f[Results::normal][n].z);
5711  }
5712  }
5713  }
5714 #endif
5715  }
5716 }
5717 
5718 // void Sequencer::rattle2(BigReal dt, int step)
5719 // {
5720 // if ( simParams->rigidBonds != RIGID_NONE ) {
5721 // Tensor virial;
5722 // patch->rattle2(dt, &virial);
5723 // ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5724 // // we need to add to alt and int virial because not included in forces
5725 // #ifdef ALTVIRIAL
5726 // ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,virial);
5727 // #endif
5728 // ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,virial);
5729 // }
5730 // }
5731 
5733 {
5734  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::MAXIMUM_MOVE);
5735 
5736  FullAtom *a = patch->atom.begin();
5737  int numAtoms = patch->numAtoms;
5738  if ( simParams->maximumMove ) {
5739  const BigReal dt = timestep / TIMEFACTOR;
5740  const BigReal maxvel = simParams->maximumMove / dt;
5741  const BigReal maxvel2 = maxvel * maxvel;
5742  for ( int i=0; i<numAtoms; ++i ) {
5743  if ( a[i].velocity.length2() > maxvel2 ) {
5744  a[i].velocity *= ( maxvel / a[i].velocity.length() );
5745  }
5746  }
5747  } else {
5748  const BigReal dt = timestep / TIMEFACTOR;
5749  const BigReal maxvel = simParams->cutoff / dt;
5750  const BigReal maxvel2 = maxvel * maxvel;
5751  int killme = 0;
5752  for ( int i=0; i<numAtoms; ++i ) {
5753  killme = killme || ( a[i].velocity.length2() > maxvel2 );
5754  }
5755  if ( killme ) {
5756  killme = 0;
5757  for ( int i=0; i<numAtoms; ++i ) {
5758  if ( a[i].velocity.length2() > maxvel2 ) {
5759  ++killme;
5760  iout << iERROR << "Atom " << (a[i].id + 1) << " velocity is "
5761  << ( PDBVELFACTOR * a[i].velocity ) << " (limit is "
5762  << ( PDBVELFACTOR * maxvel ) << ", atom "
5763  << i << " of " << numAtoms << " on patch "
5764  << patch->patchID << " pe " << CkMyPe() << ")\n" << endi;
5765  }
5766  }
5767  iout << iERROR <<
5768  "Atoms moving too fast; simulation has become unstable ("
5769  << killme << " atoms on patch " << patch->patchID
5770  << " pe " << CkMyPe() << ").\n" << endi;
5772  terminate();
5773  }
5774  }
5775 }
5776 
5778 {
5779  if ( simParams->minimizeOn ) {
5780  FullAtom *a = patch->atom.begin();
5781  int numAtoms = patch->numAtoms;
5782  for ( int i=0; i<numAtoms; ++i ) {
5783  a[i].velocity = 0.;
5784  }
5785  }
5786 }
5787 
5789 {
5790  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::SUBMIT_HALFSTEP);
5791 
5792  // velocity-dependent quantities *** ONLY ***
5793  // positions are not at half-step when called
5794  FullAtom *a = patch->atom.begin();
5795  int numAtoms = patch->numAtoms;
5796 
5797 #if CMK_BLUEGENEL
5798  CmiNetworkProgressAfter (0);
5799 #endif
5800 
5801  // For non-Multigrator doKineticEnergy = 1 always
5802  Tensor momentumSqrSum;
5804  {
5805  BigReal kineticEnergy = 0;
5806  Tensor virial;
5807  if ( simParams->pairInteractionOn ) {
5808  if ( simParams->pairInteractionSelf ) {
5809  for ( int i = 0; i < numAtoms; ++i ) {
5810  if ( a[i].partition != 1 ) continue;
5811  kineticEnergy += a[i].mass * a[i].velocity.length2();
5812  virial.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5813  }
5814  }
5815  } else {
5816  for ( int i = 0; i < numAtoms; ++i ) {
5817  if (a[i].mass < 0.01) continue;
5818  kineticEnergy += a[i].mass * a[i].velocity.length2();
5819  virial.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5820  }
5821  }
5822 
5824  momentumSqrSum = virial;
5825  }
5826  kineticEnergy *= 0.5 * 0.5;
5828  virial *= 0.5;
5829  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5830 #ifdef ALTVIRIAL
5831  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,virial);
5832 #endif
5833  }
5834 
5836  int nslabs = simParams->pressureProfileSlabs;
5837  const Lattice &lattice = patch->lattice;
5838  BigReal idz = nslabs/lattice.c().z;
5839  BigReal zmin = lattice.origin().z - 0.5*lattice.c().z;
5840  int useGroupPressure = simParams->useGroupPressure;
5841 
5842  // Compute kinetic energy partition, possibly subtracting off
5843  // internal kinetic energy if group pressure is enabled.
5844  // Since the regular pressure is 1/2 mvv and the internal kinetic
5845  // term that is subtracted off for the group pressure is
5846  // 1/2 mv (v-v_cm), the group pressure kinetic contribution is
5847  // 1/2 m * v * v_cm. The factor of 1/2 is because submitHalfstep
5848  // gets called twice per timestep.
5849  int hgs;
5850  for (int i=0; i<numAtoms; i += hgs) {
5851  int j, ppoffset;
5852  hgs = a[i].hydrogenGroupSize;
5853  int partition = a[i].partition;
5854 
5855  BigReal m_cm = 0;
5856  Velocity v_cm(0,0,0);
5857  for (j=i; j< i+hgs; ++j) {
5858  m_cm += a[j].mass;
5859  v_cm += a[j].mass * a[j].velocity;
5860  }
5861  v_cm /= m_cm;
5862  for (j=i; j < i+hgs; ++j) {
5863  BigReal mass = a[j].mass;
5864  if (! (useGroupPressure && j != i)) {
5865  BigReal z = a[j].position.z;
5866  int slab = (int)floor((z-zmin)*idz);
5867  if (slab < 0) slab += nslabs;
5868  else if (slab >= nslabs) slab -= nslabs;
5869  ppoffset = 3*(slab + partition*nslabs);
5870  }
5871  BigReal wxx, wyy, wzz;
5872  if (useGroupPressure) {
5873  wxx = 0.5*mass * a[j].velocity.x * v_cm.x;
5874  wyy = 0.5*mass * a[j].velocity.y * v_cm.y;
5875  wzz = 0.5*mass * a[j].velocity.z * v_cm.z;
5876  } else {
5877  wxx = 0.5*mass * a[j].velocity.x * a[j].velocity.x;
5878  wyy = 0.5*mass * a[j].velocity.y * a[j].velocity.y;
5879  wzz = 0.5*mass * a[j].velocity.z * a[j].velocity.z;
5880  }
5881  pressureProfileReduction->item(ppoffset ) += wxx;
5882  pressureProfileReduction->item(ppoffset+1) += wyy;
5883  pressureProfileReduction->item(ppoffset+2) += wzz;
5884  }
5885  }
5886  }
5887 
5888  // For non-Multigrator doKineticEnergy = 1 always
5890  {
5891  BigReal intKineticEnergy = 0;
5892  Tensor intVirialNormal;
5893 
5894  int hgs;
5895  for ( int i = 0; i < numAtoms; i += hgs ) {
5896 
5897 #if CMK_BLUEGENEL
5898  CmiNetworkProgress ();
5899 #endif
5900 
5901  hgs = a[i].hydrogenGroupSize;
5902  int j;
5903  BigReal m_cm = 0;
5904  Velocity v_cm(0,0,0);
5905  for ( j = i; j < (i+hgs); ++j ) {
5906  m_cm += a[j].mass;
5907  v_cm += a[j].mass * a[j].velocity;
5908  }
5910  momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
5911  }
5912  v_cm /= m_cm;
5913  if ( simParams->pairInteractionOn ) {
5914  if ( simParams->pairInteractionSelf ) {
5915  for ( j = i; j < (i+hgs); ++j ) {
5916  if ( a[j].partition != 1 ) continue;
5917  BigReal mass = a[j].mass;
5918  Vector v = a[j].velocity;
5919  Vector dv = v - v_cm;
5920  intKineticEnergy += mass * (v * dv);
5921  intVirialNormal.outerAdd (mass, v, dv);
5922  }
5923  }
5924  } else {
5925  for ( j = i; j < (i+hgs); ++j ) {
5926  BigReal mass = a[j].mass;
5927  Vector v = a[j].velocity;
5928  Vector dv = v - v_cm;
5929  intKineticEnergy += mass * (v * dv);
5930  intVirialNormal.outerAdd(mass, v, dv);
5931  }
5932  }
5933  }
5934  intKineticEnergy *= 0.5 * 0.5;
5936  intVirialNormal *= 0.5;
5937  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
5938  if ( simParams->multigratorOn) {
5939  momentumSqrSum *= 0.5;
5940  ADD_TENSOR_OBJECT(reduction,REDUCTION_MOMENTUM_SQUARED,momentumSqrSum);
5941  }
5942  }
5943 
5944 }
5945 
5946 void Sequencer::calcFixVirial(Tensor& fixVirialNormal, Tensor& fixVirialNbond, Tensor& fixVirialSlow,
5947  Vector& fixForceNormal, Vector& fixForceNbond, Vector& fixForceSlow) {
5948 
5949  FullAtom *a = patch->atom.begin();
5950  int numAtoms = patch->numAtoms;
5951 
5952  for ( int j = 0; j < numAtoms; j++ ) {
5953  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5954  Vector dx = a[j].fixedPosition;
5955  // all negative because fixed atoms cancels these forces
5956  fixVirialNormal.outerAdd(-1.0, patch->f[Results::normal][j], dx);
5957  fixVirialNbond.outerAdd(-1.0, patch->f[Results::nbond][j], dx);
5958  fixVirialSlow.outerAdd(-1.0, patch->f[Results::slow][j], dx);
5959  fixForceNormal -= patch->f[Results::normal][j];
5960  fixForceNbond -= patch->f[Results::nbond][j];
5961  fixForceSlow -= patch->f[Results::slow][j];
5962  }
5963  }
5964 }
5965 
5967 {
5968 #ifndef UPPER_BOUND
5969  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5970  NamdProfileEvent::SUBMIT_REDUCTIONS);
5971  FullAtom *a = patch->atom.begin();
5972 #endif
5973  int numAtoms = patch->numAtoms;
5974 
5975 #if CMK_BLUEGENEL
5976  CmiNetworkProgressAfter(0);
5977 #endif
5978 
5979  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
5981 
5982 #ifndef UPPER_BOUND
5983  // For non-Multigrator doKineticEnergy = 1 always
5985  {
5986  BigReal kineticEnergy = 0;
5987  Vector momentum = 0;
5988  Vector angularMomentum = 0;
5989  Vector o = patch->lattice.origin();
5990  int i;
5991  if ( simParams->pairInteractionOn ) {
5992  if ( simParams->pairInteractionSelf ) {
5993  for (i = 0; i < numAtoms; ++i ) {
5994  if ( a[i].partition != 1 ) continue;
5995  kineticEnergy += a[i].mass * a[i].velocity.length2();
5996  momentum += a[i].mass * a[i].velocity;
5997  angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
5998  }
5999  }
6000  } else {
6001  for (i = 0; i < numAtoms; ++i ) {
6002  kineticEnergy += a[i].mass * a[i].velocity.length2();
6003  momentum += a[i].mass * a[i].velocity;
6004  angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6005  }
6006  if (simParams->drudeOn) {
6007  BigReal drudeComKE = 0.;
6008  BigReal drudeBondKE = 0.;
6009 
6010  for (i = 0; i < numAtoms; i++) {
6011  if (i < numAtoms-1 &&
6012  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
6013  // i+1 is a Drude particle with parent i
6014 
6015  // convert from Cartesian coordinates to (COM,bond) coordinates
6016  BigReal m_com = (a[i].mass + a[i+1].mass); // mass of COM
6017  BigReal m = a[i+1].mass / m_com; // mass ratio
6018  BigReal m_bond = a[i+1].mass * (1. - m); // mass of bond
6019  Vector v_bond = a[i+1].velocity - a[i].velocity; // vel of bond
6020  Vector v_com = a[i].velocity + m * v_bond; // vel of COM
6021 
6022  drudeComKE += m_com * v_com.length2();
6023  drudeBondKE += m_bond * v_bond.length2();
6024 
6025  i++; // +1 from loop, we've updated both particles
6026  }
6027  else {
6028  drudeComKE += a[i].mass * a[i].velocity.length2();
6029  }
6030  } // end for
6031 
6032  drudeComKE *= 0.5;
6033  drudeBondKE *= 0.5;
6035  += drudeComKE;
6037  += drudeBondKE;
6038  } // end drudeOn
6039 
6040  } // end else
6041 
6042  kineticEnergy *= 0.5;
6044  ADD_VECTOR_OBJECT(reduction,REDUCTION_MOMENTUM,momentum);
6045  ADD_VECTOR_OBJECT(reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
6046  }
6047 
6048 #ifdef ALTVIRIAL
6049  // THIS IS NOT CORRECTED FOR PAIR INTERACTIONS
6050  {
6051  Tensor altVirial;
6052  for ( int i = 0; i < numAtoms; ++i ) {
6053  altVirial.outerAdd(1.0, patch->f[Results::normal][i], a[i].position);
6054  }
6055  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,altVirial);
6056  }
6057  {
6058  Tensor altVirial;
6059  for ( int i = 0; i < numAtoms; ++i ) {
6060  altVirial.outerAdd(1.0, patch->f[Results::nbond][i], a[i].position);
6061  }
6062  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NBOND,altVirial);
6063  }
6064  {
6065  Tensor altVirial;
6066  for ( int i = 0; i < numAtoms; ++i ) {
6067  altVirial.outerAdd(1.0, patch->f[Results::slow][i], a[i].position);
6068  }
6069  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_SLOW,altVirial);
6070  }
6071 #endif
6072 
6073  // For non-Multigrator doKineticEnergy = 1 always
6075  {
6076  BigReal intKineticEnergy = 0;
6077  Tensor intVirialNormal;
6078  Tensor intVirialNbond;
6079  Tensor intVirialSlow;
6080 
6081  int hgs;
6082  for ( int i = 0; i < numAtoms; i += hgs ) {
6083 #if CMK_BLUEGENEL
6084  CmiNetworkProgress();
6085 #endif
6086  hgs = a[i].hydrogenGroupSize;
6087  int j;
6088  BigReal m_cm = 0;
6089  Position x_cm(0,0,0);
6090  Velocity v_cm(0,0,0);
6091  for ( j = i; j < (i+hgs); ++j ) {
6092  m_cm += a[j].mass;
6093  x_cm += a[j].mass * a[j].position;
6094  v_cm += a[j].mass * a[j].velocity;
6095  }
6096  x_cm /= m_cm;
6097  v_cm /= m_cm;
6098  int fixedAtomsOn = simParams->fixedAtomsOn;
6099  if ( simParams->pairInteractionOn ) {
6100  int pairInteractionSelf = simParams->pairInteractionSelf;
6101  for ( j = i; j < (i+hgs); ++j ) {
6102  if ( a[j].partition != 1 &&
6103  ( pairInteractionSelf || a[j].partition != 2 ) ) continue;
6104  // net force treated as zero for fixed atoms
6105  if ( fixedAtomsOn && a[j].atomFixed ) continue;
6106  BigReal mass = a[j].mass;
6107  Vector v = a[j].velocity;
6108  Vector dv = v - v_cm;
6109  intKineticEnergy += mass * (v * dv);
6110  Vector dx = a[j].position - x_cm;
6111  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6112  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6113  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6114  }
6115  } else {
6116  for ( j = i; j < (i+hgs); ++j ) {
6117  // net force treated as zero for fixed atoms
6118  if ( fixedAtomsOn && a[j].atomFixed ) continue;
6119  BigReal mass = a[j].mass;
6120  Vector v = a[j].velocity;
6121  Vector dv = v - v_cm;
6122  intKineticEnergy += mass * (v * dv);
6123  Vector dx = a[j].position - x_cm;
6124  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6125  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6126  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6127  }
6128  }
6129  }
6130 
6131  intKineticEnergy *= 0.5;
6133  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
6134  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
6135  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
6136  }
6137 
6139  // subtract off internal virial term, calculated as for intVirial.
6140  int nslabs = simParams->pressureProfileSlabs;
6141  const Lattice &lattice = patch->lattice;
6142  BigReal idz = nslabs/lattice.c().z;
6143  BigReal zmin = lattice.origin().z - 0.5*lattice.c().z;
6144  int useGroupPressure = simParams->useGroupPressure;
6145 
6146  int hgs;
6147  for (int i=0; i<numAtoms; i += hgs) {
6148  int j;
6149  hgs = a[i].hydrogenGroupSize;
6150  BigReal m_cm = 0;
6151  Position x_cm(0,0,0);
6152  for (j=i; j< i+hgs; ++j) {
6153  m_cm += a[j].mass;
6154  x_cm += a[j].mass * a[j].position;
6155  }
6156  x_cm /= m_cm;
6157 
6158  BigReal z = a[i].position.z;
6159  int slab = (int)floor((z-zmin)*idz);
6160  if (slab < 0) slab += nslabs;
6161  else if (slab >= nslabs) slab -= nslabs;
6162  int partition = a[i].partition;
6163  int ppoffset = 3*(slab + nslabs*partition);
6164  for (j=i; j < i+hgs; ++j) {
6165  BigReal mass = a[j].mass;
6166  Vector dx = a[j].position - x_cm;
6167  const Vector &fnormal = patch->f[Results::normal][j];
6168  const Vector &fnbond = patch->f[Results::nbond][j];
6169  const Vector &fslow = patch->f[Results::slow][j];
6170  BigReal wxx = (fnormal.x + fnbond.x + fslow.x) * dx.x;
6171  BigReal wyy = (fnormal.y + fnbond.y + fslow.y) * dx.y;
6172  BigReal wzz = (fnormal.z + fnbond.z + fslow.z) * dx.z;
6173  pressureProfileReduction->item(ppoffset ) -= wxx;
6174  pressureProfileReduction->item(ppoffset+1) -= wyy;
6175  pressureProfileReduction->item(ppoffset+2) -= wzz;
6176  }
6177  }
6178  }
6179 
6180  // For non-Multigrator doVirial = 1 always
6181  if (patch->flags.doVirial)
6182  {
6183  if ( simParams->fixedAtomsOn ) {
6184  Tensor fixVirialNormal;
6185  Tensor fixVirialNbond;
6186  Tensor fixVirialSlow;
6187  Vector fixForceNormal = 0;
6188  Vector fixForceNbond = 0;
6189  Vector fixForceSlow = 0;
6190 
6191  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6192 
6193 #if 0
6194  auto printTensor = [](const Tensor& t, const std::string& name){
6195  CkPrintf("%s", name.c_str());
6196  CkPrintf("\n%12.5lf %12.5lf %12.5lf\n"
6197  "%12.5lf %12.5lf %12.5lf\n"
6198  "%12.5lf %12.5lf %12.5lf\n",
6199  t.xx, t.xy, t.xz,
6200  t.yx, t.yy, t.yz,
6201  t.zx, t.zy, t.zz);
6202  };
6203  printTensor(fixVirialNormal, "fixVirialNormal = ");
6204  printTensor(fixVirialNbond, "fixVirialNbond = ");
6205  printTensor(fixVirialSlow, "fixVirialSlow = ");
6206 #endif
6207 
6208  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,fixVirialNormal);
6209  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NBOND,fixVirialNbond);
6210  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,fixVirialSlow);
6211  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NORMAL,fixForceNormal);
6212  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NBOND,fixForceNbond);
6213  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_SLOW,fixForceSlow);
6214  }
6215  }
6216 #endif // UPPER_BOUND
6217 
6218  reduction->submit();
6219 #ifndef UPPER_BOUND
6221 #endif
6222 }
6223 
6225 {
6226  FullAtom *a = patch->atom.begin();
6227  Force *f1 = patch->f[Results::normal].begin();
6228  Force *f2 = patch->f[Results::nbond].begin();
6229  Force *f3 = patch->f[Results::slow].begin();
6230  const bool fixedAtomsOn = simParams->fixedAtomsOn;
6231  const bool drudeHardWallOn = simParams->drudeHardWallOn;
6232  const double drudeBondLen = simParams->drudeBondLen;
6233  const double drudeBondLen2 = drudeBondLen * drudeBondLen;
6234  const double drudeStep = 0.1/(TIMEFACTOR*TIMEFACTOR);
6235  const double drudeMove = 0.01;
6236  const double drudeStep2 = drudeStep * drudeStep;
6237  const double drudeMove2 = drudeMove * drudeMove;
6238  int numAtoms = patch->numAtoms;
6239 
6240  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
6241 
6242  for ( int i = 0; i < numAtoms; ++i ) {
6243 #if 0
6244  printf("ap[%2d]= %f %f %f\n", i, a[i].position.x, a[i].position.y, a[i].position.z);
6245  printf("f1[%2d]= %f %f %f\n", i, f1[i].x, f1[i].y, f1[i].z);
6246  printf("f2[%2d]= %f %f %f\n", i, f2[i].x, f2[i].y, f2[i].z);
6247  //printf("f3[%2d]= %f %f %f\n", i, f3[i].x, f3[i].y, f3[i].z);
6248 #endif
6249  f1[i] += f2[i] + f3[i]; // add all forces
6250  if ( drudeHardWallOn && i && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) { // drude particle
6251  if ( ! fixedAtomsOn || ! a[i].atomFixed ) {
6252  if ( drudeStep2 * f1[i].length2() > drudeMove2 ) {
6253  a[i].position += drudeMove * f1[i].unit();
6254  } else {
6255  a[i].position += drudeStep * f1[i];
6256  }
6257  if ( (a[i].position - a[i-1].position).length2() > drudeBondLen2 ) {
6258  a[i].position = a[i-1].position + drudeBondLen * (a[i].position - a[i-1].position).unit();
6259  }
6260  }
6261  Vector netf = f1[i-1] + f1[i];
6262  if ( fixedAtomsOn && a[i-1].atomFixed ) netf = 0;
6263  f1[i-1] = netf;
6264  f1[i] = 0.;
6265  }
6266  if ( fixedAtomsOn && a[i].atomFixed ) f1[i] = 0;
6267  }
6268 
6269  f2 = f3 = 0; // included in f1
6270 
6271  BigReal maxv2 = 0.;
6272 
6273  for ( int i = 0; i < numAtoms; ++i ) {
6274  BigReal v2 = a[i].velocity.length2();
6275  if ( v2 > 0. ) {
6276  if ( v2 > maxv2 ) maxv2 = v2;
6277  } else {
6278  v2 = f1[i].length2();
6279  if ( v2 > maxv2 ) maxv2 = v2;
6280  }
6281  }
6282 
6283  if ( fmax2 > 10. * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR )
6284  { Tensor virial; patch->minimize_rattle2( 0.1 * TIMEFACTOR / sqrt(maxv2), &virial, true /* forces */); }
6285 
6286  BigReal fdotf = 0;
6287  BigReal fdotv = 0;
6288  BigReal vdotv = 0;
6289  int numHuge = 0;
6290  for ( int i = 0; i < numAtoms; ++i ) {
6291  if ( simParams->fixedAtomsOn && a[i].atomFixed ) continue;
6292  if ( drudeHardWallOn && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) continue; // drude particle
6293  Force f = f1[i];
6294  BigReal ff = f * f;
6295  if ( ff > fmax2 ) {
6296  if (simParams->printBadContacts) {
6297  CkPrintf("STEP(%i) MIN_HUGE[%i] f=%e kcal/mol/A\n",patch->flags.sequence,patch->pExt[i].id,ff);
6298  }
6299  ++numHuge;
6300  // pad scaling so minimizeMoveDownhill() doesn't miss them
6301  BigReal fmult = 1.01 * sqrt(fmax2/ff);
6302  f *= fmult; ff = f * f;
6303  f1[i] *= fmult;
6304  }
6305  fdotf += ff;
6306  fdotv += f * a[i].velocity;
6307  vdotv += a[i].velocity * a[i].velocity;
6308  }
6309 
6310 #if 0
6311  printf("fdotf = %f\n", fdotf);
6312  printf("fdotv = %f\n", fdotv);
6313  printf("vdotv = %f\n", vdotv);
6314 #endif
6319 
6320  {
6321  Tensor intVirialNormal;
6322  Tensor intVirialNbond;
6323  Tensor intVirialSlow;
6324 
6325  int hgs;
6326  for ( int i = 0; i < numAtoms; i += hgs ) {
6327  hgs = a[i].hydrogenGroupSize;
6328  int j;
6329  BigReal m_cm = 0;
6330  Position x_cm(0,0,0);
6331  for ( j = i; j < (i+hgs); ++j ) {
6332  m_cm += a[j].mass;
6333  x_cm += a[j].mass * a[j].position;
6334  }
6335  x_cm /= m_cm;
6336  for ( j = i; j < (i+hgs); ++j ) {
6337  BigReal mass = a[j].mass;
6338  // net force treated as zero for fixed atoms
6339  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
6340  Vector dx = a[j].position - x_cm;
6341  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6342  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6343  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6344  }
6345  }
6346 
6347  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
6348  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
6349  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
6350  }
6351 
6352  if ( simParams->fixedAtomsOn ) {
6353  Tensor fixVirialNormal;
6354  Tensor fixVirialNbond;
6355  Tensor fixVirialSlow;
6356  Vector fixForceNormal = 0;
6357  Vector fixForceNbond = 0;
6358  Vector fixForceSlow = 0;
6359 
6360  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6361 
6362  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,fixVirialNormal);
6363  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NBOND,fixVirialNbond);
6364  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,fixVirialSlow);
6365  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NORMAL,fixForceNormal);
6366  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NBOND,fixForceNbond);
6367  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_SLOW,fixForceSlow);
6368  }
6369 
6370  reduction->submit();
6371 }
6372 
6373 void Sequencer::submitCollections(int step, int zeroVel)
6374 {
6375  //
6376  // DJH: Copy updates of SOA back into AOS.
6377  // Do we need to update everything or is it safe to just update
6378  // positions and velocities separately, as needed?
6379  //
6380  //patch->copy_updates_to_AOS();
6381 
6382  if (simParams->IMDon &&
6384  !(step % simParams->IMDfreq) &&
6386  (step != simParams->firstTimestep)) {
6388  }
6389 
6390  NAMD_EVENT_RANGE_2(patch->flags.event_on,
6391  NamdProfileEvent::SUBMIT_COLLECTIONS);
6392  int prec;
6393  int dcdSelectionIndex;
6394  std::tie (prec, dcdSelectionIndex) = Output::coordinateNeeded(step);
6395  if ( prec ) {
6396  collection->submitPositions(step,patch->atom,patch->lattice,prec,dcdSelectionIndex);
6397  }
6398  prec = Output::velocityNeeded(step);
6399  if ( prec ) {
6400  collection->submitVelocities(step,zeroVel,patch->atom,prec);
6401  }
6402  prec = Output::forceNeeded(step);
6403  if ( prec ) {
6404  int maxForceUsed = patch->flags.maxForceUsed;
6405  if ( maxForceUsed > Results::slow ) maxForceUsed = Results::slow;
6406  collection->submitForces(step,patch->atom,maxForceUsed,patch->f,prec);
6407  }
6408 }
6409 
6410 void Sequencer::runComputeObjects(int migration, int pairlists, int pressureStep)
6411 {
6412  if ( migration ) pairlistsAreValid = 0;
6413 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
6414  if ( pairlistsAreValid &&
6416  && ( pairlistsAge > pairlistsAgeLimit ) ) {
6417  pairlistsAreValid = 0;
6418  }
6419 #else
6421  pairlistsAreValid = 0;
6422  }
6423 #endif
6424  if ( ! simParams->usePairlists ) pairlists = 0;
6425  patch->flags.usePairlists = pairlists || pairlistsAreValid;
6427  pairlists && ! pairlistsAreValid;
6428 
6429  if ( simParams->singleTopology ) patch->reposition_all_alchpairs();
6430  if ( simParams->lonepairs ) patch->reposition_all_lonepairs();
6431 
6432  //
6433  // DJH: Copy updates of SOA back into AOS.
6434  // The positionsReady() routine starts force computation and atom migration.
6435  //
6436  // We could reduce amount of copying here by checking migration status
6437  // and copying velocities only when migrating. Some types of simulation
6438  // always require velocities, such as Lowe-Anderson.
6439  //
6440  //patch->copy_updates_to_AOS();
6441 
6442  patch->positionsReady(migration); // updates flags.sequence
6443 
6444  int seq = patch->flags.sequence;
6445  int basePriority = ( (seq & 0xffff) << 15 )
6447  if ( patch->flags.doGBIS && patch->flags.doNonbonded) {
6448  priority = basePriority + GB1_COMPUTE_HOME_PRIORITY;
6449  suspend(); // until all deposit boxes close
6451  priority = basePriority + GB2_COMPUTE_HOME_PRIORITY;
6452  suspend();
6454  priority = basePriority + COMPUTE_HOME_PRIORITY;
6455  suspend();
6456  } else {
6457  priority = basePriority + COMPUTE_HOME_PRIORITY;
6458  suspend(); // until all deposit boxes close
6459  }
6460 
6461  //
6462  // DJH: Copy all data into SOA from AOS.
6463  //
6464  // We need everything copied after atom migration.
6465  // When doing force computation without atom migration,
6466  // all data except forces will already be up-to-date in SOA
6467  // (except maybe for some special types of simulation).
6468  //
6469  //patch->copy_all_to_SOA();
6470 
6471  //
6472  // DJH: Copy forces to SOA.
6473  // Force available after suspend() has returned.
6474  //
6475  //patch->copy_forces_to_SOA();
6476 
6478  pairlistsAreValid = 1;
6479  pairlistsAge = 0;
6480  }
6481  // For multigrator, do not age pairlist during pressure step
6482  // NOTE: for non-multigrator pressureStep = 0 always
6483  if ( pairlistsAreValid && !pressureStep ) ++pairlistsAge;
6484 
6485  if (simParams->lonepairs) {
6486  {
6487  Tensor virial;
6488  patch->redistrib_lonepair_forces(Results::normal, &virial);
6489  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6490  }
6491  if (patch->flags.doNonbonded) {
6492  Tensor virial;
6493  patch->redistrib_lonepair_forces(Results::nbond, &virial);
6494  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6495  }
6497  Tensor virial;
6498  patch->redistrib_lonepair_forces(Results::slow, &virial);
6499  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6500  }
6501  } else if (simParams->watmodel == WaterModel::TIP4) {
6502  {
6503  Tensor virial;
6504  patch->redistrib_tip4p_forces(Results::normal, &virial);
6505  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6506  }
6507  if (patch->flags.doNonbonded) {
6508  Tensor virial;
6509  patch->redistrib_tip4p_forces(Results::nbond, &virial);
6510  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6511  }
6513  Tensor virial;
6514  patch->redistrib_tip4p_forces(Results::slow, &virial);
6515  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6516  }
6517  } else if (simParams->watmodel == WaterModel::SWM4) {
6518  {
6519  Tensor virial;
6520  patch->redistrib_swm4_forces(Results::normal, &virial);
6521  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6522  }
6523  if (patch->flags.doNonbonded) {
6524  Tensor virial;
6525  patch->redistrib_swm4_forces(Results::nbond, &virial);
6526  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6527  }
6529  Tensor virial;
6530  patch->redistrib_swm4_forces(Results::slow, &virial);
6531  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6532  }
6533  }
6534 
6535  if (simParams->singleTopology) {
6536  patch->redistrib_alchpair_forces(Results::normal);
6537  if (patch->flags.doNonbonded) {
6538  patch->redistrib_alchpair_forces(Results::nbond);
6539  }
6541  patch->redistrib_alchpair_forces(Results::slow);
6542  }
6543  }
6544 
6545  if ( patch->flags.doMolly ) {
6546  Tensor virial;
6547  patch->mollyMollify(&virial);
6548  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,virial);
6549  }
6550 
6551 
6552  // BEGIN LA
6553  if (patch->flags.doLoweAndersen) {
6555  }
6556  // END LA
6557 //TODO:HIP check if this applies to HIP
6558 #ifdef NAMD_CUDA_XXX
6559  int numAtoms = patch->numAtoms;
6560  FullAtom *a = patch->atom.begin();
6561  for ( int i=0; i<numAtoms; ++i ) {
6562  CkPrintf("%d %g %g %g\n", a[i].id,
6563  patch->f[Results::normal][i].x +
6564  patch->f[Results::nbond][i].x +
6565  patch->f[Results::slow][i].x,
6566  patch->f[Results::normal][i].y +
6567  patch->f[Results::nbond][i].y +
6568  patch->f[Results::slow][i].y,
6569  patch->f[Results::normal][i].z +
6570  patch->f[Results::nbond][i].z +
6571  patch->f[Results::slow][i].z);
6572  CkPrintf("%d %g %g %g\n", a[i].id,
6573  patch->f[Results::normal][i].x,
6574  patch->f[Results::nbond][i].x,
6575  patch->f[Results::slow][i].x);
6576  CkPrintf("%d %g %g %g\n", a[i].id,
6577  patch->f[Results::normal][i].y,
6578  patch->f[Results::nbond][i].y,
6579  patch->f[Results::slow][i].y);
6580  CkPrintf("%d %g %g %g\n", a[i].id,
6581  patch->f[Results::normal][i].z,
6582  patch->f[Results::nbond][i].z,
6583  patch->f[Results::slow][i].z);
6584  }
6585 #endif
6586 
6587 //#undef PRINT_FORCES
6588 //#define PRINT_FORCES 1
6589 #if PRINT_FORCES
6590  int numAtoms = patch->numAtoms;
6591  FullAtom *a = patch->atom.begin();
6592  for ( int i=0; i<numAtoms; ++i ) {
6593  float fxNo = patch->f[Results::normal][i].x;
6594  float fxNb = patch->f[Results::nbond][i].x;
6595  float fxSl = patch->f[Results::slow][i].x;
6596  float fyNo = patch->f[Results::normal][i].y;
6597  float fyNb = patch->f[Results::nbond][i].y;
6598  float fySl = patch->f[Results::slow][i].y;
6599  float fzNo = patch->f[Results::normal][i].z;
6600  float fzNb = patch->f[Results::nbond][i].z;
6601  float fzSl = patch->f[Results::slow][i].z;
6602  float fx = fxNo+fxNb+fxSl;
6603  float fy = fyNo+fyNb+fySl;
6604  float fz = fzNo+fzNb+fzSl;
6605 
6606  float f = sqrt(fx*fx+fy*fy+fz*fz);
6607  int id = patch->pExt[i].id;
6608  int seq = patch->flags.sequence;
6609  float x = patch->p[i].position.x;
6610  float y = patch->p[i].position.y;
6611  float z = patch->p[i].position.z;
6612  //CkPrintf("FORCE(%04i)[%04i] = <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <<% .4e, % .4e, % .4e>>\n", seq,id,
6613  CkPrintf("FORCE(%04i)[%04i] = % .9e % .9e % .9e\n", seq,id,
6614  //CkPrintf("FORCE(%04i)[%04i] = <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e>\n", seq,id,
6615 //fxNo,fyNo,fzNo,
6616 fxNb,fyNb,fzNb
6617 //fxSl,fySl,fzSl,
6618 //fx,fy,fz
6619 );
6620  }
6621 #endif
6622 }
6623 
6624 void Sequencer::rebalanceLoad(int timestep) {
6625  if ( ! ldbSteps ) {
6627  }
6628  if ( ! --ldbSteps ) {
6629  patch->submitLoadStats(timestep);
6630  ldbCoordinator->rebalance(this,patch->getPatchID());
6631  pairlistsAreValid = 0;
6632  }
6633 }
6634 
6635 void Sequencer::cycleBarrier(int doBarrier, int step) {
6636 #if USE_BARRIER
6637  if (doBarrier)
6638  // Blocking receive for the cycle barrier.
6639  broadcast->cycleBarrier.get(step);
6640 #endif
6641 }
6642 
6644  // Blocking receive for the trace barrier.
6645  broadcast->traceBarrier.get(step);
6646 }
6647 
6648 #ifdef MEASURE_NAMD_WITH_PAPI
6649 void Sequencer::papiMeasureBarrier(int step){
6650  // Blocking receive for the PAPI measure barrier.
6651  broadcast->papiMeasureBarrier.get(step);
6652 }
6653 #endif
6654 
6657  CthFree(thread);
6658  CthSuspend();
6659 }
static Node * Object()
Definition: Node.h:86
HomePatch *const patch
Definition: Sequencer.h:323
Real atomcharge(int anum) const
Definition: Molecule.h:1124
SubmitReduction * multigratorReduction
Definition: Sequencer.h:309
Vector gaussian_vector(void)
Definition: Random.h:219
void rescaleVelocities(int)
Definition: Sequencer.C:5388
void finishReduction(bool doEnergyVirial)
double * vel_y
Definition: NamdTypes.h:397
int doKineticEnergy
Definition: Sequencer.h:310
void minimizationQuenchVelocity(void)
Definition: Sequencer.C:5777
int period
period for some step dependent event (e.g. stepsPerCycle)
Definition: Sequencer.C:143
NAMD_HOST_DEVICE void rescale(Tensor factor)
Definition: Lattice.h:60
void max(int i, BigReal v)
Definition: ReductionMgr.h:349
int init(int initstep, int initperiod, int delta=0)
Definition: Sequencer.C:159
#define RECIP_TIMEFACTOR
Definition: common.h:61
DCDParams dcdSelectionParams[16]
Definition: Molecule.h:482
void barrier(const SynchronousCollectiveScope scope)
BigReal zy
Definition: Tensor.h:19
Real langevinParam
Definition: NamdTypes.h:220
Bool berendsenPressureOn
void tcoupleVelocities(BigReal, int)
Definition: Sequencer.C:5559
void addMovDragToPosition(BigReal)
Definition: Sequencer.C:4397
void terminate(void)
Definition: Sequencer.C:6655
BigReal soluteScalingFactorCharge
void submitForces(int seq, FullAtomList &a, int maxForceUsed, ForceList *f, int prec)
virtual void algorithm(void)
Definition: Sequencer.C:289
void get_rotdrag_params(BigReal &v, Vector &a, Vector &p, int atomnum) const
Definition: Molecule.h:1419
void langevinVelocitiesBBK2_SOA(BigReal timestep)
Definition: Sequencer.C:3326
#define NAMD_EVENT_STOP(eon, id)
int frequency
Definition: common.h:255
Bool is_atom_movdragged(int atomnum) const
Definition: Molecule.h:1289
SubmitReduction * pressureProfileReduction
Definition: Sequencer.h:325
void suspendULTs()
int getNumAtoms() const
Definition: Patch.h:105
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
Definition: HomePatch.C:4382
void integrate(int)
Definition: Sequencer.C:3870
friend class SequencerCUDA
Definition: Sequencer.h:49
HomePatch * patch
Definition: HomePatchList.h:23
Definition: PDB.h:36
void scaleVelocities(const BigReal velScale)
Definition: Sequencer.C:4929
void positionsReady_SOA(int doMigration=0)
Definition: HomePatch.C:971
#define GB1_COMPUTE_HOME_PRIORITY
Definition: Priorities.h:56
void addVelocityToPosition(BigReal)
Definition: Sequencer.C:5648
SubmitReduction * reduction
Definition: Sequencer.h:324
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
BigReal xz
Definition: Tensor.h:17
SubmitReduction * min_reduction
Definition: Sequencer.h:228
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
Bool is_atom_exPressure(int atomnum) const
Definition: Molecule.h:1544
SimpleBroadcastObject< int > traceBarrier
Definition: Broadcasts.h:89
BigReal accelMDLastStep
void maximumMove(BigReal)
Definition: Sequencer.C:5732
Bool monteCarloPressureOn
int marginViolations
Definition: HomePatch.h:401
Definition: common.h:275
#define BOLTZMANN
Definition: common.h:54
Definition: Node.h:78
double * f_normal_z
Definition: NamdTypes.h:430
const GlobalMasterIMD * getIMD()
Definition: IMDOutput.h:43
void cycleBarrier(int, int)
Definition: Sequencer.C:6635
#define FILE_OUTPUT
Definition: Output.h:25
IMDOutput * imd
Definition: Node.h:186
double * f_normal_y
Definition: NamdTypes.h:429
Position fixedPosition
Definition: NamdTypes.h:212
Lattice & lattice
Definition: Patch.h:127
void submitCollections_SOA(int step, int zeroVel=0)
Definition: Sequencer.C:3173
Bool globalMasterScaleByFrequency
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
SimpleBroadcastObject< Vector > momentumCorrection
Definition: Broadcasts.h:82
void addRotDragToPosition(BigReal)
Definition: Sequencer.C:4416
static PatchMap * Object()
Definition: PatchMap.h:27
void saveForce(const int ftag=Results::normal)
Definition: Sequencer.C:5598
void registerIDsFullAtom(const FullAtom *begin, const FullAtom *end)
Definition: AtomMap.C:50
CmiNodeLock printlock
Definition: PatchData.h:163
#define EVAL_MEASURE
Definition: Output.h:27
double * f_slow_y
Definition: NamdTypes.h:435
Definition: Vector.h:72
void langevinVelocitiesBBK2(BigReal)
Definition: Sequencer.C:5137
void monteCarloPressureControl(const int step, const int doMigration, const int doEnergy, const int doVirial, const int maxForceNumber, const int doGlobal)
virtual void submit(void)=0
Output * output
Definition: Node.h:185
#define ADD_TENSOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:44
SimParameters * simParameters
Definition: Node.h:181
int slowFreq
Definition: Sequencer.h:297
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3319
void newMinimizeDirection(BigReal)
Definition: Sequencer.C:4607
void newMinimizePosition(BigReal)
Definition: Sequencer.C:4666
double stochRescaleCoefficient()
Definition: Controller.C:1784
Bool CUDASOAintegrateMode
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3788
int savePairlists
Definition: PatchTypes.h:41
int nextstep
next step value
Definition: Sequencer.C:144
bool masterThread
Definition: Sequencer.h:331
BigReal reassignTemp
BigReal & item(int i)
Definition: ReductionMgr.h:336
void gbisComputeAfterP2()
Definition: HomePatch.C:4943
#define DebugM(x, y)
Definition: Debug.h:75
void startWork(const LDObjHandle &handle)
HomePatchList * homePatchList()
Definition: PatchMap.C:438
void langevinVelocitiesBBK1(BigReal)
Definition: Sequencer.C:5064
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
BigReal z
Definition: Vector.h:74
char const *const NamdProfileEventStr[]
int getNumDevice()
Definition: DeviceCUDA.h:125
int usePairlists
Definition: PatchTypes.h:40
Position position
Definition: NamdTypes.h:78
BigReal rotDragGlobVel
BigReal yz
Definition: Tensor.h:18
void updateDevicePatchMap(int startup)
int berendsenPressureFreq
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:368
void rattle1(BigReal, int)
Definition: Sequencer.C:5675
void saveTotalForces(HomePatch *)
SimpleBroadcastObject< BigReal > adaptTemperature
Definition: Broadcasts.h:92
unsigned char get_ss_type(int anum) const
Definition: Molecule.h:1448
void rebalanceLoad(int timestep)
Definition: Sequencer.C:6624
void submitHalfstep(int)
Definition: Sequencer.C:5788
Bool globalMasterStaleForces
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:290
#define iout
Definition: InfoStream.h:51
void addForceToMomentum_SOA(const double scaling, double dt_normal, double dt_nbond, double dt_slow, int maxForceNumber)
Definition: Sequencer.C:2726
int doLoweAndersen
Definition: PatchTypes.h:28
Velocity velocity
Definition: NamdTypes.h:211
int pressureProfileSlabs
void minimizeMoveDownhill(BigReal fmax2)
Definition: Sequencer.C:4585
Patch * patch(PatchID pid)
Definition: PatchMap.h:244
void addForceToMomentum(BigReal, const int ftag=Results::normal, const int useSaved=0)
Definition: Sequencer.C:5609
void submitReductions_SOA()
Definition: Sequencer.C:2968
std::vector< PatchRecord > & getPatches()
static PatchMap * ObjectOnPe(int pe)
Definition: PatchMap.h:28
float * langScalVelBBK2
derived from langevinParam
Definition: NamdTypes.h:419
uint32 groupFixed
Definition: NamdTypes.h:163
void pauseWork(const LDObjHandle &handle)
void langevinPiston(int)
Definition: Sequencer.C:5308
SimpleBroadcastObject< BigReal > tcoupleCoefficient
Definition: Broadcasts.h:79
int NAMD_gcd(int a, int b)
Definition: common.C:102
void exchangeCheckpoint(int scriptTask, int &bpc)
Definition: HomePatch.C:5263
AtomMapper * atomMapper
Definition: Patch.h:159
Bool pairInteractionOn
float * gaussrand_y
Definition: NamdTypes.h:424
Molecule stores the structural information for the system.
Definition: Molecule.h:174
LDObjHandle ldObjHandle
Definition: HomePatch.h:554
void wakeULTs()
double * pos_y
Definition: NamdTypes.h:378
void split(int iStream, int numStreams)
Definition: Random.h:77
static NAMD_HOST_DEVICE Tensor identity(BigReal v1=1.0)
Definition: Tensor.h:31
void addForceToMomentum3(const BigReal timestep1, const int ftag1, const int useSaved1, const BigReal timestep2, const int ftag2, const int useSaved2, const BigReal timestep3, const int ftag3, const int useSaved3)
Definition: Sequencer.C:5624
void positionsReady(int doMigration=0)
Definition: HomePatch.C:1895
Definition: Patch.h:35
Bool useDeviceMigration
float * mass
Definition: NamdTypes.h:405
Flags flags
Definition: Patch.h:128
void submitHalfstep_SOA()
Definition: Sequencer.C:2868
WaterModel watmodel
SimpleBroadcastObject< BigReal > stochRescaleCoefficient
Definition: Broadcasts.h:80
SimpleBroadcastObject< int > monteCarloBarostatAcceptance
Definition: Broadcasts.h:84
double * f_nbond_y
Definition: NamdTypes.h:432
uint32 id
Definition: NamdTypes.h:160
void revert(void)
Definition: HomePatch.C:5232
void submitCollections(int step, int zeroVel=0)
Definition: Sequencer.C:6373
void stochRescaleVelocities_SOA(int step)
Definition: Sequencer.C:3840
static void print_vel_SOA(const double *vel_x, const double *vel_y, const double *vel_z, int ilo=0, int ihip1=1)
Definition: Sequencer.C:107
Charge charge
Definition: NamdTypes.h:79
void runComputeObjects_SOA(int migration, int pairlists, int step)
Definition: Sequencer.C:3654
BigReal calcKineticEnergy()
Definition: Sequencer.C:4937
#define SEQ_STK_SZ
Definition: Thread.h:11
void adaptTempUpdate(int)
Definition: Sequencer.C:5450
double * f_nbond_z
Definition: NamdTypes.h:433
void positionsReady_GPU(int doMigration=0, int startup=0)
Bool langevin_useBAOAB
int32 * hydrogenGroupSize
Definition: NamdTypes.h:385
#define TIMER_START(T, TYPE)
Definition: HomePatch.h:264
#define NAIVE
Definition: SimParameters.h:52
Definition: Output.h:35
int rattle1_SOA(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:4659
double * f_normal_x
Definition: NamdTypes.h:428
void calcFixVirial(Tensor &fixVirialNormal, Tensor &fixVirialNbond, Tensor &fixVirialSlow, Vector &fixForceNormal, Vector &fixForceNbond, Vector &fixForceSlow)
Definition: Sequencer.C:5946
float * langevinParam
Definition: NamdTypes.h:406
Definition: Random.h:37
#define NAMD_PROFILE_START()
float * gaussrand_x
fill with Gaussian distributed random numbers
Definition: NamdTypes.h:423
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
int numPatches(void) const
Definition: PatchMap.h:59
void awaken(void)
Definition: Sequencer.h:55
static std::pair< int, int > coordinateNeeded(int timestep)
Check if the step requires to output the coordinates.
Definition: Output.C:185
#define NAMD_EVENT_START(eon, id)
int pairlistsAge
Definition: Sequencer.h:232
void stochRescaleVelocities(int)
Definition: Sequencer.C:5582
void rattle1_SOA(BigReal, int)
Definition: Sequencer.C:3637
#define COMPUTE_HOME_PRIORITY
Definition: Priorities.h:76
void constructDevicePatchMap()
static void print_tensor(const Tensor &t)
Definition: Sequencer.C:120
NAMD_HOST_DEVICE BigReal length(void) const
Definition: Vector.h:202
int getMasterPe()
Definition: DeviceCUDA.h:137
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
Definition: Lattice.h:137
BigReal rescaleTemp
void NAMD_bug(const char *err_msg)
Definition: common.C:195
float * gaussrand_z
Definition: NamdTypes.h:425
#define TIMER_REPORT(T)
Definition: HomePatch.h:267
void multigratorPressure(int step, int callNumber)
Definition: Sequencer.C:4794
int doEnergy
Definition: PatchTypes.h:20
static ComputeCUDAMgr * getComputeCUDAMgr()
void berendsenPressure(int)
Definition: Sequencer.C:5244
void submitMomentum(int step)
Definition: Sequencer.C:4703
int doFullElectrostatics
Definition: PatchTypes.h:23
BigReal yx
Definition: Tensor.h:18
Bool adaptTempLangevin
int rescaleVelocities_numTemps
Definition: Sequencer.h:277
double * vel_x
Jim recommends double precision velocity.
Definition: NamdTypes.h:396
int32 * id
Definition: NamdTypes.h:390
void submitLoadStats(int timestep)
Definition: HomePatch.C:5428
void mollyMollify(Tensor *virial)
Definition: HomePatch.C:5159
void runComputeObjects(int migration=1, int pairlists=0, int pressureStep=0)
Definition: Sequencer.C:6410
void awaken(void)
Definition: Controller.C:371
void rebalance(Sequencer *seq, PatchID id)
void rescaleaccelMD(int, int, int)
Definition: Sequencer.C:5407
SimpleBroadcastObject< Tensor > velocityRescaleTensor2
Definition: Broadcasts.h:75
float * charge
Definition: NamdTypes.h:381
int Bool
Definition: common.h:142
BigReal drudeBondLen
CompAtomList p
Definition: Patch.h:153
SimpleBroadcastObject< int > IMDTimeEnergyBarrier
Definition: Broadcasts.h:90
Sequencer(HomePatch *p)
Definition: Sequencer.C:171
BigReal langevinTemp
int time_switch
Definition: imd.h:62
void clearDevicePatchMap()
NAMD_HOST_DEVICE BigReal length2(void) const
Definition: Vector.h:206
int ldbSteps
Definition: Sequencer.h:330
int numAtoms
Definition: Patch.h:151
MTSChoices MTSAlgorithm
#define NAMD_EVENT_RANGE_2(eon, id)
void run(void)
Definition: Sequencer.C:269
SimpleBroadcastObject< int > scriptBarrier
Definition: Broadcasts.h:88
uint8 partition
Definition: NamdTypes.h:81
bool getIsGlobalDevice() const
Definition: DeviceCUDA.h:172
BigReal scriptArg1
BigReal x
Definition: Vector.h:74
uint8 hydrogenGroupSize
Definition: NamdTypes.h:89
const_iterator const_begin(void) const
Definition: ResizeArray.h:39
PatchID getPatchID() const
Definition: Patch.h:114
void scalePositionsVelocities(const Tensor &posScale, const Tensor &velScale)
Definition: Sequencer.C:4757
int monteCarloPressureFreq
int getPesSharingDevice(const int i)
Definition: DeviceCUDA.h:139
BigReal adaptTempT
Definition: Sequencer.h:272
int maxForceUsed
Definition: PatchTypes.h:33
SimpleBroadcastObject< BigReal > velocityRescaleFactor2
Definition: Broadcasts.h:76
int sequence
Definition: PatchTypes.h:18
Bool is_atom_rotdragged(int atomnum) const
Definition: Molecule.h:1305
#define D_MSG(t)
Definition: Debug.h:165
int eventEndOfTimeStep
Definition: Node.C:296
void doMigrationGPU(const int startup, const int doGlobal, const int updatePatchMap)
void langevinPiston_SOA(int step)
Definition: Sequencer.C:3508
#define END_OF_RUN
Definition: Output.h:26
void gbisComputeAfterP1()
Definition: HomePatch.C:4915
void integrate_SOA(int)
Definition: Sequencer.C:2051
void traceBarrier(int)
Definition: Sequencer.C:6643
int doNonbonded
Definition: PatchTypes.h:22
void NAMD_die(const char *err_msg)
Definition: common.C:147
PDB * pdb
Definition: Node.h:183
static LdbCoordinator * Object()
BigReal reassignIncr
void gaussian_array_f(float *a, int n)
Definition: Random.h:258
#define TIMER_INIT_WIDTH(T, TYPE, WIDTH)
Definition: HomePatch.h:263
int getForceSendActive() const
Definition: ComputeGlobal.h:46
static int forceNeeded(int timestep)
Check if the step requires to output the forces.
Definition: Output.C:619
int berendsenPressure_count
Definition: Sequencer.h:294
SimpleBroadcastObject< BigReal > velocityRescaleFactor
Definition: Broadcasts.h:71
void publish(int tag, const T &t)
SimpleBroadcastObject< BigReal > minimizeCoefficient
Definition: Broadcasts.h:81
void reassignVelocities(BigReal, int)
Definition: Sequencer.C:5466
Bool LJPMESerialRealSpaceOn
void langevinVelocitiesBBK1_SOA(BigReal timestep)
Definition: Sequencer.C:3280
SimpleBroadcastObject< Vector > accelMDRescaleFactor
Definition: Broadcasts.h:91
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3410
ComputeGlobal * computeGlobalObject
Definition: ComputeMgr.h:160
Elem & item(int i)
Definition: ResizeArray.h:119
void saveForce(const int ftag=Results::normal)
Definition: HomePatch.C:2315
Random * random
Definition: Sequencer.h:321
void runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup)
BigReal xx
Definition: Tensor.h:17
SimpleBroadcastObject< Tensor > positionRescaleFactor
Definition: Broadcasts.h:72
void buildRattleList_SOA()
Definition: HomePatch.C:4520
int getDeviceID()
Definition: DeviceCUDA.h:144
void langevinVelocities(BigReal)
Definition: Sequencer.C:5027
IMDSessionInfo IMDsendsettings
static CollectionMaster * Object()
void hardWallDrude(BigReal, int)
Definition: Sequencer.C:5660
NodeBroadcast * nodeBroadcast
Definition: PatchData.h:141
void checkpoint(void)
Definition: HomePatch.C:5222
BigReal zz
Definition: Tensor.h:19
#define TIMER_STOP(T, TYPE)
Definition: HomePatch.h:265
void suspend(void)
Definition: Sequencer.C:279
void multigratorTemperature(int step, int callNumber)
Definition: Sequencer.C:4957
static constexpr int num_inline_peer
Definition: CudaRecord.h:36
unsigned int randomSeed
double * recipMass
derived from mass
Definition: NamdTypes.h:404
BigReal initialTemp
void reinitVelocities(void)
Definition: Sequencer.C:5498
int pressureProfileAtomTypes
int checkpoint_berendsenPressure_count
Definition: Sequencer.h:295
#define simParams
Definition: Output.C:131
ControllerBroadcasts * broadcast
Definition: Sequencer.h:328
#define NAMD_EVENT_START_EX(eon, id, str)
iterator begin(void)
Definition: ResizeArray.h:36
void maximumMove_SOA(const double dt, const double maxvel2)
Definition: Sequencer.C:3222
double * pos_z
Definition: NamdTypes.h:379
double * f_slow_x
Definition: NamdTypes.h:434
CollectionMgr *const collection
Definition: Sequencer.h:327
void updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal)
const PatchID patchID
Definition: Patch.h:150
#define GB2_COMPUTE_HOME_PRIORITY
Definition: Priorities.h:64
int numHomePatches(void)
Definition: PatchMap.C:432
Definition: Tensor.h:15
BigReal xy
Definition: Tensor.h:17
iterator end(void)
Definition: ResizeArray.h:37
bool rattleListValid_SOA
Definition: HomePatch.h:454
#define NAMD_PROFILE_STOP()
Bool langevinGammasDiffer
double * pos_x
Definition: NamdTypes.h:377
int doVirial
Definition: PatchTypes.h:21
BigReal y
Definition: Vector.h:74
virtual ~Sequencer(void)
Definition: Sequencer.C:245
BigReal movDragGlobVel
int getNumStepsToRun(void)
bool getIsPmeDevice()
Definition: DeviceCUDA.h:168
int doLCPO
Definition: PatchTypes.h:31
void resetMovingAverage()
Definition: Controller.C:656
void newtonianVelocities(BigReal, const BigReal, const BigReal, const BigReal, const int, const int, const int)
Definition: Sequencer.C:5003
static void print_vel_AOS(const FullAtom *a, int ilo=0, int ihip1=1)
Definition: Sequencer.C:95
void rescaleSoluteCharges(BigReal)
Definition: Sequencer.C:5543
void addVelocityToPosition_SOA(const double dt)
Definition: Sequencer.C:2829
double * vel_z
Definition: NamdTypes.h:398
void setVal(const NodeReduction *other)
Definition: ReductionMgr.C:681
#define SOA_SIMPLIFY_PARAMS
Definition: Sequencer.h:31
Mass mass
Definition: NamdTypes.h:218
void submitVelocities(int seq, int zero, FullAtomList &a, int prec)
Bool pressureProfileOn
void submitMinimizeReductions(int, BigReal fmax2)
Definition: Sequencer.C:6224
#define ADD_VECTOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:28
BigReal yy
Definition: Tensor.h:18
int doMomenta
Definition: Sequencer.h:311
#define TIMER_DONE(T)
Definition: HomePatch.h:266
#define PDBVELFACTOR
Definition: common.h:57
CudaComputeNonbonded * getCudaComputeNonbonded()
#define TIMEFACTOR
Definition: common.h:55
Bool pairInteractionSelf
int multigratorPressureFreq
static int velocityNeeded(int timestep)
Check if the step requires to output the velocities.
Definition: Output.C:510
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
int bufferOffsetNBPad
Definition: CudaRecord.h:39
double * f_nbond_x
Definition: NamdTypes.h:431
int getDeviceIndex()
Definition: DeviceCUDA.h:166
BigReal maximumMove
#define SPECIAL_PATCH_ID
Definition: Sequencer.C:88
void submitPositions(int seq, FullAtomList &a, Lattice l, int prec, int dcdSelectionIndex)
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
void correctMomentum(int step, BigReal drifttime)
Definition: Sequencer.C:4726
bool getIsMasterDevice()
Definition: DeviceCUDA.C:646
int pairlistsAgeLimit
Definition: Sequencer.h:233
NAMD_HOST_DEVICE void outerAdd(BigReal scale, const Vector &v1, const Vector &v2)
Definition: Tensor.h:255
int pairlistsAreValid
Definition: Sequencer.h:231
int doGBIS
Definition: PatchTypes.h:30
int stochRescale_count
Definition: Sequencer.h:290
int doFullDispersion
Definition: PatchTypes.h:24
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:83
int check(int step)
Definition: Sequencer.C:149
ComputeMgr * computeMgr
Definition: Node.h:172
int maxForceMerged
Definition: PatchTypes.h:34
BigReal reassignHold
bool getIsPmeDevice()
Definition: GlobalGPUMgr.C:100
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3348
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3387
void quenchVelocities()
Definition: Sequencer.C:4694
ForceList f[Results::maxNumForces]
Definition: Patch.h:214
float * langScalRandBBK2
from langevinParam and recipMass
Definition: NamdTypes.h:420
void get_movdrag_params(Vector &v, int atomnum) const
Definition: Molecule.h:1413
static GlobalGPUMgr * Object()
Definition: GlobalGPUMgr.h:61
void enableEarlyExit(void)
Definition: Node.C:1461
void submitReductions(int)
Definition: Sequencer.C:5966
#define namd_reciprocal(x)
Definition: Vector.h:69
SimpleBroadcastObject< Tensor > positionRescaleFactor2
Definition: Broadcasts.h:77
void integrate_CUDA_SOA(int scriptTask)
#define RIGID_NONE
Definition: SimParameters.h:80
void loweAndersenFinish()
Definition: HomePatch.C:4881
uint32 atomFixed
Definition: NamdTypes.h:162
int getNumPesSharingDevice()
Definition: DeviceCUDA.h:138
SimParameters *const simParams
Definition: Sequencer.h:322
SimpleBroadcastObject< Tensor > velocityRescaleTensor
Definition: Broadcasts.h:74
NAMD_HOST_DEVICE Vector unit(void) const
Definition: Vector.h:215
BigReal zx
Definition: Tensor.h:19
CompAtomExtList pExt
Definition: Patch.h:181
int energies_switch
Definition: imd.h:63
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
Molecule * molecule
Definition: Node.h:179
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278
void rescaleVelocitiesByFactor(BigReal)
Definition: Sequencer.C:5521
double * f_slow_z
Definition: NamdTypes.h:436
int doMolly
Definition: PatchTypes.h:25
int multigratorTemperatureFreq
void reloadCharges()
Definition: Sequencer.C:5531
int doMinimize
Definition: PatchTypes.h:26
#define FORCE_OUTPUT
Definition: Output.h:28
int globalMasterFrequency
double BigReal
Definition: common.h:123
void minimize()
Definition: Sequencer.C:4439
static SynchronousCollectives * Object()
CudaPmeOneDevice * createCudaPmeOneDevice()
int step
Definition: PatchTypes.h:16
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
CudaPmeOneDevice * getCudaPmeOneDevice()
void updatePatchOrder(const std::vector< CudaLocalRecord > &data)
for(int i=0;i< n1;++i)
void berendsenPressure_SOA(int step)
Definition: Sequencer.C:3409
int32 numAtoms
number of atoms
Definition: NamdTypes.h:456
void printDevicePatchMap()
BigReal drudeTemp
void compute(const Lattice &lattice, int doEnergyVirial, int step)
void exchangeAtoms(int scriptTask)
Definition: HomePatch.C:5370
T get(int tag, const int expected=-1)