NAMD
SequencerCUDAKernel.h
Go to the documentation of this file.
1 #ifndef SEQUENCERCUDAKERNEL_H
2 #define SEQUENCERCUDAKERNEL_H
3 
4 #include <cstddef>
5 #ifdef NAMD_CUDA
6 #include <curand.h>
7 #endif
8 
9 #ifdef NAMD_HIP
10 #include <hiprand/hiprand.h>
11 #endif
12 
13 #include <vector>
14 
15 #include "NamdTypes.h"
16 #include "CudaUtils.h"
17 #include "MShakeKernel.h"
18 #include "CudaTileListKernel.h" // for CudaPatchRecord
19 #include "CudaTileListKernel.hip.h" // for CudaPatchRecord
20 #include "CudaRecord.h"
21 #include "Lattice.h"
22 #include "HipDefines.h"
23 #include "structures.h"
24 
25 #ifdef NODEGROUP_FORCE_REGISTER
26 
27 #define TIMEFACTOR 48.88821
28 #define NOT_AVAILABLE 999
29 
30 #define PATCH_BLOCKS 512
31 #define ATOM_BLOCKS 128
32 
33 
34 class SequencerCUDAKernel{
35 
36 public:
37  SettleParameters *sp;
38  bool firstRattleDone;
39  bool intConstInit;
40  int nSettle, nRattle;
41 
42 // Size 1
43  int* d_nHG;
44  int* d_nSettles;
45  int* d_nRattles;
46 
47  int* hgi;
48  size_t hgi_size;
49 
50  // Other list sizes are bounded by the number of atoms, ok to use 32-bit int.
51  // However, storage size in bytes could overflow 32-bit int, so use size_t.
52  char* d_rattleList_temp_storage;
53  size_t temp_storage_bytes;
54 
55  int* rattleIndexes;
56  size_t rattleIndexes_size;
57 
58  SequencerCUDAKernel();
59  ~SequencerCUDAKernel();
60 
61  void addForceToMomentum(
62  bool doFixedAtoms,
63  const double scaling,
64  double dt_normal,
65  double dt_nbond,
66  double dt_slow,
67  double velrescaling,
68  const double *recipMass,
69  const double *f_normal_x,
70  const double *f_normal_y,
71  const double *f_normal_z,
72  const double *f_nbond_x,
73  const double *f_nbond_y,
74  const double *f_nbond_z,
75  const double *f_slow_x,
76  const double *f_slow_y,
77  const double *f_slow_z,
78  double *vel_x,
79  double *vel_y,
80  double *vel_z,
81  const int *atomFixed,
82  int numAtoms,
83  int maxForceNumber,
84  cudaStream_t stream);
85  void maximumMove(
86  const double maxvel2,
87  const double *vel_x,
88  const double *vel_y,
89  const double *vel_z,
90  int *killme,
91  const int numAtoms,
92  cudaStream_t stream);
93  void addVelocityToPosition(
94  bool doFixedAtoms,
95  const double dt,
96  const int *atomFixed,
97  const double *vel_x,
98  const double *vel_y,
99  const double *vel_z,
100  double *pos_x,
101  double *pos_y,
102  double *pos_z,
103  double *h_pos_x,
104  double *h_pos_y,
105  double *h_pos_z,
106  int numAtoms,
107  bool copyPositions,
108  cudaStream_t stream);
109  void velocityVerlet1(
110  const bool doFixedAtoms,
111  const int step,
112  const double scaling,
113  const double dt_normal,
114  const double dt_nbond,
115  const double dt_slow,
116  const double velrescaling,
117  const double* recipMass,
118  double* vel_x,
119  double* vel_y,
120  double* vel_z,
121  const double maxvel2,
122  int* h_killme,
123  double* pos_x,
124  double* pos_y,
125  double* pos_z,
126  double* h_pos_x,
127  double* h_pos_y,
128  double* h_pos_z,
129  double* f_normal_x,
130  double* f_normal_y,
131  double* f_normal_z,
132  double* f_nbond_x,
133  double* f_nbond_y,
134  double* f_nbond_z,
135  double* f_slow_x,
136  double* f_slow_y,
137  double* f_slow_z,
138  const int* atomFixed,
139  const int numAtoms,
140  const int maxForceNumber,
141  cudaStream_t stream);
142 
143 
144  void centerOfMass(
145  const double *coor_x,
146  const double *coor_y,
147  const double *coor_z,
148  double *cm_x,
149  double *cm_y,
150  double *cm_z,
151  const float* mass,
152  const int* hydrogenGroupSize,
153  const int numAtoms,
154  cudaStream_t stream
155  );
156 
157  void updateRigidArrays(
158  const bool doFixedAtoms,
159  const double dt,
160  const int *atomFixed,
161  const double *vel_x,
162  const double *vel_y,
163  const double *vel_z,
164  const double *pos_x,
165  const double *pos_y,
166  const double *pos_z,
167  double* velNew_x,
168  double* velNew_y,
169  double* velNew_z,
170  double* posNew_x,
171  double* posNew_y,
172  double* posNew_z,
173  int numAtoms,
174  cudaStream_t stream);
175  void submitHalf(
176  const bool doFixedAtoms,
177  const double *vel_x,
178  const double *vel_y,
179  const double *vel_z,
180  const double *vcm_x,
181  const double *vcm_y,
182  const double *vcm_z,
183  const float *mass,
184  BigReal *kineticEnergy,
185  BigReal *intKineticEnergy,
186  cudaTensor *virial,
187  cudaTensor *intVirialNormal,
188  BigReal *h_kineticEnergy,
189  BigReal *h_intKineticEnergy,
190  cudaTensor *h_virial,
191  cudaTensor *h_intVirialNormal,
192  int *hydrogenGroupSize,
193  int numAtoms,
194  unsigned int* tbcatomic,
195  cudaStream_t stream);
196 
197  void scaleCoordinateWithFactor(
198  double *pos_x,
199  double *pos_y,
200  double *pos_z,
201  float *mass,
202  int *hydrogenGroupSize,
203  cudaTensor factor,
204  cudaVector origin,
205  int useGroupPressure,
206  int numAtoms,
207  cudaStream_t stream);
208 
209  // Maps the global atom index to local
210  void SetAtomIndexOrder(
211  int *id,
212  int *idOrder,
213  int numAtoms,
214  cudaStream_t stream);
215 
216  // scale the coordinate using Molecule's geometric center
217  void scaleCoordinateUsingGC(
218  double *pos_x,
219  double *pos_y,
220  double *pos_z,
221  const int *idOrder,
222  const int *moleculeStartIndex,
223  const int *moleculeAtom,
224  const cudaTensor factor,
225  const cudaVector origin,
226  const Lattice oldLattice,
227  const Lattice newLattice,
228  const char3 *transform,
229  const int numMolecules,
230  const int numLargeMolecules,
231  cudaStream_t stream);
232 
233  void langevinPiston(
234  const bool doFixedAtoms,
235  const int* atomFixed,
236  const int* groupFixed,
237  const char3* transform,
238  const Lattice lattice,
239  const double* fixedPosition_x,
240  const double* fixedPosition_y,
241  const double* fixedPosition_z,
242  double *pos_x,
243  double *pos_y,
244  double *pos_z,
245  double *vel_x,
246  double *vel_y,
247  double *vel_z,
248  float *mass,
249  int *hydrogenGroupSize,
250  cudaTensor factor,
251  cudaVector origin,
252  double velFactor_x,
253  double velFactor_y,
254  double velFactor_z,
255  int useGroupPressure,
256  int numAtoms,
257  cudaStream_t stream);
258  void submitReduction1(
259  double *pos_x,
260  double *pos_y,
261  double *pos_z,
262  double *vel_x,
263  double *vel_y,
264  double *vel_z,
265  float *mass,
266  BigReal *kineticEnergy,
267  BigReal *momentum_x,
268  BigReal *momentum_y,
269  BigReal *momentum_z,
270  BigReal *angularMomentum_x,
271  BigReal *angularMomentum_y,
272  BigReal *angularMomentum_z,
273  BigReal origin_x,
274  BigReal origin_y,
275  BigReal origin_z,
276  BigReal *h_kineticEnergy,
277  BigReal *h_momentum_x,
278  BigReal *h_momentum_y,
279  BigReal *h_momentum_z,
280  BigReal *h_angularMomentum_x,
281  BigReal *h_angularMomentum_y,
282  BigReal *h_angularMomentum_z,
283  unsigned int* tbcatomic,
284  int numAtoms,
285  cudaStream_t stream);
286  void submitReduction2(
287  const bool doFixedAtoms,
288  const int* atomFixed,
289  const double *pos_x,
290  const double *pos_y,
291  const double *pos_z,
292  const double *vel_x,
293  const double *vel_y,
294  const double *vel_z,
295  const double *rcm_x,
296  const double *rcm_y,
297  const double *rcm_z,
298  const double *vcm_x,
299  const double *vcm_y,
300  const double *vcm_z,
301  const double *f_normal_x,
302  const double *f_normal_y,
303  const double *f_normal_z,
304  const double *f_nbond_x,
305  const double *f_nbond_y,
306  const double *f_nbond_z,
307  const double *f_slow_x,
308  const double *f_slow_y,
309  const double *f_slow_z,
310  float *mass,
311  int *hydrogenGroupSize,
312  BigReal *kineticEnergy,
313  BigReal *h_kineticEnergy,
314  BigReal *intKineticEnergy,
315  BigReal *h_intKineticEnergy,
316  cudaTensor *intVirialNormal,
317  cudaTensor *intVirialNbond,
318  cudaTensor *intVirialSlow,
319  cudaTensor *h_intVirialNormal,
320  cudaTensor *h_intVirialNbond,
321  cudaTensor *h_intVirialSlow,
322  cudaTensor *rigidVirial,
323  cudaTensor *h_rigidVirial,
324  unsigned int* tbcatomic,
325  int numAtoms,
326  int maxForceNumber,
327  const bool doMTS,
328  cudaStream_t stream);
329  void langevinVelocitiesBBK1(
330  BigReal timestep,
331  const float *langevinParam,
332  double *vel_x,
333  double *vel_y,
334  double *vel_z,
335  int numAtoms,
336  cudaStream_t stream);
337  void langevinVelocitiesBBK2(
338  BigReal timestep,
339  const float *langScalVelBBK2,
340  const float *langScalRandBBK2,
341  float *gaussrand_x,
342  float *gaussrand_y,
343  float *gaussrand_z,
344  double *vel_x,
345  double *vel_y,
346  double *vel_z,
347  const int numAtoms,
348  const int numAtomsGlobal,
349  const int stride,
350  curandGenerator_t gen,
351  cudaStream_t stream);
352 
353  void reassignVelocities(
354  const BigReal timestep,
355  const bool doFixedAtoms,
356  const int* atomFixed,
357  float *gaussrand_x,
358  float *gaussrand_y,
359  float *gaussrand_z,
360  double *vel_x,
361  double *vel_y,
362  double *vel_z,
363  const double *d_recipMass,
364  const BigReal kbT,
365  const int numAtoms,
366  const int numAtomsGlobal,
367  const int stride,
368  curandGenerator_t gen,
369  cudaStream_t stream);
370 
371  void rattle1(
372  const bool doFixedAtoms,
373  const bool doEnergy,
374  const bool pressure,
375  const int numAtoms,
376  const double dt,
377  const double invdt,
378  const double tol2,
379  double *vel_x,
380  double *vel_y,
381  double *vel_z,
382  double *pos_x,
383  double *pos_y,
384  double *pos_z,
385  double *velNew_x,
386  double *velNew_y,
387  double *velNew_z,
388  double *posNew_x,
389  double *posNew_y,
390  double *posNew_z,
391  double *f_normal_x,
392  double *f_normal_y,
393  double *f_normal_z,
394  const int *hydrogenGroupSize,
395  const float *rigidBondLength,
396  const float *mass,
397  const int *atomFixed,
398  int **settleList,
399  size_t& settleListSize,
400  int **consFailure,
401  size_t& consFailureSize,
402  CudaRattleElem **rattleList,
403  size_t& rattleListSize,
404  int *nSettle,
405  int *nRattle,
406  cudaTensor *virial,
407  cudaTensor *h_virial,
408  unsigned int* tbcatomic,
409  int migration,
410  SettleParameters *sp,
411  bool first,
412  int* h_consFailure,
413  const WaterModel water_model,
414  cudaStream_t stream);
415 
416  void copy_nbond_forces(int numPatches, float4 *f_nbond,
417  float4* f_nbond_slow,
418  double* f_nbond_x,
419  double* f_nbond_y,
420  double* f_nbond_z,
421  double* f_slow_x,
422  double* f_slow_y,
423  double* f_slow_z,
424  const int* patchIDS,
425  const int* patchoffsets,
426  const int* patchUnsortOrder,
427  const CudaPatchRecord* nbondIndexPerPatch,
428  const bool doSlow,
429  cudaStream_t stream);
430 
431  void copy_bond_forces(int numPatches,
432  double *f_bond,
433  double *f_bond_nbond,
434  double *f_bond_slow,
435  double *f_bond_x,
436  double *f_bond_y,
437  double *f_bond_z,
438  double *f_nbond_x,
439  double *f_nbond_y,
440  double *f_nbond_z,
441  double *f_slow_x,
442  double *f_slow_y,
443  double *f_slow_z,
444  int forceStride, //if stridedForces
445  PatchRecord *pr,
446  const int *patchIDs,
447  const int *patchOffsets,
448  bool doNbond,
449  bool doSlow,
450  cudaStream_t stream);
451 
452  void copy_slow_forces(int numPatches,
453  const CudaForce* f_slow,
454  double* f_slow_x,
455  double* f_slow_y,
456  double* f_slow_z,
457  const int* d_patchOffsets,
458  const Lattice *lattices,
459  cudaStream_t stream);
460 
461  void accumulateForceToSOA(
462  const int doGlobal,
463  const int doCudaGlobal,
464  const int maxForceNumber,
465  const int numPatches,
466  const int nDevices,
467  CudaLocalRecord* localRecords,
468  const double* f_bond,
469  const double* f_bond_nbond,
470  const double* f_bond_slow,
471  int forceStride,
472  const float4* f_nbond,
473  const float4* f_nbond_slow,
474  const CudaForce* f_slow,
475  double* d_f_global_x,
476  double* d_f_global_y,
477  double* d_f_global_z,
478  double* d_f_normal_x,
479  double* d_f_normal_y,
480  double* d_f_normal_z,
481  double* d_f_nbond_x,
482  double* d_f_nbond_y,
483  double* d_f_nbond_z,
484  double* d_f_slow_x,
485  double* d_f_slow_y,
486  double* d_f_slow_z,
487  const int* patchUnsortOrder,
488  const Lattice lattice,
489  unsigned int** deviceQueues,
490  unsigned int* queueCounters,
491  unsigned int* tbcatomic,
492  cudaStream_t stream);
493 
494  void accumulate_force_kick(
495  const bool doFixedAtoms,
496  const int doGlobal,
497  const int doCudaGlobal,
498  const int maxForceNumber,
499  const int numPatches,
500  CudaLocalRecord* localRecords,
501  const double* f_bond,
502  const double* f_bond_nbond,
503  const double* f_bond_slow,
504  int forceStride,
505  const float4* f_nbond,
506  const float4* f_nbond_slow,
507  const CudaForce* f_slow,
508  double* d_f_global_x,
509  double* d_f_global_y,
510  double* d_f_global_z,
511  double* d_f_normal_x,
512  double* d_f_normal_y,
513  double* d_f_normal_z,
514  double* d_f_nbond_x,
515  double* d_f_nbond_y,
516  double* d_f_nbond_z,
517  double* d_f_slow_x,
518  double* d_f_slow_y,
519  double* d_f_slow_z,
520  double* d_vel_x,
521  double* d_vel_y,
522  double* d_vel_z,
523  const double* recipMass,
524  const int* d_atomFixed,
525  const double dt_normal,
526  const double dt_nbond,
527  const double dt_slow,
528  const double scaling,
529  const int* patchUnsortOrder,
530  const Lattice lattice,
531  cudaStream_t stream);
532 
533  void set_compute_positions(
534  const int devID,
535  const bool isPmeDevice,
536  const int nDev,
537  const int numPatchesHomeAndProxy,
538  const int numPatchesHome,
539  const bool doNbond,
540  const bool doSlow,
541  const bool doFEP,
542  const bool doTI,
543  const bool doAlchDecouple,
544  const bool doAlchSoftCore,
545  const bool handleBoundary,
546  const double* pos_x,
547  const double* pos_y,
548  const double* pos_z,
549 #ifndef NAMD_NCCL_ALLREDUCE
550  double** peer_pos_x,
551  double** peer_pos_y,
552  double** peer_pos_z,
553  float** d_peer_charge,
554  int** d_peer_partition,
555 #endif
556  const float* charges,
557  const int* partition,
558  const double charge_scaling,
559  const double3* patchCenter,
560  const int* s_patchPositions,
561  const int* s_pencilPatchIndex,
562  const int* s_patchIDs,
563  const int* patchSortOrder,
564  const Lattice lattice,
565  float4* nb_atoms,
566  float4* b_atoms,
567  float4* s_atoms,
568  int* s_partition,
569  int numTotalAtoms,
570  CudaLocalRecord* localRecords,
571  CudaPeerRecord* peerRecords,
572  std::vector<int>& atomCounts,
573  cudaStream_t stream);
574 
575  void set_pme_positions(
576  const int devID,
577  const bool isPmeDevice,
578  const int nDev,
579  const int numPatchesHomeAndProxy,
580  const int numPatchesHome,
581  const bool doNbond,
582  const bool doSlow,
583  const bool doFEP,
584  const bool doTI,
585  const bool doAlchDecouple,
586  const bool doAlchSoftCore,
587  const bool handleBoundary,
588  const double* d_pos_x,
589  const double* d_pos_y,
590  const double* d_pos_z,
591  #ifndef NAMD_NCCL_ALLREDUCE
592  double** d_peer_pos_x,
593  double** d_peer_pos_y,
594  double** d_peer_pos_z,
595  float** d_peer_charge,
596  int** d_peer_partition,
597  #endif
598  const float* charges,
599  const int* partition,
600  const double charge_scaling,
601  const double3* patchCenter,
602  const int* s_patchPositions,
603  const int* s_pencilPatchIndex,
604  const int* s_patchIDs,
605  const int* patchSortOrder,
606  const Lattice lattice,
607  float4* nb_atoms,
608  float4* b_atoms,
609  float4* s_atoms,
610  int* s_partition,
611  int numTotalAtoms,
612  CudaLocalRecord* localRecords,
613  CudaPeerRecord* peerRecords,
614  std::vector<int>& atomCounts,
615  cudaStream_t stream);
616 
617  void PairListMarginCheck(const int numPatches,
618  CudaLocalRecord* localRecords,
619  const double* pos_x,
620  const double* pos_y,
621  const double* pos_z,
622  const double* pos_old_x,
623  const double* pos_old_y,
624  const double* pos_old_z,
625  const double3* awayDists, // for margin check
626  const Lattice lattice,
627  const Lattice lattice_old,
628  const double3* patchMins,
629  const double3* patchMaxes,
630  const double3* patchCenter,
631  const CudaMInfo* mInfo,
632  unsigned int* tbcatomic,
633  const double pairlistTrigger,
634  const double pairlistGrow,
635  const double pairlistShrink,
636  double* patchMaxAtomMovement,
637  double* h_patchMaxAtomMovement,
638  double* patchNewTolerance,
639  double* h_patchNewTolerance,
640  const double minSize,
641  const double cutoff,
642  const double sysdima,
643  const double sysdimb,
644  const double sysdimc,
645  unsigned int* h_marginViolations,
646  unsigned int* h_periodicCellSmall,
647  const bool rescalePairlistTolerance,
648  const bool isPeriodic,
649  cudaStream_t stream);
650 
651  void apply_Efield(
652  const int numAtoms,
653  const bool normalized,
654  const bool doEnergy,
655  const double3 eField,
656  const double eFieldOmega,
657  const double eFieldPhi,
658  const double t,
659  const Lattice lat,
660  const char3* transform,
661  const float* charges,
662  const double* pos_x,
663  const double* pos_y,
664  const double* pos_z,
665  double* f_normal_x,
666  double* f_normal_y,
667  double* f_normal_z,
668  double3* d_extForce,
669  cudaTensor* d_extVirial,
670  double* d_extEnergy,
671  double3* h_extForce,
672  cudaTensor* h_extVirial,
673  double* h_extEnergy,
674  unsigned int* tbcatomic,
675  cudaStream_t stream
676  );
677 
678  void mergeForcesFromPeers(
679  const int devID,
680  const int maxForceNumber,
681  const Lattice lat,
682  const int numPatchesHomeAndProxy,
683  const int numPatchesHome,
684  // ------- Force buffers to be merged - ----- //
685  double** f_normal_x,
686  double** f_normal_y,
687  double** f_normal_z,
688  double** f_nbond_x,
689  double** f_nbond_y,
690  double** f_nbond_z,
691  double** f_slow_x,
692  double** f_slow_y,
693  double** f_slow_z,
694  const CudaForce* pmeForces,
695  CudaLocalRecord* localRecords,
696  CudaPeerRecord* peerRecords,
697  std::vector<int>& atomCounts,
698  cudaStream_t stream
699  );
700 
701  void copyForcesToHostSOA(
702  const int numPatches,
703  CudaLocalRecord* localRecords,
704  const int maxForceNumber,
705  const double* d_f_normal_x,
706  const double* d_f_normal_y,
707  const double* d_f_normal_z,
708  const double* d_f_nbond_x,
709  const double* d_f_nbond_y,
710  const double* d_f_nbond_z,
711  const double* d_f_slow_x,
712  const double* d_f_slow_y,
713  const double* d_f_slow_z,
714  PatchDataSOA* d_HostPatchDataSOA,
715  const bool doGlobal,
716  const bool doForcesOutput,
717  cudaStream_t stream
718  );
719 
720  void copyForcesToDevice(
721  const int numAtoms,
722  const double* d_f_nbond_x,
723  const double* d_f_nbond_y,
724  const double* d_f_nbond_z,
725  const double* d_f_slow_x,
726  const double* d_f_slow_y,
727  const double* d_f_slow_z,
728  double* d_f_saved_nbond_x,
729  double* d_f_saved_nbond_y,
730  double* d_f_saved_nbond_z,
731  double* d_f_saved_slow_x,
732  double* d_f_saved_slow_y,
733  double* d_f_saved_slow_z,
734  const int maxForceNumber,
735  cudaStream_t stream
736  );
737 
738  void copyPositionsToHostSOA(
739  const int numPatches,
740  CudaLocalRecord* localRecords,
741  const double* pos_x,
742  const double* pos_y,
743  const double* pos_z,
744  PatchDataSOA* d_HostPatchDataSOA,
745  cudaStream_t stream
746  );
747 
748  void redistributeTip4pForces(
749  double* d_f_normal_x,
750  double* d_f_normal_y,
751  double* d_f_normal_z,
752  double* d_f_nbond_x,
753  double* d_f_nbond_y,
754  double* d_f_nbond_z,
755  double* d_f_slow_x,
756  double* d_f_slow_y,
757  double* d_f_slow_z,
758  cudaTensor* d_virial_normal,
759  cudaTensor* d_virial_nbond,
760  cudaTensor* d_virial_slow,
761  const double* d_pos_x,
762  const double* d_pos_y,
763  const double* d_pos_z,
764  const float* d_mass,
765  const int numAtoms,
766  const int doVirial,
767  const int maxForceNumber,
768  cudaStream_t stream
769  );
770 
771  void calcFixVirial(
772  const int maxForceNumber,
773  const int numAtoms,
774  const int* d_atomFixed,
775  const double* d_fixedPosition_x,
776  const double* d_fixedPosition_y,
777  const double* d_fixedPosition_z,
778  const double* d_f_normal_x,
779  const double* d_f_normal_y,
780  const double* d_f_normal_z,
781  const double* d_f_nbond_x,
782  const double* d_f_nbond_y,
783  const double* d_f_nbond_z,
784  const double* d_f_slow_x,
785  const double* d_f_slow_y,
786  const double* d_f_slow_z,
787  cudaTensor* d_virial_normal,
788  cudaTensor* d_virial_nbond,
789  cudaTensor* d_virial_slow,
790  double3* d_extForce_normal,
791  double3* d_extForce_nbond,
792  double3* d_extForce_slow,
793  cudaStream_t stream
794  );
795 };
796 
797 #endif // NODEGROUP_FORCE_REGISTER
798 #endif // SEQUENCERCUDAKERNEL_H
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
WaterModel
Definition: common.h:221
double BigReal
Definition: common.h:123