1 #ifndef SEQUENCERCUDAKERNEL_H 2 #define SEQUENCERCUDAKERNEL_H 10 #include <hiprand/hiprand.h> 25 #ifdef NODEGROUP_FORCE_REGISTER 27 #define TIMEFACTOR 48.88821 28 #define NOT_AVAILABLE 999 30 #define PATCH_BLOCKS 512 31 #define ATOM_BLOCKS 128 34 class SequencerCUDAKernel{
52 char* d_rattleList_temp_storage;
53 size_t temp_storage_bytes;
56 size_t rattleIndexes_size;
58 SequencerCUDAKernel();
59 ~SequencerCUDAKernel();
61 void addForceToMomentum(
68 const double *recipMass,
69 const double *f_normal_x,
70 const double *f_normal_y,
71 const double *f_normal_z,
72 const double *f_nbond_x,
73 const double *f_nbond_y,
74 const double *f_nbond_z,
75 const double *f_slow_x,
76 const double *f_slow_y,
77 const double *f_slow_z,
93 void addVelocityToPosition(
108 cudaStream_t stream);
109 void velocityVerlet1(
110 const bool doFixedAtoms,
112 const double scaling,
113 const double dt_normal,
114 const double dt_nbond,
115 const double dt_slow,
116 const double velrescaling,
117 const double* recipMass,
121 const double maxvel2,
138 const int* atomFixed,
140 const int maxForceNumber,
141 cudaStream_t stream);
145 const double *coor_x,
146 const double *coor_y,
147 const double *coor_z,
152 const int* hydrogenGroupSize,
157 void updateRigidArrays(
158 const bool doFixedAtoms,
160 const int *atomFixed,
174 cudaStream_t stream);
176 const bool doFixedAtoms,
192 int *hydrogenGroupSize,
194 unsigned int* tbcatomic,
195 cudaStream_t stream);
197 void scaleCoordinateWithFactor(
202 int *hydrogenGroupSize,
205 int useGroupPressure,
207 cudaStream_t stream);
210 void SetAtomIndexOrder(
214 cudaStream_t stream);
217 void scaleCoordinateUsingGC(
222 const int *moleculeStartIndex,
223 const int *moleculeAtom,
228 const char3 *transform,
229 const int numMolecules,
230 const int numLargeMolecules,
231 cudaStream_t stream);
234 const bool doFixedAtoms,
235 const int* atomFixed,
236 const int* groupFixed,
237 const char3* transform,
239 const double* fixedPosition_x,
240 const double* fixedPosition_y,
241 const double* fixedPosition_z,
249 int *hydrogenGroupSize,
255 int useGroupPressure,
257 cudaStream_t stream);
258 void submitReduction1(
283 unsigned int* tbcatomic,
285 cudaStream_t stream);
286 void submitReduction2(
287 const bool doFixedAtoms,
288 const int* atomFixed,
301 const double *f_normal_x,
302 const double *f_normal_y,
303 const double *f_normal_z,
304 const double *f_nbond_x,
305 const double *f_nbond_y,
306 const double *f_nbond_z,
307 const double *f_slow_x,
308 const double *f_slow_y,
309 const double *f_slow_z,
311 int *hydrogenGroupSize,
324 unsigned int* tbcatomic,
328 cudaStream_t stream);
329 void langevinVelocitiesBBK1(
331 const float *langevinParam,
336 cudaStream_t stream);
337 void langevinVelocitiesBBK2(
339 const float *langScalVelBBK2,
340 const float *langScalRandBBK2,
348 const int numAtomsGlobal,
350 curandGenerator_t gen,
351 cudaStream_t stream);
353 void reassignVelocities(
355 const bool doFixedAtoms,
356 const int* atomFixed,
363 const double *d_recipMass,
366 const int numAtomsGlobal,
368 curandGenerator_t gen,
369 cudaStream_t stream);
372 const bool doFixedAtoms,
394 const int *hydrogenGroupSize,
395 const float *rigidBondLength,
397 const int *atomFixed,
399 size_t& settleListSize,
401 size_t& consFailureSize,
403 size_t& rattleListSize,
408 unsigned int* tbcatomic,
414 cudaStream_t stream);
416 void copy_nbond_forces(
int numPatches, float4 *f_nbond,
417 float4* f_nbond_slow,
425 const int* patchoffsets,
426 const int* patchUnsortOrder,
429 cudaStream_t stream);
431 void copy_bond_forces(
int numPatches,
433 double *f_bond_nbond,
447 const int *patchOffsets,
450 cudaStream_t stream);
452 void copy_slow_forces(
int numPatches,
457 const int* d_patchOffsets,
459 cudaStream_t stream);
461 void accumulateForceToSOA(
463 const int doCudaGlobal,
464 const int maxForceNumber,
465 const int numPatches,
468 const double* f_bond,
469 const double* f_bond_nbond,
470 const double* f_bond_slow,
472 const float4* f_nbond,
473 const float4* f_nbond_slow,
475 double* d_f_global_x,
476 double* d_f_global_y,
477 double* d_f_global_z,
478 double* d_f_normal_x,
479 double* d_f_normal_y,
480 double* d_f_normal_z,
487 const int* patchUnsortOrder,
489 unsigned int** deviceQueues,
490 unsigned int* queueCounters,
491 unsigned int* tbcatomic,
492 cudaStream_t stream);
494 void accumulate_force_kick(
495 const bool doFixedAtoms,
497 const int doCudaGlobal,
498 const int maxForceNumber,
499 const int numPatches,
501 const double* f_bond,
502 const double* f_bond_nbond,
503 const double* f_bond_slow,
505 const float4* f_nbond,
506 const float4* f_nbond_slow,
508 double* d_f_global_x,
509 double* d_f_global_y,
510 double* d_f_global_z,
511 double* d_f_normal_x,
512 double* d_f_normal_y,
513 double* d_f_normal_z,
523 const double* recipMass,
524 const int* d_atomFixed,
525 const double dt_normal,
526 const double dt_nbond,
527 const double dt_slow,
528 const double scaling,
529 const int* patchUnsortOrder,
531 cudaStream_t stream);
533 void set_compute_positions(
535 const bool isPmeDevice,
537 const int numPatchesHomeAndProxy,
538 const int numPatchesHome,
543 const bool doAlchDecouple,
544 const bool doAlchSoftCore,
545 const bool handleBoundary,
549 #ifndef NAMD_NCCL_ALLREDUCE
553 float** d_peer_charge,
554 int** d_peer_partition,
556 const float* charges,
558 const double charge_scaling,
559 const double3* patchCenter,
560 const int* s_patchPositions,
561 const int* s_pencilPatchIndex,
562 const int* s_patchIDs,
563 const int* patchSortOrder,
572 std::vector<int>& atomCounts,
573 cudaStream_t stream);
575 void set_pme_positions(
577 const bool isPmeDevice,
579 const int numPatchesHomeAndProxy,
580 const int numPatchesHome,
585 const bool doAlchDecouple,
586 const bool doAlchSoftCore,
587 const bool handleBoundary,
588 const double* d_pos_x,
589 const double* d_pos_y,
590 const double* d_pos_z,
591 #ifndef NAMD_NCCL_ALLREDUCE
592 double** d_peer_pos_x,
593 double** d_peer_pos_y,
594 double** d_peer_pos_z,
595 float** d_peer_charge,
596 int** d_peer_partition,
598 const float* charges,
600 const double charge_scaling,
601 const double3* patchCenter,
602 const int* s_patchPositions,
603 const int* s_pencilPatchIndex,
604 const int* s_patchIDs,
605 const int* patchSortOrder,
614 std::vector<int>& atomCounts,
615 cudaStream_t stream);
617 void PairListMarginCheck(
const int numPatches,
622 const double* pos_old_x,
623 const double* pos_old_y,
624 const double* pos_old_z,
625 const double3* awayDists,
628 const double3* patchMins,
629 const double3* patchMaxes,
630 const double3* patchCenter,
632 unsigned int* tbcatomic,
633 const double pairlistTrigger,
634 const double pairlistGrow,
635 const double pairlistShrink,
636 double* patchMaxAtomMovement,
637 double* h_patchMaxAtomMovement,
638 double* patchNewTolerance,
639 double* h_patchNewTolerance,
640 const double minSize,
642 const double sysdima,
643 const double sysdimb,
644 const double sysdimc,
645 unsigned int* h_marginViolations,
646 unsigned int* h_periodicCellSmall,
647 const bool rescalePairlistTolerance,
648 const bool isPeriodic,
649 cudaStream_t stream);
653 const bool normalized,
655 const double3 eField,
656 const double eFieldOmega,
657 const double eFieldPhi,
660 const char3* transform,
661 const float* charges,
674 unsigned int* tbcatomic,
678 void mergeForcesFromPeers(
680 const int maxForceNumber,
682 const int numPatchesHomeAndProxy,
683 const int numPatchesHome,
697 std::vector<int>& atomCounts,
701 void copyForcesToHostSOA(
702 const int numPatches,
704 const int maxForceNumber,
705 const double* d_f_normal_x,
706 const double* d_f_normal_y,
707 const double* d_f_normal_z,
708 const double* d_f_nbond_x,
709 const double* d_f_nbond_y,
710 const double* d_f_nbond_z,
711 const double* d_f_slow_x,
712 const double* d_f_slow_y,
713 const double* d_f_slow_z,
716 const bool doForcesOutput,
720 void copyForcesToDevice(
722 const double* d_f_nbond_x,
723 const double* d_f_nbond_y,
724 const double* d_f_nbond_z,
725 const double* d_f_slow_x,
726 const double* d_f_slow_y,
727 const double* d_f_slow_z,
728 double* d_f_saved_nbond_x,
729 double* d_f_saved_nbond_y,
730 double* d_f_saved_nbond_z,
731 double* d_f_saved_slow_x,
732 double* d_f_saved_slow_y,
733 double* d_f_saved_slow_z,
734 const int maxForceNumber,
738 void copyPositionsToHostSOA(
739 const int numPatches,
748 void redistributeTip4pForces(
749 double* d_f_normal_x,
750 double* d_f_normal_y,
751 double* d_f_normal_z,
761 const double* d_pos_x,
762 const double* d_pos_y,
763 const double* d_pos_z,
767 const int maxForceNumber,
772 const int maxForceNumber,
774 const int* d_atomFixed,
775 const double* d_fixedPosition_x,
776 const double* d_fixedPosition_y,
777 const double* d_fixedPosition_z,
778 const double* d_f_normal_x,
779 const double* d_f_normal_y,
780 const double* d_f_normal_z,
781 const double* d_f_nbond_x,
782 const double* d_f_nbond_y,
783 const double* d_f_nbond_z,
784 const double* d_f_slow_x,
785 const double* d_f_slow_y,
786 const double* d_f_slow_z,
790 double3* d_extForce_normal,
791 double3* d_extForce_nbond,
792 double3* d_extForce_slow,
797 #endif // NODEGROUP_FORCE_REGISTER 798 #endif // SEQUENCERCUDAKERNEL_H
static void partition(int *order, const FullAtom *atoms, int begin, int end)