1 #ifndef SEQUENCERCUDA_H 2 #define SEQUENCERCUDA_H 10 #include <hiprand/hiprand.h> 32 #ifdef NODEGROUP_FORCE_REGISTER 50 inline static SequencerCUDA *Object() {
return CkpvAccess(SequencerCUDA_instance); }
51 inline static SequencerCUDA *ObjectOnPe(
int pe) {
return CkpvAccessOther(SequencerCUDA_instance, CmiRankOf(pe)); }
53 int numPatchesCheckedIn;
55 std::vector<CthThread> waitingThreads;
57 bool masterThreadSleeping =
false;
58 bool breakSuspends =
false;
63 bool reallocateArrays(
int in_numAtomsHome,
int in_numAtomsHomeAndProxy);
64 void reallocateMigrationDestination();
65 void deallocateArrays();
66 void deallocateStaticArrays();
67 void copyAoSDataToHost();
68 void copyPatchDataToHost();
69 void copyAtomDataToDeviceAoS();
71 void copyAtomDataToDevice(
bool copyForces,
int maxForceNumber);
73 bool copyPatchData(
const bool copyIn,
const bool startup);
74 void copyDataToPeers(
const bool copyIn);
75 void migrationLocalInit();
76 void migrationPerform();
77 void migrationLocalPost(
int startup);
78 void migrationUpdateAdvancedFeatures(
const int startup);
79 void migrationUpdateAtomCounts();
80 void migrationUpdateAtomOffsets();
81 void migrationUpdateRemoteOffsets();
82 void migrationUpdateProxyDestination();
83 void migrationUpdateDestination();
84 void migrationSortAtomsNonbonded();
87 void copyMigrationInfo(
HomePatch *p,
int patchIndex);
89 void assembleOrderedPatchList();
94 void monteCarloPressure_part2(
int step,
int maxForceNumber,
95 const bool doEnergy,
const bool doGloblal,
const bool doVirial);
98 void monteCarloPressure_reject(
Lattice &lattice);
100 void monteCarloPressure_accept(
const int doMigration);
104 inline static void tensor_enforce_symmetry(
Tensor& t) {
116 const double maxvel2,
120 int reassignVelocitiesStep,
121 int langevinPistonStep,
122 int berendsenPressureStep,
125 const int savePairlists,
126 const int usePairlists,
127 const bool doEnergy);
134 const double maxvel2,
138 int langevinPistonStep,
141 const int savePairlists,
142 const int usePairlists,
143 const bool doEnergy);
145 void launch_set_compute_positions();
148 const int doMCPressure,
155 const int langevinPistonStep,
159 const bool doEnergy);
162 const int doMCPressure,
169 const bool requestGlobalForces,
170 const int doGlobalStaleForces,
171 const bool forceRequestedGPU,
175 const bool requestForcesOutput);
178 void copyGlobalForcesToDevice();
180 void copySettleParameter();
181 void finish_part1(
const int copyIn,
182 const int savePairlists,
183 const int usePairlists);
185 void update_patch_flags();
186 void finish_patch_flags(
int isMigration);
187 void updatePairlistFlags(
const int doMigration);
188 void updateDeviceKernels();
189 void setRescalePairlistTolerance(
const bool val);
192 void allocateGPUSavedForces();
193 cudaStream_t stream, stream2;
203 cudaEvent_t stream2CopyDone, stream2CopyAfter;
208 std::vector<AtomMap*> atomMapList;
215 int *d_sortSoluteIndex;
216 int4 *d_migrationDestination;
218 int *d_migrationGroupSize;
219 int *d_migrationGroupIndex;
234 double *d_f_normal_x, *d_f_normal_y, *d_f_normal_z;
235 double *d_f_nbond_x, *d_f_nbond_y, *d_f_nbond_z;
236 double *d_f_slow_x, *d_f_slow_y, *d_f_slow_z;
237 double *d_vel_x, *d_vel_y, *d_vel_z;
239 double *d_pos_x, *d_pos_y, *d_pos_z;
241 double *d_fixedPosition_x, *d_fixedPosition_y, *d_fixedPosition_z;
243 double *d_f_saved_nbond_x, *d_f_saved_nbond_y, *d_f_saved_nbond_z;
244 double *d_f_saved_slow_x, *d_f_saved_slow_y, *d_f_saved_slow_z;
246 double *d_posNew_raw;
247 double *d_posNew_x, *d_posNew_y, *d_posNew_z;
249 double *d_f_global_x, *d_f_global_y, *d_f_global_z;
251 double *d_rcm_x, *d_rcm_y, *d_rcm_z;
252 double *d_vcm_x, *d_vcm_y, *d_vcm_z;
257 double *d_f_normalMC_x, *d_f_normalMC_y, *d_f_normalMC_z;
258 double *d_f_nbondMC_x, *d_f_nbondMC_y, *d_f_nbondMC_z;
259 double *d_f_slowMC_x, *d_f_slowMC_y, *d_f_slowMC_z;
261 double *d_posMC_x, *d_posMC_y, *d_posMC_z;
265 int *d_moleculeStartIndex;
269 double *d_velNew_x, *d_velNew_y, *d_velNew_z;
270 double *d_posSave_x, *d_posSave_y, *d_posSave_z;
273 int *d_patchOffsetTemp;
275 float *d_rigidBondLength;
281 float *d_langevinParam;
282 float *d_langScalVelBBK2;
283 float *d_langScalRandBBK2;
284 float *d_gaussrand_x, *d_gaussrand_y, *d_gaussrand_z;
285 int *d_hydrogenGroupSize;
287 size_t d_consFailureSize;
289 size_t settleListSize;
291 size_t rattleListSize;
292 int* d_globalToLocalID;
293 int* d_patchToDeviceMap;
294 double3* d_patchCenter;
295 double3* d_awayDists;
298 double* d_patchMaxAtomMovement;
299 double* d_patchNewTolerance;
300 unsigned int* d_tbcatomic;
306 double *f_global_x, *f_global_y, *f_global_z;
307 double *f_normal_x, *f_normal_y, *f_normal_z;
308 double *f_nbond_x, *f_nbond_y, *f_nbond_z;
309 double *f_slow_x, *f_slow_y, *f_slow_z;
310 double *vel_x, *vel_y, *vel_z;
311 double *pos_x, *pos_y, *pos_z;
317 float *langevinParam;
318 float *langScalVelBBK2;
319 float *langScalRandBBK2;
321 int *hydrogenGroupSize;
322 float *rigidBondLength;
326 double* fixedPosition_x;
327 double* fixedPosition_y;
328 double* fixedPosition_z;
329 int* globalToLocalID;
330 int* patchToDeviceMap;
332 double3* patchCenter;
336 double* patchMaxAtomMovement;
337 double* patchNewTolerance;
338 int* computeNbondPosition;
342 double pairlist_newTolerance;
351 BigReal* intKineticEnergy_half;
375 unsigned int *h_marginViolations;
376 unsigned int *h_periodicCellSmall;
378 unsigned int totalMarginViolations;
381 bool buildRigidLists;
412 double3 *d_fixForceNormal;
413 double3 *d_fixForceNbond;
414 double3 *d_fixForceSlow;
420 int numAtomsHomePrev;
421 int numAtomsHomeAllocated;
422 int numAtomsHomeAndProxy;
423 int numAtomsHomeAndProxyAllocated;
425 int numPatchesGlobal;
426 int numPatchesHomeAndProxy;
429 int marginViolations;
430 bool rescalePairlistTolerance;
431 int nSettle, nRattle;
436 CmiNodeLock printlock;
438 cudaEvent_t eventStart, eventStop;
443 float t_pairlistCheck;
444 float t_setComputePositions;
447 float t_accumulateForceKick;
450 float t_submitReductions1;
451 float t_submitReductions2;
456 std::vector<HomePatch*> patchList;
460 std::vector<HomePatch*> patchListHomeAndProxy;
464 unsigned long long int d_ullmaxtol;
465 SequencerCUDAKernel *CUDASequencerKernel;
466 MigrationCUDAKernel *CUDAMigrationKernel;
467 ComputeRestraintsCUDA *restraintsKernel;
468 ComputeSMDCUDA *SMDKernel;
469 ComputeGroupRestraintsCUDA *groupRestraintsKernel;
470 ComputeGridForceCUDA *gridForceKernel;
472 curandGenerator_t curandGen;
473 ComputeConsForceCUDA *consForceKernel;
475 size_t num_used_grids;
476 std::vector<int> used_grids;
479 unsigned int* deviceQueue;
482 double** d_peer_pos_x;
483 double** d_peer_pos_y;
484 double** d_peer_pos_z;
485 float** d_peer_charge;
486 int** d_peer_partition;
487 double** d_peer_vel_x;
488 double** d_peer_vel_y;
489 double** d_peer_vel_z;
490 double** d_peer_fb_x;
491 double** d_peer_fb_y;
492 double** d_peer_fb_z;
493 double** d_peer_fn_x;
494 double** d_peer_fn_y;
495 double** d_peer_fn_z;
496 double** d_peer_fs_x;
497 double** d_peer_fs_y;
498 double** d_peer_fs_z;
499 bool** h_patchRecordHasForces;
500 bool** d_patchRecordHasForces;
505 int4** d_peer_migrationDestination;
506 int** d_peer_sortSoluteIndex;
509 int** d_peer_vdwType;
510 int** d_peer_sortOrder;
511 int** d_peer_unsortOrder;
512 double3** d_peer_patchCenter;
520 const double maxvel2,
523 int numAtoms,
int part,
524 const bool doEnergy);
525 void submitReductions(
529 int marginViolations,
535 void submitReductionValues();
536 void copyPositionsAndVelocitiesToHost(
bool copyOut,
const int doGlobal);
537 void copyPositionsToHost();
538 void startRun1(
int maxForceNumber,
const Lattice& lat);
551 const bool requestGlobalForces,
552 int doGlobalMasterStateForces,
553 const bool requestForcesOutput,
554 const bool requestGlobalForcesGPU,
558 void redistributeTip4pForces(
559 const int maxForceNumber,
562 void printSOAForces(
char *);
563 void printSOAPositionsAndVelocities();
564 void registerSOAPointersToHost();
565 void copySOAHostRegisterToDevice();
568 void calculateExternalForces(
570 const int maxForceNumber,
574 void atomUpdatePme();
576 void updateHostPatchDataSOA();
577 void saveForceCUDASOA_direct(
579 const bool doForcesOutput,
580 const int maxForceNumber);
581 void copyPositionsToHost_direct();
583 int getNumPatchesHome() {
return numPatchesHome; }
585 double3* getHostPatchMin() {
return patchMin; }
586 double3* getHostPatchMax() {
return patchMax; }
587 double3* getHostAwayDists() {
return awayDists; }
591 #endif // SEQUENCERCUDA_H
friend class SequencerCUDA
static void partition(int *order, const FullAtom *atoms, int begin, int end)
A class for copying atom information from SequencerCUDA to CudaGlobalMasterClient.