1 #ifndef SEQUENCERCUDA_H 2 #define SEQUENCERCUDA_H 10 #include <hiprand/hiprand.h> 32 #ifdef NODEGROUP_FORCE_REGISTER 50 inline static SequencerCUDA *Object() {
return CkpvAccess(SequencerCUDA_instance); }
51 inline static SequencerCUDA *ObjectOnPe(
int pe) {
return CkpvAccessOther(SequencerCUDA_instance, CmiRankOf(pe)); }
53 int numPatchesCheckedIn;
55 std::vector<CthThread> waitingThreads;
57 bool masterThreadSleeping =
false;
58 bool breakSuspends =
false;
63 bool reallocateArrays(
int in_numAtomsHome,
int in_numAtomsHomeAndProxy);
64 void reallocateMigrationDestination();
65 void deallocateArrays();
66 void deallocateStaticArrays();
68 void copyAoSDataToHost();
69 void copyPatchDataToHost();
70 void copyAtomDataToDeviceAoS();
72 void copyAtomDataToDevice(
bool copyForces,
int maxForceNumber);
74 bool copyPatchData(
const bool copyIn,
const bool startup);
75 void copyDataToPeers(
const bool copyIn);
76 void migrationLocalInit();
77 void migrationPerform();
78 void migrationLocalPost(
int startup);
79 void migrationUpdateAdvancedFeatures(
const int startup);
80 void migrationUpdateAtomCounts();
81 void migrationUpdateAtomOffsets();
82 void migrationUpdateRemoteOffsets();
83 void migrationUpdateProxyDestination();
84 void migrationUpdateDestination();
85 void migrationSortAtomsNonbonded();
88 void copyMigrationInfo(
HomePatch *p,
int patchIndex);
90 void assembleOrderedPatchList();
96 const bool doEnergy,
const bool doGloblal,
const bool doVirial);
99 void monteCarloPressure_reject(
Lattice &lattice);
105 inline static void tensor_enforce_symmetry(
Tensor& t) {
117 const double maxvel2,
122 int reassignVelocitiesStep,
123 int langevinPistonStep,
124 int berendsenPressureStep,
127 const int savePairlists,
128 const int usePairlists,
129 const bool doEnergy);
136 const double maxvel2,
141 int langevinPistonStep,
144 const int savePairlists,
145 const int usePairlists,
146 const bool doEnergy);
148 void launch_set_compute_positions();
151 const int doMCPressure,
159 const int langevinPistonStep,
163 const bool doEnergy);
166 const int doMCPressure,
174 const bool requestGlobalForces,
175 const int doGlobalStaleForces,
176 const bool forceRequestedGPU,
180 const bool requestForcesOutput);
183 void copyGlobalForcesToDevice();
185 void copySettleParameter();
186 void finish_part1(
const int copyIn,
187 const int savePairlists,
188 const int usePairlists,
191 void update_patch_flags();
192 void finish_patch_flags(
int isMigration);
193 void updatePairlistFlags(
const int doMigration);
194 void updateDeviceKernels();
197 void allocateGPUSavedForces();
198 cudaStream_t stream, stream2;
208 cudaEvent_t stream2CopyDone, stream2CopyAfter;
211 std::vector<AtomMap*> atomMapList;
218 int *d_sortSoluteIndex;
219 int4 *d_migrationDestination;
221 int *d_migrationGroupSize;
222 int *d_migrationGroupIndex;
237 double *d_f_normal_x, *d_f_normal_y, *d_f_normal_z;
238 double *d_f_nbond_x, *d_f_nbond_y, *d_f_nbond_z;
239 double *d_f_slow_x, *d_f_slow_y, *d_f_slow_z;
240 double *d_vel_x, *d_vel_y, *d_vel_z;
242 double *d_pos_x, *d_pos_y, *d_pos_z;
244 double *d_fixedPosition_x, *d_fixedPosition_y, *d_fixedPosition_z;
246 double *d_f_saved_nbond_x, *d_f_saved_nbond_y, *d_f_saved_nbond_z;
247 double *d_f_saved_slow_x, *d_f_saved_slow_y, *d_f_saved_slow_z;
249 double *d_posNew_raw;
250 double *d_posNew_x, *d_posNew_y, *d_posNew_z;
252 double *d_f_global_x, *d_f_global_y, *d_f_global_z;
254 double *d_rcm_x, *d_rcm_y, *d_rcm_z;
255 double *d_vcm_x, *d_vcm_y, *d_vcm_z;
260 double *d_f_normalMC_x, *d_f_normalMC_y, *d_f_normalMC_z;
261 double *d_f_nbondMC_x, *d_f_nbondMC_y, *d_f_nbondMC_z;
262 double *d_f_slowMC_x, *d_f_slowMC_y, *d_f_slowMC_z;
264 double *d_posMC_x, *d_posMC_y, *d_posMC_z;
268 int *d_moleculeStartIndex;
272 double *d_velNew_x, *d_velNew_y, *d_velNew_z;
273 double *d_posSave_x, *d_posSave_y, *d_posSave_z;
276 int *d_patchOffsetTemp;
278 float *d_rigidBondLength;
284 float *d_langevinParam;
285 float *d_langScalVelBBK2;
286 float *d_langScalRandBBK2;
287 float *d_gaussrand_x, *d_gaussrand_y, *d_gaussrand_z;
288 int *d_hydrogenGroupSize;
290 size_t d_consFailureSize;
292 size_t settleListSize;
294 size_t rattleListSize;
295 int* d_globalToLocalID;
296 int* d_patchToDeviceMap;
297 double3* d_patchCenter;
298 double3* d_awayDists;
301 double* d_patchMaxAtomMovement;
302 double* d_patchNewTolerance;
303 unsigned int* d_tbcatomic;
309 double *f_global_x, *f_global_y, *f_global_z;
310 double *f_normal_x, *f_normal_y, *f_normal_z;
311 double *f_nbond_x, *f_nbond_y, *f_nbond_z;
312 double *f_slow_x, *f_slow_y, *f_slow_z;
313 double *vel_x, *vel_y, *vel_z;
314 double *pos_x, *pos_y, *pos_z;
320 float *langevinParam;
321 float *langScalVelBBK2;
322 float *langScalRandBBK2;
324 int *hydrogenGroupSize;
325 float *rigidBondLength;
329 double* fixedPosition_x;
330 double* fixedPosition_y;
331 double* fixedPosition_z;
332 int* globalToLocalID;
333 int* patchToDeviceMap;
335 double3* patchCenter;
339 double* patchMaxAtomMovement;
340 double* patchNewTolerance;
341 int* computeNbondPosition;
345 double pairlist_newTolerance;
354 BigReal* intKineticEnergy_half;
378 unsigned int *h_marginViolations;
379 unsigned int *h_periodicCellSmall;
381 unsigned int totalMarginViolations;
384 bool buildRigidLists;
415 double3 *d_fixForceNormal;
416 double3 *d_fixForceNbond;
417 double3 *d_fixForceSlow;
423 int numAtomsHomePrev;
424 int numAtomsHomeAllocated;
425 int numAtomsHomeAndProxy;
426 int numAtomsHomeAndProxyAllocated;
428 int numPatchesGlobal;
429 int numPatchesHomeAndProxy;
432 int marginViolations;
433 bool rescalePairlistTolerance;
434 int nSettle, nRattle;
439 CmiNodeLock printlock;
441 cudaEvent_t eventStart, eventStop;
446 float t_pairlistCheck;
447 float t_setComputePositions;
450 float t_accumulateForceKick;
453 float t_submitReductions1;
454 float t_submitReductions2;
459 std::vector<HomePatch*> patchList;
463 std::vector<HomePatch*> patchListHomeAndProxy;
467 unsigned long long int d_ullmaxtol;
468 SequencerCUDAKernel *CUDASequencerKernel;
469 MigrationCUDAKernel *CUDAMigrationKernel;
470 ComputeRestraintsCUDA *restraintsKernel;
471 ComputeSMDCUDA *SMDKernel;
472 ComputeGroupRestraintsCUDA *groupRestraintsKernel;
473 ComputeGridForceCUDA *gridForceKernel;
475 curandGenerator_t curandGen;
476 ComputeConsForceCUDA *consForceKernel;
478 size_t num_used_grids;
479 std::vector<int> used_grids;
482 unsigned int* deviceQueue;
485 double** d_peer_pos_x;
486 double** d_peer_pos_y;
487 double** d_peer_pos_z;
488 float** d_peer_charge;
489 int** d_peer_partition;
490 double** d_peer_vel_x;
491 double** d_peer_vel_y;
492 double** d_peer_vel_z;
493 double** d_peer_fb_x;
494 double** d_peer_fb_y;
495 double** d_peer_fb_z;
496 double** d_peer_fn_x;
497 double** d_peer_fn_y;
498 double** d_peer_fn_z;
499 double** d_peer_fs_x;
500 double** d_peer_fs_y;
501 double** d_peer_fs_z;
502 bool** h_patchRecordHasForces;
503 bool** d_patchRecordHasForces;
508 int4** d_peer_migrationDestination;
509 int** d_peer_sortSoluteIndex;
512 int** d_peer_vdwType;
513 int** d_peer_sortOrder;
514 int** d_peer_unsortOrder;
515 double3** d_peer_patchCenter;
523 const double maxvel2,
527 int numAtoms,
int part,
528 const bool doEnergy);
529 void submitReductions(
534 int marginViolations,
540 void submitReductionValues();
541 void copyPositionsAndVelocitiesToHost(
bool copyOut,
const int doGlobal);
542 void copyPositionsToHost();
543 void startRun1(
int maxForceNumber,
const Lattice& lat);
558 const bool requestGlobalForces,
559 int doGlobalMasterStateForces,
560 const bool requestForcesOutput,
561 const bool requestGlobalForcesGPU,
565 void redistributeTip4pForces(
567 const int maxForceNumber,
570 void printSOAForces(
char *);
571 void printSOAPositionsAndVelocities();
572 void registerSOAPointersToHost();
573 void copySOAHostRegisterToDevice();
576 void calculateExternalForces(
579 const int maxForceNumber,
583 void atomUpdatePme();
585 void updateHostPatchDataSOA();
586 void saveForceCUDASOA_direct(
588 const bool doForcesOutput,
589 const int maxForceNumber);
590 void copyPositionsToHost_direct();
592 int getNumPatchesHome() {
return numPatchesHome; }
594 double3* getHostPatchMin() {
return patchMin; }
595 double3* getHostPatchMax() {
return patchMax; }
596 double3* getHostAwayDists() {
return awayDists; }
600 #endif // SEQUENCERCUDA_H
friend class SequencerCUDA
SubmitReduction * reduction
static void partition(int *order, const FullAtom *atoms, int begin, int end)
A class for copying atom information from SequencerCUDA to CudaGlobalMasterClient.