30 #define PRINT_FORCES 0 62 #define MIN_DEBUG_LEVEL 3 72 #define START_HPM_STEP 1000 73 #define STOP_HPM_STEP 1500 77 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 79 #define __thread __declspec(thread) 88 #define SPECIAL_PATCH_ID 91 97 int ilo=0,
int ihip1=1
99 printf(
"AOS Velocities:\n");
100 for (
int i=ilo; i < ihip1; i++) {
101 printf(
"%d %g %g %g\n", i,
111 int ilo=0,
int ihip1=1
113 printf(
"SOA Velocities:\n");
114 for (
int i=ilo; i < ihip1; i++) {
115 printf(
"%d %g %g %g\n", i, vel_x[i], vel_y[i], vel_z[i]);
121 printf(
"%g %g %g %g %g %g %g %g %g\n",
159 inline int init(
int initstep,
int initperiod,
int delta=0) {
176 pairlistsAreValid(0),
183 #
if defined(NAMD_CUDA) || defined(NAMD_HIP)
185 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
186 PatchData* patchData = cpdata.ckLocalBranch();
189 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP) 228 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER) 253 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER) 255 delete CUDASequencer;
262 void Sequencer::threadRun(
Sequencer* arg)
272 DebugM(4,
"::run() - this = " <<
this <<
"\n" );
273 thread = CthCreate((CthVoidFn)&(threadRun),(
void*)(
this),
SEQ_STK_SZ);
274 CthSetStrategyDefault(thread);
295 switch ( scriptTask ) {
340 NAMD_die(
"Minimization is currently not supported on the GPU integrator\n");
355 #ifdef NODEGROUP_FORCE_REGISTER 361 #ifdef NODEGROUP_FORCE_REGISTER 370 NAMD_bug(
"Unknown task in Sequencer::algorithm");
385 #if defined(NODEGROUP_FORCE_REGISTER) 389 CUDASequencer->numPatchesCheckedIn += 1;
390 if (CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe())) {
392 CUDASequencer->waitingThreads.push_back(CthSelf());
399 int lastStep = CUDASequencer->patchData->flags.step;
401 if (CUDASequencer->breakSuspends)
break;
407 CUDASequencer->numPatchesCheckedIn += 1;
408 CUDASequencer->waitingThreads.push_back(CthSelf());
409 if(CUDASequencer->numPatchesCheckedIn == patchMap->
numPatchesOnNode(CkMyPe()) - 1 &&
410 CUDASequencer->masterThreadSleeping){
411 CUDASequencer->masterThreadSleeping =
false;
412 CthAwaken(CUDASequencer->masterThread);
419 CUDASequencer->numPatchesCheckedIn = 0;
420 for (CthThread t : CUDASequencer->waitingThreads) {
423 CUDASequencer->waitingThreads.clear();
440 CUDASequencer->sync();
452 ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
460 if (isMaster && cudaGlobal && doMigration) cudaGlobal->setStep(static_cast<int64_t>(
patch->
flags.
step));
480 cudaBond->atomUpdate();
487 cudaGlobal->updateAtomMaps();
488 cudaGlobal->communicateToClients(&(this->
patch->
lattice));
496 CUDASequencer->launch_set_compute_positions();
497 CUDASequencer->sync();
505 for(
int i = 0 ; i < CkNumPes(); i++){
508 ComputeBondedCUDA* b = CUDASequencer->patchData->cudaBondedList[i];
510 if (b == NULL)
continue;
511 auto list = std::find(std::begin(b->getBondedPes()), std::end(b->getBondedPes()), CkMyPe());
512 if( list != std::end(b->getBondedPes()) ){
513 b->openBoxesOnPe(startup);
528 cudaBond->loadTuplesOnPe(startup);
534 cudaBond->copyTupleDataGPU(startup);
536 cudaBond->copyTupleDataSN();
546 cudaBond->launchWork();
548 if (cudaPme && computePme) {
549 cudaPme->
compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
554 cudaGlobal->calculate();
570 for(
int i = 0; i < numhp; ++i) {
572 hp->zero_global_forces_SOA();
587 CUDASequencer->copyGlobalForcesToDevice();
594 cudaBond->finishPatches();
602 cudaBond->finishReductions();
603 if (cudaGlobal) cudaGlobal->finishReductions();
612 if (cudaPme && computePme) {
613 cudaPme->
compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
619 cudaGlobal->calculate();
631 for(
int i = 0; i < numhp; ++i) {
633 hp->zero_global_forces_SOA();
648 CUDASequencer->copyGlobalForcesToDevice();
654 cudaBond->finishPatches();
659 if (cudaGlobal) cudaGlobal->finishReductions();
677 const int doMigration,
680 const int maxForceNumber,
685 Controller *c_out = CUDASequencer->patchData->c_out;
686 bool mGpuOn = CUDASequencer->mGpuOn;
692 CUDASequencer->submitReductionValues();
695 CUDASequencer->patchData->reductionBackendSave->setVal(
reduction);
699 c_out->mcPressure_prepare(step);
706 CUDASequencer->monteCarloPressure_part1(factor, origin, oldLattice);
711 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
712 CUDASequencer->patchData->factor = &(factor);
714 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
725 CUDASequencer->update_patch_flags();
736 CUDASequencer->monteCarloPressure_part2(step, maxForceNumber,
737 doEnergy, doGlobal, doVirial);
738 CUDASequencer->submitReductionValues();
742 c_out->mcPressure_accept(step);
748 CUDASequencer->monteCarloPressure_accept(doMigration);
753 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
755 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
759 CUDASequencer->monteCarloPressure_reject(this->
patch->
lattice);
762 reduction->
setVal(CUDASequencer->patchData->reductionBackendSave);
768 if(isMasterPe && !accepted){
770 CUDASequencer->update_patch_flags();
775 const int updatePatchMap) {
778 const bool updatePatchData = startup || doGlobal || updatePatchMap;
781 bool realloc =
false;
790 if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
797 CUDASequencer->reallocateMigrationDestination();
798 CUDASequencer->registerSOAPointersToHost();
802 CUDASequencer->copySOAHostRegisterToDevice();
811 CUDASequencer->migrationLocalInit();
817 CUDASequencer->migrationPerform();
823 CUDASequencer->migrationUpdateAtomCounts();
829 CUDASequencer->migrationUpdateAtomOffsets();
835 CUDASequencer->copyPatchDataToHost();
843 realloc = CUDASequencer->copyPatchData(
true,
false);
850 if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
857 CUDASequencer->registerSOAPointersToHost();
861 CUDASequencer->copySOAHostRegisterToDevice();
867 CUDASequencer->migrationLocalPost(0);
868 CUDASequencer->migrationSortAtomsNonbonded();
873 if (!updatePatchData) {
876 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
877 CUDASequencer->masterThreadSleeping =
true;
878 CUDASequencer->masterThread = CthSelf();
884 CUDASequencer->sync();
889 CUDASequencer->migrationUpdateDestination();
894 CUDASequencer->migrationUpdateProxyDestination();
899 CUDASequencer->migrationUpdateRemoteOffsets();
904 CUDASequencer->copyDataToPeers(
true);
908 if (updatePatchData) {
912 for(
int i = 0; i < numhp; ++i) {
921 CUDASequencer->copyAoSDataToHost();
926 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
927 CUDASequencer->masterThreadSleeping =
true;
928 CUDASequencer->masterThread = CthSelf();
935 CUDASequencer->updateHostPatchDataSOA();
943 CUDASequencer->migrationUpdateAdvancedFeatures(
false);
951 #ifdef TIMER_COLLECTION 952 TimerSet& t =
patch->timerSet;
969 Controller *c_out = CUDASequencer->patchData->c_out;
994 doNonbonded = (step >= numberOfSteps) ||
1011 doFullElectrostatics = (dofull &&
1012 ((step >= numberOfSteps) ||
1024 doEnergy = energyFrequency.
init(step, newComputeEnergies);
1037 int doGlobal = (doTcl || doColvars || doIMD);
1040 bool globalMasterStep=
false;
1041 int doGlobalObjects=0;
1042 int doGlobalStaleForces = 0;
1047 globalMasterStep = globalMasterFrequency.
check(step);
1048 doGlobalObjects = globalMasterStep? 1:0;
1052 doGlobalStaleForces=1;
1060 doGlobalStaleForces=doGlobalObjects;
1064 doGlobalStaleForces=doGlobalObjects;
1069 doGlobalStaleForces = 0;
1070 doGlobalObjects = 0;
1086 langevinPistonFrequency.
init(step,
1118 patch->copy_atoms_to_SOA();
1134 CUDASequencer->breakSuspends =
false;
1139 if (CUDASequencer->patchData->ptrCollectionMaster == NULL) {
1142 CUDASequencer->patchData->ptrCollectionMaster = pcm;
1145 if (CUDASequencer->patchData->ptrOutput == NULL) {
1148 CUDASequencer->patchData->ptrOutput = pout;
1151 if (CUDASequencer->patchData->pdb == NULL) {
1154 CUDASequencer->patchData->pdb = pdb;
1157 if (CUDASequencer->patchData->imd == NULL) {
1160 CUDASequencer->patchData->imd = imd;
1170 CUDASequencer->patchData->cudaBondedList[CkMyPe()] = NULL;
1171 CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = NULL;
1201 if(patchData->updateCounter.load()>0)
1203 CUDASequencer->updateDeviceKernels();
1207 CUDASequencer->startRun1(maxForceUsed, this->
patch->
lattice);
1210 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
1211 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
1215 const bool addCudaGlobalForces =
1216 (cudaGlobalMasterObject !=
nullptr) ?
1217 cudaGlobalMasterObject->willAddGlobalForces() :
1219 if (addCudaGlobalForces) {
1220 CUDASequencer->allocateGPUSavedForces();
1228 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
1229 CUDASequencer->masterThreadSleeping =
true;
1230 CUDASequencer->masterThread = CthSelf();
1252 CUDASequencer->setRescalePairlistTolerance(step < numberOfSteps);
1263 CUDASequencer->finish_patch_flags(
true);
1266 const bool addCudaGlobalForces =
1267 (cudaGlobalMasterObject !=
nullptr) ?
1268 cudaGlobalMasterObject->willAddGlobalForces() :
1270 CUDASequencer->startRun2(timestep,
1272 doGlobal || addCudaGlobalForces, maxForceUsed);
1276 const bool requestTotalForces = computeGlobal ? computeGlobal->
getForceSendActive() :
false;
1279 const bool requestGPUTotalForces =
1280 (cudaGlobalMasterObject !=
nullptr) ?
1281 cudaGlobalMasterObject->requestedTotalForces() :
1283 CUDASequencer->startRun3(timestep,
1285 requestTotalForces, doGlobalStaleForces,
1287 requestGPUTotalForces,
1298 for(
int i = 0; i < numhp; ++i) {
1304 CUDASequencer->submitReductionValues();
1311 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
1313 c_out->printStep(step);
1319 double velrescaling = 1;
1321 for( ++step; step <= numberOfSteps; ++step ){
1322 const int imdStep = imdFrequency.
check(step);
1323 const int isForcesOutputStep = forceDcdFrequency.
check(step) + (doIMD ? imdStep : 0);
1324 int dcdSelectionChecks=0;
1326 for(
int dcdindex=0; dcdindex<16;++dcdindex)
1329 if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
1330 dcdSelectionChecks++;
1332 const int isCollection = restartFrequency.
check(step) +
1333 dcdFrequency.
check(step) + velDcdFrequency.
check(step) +
1334 imdStep + dcdSelectionChecks;
1335 int isMigration =
false;
1336 const int doVelocityRescale = stochRescaleFrequency.
check(step);
1337 const int doMCPressure = monteCarloPressureFrequency.
check(step);
1339 doEnergy = energyFrequency.
check(step) || doVelocityRescale || doMCPressure;
1340 int langevinPistonStep = langevinPistonFrequency.
check(step);
1342 int reassignVelocityStep = reassignVelocityFrequency.
check(step);
1345 int berendsenPressureStep = 0;
1350 berendsenPressureStep = 1;
1353 if(patchData->updateCounter.load()>0)
1355 CUDASequencer->updateDeviceKernels();
1360 globalMasterStep = globalMasterFrequency.
check(step);
1361 doGlobalObjects = globalMasterStep? 1:0;
1365 doGlobalStaleForces=1;
1373 doGlobalStaleForces=doGlobalObjects;
1377 doGlobalStaleForces=doGlobalObjects;
1382 doGlobalStaleForces = 0;
1383 doGlobalObjects = 0;
1384 globalMasterStep =
false;
1388 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 1403 c_out->piston1(step);
1407 c_out->berendsenPressureController(step);
1413 if (isMasterPe)
cudaCheck(cudaDeviceSynchronize());
1416 if (langevinPistonStep || berendsenPressureStep) {
1421 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
1422 CUDASequencer->patchData->factor = &(factor);
1428 int previousMaxForceUsed;
1431 previousMaxForceUsed = maxForceUsed;
1435 doNonbonded = nonbondedFrequency.
check(step);
1437 doFullElectrostatics = (dofull && fullElectFrequency.
check(step));
1453 CUDASequencer->launch_part1(
1455 timestep, nbondstep, slowstep, velrescaling, maxvel2,
1456 *(CUDASequencer->patchData->factor),
1459 (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1460 reassignVelocityStep,
1462 berendsenPressureStep,
1463 previousMaxForceUsed,
1465 this->patch->flags.savePairlists,
1466 this->patch->flags.usePairlists,
1471 if (reassignVelocityStep)
1481 CUDASequencer->launch_part11(
1482 timestep, nbondstep, slowstep, velrescaling, maxvel2,
1483 *(CUDASequencer->patchData->factor),
1486 (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1488 previousMaxForceUsed,
1490 this->patch->flags.savePairlists,
1491 this->patch->flags.usePairlists,
1502 if(CUDASequencer->patchData->migrationFlagPerDevice[i] != 0) {
1511 CUDASequencer->launch_set_compute_positions();
1519 CUDASequencer->updatePairlistFlags(isMigration);
1521 CUDASequencer->copyPositionsAndVelocitiesToHost(isMigration, doGlobalObjects);
1530 cudaGlobal->communicateToClients(&(this->
patch->
lattice));
1541 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
1542 CUDASequencer->masterThreadSleeping =
true;
1543 CUDASequencer->masterThread = CthSelf();
1560 CUDASequencer->finish_patch_flags(isMigration);
1569 const bool addCudaGlobalForces =
1570 (cudaGlobalMasterObject !=
nullptr) ?
1571 cudaGlobalMasterObject->willAddGlobalForces() :
1573 CUDASequencer->launch_part2(doMCPressure,
1574 timestep, nbondstep, slowstep,
1581 doGlobalStaleForces || addCudaGlobalForces,
1593 const bool requestTotalForces = (computeGlobal ? computeGlobal->
getForceSendActive() :
false) && doGlobalObjects;
1598 const bool requestGPUTotalForces =
1599 (CudaGlobalMasterObject !=
nullptr) ?
1600 CudaGlobalMasterObject->requestedTotalForces() :
1602 CUDASequencer->launch_part3(doMCPressure,
1603 timestep, nbondstep, slowstep,
1608 doGlobalStaleForces,
1609 requestGPUTotalForces,
1613 isForcesOutputStep);
1620 if (requestTotalForces) {
1625 for(
int i = 0; i < numhp; ++i) {
1631 CUDASequencer->submitReductionValues();
1638 c_out->printStep(step);
1642 if (doVelocityRescale) {
1649 if (doVelocityRescale) {
1660 CUDASequencer->copyAoSDataToHost();
1667 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1680 CUDASequencer->copyAoSDataToHost();
1681 CUDASequencer->updateHostPatchDataSOA();
1682 CUDASequencer->saveForceCUDASOA_direct(
false,
true, maxForceUsed);
1688 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1690 hp->sort_solvent_atoms();
1691 hp->copy_atoms_to_SOA();
1692 hp->copy_forces_to_AOS();
1696 CUDASequencer->updateHostPatchDataSOA();
1697 CUDASequencer->saveForceCUDASOA_direct(
false,
true, maxForceUsed);
1699 if(isMasterPe) CUDASequencer->copyPositionsAndVelocitiesToHost(
true,doGlobal);
1702 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1704 hp->copy_updates_to_AOS();
1705 hp->copy_forces_to_AOS();
1711 CUDASequencer->breakSuspends =
true;
1725 CUDASequencer->copyPatchData(
true, startup);
1727 CUDASequencer->reallocateMigrationDestination();
1728 CUDASequencer->copyAtomDataToDeviceAoS();
1730 CUDASequencer->copyAtomDataToDevice(startup, maxForceUsed);
1732 CUDASequencer->migrationLocalPost(startup);
1733 CUDASequencer->migrationUpdateAdvancedFeatures(startup);
1735 CUDASequencer->registerSOAPointersToHost();
1739 CUDASequencer->copySOAHostRegisterToDevice();
1745 CUDASequencer->updateHostPatchDataSOA();
1759 ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
1762 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1763 patchData = cpdata.ckLocalBranch();
1768 if (patchData->devicePatchMapFlag[CkMyPe()])
return;
1769 patchData->devicePatchMapFlag[CkMyPe()] = 1;
1780 std::vector<NBPatchRecord>& nonbondedPatches = cudaNbond->
getPatches();
1781 std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1782 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1788 homePatches.begin(),
1796 for (
int i = 0; i < nonbondedPatches.size(); i++) {
1798 record.
patchID = nonbondedPatches[i].patchID;
1801 const int targetPatchID = record.
patchID;
1802 auto result = std::find_if(
1803 homePatches.begin(),
1806 return (p->getPatchID() == targetPatchID);
1809 record.
isProxy = (result == homePatches.end());
1810 localPatches.push_back(record);
1817 localPatches.begin(),
1820 return (a.
isProxy < b.isProxy);
1825 cudaBond->updatePatchOrder(localPatches);
1827 patchData->devData[deviceIndex].numPatchesHome = homePatches.size();
1828 patchData->devData[deviceIndex].numPatchesHomeAndProxy = localPatches.size();
1840 std::vector<CudaPeerRecord>& myPeerPatches = patchData->devData[deviceIndex].h_peerPatches;
1841 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1843 for (
int i = 0; i < localPatches.size(); i++) {
1844 std::vector<CudaPeerRecord> tempPeers;
1845 const int targetPatchID = localPatches[i].patchID;
1846 const int targetIsProxy = localPatches[i].isProxy;
1849 if (devIdx == deviceIndex)
continue;
1850 std::vector<CudaLocalRecord>& peerPatches = patchData->devData[devIdx].h_localPatches;
1854 for (
int j = 0; j < patchData->devData[devIdx].numPatchesHomeAndProxy; j++) {
1856 if (peer.
patchID == targetPatchID && peer.
isProxy != targetIsProxy) {
1860 tempPeers.push_back(peerRecord);
1868 localPatches[i].numPeerRecord = tempPeers.size();
1869 if (!tempPeers.empty()) {
1870 localPatches[i].peerRecordStartIndex = myPeerPatches.size();
1871 myPeerPatches.insert(myPeerPatches.end(), tempPeers.begin(), tempPeers.end());
1879 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1880 patchData = cpdata.ckLocalBranch();
1886 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1887 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1890 CkPrintf(
"PE: %d\n", CkMyPe());
1892 CkPrintf(
"[%d] Home patches %d Local patches %d\n", CkMyPe(), numPatchesHome, localPatches.size());
1894 CkPrintf(
"Home Patches: ");
1895 for (
int i = 0; i < numPatchesHome; i++) {
1896 CkPrintf(
"%d ", localPatches[i].patchID);
1900 CkPrintf(
"Proxy Patches: ");
1901 for (
int i = numPatchesHome; i < localPatches.size(); i++) {
1902 CkPrintf(
"%d ", localPatches[i].patchID);
1912 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1913 patchData = cpdata.ckLocalBranch();
1918 if (!patchData->devicePatchMapFlag[CkMyPe()])
return;
1919 patchData->devicePatchMapFlag[CkMyPe()] = 0;
1926 std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1927 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1928 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1930 homePatches.clear();
1931 localPatches.clear();
1932 peerPatches.clear();
1945 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1946 patchData = cpdata.ckLocalBranch();
1952 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1953 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1958 int max_atom_count = 0;
1959 int total_atom_count = 0;
1962 for (
int i = 0; i < numPatchesHome; i++) {
1967 if (
patch != NULL)
break;
1969 if (
patch == NULL)
NAMD_die(
"Sequencer: Failed to find patch in updateDevicePatchMap");
1974 if (localPatches[i].numAtoms > max_atom_count) max_atom_count = localPatches[i].numAtoms;
1975 total_atom_count += localPatches[i].numAtoms;
1983 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1984 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
1985 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1986 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1988 for (
int i = numPatchesHome; i < numPatchesHomeAndProxy; i++) {
1989 const int index = localPatches[i].peerRecordStartIndex;
1990 const int devIdx = peerPatches[index].deviceIndex;
1991 const int peerIdx = peerPatches[index].patchIndex;
1992 const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
1994 localPatches[i].numAtoms = peer.
numAtoms;
2003 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2004 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2006 int runningOffset = 0;
2007 int runningOffsetNBPad = 0;
2009 for (
int i = 0; i < numPatchesHomeAndProxy; i++) {
2010 localPatches[i].bufferOffset = runningOffset;
2011 localPatches[i].bufferOffsetNBPad = runningOffsetNBPad;
2012 runningOffset += localPatches[i].numAtoms;
2013 runningOffsetNBPad += localPatches[i].numAtomsNBPad;
2021 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2022 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2023 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2026 for (
int i = 0; i < peerPatches.size(); i++) {
2027 const int devIdx = peerPatches[i].deviceIndex;
2028 const int peerIdx = peerPatches[i].patchIndex;
2029 const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2036 for (
int i = 0; i < numPatchesHomeAndProxy; i++) {
2037 const int numPeerRecord = localPatches[i].numPeerRecord;
2038 const int peerOffset = localPatches[i].peerRecordStartIndex;
2041 localPatches[i].inline_peers[j] = peerPatches[peerOffset+j];
2058 #ifdef TIMER_COLLECTION 2059 TimerSet& t =
patch->timerSet;
2098 doNonbonded = (step >= numberOfSteps) ||
2115 doFullElectrostatics = (dofull &&
2116 ((step >= numberOfSteps) ||
2128 doEnergy = energyFrequency.
init(step, newComputeEnergies);
2133 int doGlobal = doTcl || doColvars;
2151 langevinPistonFrequency.
init(step,
2318 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 2322 int beginStep =
simParams->beginEventStep;
2327 for ( ++step; step <= numberOfSteps; ++step ) {
2328 int dcdSelectionChecks=0;
2330 for(
int dcdindex=0; dcdindex<16;++dcdindex)
2333 if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
2334 dcdSelectionChecks++;
2336 const int isCollection = restartFrequency.
check(step) +
2337 dcdFrequency.
check(step) + velDcdFrequency.
check(step) +
2338 forceDcdFrequency.
check(step) + imdFrequency.
check(step) +
2340 const int isMigration = stepsPerCycle.
check(step);
2341 doEnergy = energyFrequency.
check(step);
2342 DebugM(3,
"doGlobal now "<< doGlobal<<
"\n"<<
endi);
2344 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 2345 eon = epid && (beginStep < step && step <= endStep);
2347 if (controlProfiling && step == beginStep) {
2350 if (controlProfiling && step == endStep) {
2417 if ( langevinPistonFrequency.
check(step) ) {
2504 doNonbonded = nonbondedFrequency.
check(step);
2506 doFullElectrostatics = (dofull && fullElectFrequency.
check(step));
2662 fprintf(stderr,
"Patch %d has %d atoms\n", pid, n);
2663 fprintf(stderr,
"%3s %8s %12s %12s %12s\n",
2664 "",
"id",
"fnormal_x",
"fnbond_x",
"fslow_x");
2665 for (
int i=0; i < n; i++) {
2666 int index = p.
id[i];
2667 fprintf(stderr,
"%3d %8d %12.8f %12.8f %12.8f\n",
2674 for (
int i=0; i < n; i++) {
2685 TestArray_write<double>(
2686 "f_normal_good.bin",
"f_normal good", (
double*)f_normal, 3*n);
2687 TestArray_write<double>(
2688 "f_nbond_good.bin",
"f_nbond good", (
double*)f_nbond, 3*n);
2689 TestArray_write<double>(
2690 "f_slow_good.bin",
"f_slow good", (
double*)f_slow, 3*n);
2712 patch->copy_updates_to_AOS();
2716 printf(
"Timer collection reporting in microseconds for " 2727 const double scaling,
2732 const double * __restrict recipMass,
2733 const double * __restrict f_normal_x,
2734 const double * __restrict f_normal_y,
2735 const double * __restrict f_normal_z,
2736 const double * __restrict f_nbond_x,
2737 const double * __restrict f_nbond_y,
2738 const double * __restrict f_nbond_z,
2739 const double * __restrict f_slow_x,
2740 const double * __restrict f_slow_y,
2741 const double * __restrict f_slow_z,
2742 double * __restrict vel_x,
2743 double * __restrict vel_y,
2744 double * __restrict vel_z,
2750 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM_SOA);
2752 #ifdef SOA_SIMPLIFY_PARAMS 2753 const double * __restrict recipMass =
patch->patchDataSOA.
recipMass;
2755 const double * __restrict f_normal_x =
patch->patchDataSOA.
f_normal_x;
2756 const double * __restrict f_normal_y =
patch->patchDataSOA.
f_normal_y;
2757 const double * __restrict f_normal_z =
patch->patchDataSOA.
f_normal_z;
2759 const double * __restrict f_nbond_x =
patch->patchDataSOA.
f_nbond_x;
2760 const double * __restrict f_nbond_y =
patch->patchDataSOA.
f_nbond_y;
2761 const double * __restrict f_nbond_z =
patch->patchDataSOA.
f_nbond_z;
2763 const double * __restrict f_slow_x =
patch->patchDataSOA.
f_slow_x;
2764 const double * __restrict f_slow_y =
patch->patchDataSOA.
f_slow_y;
2765 const double * __restrict f_slow_z =
patch->patchDataSOA.
f_slow_z;
2766 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2767 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2768 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2783 if(this->patch->getPatchID() == 538){
2791 fprintf(stderr,
"Old Velocities %lf %lf %lf\n", vel_x[0], vel_y[0], vel_z[ 0]);
2792 fprintf(stderr,
"Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2793 f_slow_x[43], f_slow_y[43], f_slow_z[43],
2794 f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2795 f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2798 switch (maxForceNumber) {
2801 for (
int i=0; i < numAtoms; i++) {
2802 vel_x[i] += f_slow_x[i] * recipMass[i] * dt_slow;
2803 vel_y[i] += f_slow_y[i] * recipMass[i] * dt_slow;
2804 vel_z[i] += f_slow_z[i] * recipMass[i] * dt_slow;
2808 dt_nbond *= scaling;
2809 for (
int i=0; i < numAtoms; i++) {
2810 vel_x[i] += f_nbond_x[i] * recipMass[i] * dt_nbond;
2811 vel_y[i] += f_nbond_y[i] * recipMass[i] * dt_nbond;
2812 vel_z[i] += f_nbond_z[i] * recipMass[i] * dt_nbond;
2816 dt_normal *= scaling;
2817 for (
int i=0; i < numAtoms; i++) {
2818 vel_x[i] += f_normal_x[i] * recipMass[i] * dt_normal;
2819 vel_y[i] += f_normal_y[i] * recipMass[i] * dt_normal;
2820 vel_z[i] += f_normal_z[i] * recipMass[i] * dt_normal;
2833 const double * __restrict vel_x,
2834 const double * __restrict vel_y,
2835 const double * __restrict vel_z,
2836 double * __restrict pos_x,
2837 double * __restrict pos_y,
2838 double * __restrict pos_z,
2843 NamdProfileEvent::ADD_VELOCITY_TO_POSITION_SOA);
2844 #ifdef SOA_SIMPLIFY_PARAMS 2845 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2846 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2847 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2848 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
2849 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
2850 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
2853 for (
int i=0; i < numAtoms; i++) {
2854 pos_x[i] += vel_x[i] * dt;
2855 pos_y[i] += vel_y[i] * dt;
2856 pos_z[i] += vel_z[i] * dt;
2859 if(this->patch->getPatchID() == 538){
2860 fprintf(stderr,
"New Positions %lf %lf %lf\n", pos_x[43], pos_y[43], pos_z[43]);
2861 fprintf(stderr,
"New Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[43]);
2870 const int * __restrict hydrogenGroupSize,
2871 const float * __restrict mass,
2872 const double * __restrict vel_x,
2873 const double * __restrict vel_y,
2874 const double * __restrict vel_z,
2879 NamdProfileEvent::SUBMIT_HALFSTEP_SOA);
2880 #ifdef SOA_SIMPLIFY_PARAMS 2882 const float * __restrict mass =
patch->patchDataSOA.
mass;
2883 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2884 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2885 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2891 for (
int i=0; i < numAtoms; i++) {
2893 kineticEnergy += mass[i] *
2894 (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
2896 virial.
xx += mass[i] * vel_x[i] * vel_x[i];
2897 virial.
xy += mass[i] * vel_x[i] * vel_y[i];
2898 virial.
xz += mass[i] * vel_x[i] * vel_z[i];
2899 virial.
yx += mass[i] * vel_y[i] * vel_x[i];
2900 virial.
yy += mass[i] * vel_y[i] * vel_y[i];
2901 virial.
yz += mass[i] * vel_y[i] * vel_z[i];
2902 virial.
zx += mass[i] * vel_z[i] * vel_x[i];
2903 virial.
zy += mass[i] * vel_z[i] * vel_y[i];
2904 virial.
zz += mass[i] * vel_z[i] * vel_z[i];
2906 kineticEnergy *= 0.5 * 0.5;
2917 for (
int i=0; i < numAtoms; i += hgs) {
2920 hgs = hydrogenGroupSize[i];
2925 for (
int j = i; j < (i+hgs); j++) {
2927 v_cm_x += mass[j] * vel_x[j];
2928 v_cm_y += mass[j] * vel_y[j];
2929 v_cm_z += mass[j] * vel_z[j];
2931 BigReal recip_m_cm = 1.0 / m_cm;
2932 v_cm_x *= recip_m_cm;
2933 v_cm_y *= recip_m_cm;
2934 v_cm_z *= recip_m_cm;
2936 for (
int j = i; j < (i+hgs); j++) {
2937 BigReal dv_x = vel_x[j] - v_cm_x;
2938 BigReal dv_y = vel_y[j] - v_cm_y;
2939 BigReal dv_z = vel_z[j] - v_cm_z;
2941 intKineticEnergy += mass[j] *
2942 (vel_x[j] * dv_x + vel_y[j] * dv_y + vel_z[j] * dv_z);
2944 intVirialNormal.
xx += mass[j] * vel_x[j] * dv_x;
2945 intVirialNormal.
xy += mass[j] * vel_x[j] * dv_y;
2946 intVirialNormal.
xz += mass[j] * vel_x[j] * dv_z;
2947 intVirialNormal.
yx += mass[j] * vel_y[j] * dv_x;
2948 intVirialNormal.
yy += mass[j] * vel_y[j] * dv_y;
2949 intVirialNormal.
yz += mass[j] * vel_y[j] * dv_z;
2950 intVirialNormal.
zx += mass[j] * vel_z[j] * dv_x;
2951 intVirialNormal.
zy += mass[j] * vel_z[j] * dv_y;
2952 intVirialNormal.
zz += mass[j] * vel_z[j] * dv_z;
2955 intKineticEnergy *= 0.5 * 0.5;
2956 intVirialNormal *= 0.5;
2958 += intKineticEnergy;
2970 const int * __restrict hydrogenGroupSize,
2971 const float * __restrict mass,
2972 const double * __restrict pos_x,
2973 const double * __restrict pos_y,
2974 const double * __restrict pos_z,
2975 const double * __restrict vel_x,
2976 const double * __restrict vel_y,
2977 const double * __restrict vel_z,
2978 const double * __restrict f_normal_x,
2979 const double * __restrict f_normal_y,
2980 const double * __restrict f_normal_z,
2981 const double * __restrict f_nbond_x,
2982 const double * __restrict f_nbond_y,
2983 const double * __restrict f_nbond_z,
2984 const double * __restrict f_slow_x,
2985 const double * __restrict f_slow_y,
2986 const double * __restrict f_slow_z,
2991 NamdProfileEvent::SUBMIT_REDUCTIONS_SOA);
2992 #ifdef SOA_SIMPLIFY_PARAMS 2994 const float * __restrict mass =
patch->patchDataSOA.
mass;
2995 const double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
2996 const double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
2997 const double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
2998 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2999 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3000 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3001 const double * __restrict f_normal_x =
patch->patchDataSOA.
f_normal_x;
3002 const double * __restrict f_normal_y =
patch->patchDataSOA.
f_normal_y;
3003 const double * __restrict f_normal_z =
patch->patchDataSOA.
f_normal_z;
3004 const double * __restrict f_nbond_x =
patch->patchDataSOA.
f_nbond_x;
3005 const double * __restrict f_nbond_y =
patch->patchDataSOA.
f_nbond_y;
3006 const double * __restrict f_nbond_z =
patch->patchDataSOA.
f_nbond_z;
3007 const double * __restrict f_slow_x =
patch->patchDataSOA.
f_slow_x;
3008 const double * __restrict f_slow_y =
patch->patchDataSOA.
f_slow_y;
3009 const double * __restrict f_slow_z =
patch->patchDataSOA.
f_slow_z;
3021 BigReal angularMomentum_x = 0;
3022 BigReal angularMomentum_y = 0;
3023 BigReal angularMomentum_z = 0;
3030 for (
int i=0; i < numAtoms; i++) {
3033 kineticEnergy += mass[i] *
3034 (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
3037 momentum_x += mass[i] * vel_x[i];
3038 momentum_y += mass[i] * vel_y[i];
3039 momentum_z += mass[i] * vel_z[i];
3042 BigReal dpos_x = pos_x[i] - origin_x;
3043 BigReal dpos_y = pos_y[i] - origin_y;
3044 BigReal dpos_z = pos_z[i] - origin_z;
3047 angularMomentum_x += mass[i] * (dpos_y*vel_z[i] - dpos_z*vel_y[i]);
3048 angularMomentum_y += mass[i] * (dpos_z*vel_x[i] - dpos_x*vel_z[i]);
3049 angularMomentum_z += mass[i] * (dpos_x*vel_y[i] - dpos_y*vel_x[i]);
3054 kineticEnergy *= 0.5;
3055 Vector momentum(momentum_x, momentum_y, momentum_z);
3056 Vector angularMomentum(angularMomentum_x, angularMomentum_y,
3071 for (
int i=0; i < numAtoms; i += hgs) {
3072 hgs = hydrogenGroupSize[i];
3081 for ( j = i; j < (i+hgs); ++j ) {
3083 r_cm_x += mass[j] * pos_x[j];
3084 r_cm_y += mass[j] * pos_y[j];
3085 r_cm_z += mass[j] * pos_z[j];
3086 v_cm_x += mass[j] * vel_x[j];
3087 v_cm_y += mass[j] * vel_y[j];
3088 v_cm_z += mass[j] * vel_z[j];
3099 for ( j = i; j < (i+hgs); ++j ) {
3113 intKineticEnergy += mass[j] *
3114 (v_x * dv_x + v_y * dv_y + v_z * dv_z);
3117 BigReal dr_x = pos_x[j] - r_cm_x;
3118 BigReal dr_y = pos_y[j] - r_cm_y;
3119 BigReal dr_z = pos_z[j] - r_cm_z;
3122 intVirialNormal.
xx += f_normal_x[j] * dr_x;
3123 intVirialNormal.
xy += f_normal_x[j] * dr_y;
3124 intVirialNormal.
xz += f_normal_x[j] * dr_z;
3125 intVirialNormal.
yx += f_normal_y[j] * dr_x;
3126 intVirialNormal.
yy += f_normal_y[j] * dr_y;
3127 intVirialNormal.
yz += f_normal_y[j] * dr_z;
3128 intVirialNormal.
zx += f_normal_z[j] * dr_x;
3129 intVirialNormal.
zy += f_normal_z[j] * dr_y;
3130 intVirialNormal.
zz += f_normal_z[j] * dr_z;
3133 intVirialNbond.
xx += f_nbond_x[j] * dr_x;
3134 intVirialNbond.
xy += f_nbond_x[j] * dr_y;
3135 intVirialNbond.
xz += f_nbond_x[j] * dr_z;
3136 intVirialNbond.
yx += f_nbond_y[j] * dr_x;
3137 intVirialNbond.
yy += f_nbond_y[j] * dr_y;
3138 intVirialNbond.
yz += f_nbond_y[j] * dr_z;
3139 intVirialNbond.
zx += f_nbond_z[j] * dr_x;
3140 intVirialNbond.
zy += f_nbond_z[j] * dr_y;
3141 intVirialNbond.
zz += f_nbond_z[j] * dr_z;
3144 intVirialSlow.
xx += f_slow_x[j] * dr_x;
3145 intVirialSlow.
xy += f_slow_x[j] * dr_y;
3146 intVirialSlow.
xz += f_slow_x[j] * dr_z;
3147 intVirialSlow.
yx += f_slow_y[j] * dr_x;
3148 intVirialSlow.
yy += f_slow_y[j] * dr_y;
3149 intVirialSlow.
yz += f_slow_y[j] * dr_z;
3150 intVirialSlow.
zx += f_slow_z[j] * dr_x;
3151 intVirialSlow.
zy += f_slow_z[j] * dr_y;
3152 intVirialSlow.
zz += f_slow_z[j] * dr_z;
3156 intKineticEnergy *= 0.5;
3181 NamdProfileEvent::SUBMIT_COLLECTIONS_SOA);
3196 if ( is_pos_needed || is_vel_needed ) {
3197 patch->copy_updates_to_AOS();
3206 patch->copy_forces_to_AOS();
3208 if ( is_pos_needed ) {
3211 if ( is_vel_needed ) {
3214 if ( is_f_needed ) {
3224 const double maxvel2
3227 const double * __restrict vel_x,
3228 const double * __restrict vel_y,
3229 const double * __restrict vel_z,
3234 #ifdef SOA_SIMPLIFY_PARAMS 3235 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3236 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3237 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3245 for (
int i=0; i < numAtoms; i++) {
3247 vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3248 killme = killme + ( vel2 > maxvel2 );
3255 for (
int i=0; i < numAtoms; i++) {
3257 vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3258 if (vel2 > maxvel2) {
3260 const Vector vel(vel_x[i], vel_y[i], vel_z[i]);
3261 const BigReal maxvel = sqrt(maxvel2);
3263 iout <<
iERROR <<
"Atom " << (a[i].
id + 1) <<
" velocity is " 3266 << i <<
" of " << numAtoms <<
" on patch " 3271 "Atoms moving too fast; simulation has become unstable (" 3273 <<
" pe " << CkMyPe() <<
").\n" <<
endi;
3284 const float * __restrict langevinParam,
3285 double * __restrict vel_x,
3286 double * __restrict vel_y,
3287 double * __restrict vel_z,
3292 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1_SOA);
3293 #ifdef SOA_SIMPLIFY_PARAMS 3295 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3296 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3297 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3313 for (
int i=0; i < numAtoms; i++) {
3314 BigReal dt_gamma = dt * langevinParam[i];
3317 BigReal scaling = 1. - 0.5 * dt_gamma;
3318 vel_x[i] *= scaling;
3319 vel_y[i] *= scaling;
3320 vel_z[i] *= scaling;
3330 const float * __restrict langevinParam,
3331 const float * __restrict langScalVelBBK2,
3332 const float * __restrict langScalRandBBK2,
3333 float * __restrict gaussrand_x,
3334 float * __restrict gaussrand_y,
3335 float * __restrict gaussrand_z,
3336 double * __restrict vel_x,
3337 double * __restrict vel_y,
3338 double * __restrict vel_z,
3344 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2_SOA);
3345 #ifdef SOA_SIMPLIFY_PARAMS 3352 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3353 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3354 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3382 for (
int i=0; i < numAtoms; i++) {
3385 gaussrand_x[i] = float(rg.
x);
3386 gaussrand_y[i] = float(rg.
y);
3387 gaussrand_z[i] = float(rg.
z);
3398 for (
int i=0; i < numAtoms; i++) {
3399 vel_x[i] += gaussrand_x[i] * langScalRandBBK2[i];
3400 vel_y[i] += gaussrand_y[i] * langScalRandBBK2[i];
3401 vel_z[i] += gaussrand_z[i] * langScalRandBBK2[i];
3402 vel_x[i] *= langScalVelBBK2[i];
3403 vel_y[i] *= langScalVelBBK2[i];
3404 vel_z[i] *= langScalVelBBK2[i];
3411 const int * __restrict hydrogenGroupSize,
3412 const float * __restrict mass,
3413 double * __restrict pos_x,
3414 double * __restrict pos_y,
3415 double * __restrict pos_z,
3420 #ifdef SOA_SIMPLIFY_PARAMS 3422 const float * __restrict mass =
patch->patchDataSOA.
mass;
3423 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3424 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3425 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3444 for (
int i = 0; i < numAtoms; i += hgs) {
3446 hgs = hydrogenGroupSize[i];
3453 for ( j = i; j < (i+hgs); ++j ) {
3455 r_cm_x += mass[j] * pos_x[j];
3456 r_cm_y += mass[j] * pos_y[j];
3457 r_cm_z += mass[j] * pos_z[j];
3465 double tx = r_cm_x - origin.
x;
3466 double ty = r_cm_y - origin.
y;
3467 double tz = r_cm_z - origin.
z;
3469 double new_r_cm_x = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3470 double new_r_cm_y = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3471 double new_r_cm_z = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3473 new_r_cm_x += origin.
x;
3474 new_r_cm_y += origin.
y;
3475 new_r_cm_z += origin.
z;
3477 double delta_r_cm_x = new_r_cm_x - r_cm_x;
3478 double delta_r_cm_y = new_r_cm_y - r_cm_y;
3479 double delta_r_cm_z = new_r_cm_z - r_cm_z;
3481 for (j = i; j < (i+hgs); ++j) {
3482 pos_x[j] += delta_r_cm_x;
3483 pos_y[j] += delta_r_cm_y;
3484 pos_z[j] += delta_r_cm_z;
3488 for (
int i = 0; i < numAtoms; ++i) {
3492 double tx = pos_x[i] - origin.
x;
3493 double ty = pos_y[i] - origin.
y;
3494 double tz = pos_z[i] - origin.
z;
3496 double ftx = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3497 double fty = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3498 double ftz = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3500 pos_x[i] = ftx + origin.
x;
3501 pos_y[i] = fty + origin.
y;
3502 pos_z[i] = ftz + origin.
z;
3510 const int * __restrict hydrogenGroupSize,
3511 const float * __restrict mass,
3512 double * __restrict pos_x,
3513 double * __restrict pos_y,
3514 double * __restrict pos_z,
3515 double * __restrict vel_x,
3516 double * __restrict vel_y,
3517 double * __restrict vel_z,
3523 #ifdef SOA_SIMPLIFY_PARAMS 3525 const float * __restrict mass =
patch->patchDataSOA.
mass;
3526 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3527 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3528 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3529 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3530 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3531 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3553 for (
int i=0; i < numAtoms; i += hgs) {
3555 hgs = hydrogenGroupSize[i];
3564 for ( j = i; j < (i+hgs); ++j ) {
3566 r_cm_x += mass[j] * pos_x[j];
3567 r_cm_y += mass[j] * pos_y[j];
3568 r_cm_z += mass[j] * pos_z[j];
3569 v_cm_x += mass[j] * vel_x[j];
3570 v_cm_y += mass[j] * vel_y[j];
3571 v_cm_z += mass[j] * vel_z[j];
3578 double tx = r_cm_x - origin.
x;
3579 double ty = r_cm_y - origin.
y;
3580 double tz = r_cm_z - origin.
z;
3581 double new_r_cm_x = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3582 double new_r_cm_y = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3583 double new_r_cm_z = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3584 new_r_cm_x += origin.
x;
3585 new_r_cm_y += origin.
y;
3586 new_r_cm_z += origin.
z;
3588 double delta_r_cm_x = new_r_cm_x - r_cm_x;
3589 double delta_r_cm_y = new_r_cm_y - r_cm_y;
3590 double delta_r_cm_z = new_r_cm_z - r_cm_z;
3594 double delta_v_cm_x = ( velFactor_x - 1 ) * v_cm_x;
3595 double delta_v_cm_y = ( velFactor_y - 1 ) * v_cm_y;
3596 double delta_v_cm_z = ( velFactor_z - 1 ) * v_cm_z;
3597 for (j = i; j < (i+hgs); j++) {
3598 pos_x[j] += delta_r_cm_x;
3599 pos_y[j] += delta_r_cm_y;
3600 pos_z[j] += delta_r_cm_z;
3601 vel_x[j] += delta_v_cm_x;
3602 vel_y[j] += delta_v_cm_y;
3603 vel_z[j] += delta_v_cm_z;
3612 for (
int i=0; i < numAtoms; i++) {
3613 double tx = pos_x[i] - origin.
x;
3614 double ty = pos_y[i] - origin.
y;
3615 double tz = pos_z[i] - origin.
z;
3616 double ftx = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3617 double fty = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3618 double ftz = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3619 pos_x[i] = ftx + origin.
x;
3620 pos_y[i] = fty + origin.
y;
3621 pos_z[i] = ftz + origin.
z;
3622 vel_x[i] *= velFactor_x;
3623 vel_y[i] *= velFactor_y;
3624 vel_z[i] *= velFactor_z;
3642 Tensor *vp = ( pressure ? &virial : 0 );
3646 "Constraint failure; simulation has become unstable.\n" <<
endi;
3657 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) || defined(NAMD_MIC) 3672 #if defined(NTESTPID) 3676 double *xyzq =
new double[4*numAtoms];
3681 for (
int i=0; i < numAtoms; i++) {
3687 char fname[128], remark[128];
3688 sprintf(fname,
"xyzq_soa_pid%d_step%d.bin", NTESTPID, step);
3689 sprintf(remark,
"SOA xyzq, patch %d, step %d", NTESTPID, step);
3690 TestArray_write<double>(fname, remark, xyzq, 4*numAtoms);
3695 patch->zero_global_forces_SOA();
3699 int basePriority = ( (seq & 0xffff) << 15 )
3710 #ifdef NODEGROUP_FORCE_REGISTER 3712 patch->copy_forces_to_SOA();
3715 patch->copy_forces_to_SOA();
3718 #if defined(NTESTPID) 3724 double *fxyz =
new double[3*numAtoms];
3728 for (
int i=0; i < numAtoms; i++) {
3730 fxyz[3*i+1] = fy[i];
3731 fxyz[3*i+2] = fz[i];
3733 sprintf(fname,
"fxyz_normal_soa_pid%d_step%d.bin", NTESTPID, step);
3734 sprintf(remark,
"SOA fxyz normal, patch %d, step %d", NTESTPID, step);
3735 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3739 for (
int i=0; i < numAtoms; i++) {
3741 fxyz[3*i+1] = fy[i];
3742 fxyz[3*i+2] = fz[i];
3744 sprintf(fname,
"fxyz_nbond_soa_pid%d_step%d.bin", NTESTPID, step);
3745 sprintf(remark,
"SOA fxyz nonbonded, patch %d, step %d", NTESTPID, step);
3746 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3750 for (
int i=0; i < numAtoms; i++) {
3752 fxyz[3*i+1] = fy[i];
3753 fxyz[3*i+2] = fz[i];
3755 sprintf(fname,
"fxyz_slow_soa_pid%d_step%d.bin", NTESTPID, step);
3756 sprintf(remark,
"SOA fxyz slow, patch %d, step %d", NTESTPID, step);
3757 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3765 double *fxyz =
new double[3*numAtoms];
3766 double *fx, *fy, *fz;
3767 char fname[64], remark[128];
3773 for (
int i=0; i < numAtoms; i++) {
3775 fxyz[3*i+1] = fy[i];
3776 fxyz[3*i+2] = fz[i];
3778 sprintf(fname,
"fslow_soa_%d.bin", step);
3779 sprintf(remark,
"SOA slow forces, step %d\n", step);
3780 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3785 for (
int i=0; i < numAtoms; i++) {
3787 fxyz[3*i+1] = fy[i];
3788 fxyz[3*i+2] = fz[i];
3790 sprintf(fname,
"fnbond_soa_%d.bin", step);
3791 sprintf(remark,
"SOA nonbonded forces, step %d\n", step);
3792 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3797 for (
int i=0; i < numAtoms; i++) {
3799 fxyz[3*i+1] = fy[i];
3800 fxyz[3*i+2] = fz[i];
3802 sprintf(fname,
"fnormal_soa_%d.bin", step);
3803 sprintf(remark,
"SOA normal forces, step %d\n", step);
3804 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3813 fprintf(stderr,
"CPU force arrays for alanin\n" );
3815 fprintf(stderr,
"f[%i] = %lf %lf %lf | %lf %lf %lf | %lf %lf %lf\n", i,
3844 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3845 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3846 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3850 DebugM(4,
"stochastically rescaling velocities at step " << step <<
" by " << velrescaling <<
"\n");
3851 for (
int i = 0; i < numAtoms; ++i ) {
3852 vel_x[i] *= velrescaling;
3853 vel_y[i] *= velrescaling;
3854 vel_z[i] *= velrescaling;
3865 #endif // SEQUENCER_SOA 3872 char tracePrefix[20];
3882 #ifdef TIMER_COLLECTION 3883 TimerSet& t =
patch->timerSet;
3919 const BigReal nbondstep = timestep * (staleForces?1:nonbondedFrequency);
3921 doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
3928 if ( dofull )
slowFreq = fullElectFrequency;
3929 const BigReal slowstep = timestep * (staleForces?1:fullElectFrequency);
3931 doFullElectrostatics = (dofull && ((step >= numberOfSteps) || !(step%fullElectFrequency)));
3946 if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed =
Results::amdf;
3976 int doGlobal = doTcl || doColvars;
3982 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 4010 if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4015 doEnergy = ! ( step % energyFrequency );
4017 if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4019 if ( adaptTempOn ) doEnergy=1;
4022 D_MSG(
"runComputeObjects()");
4030 if ( staleForces || doGlobal ) {
4036 D_MSG(
"newtonianVelocities()");
4050 D_MSG(
"submitHalfstep()");
4056 D_MSG(
"newtonianVelocities()");
4069 D_MSG(
"submitHalfstep()");
4073 if ( zeroMomentum && doFullElectrostatics )
submitMomentum(step);
4075 D_MSG(
"newtonianVelocities()");
4082 D_MSG(
"submitReductions()");
4090 sprintf(traceNote,
"%s%d",tracePrefix,step);
4091 traceUserSuppliedNote(traceNote);
4099 bool doMultigratorRattle =
false;
4113 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 4117 int beginStep =
simParams->beginEventStep;
4122 for ( ++step; step <= numberOfSteps; ++step )
4124 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 4125 eon = epid && (beginStep < step && step <= endStep);
4127 if (controlProfiling && step == beginStep) {
4130 if (controlProfiling && step == endStep) {
4137 DebugM(3,
"for step "<<step<<
" dGlobal " << doGlobal<<
"\n"<<
endi);
4148 newtonianVelocities(0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4153 if (doMultigratorRattle)
rattle1(timestep, 1);
4206 #endif // UPPER_BOUND 4208 doNonbonded = !(step%nonbondedFrequency);
4209 doFullElectrostatics = (dofull && !(step%fullElectFrequency));
4216 if ( zeroMomentum && doFullElectrostatics ) {
4235 if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed =
Results::amdf;
4238 doEnergy = ! ( step % energyFrequency );
4239 if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4240 if ( adaptTempOn ) doEnergy=1;
4264 if ( staleForces || doGlobal ) {
4270 if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4272 newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4281 newtonianVelocities(1.0,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4301 if ( zeroMomentum && doFullElectrostatics )
submitMomentum(step);
4305 newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4344 if(step == bstep || step == estep){
4349 #ifdef MEASURE_NAMD_WITH_PAPI 4351 int bstep =
simParams->papiMeasureStartStep;
4352 int estep = bstep +
simParams->numPapiMeasureSteps;
4353 if(step == bstep || step==estep) {
4354 papiMeasureBarrier(step);
4361 sprintf(traceNote,
"%s%d",tracePrefix,step);
4362 traceUserSuppliedNote(traceNote);
4364 #endif // UPPER_BOUND 4369 cycleBarrier(dofull && !((step+1)%fullElectFrequency),step);
4373 if(step == START_HPM_STEP)
4374 (CProxy_Node(CkpvAccess(BOCclass_group).node)).startHPM();
4376 if(step == STOP_HPM_STEP)
4377 (CProxy_Node(CkpvAccess(BOCclass_group).node)).stopHPM();
4383 #ifdef TIMER_COLLECTION 4385 printf(
"Timer collection reporting in microseconds for " 4389 #endif // TIMER_COLLECTION 4403 Vector movDragVel, dragIncrement;
4404 for (
int i = 0; i < numAtoms; ++i )
4410 dragIncrement = movDragGlobVel * movDragVel * dt;
4424 Vector rotDragAxis, rotDragPivot, dragIncrement;
4425 for (
int i = 0; i < numAtoms; ++i )
4431 dAngle = rotDragGlobVel * rotDragVel * dt;
4432 rotDragAxis /= rotDragAxis.
length();
4433 atomRadius = atom[i].
position - rotDragPivot;
4434 dragIncrement = cross(rotDragAxis, atomRadius) * dAngle;
4448 #if 0 && defined(NODEGROUP_FORCE_REGISTER) 4451 const int stepsPerCycle_save = stepsPerCycle;
4467 doFullElectrostatics = dofull;
4489 int doGlobal = doTcl || doColvars;
4508 #ifdef DEBUG_MINIMIZE 4509 printf(
"doTcl = %d doColvars = %d\n", doTcl, doColvars);
4515 #ifdef DEBUG_MINIMIZE 4516 else { printf(
"No computeGlobal\n"); }
4526 for ( ++step; step <= numberOfSteps; ++step ) {
4569 patch->copy_atoms_to_SOA();
4573 #if 0 && defined(NODEGROUP_FORCE_REGISTER) 4591 for (
int i = 0; i < numAtoms; ++i ) {
4597 for (
int j=1; j<hgs; ++j ) {
4615 for (
int i = 0; i < numAtoms; ++i ) {
4618 if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4621 if ( fixedAtomsOn && a[i].atomFixed ) a[i].
velocity = 0;
4623 if ( v2 > maxv2 ) maxv2 = v2;
4629 for (
int i = 0; i < numAtoms; ++i ) {
4630 if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4633 if ( fixedAtomsOn && a[i].atomFixed ) a[i].
velocity = 0;
4635 if ( v2 > maxv2 ) maxv2 = v2;
4645 for (
int i = 0; i < numAtoms; i += hgs ) {
4648 if ( minChildVel < fmax2 )
continue;
4649 int adjustChildren = 1;
4650 for (
int j = i+1; j < (i+hgs); ++j ) {
4651 if ( a[j].velocity.length2() > minChildVel ) adjustChildren = 0;
4653 if ( adjustChildren ) {
4655 for (
int j = i+1; j < (i+hgs); ++j ) {
4656 if (a[i].mass < 0.01)
continue;
4670 for (
int i = 0; i < numAtoms; ++i ) {
4675 for (
int i = 1; i < numAtoms; ++i ) {
4676 if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4685 for (
int i = 1; i < numAtoms; ++i ) {
4686 if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4698 for (
int i = 0; i < numAtoms; ++i ) {
4711 for (
int i = 0; i < numAtoms; ++i ) {
4716 for (
int i = 0; i < numAtoms; ++i ) {
4732 NAMD_die(
"Cannot zero momentum when fixed atoms are present.");
4743 for (
int i = 0; i < numAtoms; ++i ) {
4744 a[i].velocity += dv * a[i].recipMass;
4745 a[i].position += dx * a[i].recipMass;
4748 for (
int i = 0; i < numAtoms; ++i ) {
4749 a[i].velocity += dv;
4750 a[i].position += dx;
4762 NAMD_bug(
"Sequencer::scalePositionsVelocities, fixed atoms not implemented");
4766 for (
int i = 0; i < numAtoms; i += hgs ) {
4771 for (
int j=0;j < hgs;++j) {
4772 m_cm += a[i+j].
mass;
4781 for (
int j=0;j < hgs;++j) {
4787 for (
int i = 0; i < numAtoms; i++) {
4804 Tensor posScaleTensor = scaleTensor;
4833 for (
int i = 0; i < numAtoms; ++i ) {
4836 virialNormal.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4840 for (
int i = 0; i < numAtoms; ++i ) {
4841 if (a[i].mass < 0.01)
continue;
4843 virialNormal.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4847 kineticEnergy *= 0.5;
4855 Vector fixForceNormal = 0;
4856 Vector fixForceNbond = 0;
4859 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
4875 for (
int i = 0; i < numAtoms; i += hgs ) {
4881 for ( j = i; j < (i+hgs); ++j ) {
4892 NAMD_bug(
"Sequencer::multigratorPressure, this part needs to be implemented correctly");
4893 for ( j = i; j < (i+hgs); ++j ) {
4898 intVirialNormal2.
outerAdd (mass, v, dv);
4906 for ( j = i; j < (i+hgs); ++j ) {
4910 intVirialNormal2.
outerAdd(mass, v, dv);
4932 for (
int i = 0; i < numAtoms; i++) {
4943 for (
int i = 0; i < numAtoms; ++i ) {
4949 for (
int i = 0; i < numAtoms; ++i ) {
4953 kineticEnergy *= 0.5;
4954 return kineticEnergy;
4972 for (
int i = 0; i < numAtoms; i += hgs ) {
4978 for ( j = i; j < (i+hgs); ++j ) {
4983 momentumSqrSum.
outerAdd(1.0/m_cm, v_cm, v_cm);
4986 for (
int i = 0; i < numAtoms; i++) {
4987 momentumSqrSum.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5006 const int staleForces,
5007 const int doNonbonded,
5008 const int doFullElectrostatics)
5011 NamdProfileEvent::NEWTONIAN_VELOCITIES);
5014 if (staleForces || (doNonbonded && doFullElectrostatics)) {
5020 if (staleForces || doNonbonded)
5022 if (staleForces || doFullElectrostatics)
5049 for (
int i = 0; i < numAtoms; ++i )
5052 if ( ! dt_gamma )
continue;
5054 BigReal f1 = exp( -dt_gamma );
5055 BigReal f2 = sqrt( ( 1. - f1*f1 ) * kbT *
5067 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1);
5077 for (i = 0; i < numAtoms; i++) {
5079 if (i < numAtoms-1 &&
5080 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5092 if (dt_gamma != 0.0) {
5093 v_com *= ( 1. - 0.5 * dt_gamma );
5098 if (dt_gamma != 0.0) {
5099 v_bnd *= ( 1. - 0.5 * dt_gamma );
5110 if ( ! dt_gamma )
continue;
5112 a[i].
velocity *= ( 1. - 0.5 * dt_gamma );
5123 for ( i = 0; i < numAtoms; ++i )
5126 if ( ! dt_gamma )
continue;
5128 a[i].
velocity *= ( 1. - 0.5 * dt_gamma );
5140 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2);
5167 for (i = 0; i < numAtoms; i++) {
5169 if (i < numAtoms-1 &&
5170 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5182 if (dt_gamma != 0.0) {
5185 sqrt( 2 * dt_gamma * kbT *
5186 ( a[i].
partition ? tempFactor : 1.0 ) / mass );
5187 v_com /= ( 1. + 0.5 * dt_gamma );
5192 if (dt_gamma != 0.0) {
5195 sqrt( 2 * dt_gamma * kbT_bnd *
5196 ( a[i+1].
partition ? tempFactor : 1.0 ) / mass );
5197 v_bnd /= ( 1. + 0.5 * dt_gamma );
5208 if ( ! dt_gamma )
continue;
5211 sqrt( 2 * dt_gamma * kbT *
5212 ( a[i].
partition ? tempFactor : 1.0 ) * a[i].recipMass );
5213 a[i].
velocity /= ( 1. + 0.5 * dt_gamma );
5227 for ( i = 0; i < numAtoms; ++i )
5230 if ( ! dt_gamma )
continue;
5233 sqrt( 2 * dt_gamma * kbT *
5234 ( a[i].
partition ? tempFactor : 1.0 ) * a[i].recipMass );
5235 a[i].
velocity /= ( 1. + 0.5 * dt_gamma );
5259 for (
int i = 0; i < numAtoms; i += hgs ) {
5263 for ( j = i; j < (i+hgs); ++j ) {
5265 a[j].fixedPosition,a[j].transform);
5271 for ( j = i; j < (i+hgs); ++j ) {
5279 Position delta_x_cm = new_x_cm - x_cm;
5280 for ( j = i; j < (i+hgs); ++j ) {
5283 a[j].fixedPosition,a[j].transform);
5292 for (
int i = 0; i < numAtoms; ++i )
5296 a[i].fixedPosition,a[i].transform);
5328 for (
int i = 0; i < numAtoms; i += hgs ) {
5332 for ( j = i; j < (i+hgs); ++j ) {
5334 a[j].fixedPosition,a[j].transform);
5341 for ( j = i; j < (i+hgs); ++j ) {
5350 Position delta_x_cm = new_x_cm - x_cm;
5353 delta_v_cm.
x = ( velFactor.
x - 1 ) * v_cm.
x;
5354 delta_v_cm.
y = ( velFactor.
y - 1 ) * v_cm.
y;
5355 delta_v_cm.
z = ( velFactor.
z - 1 ) * v_cm.
z;
5356 for ( j = i; j < (i+hgs); ++j ) {
5359 a[j].fixedPosition,a[j].transform);
5370 for (
int i = 0; i < numAtoms; ++i )
5374 a[i].fixedPosition,a[i].transform);
5391 if ( rescaleFreq > 0 ) {
5398 for (
int i = 0; i < numAtoms; ++i )
5414 const BigReal factor_dihe = accelMDfactor[0];
5415 const BigReal factor_tot = accelMDfactor[1];
5419 NAMD_die(
"accelMD broadcasting error!\n");
5421 NAMD_die(
"accelMD broadcasting error!\n");
5424 for (
int i = 0; i < numAtoms; ++i)
5430 for (
int i = 0; i < numAtoms; ++i)
5433 for (
int i = 0; i < numAtoms; ++i)
5436 if (doFullElectrostatics) {
5437 for (
int i = 0; i < numAtoms; ++i)
5443 for (
int i = 0; i < numAtoms; ++i)
5454 if ( (step < simParams->adaptTempFirstStep ) ||
5469 if ( ( reassignFreq > 0 ) && ! ( step % reassignFreq ) ) {
5478 if ( newTemp < simParams->reassignHold )
5486 for (
int i = 0; i < numAtoms; ++i )
5490 sqrt(kbT * (a[i].
partition ? tempFactor : 1.0) * a[i].recipMass) *
5494 NAMD_bug(
"Sequencer::reassignVelocities called improperly!");
5508 for (
int i = 0; i < numAtoms; ++i )
5511 a[i].mass <= 0.) ?
Vector(0,0,0) :
5512 sqrt(kbT * (a[i].
partition ? tempFactor : 1.0) * a[i].recipMass) *
5525 for (
int i = 0; i < numAtoms; ++i )
5536 for (
int i = 0; i < numAtoms; ++i )
5548 BigReal sqrt_factor = sqrt(factor);
5551 for (
int i = 0; i < numAtoms; ++i ) {
5570 for (
int i = 0; i < numAtoms; ++i )
5572 BigReal f1 = exp( coefficient * a[i].langevinParam );
5590 DebugM(4,
"stochastically rescaling velocities at step " << step <<
" by " << velrescaling <<
"\n");
5591 for (
int i = 0; i < numAtoms; ++i ) {
5610 BigReal timestep,
const int ftag,
const int useSaved
5613 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5615 CmiNetworkProgressAfter (0);
5625 const BigReal timestep1,
const int ftag1,
const int useSaved1,
5626 const BigReal timestep2,
const int ftag2,
const int useSaved2,
5627 const BigReal timestep3,
const int ftag3,
const int useSaved3
5630 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5632 CmiNetworkProgressAfter (0);
5651 NamdProfileEvent::ADD_VELOCITY_TO_POSITION);
5653 CmiNetworkProgressAfter (0);
5664 Tensor *vp = ( pressure ? &virial : 0 );
5666 iout <<
iERROR <<
"Constraint failure in HardWallDrude(); " 5667 <<
"simulation may become unstable.\n" <<
endi;
5680 Tensor *vp = ( pressure ? &virial : 0 );
5683 "Constraint failure; simulation has become unstable.\n" <<
endi;
5688 printf(
"virial = %g %g %g %g %g %g %g %g %g\n",
5689 virial.
xx, virial.
xy, virial.
xz,
5690 virial.
yx, virial.
yy, virial.
yz,
5691 virial.
zx, virial.
zy, virial.
zz);
5698 printf(
"pos[%d] = %g %g %g\n", n,
5702 printf(
"vel[%d] = %g %g %g\n", n,
5707 printf(
"force[%d] = %g %g %g\n", n,
5741 const BigReal maxvel2 = maxvel * maxvel;
5742 for (
int i=0; i<numAtoms; ++i ) {
5743 if ( a[i].velocity.length2() > maxvel2 ) {
5750 const BigReal maxvel2 = maxvel * maxvel;
5752 for (
int i=0; i<numAtoms; ++i ) {
5757 for (
int i=0; i<numAtoms; ++i ) {
5758 if ( a[i].velocity.length2() > maxvel2 ) {
5760 iout <<
iERROR <<
"Atom " << (a[i].
id + 1) <<
" velocity is " 5763 << i <<
" of " << numAtoms <<
" on patch " 5768 "Atoms moving too fast; simulation has become unstable (" 5770 <<
" pe " << CkMyPe() <<
").\n" <<
endi;
5782 for (
int i=0; i<numAtoms; ++i ) {
5798 CmiNetworkProgressAfter (0);
5809 for (
int i = 0; i < numAtoms; ++i ) {
5812 virial.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5816 for (
int i = 0; i < numAtoms; ++i ) {
5817 if (a[i].mass < 0.01)
continue;
5819 virial.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5824 momentumSqrSum = virial;
5826 kineticEnergy *= 0.5 * 0.5;
5850 for (
int i=0; i<numAtoms; i += hgs) {
5857 for (j=i; j< i+hgs; ++j) {
5862 for (j=i; j < i+hgs; ++j) {
5864 if (! (useGroupPressure && j != i)) {
5866 int slab = (int)floor((z-zmin)*idz);
5867 if (slab < 0) slab += nslabs;
5868 else if (slab >= nslabs) slab -= nslabs;
5872 if (useGroupPressure) {
5895 for (
int i = 0; i < numAtoms; i += hgs ) {
5898 CmiNetworkProgress ();
5905 for ( j = i; j < (i+hgs); ++j ) {
5910 momentumSqrSum.
outerAdd(1.0/m_cm, v_cm, v_cm);
5915 for ( j = i; j < (i+hgs); ++j ) {
5920 intKineticEnergy += mass * (v * dv);
5921 intVirialNormal.
outerAdd (mass, v, dv);
5925 for ( j = i; j < (i+hgs); ++j ) {
5929 intKineticEnergy += mass * (v * dv);
5930 intVirialNormal.
outerAdd(mass, v, dv);
5934 intKineticEnergy *= 0.5 * 0.5;
5936 intVirialNormal *= 0.5;
5939 momentumSqrSum *= 0.5;
5952 for (
int j = 0; j < numAtoms; j++ ) {
5970 NamdProfileEvent::SUBMIT_REDUCTIONS);
5976 CmiNetworkProgressAfter(0);
5988 Vector angularMomentum = 0;
5993 for (i = 0; i < numAtoms; ++i ) {
5997 angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6001 for (i = 0; i < numAtoms; ++i ) {
6004 angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6010 for (i = 0; i < numAtoms; i++) {
6011 if (i < numAtoms-1 &&
6012 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
6022 drudeComKE += m_com * v_com.
length2();
6023 drudeBondKE += m_bond * v_bond.length2();
6042 kineticEnergy *= 0.5;
6052 for (
int i = 0; i < numAtoms; ++i ) {
6059 for (
int i = 0; i < numAtoms; ++i ) {
6066 for (
int i = 0; i < numAtoms; ++i ) {
6082 for (
int i = 0; i < numAtoms; i += hgs ) {
6084 CmiNetworkProgress();
6091 for ( j = i; j < (i+hgs); ++j ) {
6101 for ( j = i; j < (i+hgs); ++j ) {
6103 ( pairInteractionSelf || a[j].
partition != 2 ) )
continue;
6105 if ( fixedAtomsOn && a[j].atomFixed )
continue;
6109 intKineticEnergy += mass * (v * dv);
6116 for ( j = i; j < (i+hgs); ++j ) {
6118 if ( fixedAtomsOn && a[j].atomFixed )
continue;
6122 intKineticEnergy += mass * (v * dv);
6131 intKineticEnergy *= 0.5;
6147 for (
int i=0; i<numAtoms; i += hgs) {
6152 for (j=i; j< i+hgs; ++j) {
6159 int slab = (int)floor((z-zmin)*idz);
6160 if (slab < 0) slab += nslabs;
6161 else if (slab >= nslabs) slab -= nslabs;
6163 int ppoffset = 3*(slab + nslabs*
partition);
6164 for (j=i; j < i+hgs; ++j) {
6170 BigReal wxx = (fnormal.
x + fnbond.
x + fslow.
x) * dx.
x;
6171 BigReal wyy = (fnormal.
y + fnbond.
y + fslow.
y) * dx.
y;
6172 BigReal wzz = (fnormal.
z + fnbond.
z + fslow.
z) * dx.
z;
6187 Vector fixForceNormal = 0;
6188 Vector fixForceNbond = 0;
6191 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6194 auto printTensor = [](
const Tensor& t,
const std::string& name){
6195 CkPrintf(
"%s", name.c_str());
6196 CkPrintf(
"\n%12.5lf %12.5lf %12.5lf\n" 6197 "%12.5lf %12.5lf %12.5lf\n" 6198 "%12.5lf %12.5lf %12.5lf\n",
6203 printTensor(fixVirialNormal,
"fixVirialNormal = ");
6204 printTensor(fixVirialNbond,
"fixVirialNbond = ");
6205 printTensor(fixVirialSlow,
"fixVirialSlow = ");
6216 #endif // UPPER_BOUND 6233 const double drudeBondLen2 = drudeBondLen * drudeBondLen;
6235 const double drudeMove = 0.01;
6236 const double drudeStep2 = drudeStep * drudeStep;
6237 const double drudeMove2 = drudeMove * drudeMove;
6242 for (
int i = 0; i < numAtoms; ++i ) {
6245 printf(
"f1[%2d]= %f %f %f\n", i, f1[i].x, f1[i].y, f1[i].z);
6246 printf(
"f2[%2d]= %f %f %f\n", i, f2[i].x, f2[i].y, f2[i].z);
6249 f1[i] += f2[i] + f3[i];
6250 if ( drudeHardWallOn && i && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) {
6251 if ( ! fixedAtomsOn || ! a[i].atomFixed ) {
6252 if ( drudeStep2 * f1[i].length2() > drudeMove2 ) {
6255 a[i].
position += drudeStep * f1[i];
6257 if ( (a[i].position - a[i-1].position).length2() > drudeBondLen2 ) {
6261 Vector netf = f1[i-1] + f1[i];
6262 if ( fixedAtomsOn && a[i-1].atomFixed ) netf = 0;
6266 if ( fixedAtomsOn && a[i].atomFixed ) f1[i] = 0;
6273 for (
int i = 0; i < numAtoms; ++i ) {
6276 if ( v2 > maxv2 ) maxv2 = v2;
6279 if ( v2 > maxv2 ) maxv2 = v2;
6290 for (
int i = 0; i < numAtoms; ++i ) {
6292 if ( drudeHardWallOn && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) )
continue;
6301 BigReal fmult = 1.01 * sqrt(fmax2/ff);
6302 f *= fmult; ff = f * f;
6311 printf(
"fdotf = %f\n", fdotf);
6312 printf(
"fdotv = %f\n", fdotv);
6313 printf(
"vdotv = %f\n", vdotv);
6326 for (
int i = 0; i < numAtoms; i += hgs ) {
6331 for ( j = i; j < (i+hgs); ++j ) {
6336 for ( j = i; j < (i+hgs); ++j ) {
6356 Vector fixForceNormal = 0;
6357 Vector fixForceNbond = 0;
6360 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6391 NamdProfileEvent::SUBMIT_COLLECTIONS);
6393 int dcdSelectionIndex;
6413 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC) 6445 int basePriority = ( (seq & 0xffff) << 15 )
6558 #ifdef NAMD_CUDA_XXX 6561 for (
int i=0; i<numAtoms; ++i ) {
6562 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6572 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6576 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6580 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6592 for (
int i=0; i<numAtoms; ++i ) {
6602 float fx = fxNo+fxNb+fxSl;
6603 float fy = fyNo+fyNb+fySl;
6604 float fz = fzNo+fzNb+fzSl;
6606 float f = sqrt(fx*fx+fy*fy+fz*fz);
6609 float x =
patch->
p[i].position.x;
6610 float y =
patch->
p[i].position.y;
6611 float z =
patch->
p[i].position.z;
6613 CkPrintf(
"FORCE(%04i)[%04i] = % .9e % .9e % .9e\n", seq,
id,
6648 #ifdef MEASURE_NAMD_WITH_PAPI 6649 void Sequencer::papiMeasureBarrier(
int step){
6651 broadcast->papiMeasureBarrier.get(step);
Real atomcharge(int anum) const
SubmitReduction * multigratorReduction
Vector gaussian_vector(void)
void rescaleVelocities(int)
void finishReduction(bool doEnergyVirial)
void minimizationQuenchVelocity(void)
int period
period for some step dependent event (e.g. stepsPerCycle)
NAMD_HOST_DEVICE void rescale(Tensor factor)
void max(int i, BigReal v)
int init(int initstep, int initperiod, int delta=0)
DCDParams dcdSelectionParams[16]
void barrier(const SynchronousCollectiveScope scope)
void tcoupleVelocities(BigReal, int)
void addMovDragToPosition(BigReal)
BigReal soluteScalingFactorCharge
void submitForces(int seq, FullAtomList &a, int maxForceUsed, ForceList *f, int prec)
virtual void algorithm(void)
void get_rotdrag_params(BigReal &v, Vector &a, Vector &p, int atomnum) const
void langevinVelocitiesBBK2_SOA(BigReal timestep)
#define NAMD_EVENT_STOP(eon, id)
Bool is_atom_movdragged(int atomnum) const
SubmitReduction * pressureProfileReduction
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
friend class SequencerCUDA
void scaleVelocities(const BigReal velScale)
void positionsReady_SOA(int doMigration=0)
#define GB1_COMPUTE_HOME_PRIORITY
void addVelocityToPosition(BigReal)
SubmitReduction * reduction
NAMD_HOST_DEVICE Vector c() const
SubmitReduction * min_reduction
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
Bool is_atom_exPressure(int atomnum) const
SimpleBroadcastObject< int > traceBarrier
void maximumMove(BigReal)
Bool monteCarloPressureOn
const GlobalMasterIMD * getIMD()
void cycleBarrier(int, int)
void submitCollections_SOA(int step, int zeroVel=0)
Bool globalMasterScaleByFrequency
static void partition(int *order, const FullAtom *atoms, int begin, int end)
SimpleBroadcastObject< Vector > momentumCorrection
void addRotDragToPosition(BigReal)
static PatchMap * Object()
void saveForce(const int ftag=Results::normal)
void registerIDsFullAtom(const FullAtom *begin, const FullAtom *end)
void langevinVelocitiesBBK2(BigReal)
void monteCarloPressureControl(const int step, const int doMigration, const int doEnergy, const int doVirial, const int maxForceNumber, const int doGlobal)
virtual void submit(void)=0
#define ADD_TENSOR_OBJECT(R, RL, D)
SimParameters * simParameters
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
void newMinimizeDirection(BigReal)
void newMinimizePosition(BigReal)
double stochRescaleCoefficient()
Bool CUDASOAintegrateMode
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
int nextstep
next step value
void gbisComputeAfterP2()
void startWork(const LDObjHandle &handle)
HomePatchList * homePatchList()
void langevinVelocitiesBBK1(BigReal)
std::ostream & endi(std::ostream &s)
char const *const NamdProfileEventStr[]
void updateDevicePatchMap(int startup)
int berendsenPressureFreq
SubmitReduction * willSubmit(int setID, int size=-1)
void rattle1(BigReal, int)
void saveTotalForces(HomePatch *)
SimpleBroadcastObject< BigReal > adaptTemperature
unsigned char get_ss_type(int anum) const
void rebalanceLoad(int timestep)
Bool globalMasterStaleForces
static ReductionMgr * Object(void)
void addForceToMomentum_SOA(const double scaling, double dt_normal, double dt_nbond, double dt_slow, int maxForceNumber)
void minimizeMoveDownhill(BigReal fmax2)
Patch * patch(PatchID pid)
void addForceToMomentum(BigReal, const int ftag=Results::normal, const int useSaved=0)
void submitReductions_SOA()
std::vector< PatchRecord > & getPatches()
static PatchMap * ObjectOnPe(int pe)
float * langScalVelBBK2
derived from langevinParam
void pauseWork(const LDObjHandle &handle)
SimpleBroadcastObject< BigReal > tcoupleCoefficient
int NAMD_gcd(int a, int b)
void exchangeCheckpoint(int scriptTask, int &bpc)
Molecule stores the structural information for the system.
void split(int iStream, int numStreams)
static NAMD_HOST_DEVICE Tensor identity(BigReal v1=1.0)
void addForceToMomentum3(const BigReal timestep1, const int ftag1, const int useSaved1, const BigReal timestep2, const int ftag2, const int useSaved2, const BigReal timestep3, const int ftag3, const int useSaved3)
void positionsReady(int doMigration=0)
void submitHalfstep_SOA()
SimpleBroadcastObject< BigReal > stochRescaleCoefficient
SimpleBroadcastObject< int > monteCarloBarostatAcceptance
void submitCollections(int step, int zeroVel=0)
void stochRescaleVelocities_SOA(int step)
static void print_vel_SOA(const double *vel_x, const double *vel_y, const double *vel_z, int ilo=0, int ihip1=1)
void runComputeObjects_SOA(int migration, int pairlists, int step)
BigReal calcKineticEnergy()
void adaptTempUpdate(int)
void positionsReady_GPU(int doMigration=0, int startup=0)
int32 * hydrogenGroupSize
#define TIMER_START(T, TYPE)
int rattle1_SOA(const BigReal, Tensor *virial, SubmitReduction *)
void calcFixVirial(Tensor &fixVirialNormal, Tensor &fixVirialNbond, Tensor &fixVirialSlow, Vector &fixForceNormal, Vector &fixForceNbond, Vector &fixForceSlow)
#define NAMD_PROFILE_START()
float * gaussrand_x
fill with Gaussian distributed random numbers
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
int numPatches(void) const
static std::pair< int, int > coordinateNeeded(int timestep)
Check if the step requires to output the coordinates.
#define NAMD_EVENT_START(eon, id)
void stochRescaleVelocities(int)
void rattle1_SOA(BigReal, int)
#define COMPUTE_HOME_PRIORITY
void constructDevicePatchMap()
static void print_tensor(const Tensor &t)
NAMD_HOST_DEVICE BigReal length(void) const
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
void NAMD_bug(const char *err_msg)
void multigratorPressure(int step, int callNumber)
static ComputeCUDAMgr * getComputeCUDAMgr()
void berendsenPressure(int)
void submitMomentum(int step)
int rescaleVelocities_numTemps
double * vel_x
Jim recommends double precision velocity.
void submitLoadStats(int timestep)
void mollyMollify(Tensor *virial)
void runComputeObjects(int migration=1, int pairlists=0, int pressureStep=0)
void rebalance(Sequencer *seq, PatchID id)
void rescaleaccelMD(int, int, int)
SimpleBroadcastObject< Tensor > velocityRescaleTensor2
SimpleBroadcastObject< int > IMDTimeEnergyBarrier
void clearDevicePatchMap()
NAMD_HOST_DEVICE BigReal length2(void) const
#define NAMD_EVENT_RANGE_2(eon, id)
SimpleBroadcastObject< int > scriptBarrier
bool getIsGlobalDevice() const
const_iterator const_begin(void) const
PatchID getPatchID() const
void scalePositionsVelocities(const Tensor &posScale, const Tensor &velScale)
int monteCarloPressureFreq
int getPesSharingDevice(const int i)
SimpleBroadcastObject< BigReal > velocityRescaleFactor2
Bool is_atom_rotdragged(int atomnum) const
void doMigrationGPU(const int startup, const int doGlobal, const int updatePatchMap)
void langevinPiston_SOA(int step)
void gbisComputeAfterP1()
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
void gaussian_array_f(float *a, int n)
#define TIMER_INIT_WIDTH(T, TYPE, WIDTH)
int getForceSendActive() const
static int forceNeeded(int timestep)
Check if the step requires to output the forces.
int berendsenPressure_count
SimpleBroadcastObject< BigReal > velocityRescaleFactor
void publish(int tag, const T &t)
SimpleBroadcastObject< BigReal > minimizeCoefficient
void reassignVelocities(BigReal, int)
Bool LJPMESerialRealSpaceOn
void langevinVelocitiesBBK1_SOA(BigReal timestep)
SimpleBroadcastObject< Vector > accelMDRescaleFactor
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
ComputeGlobal * computeGlobalObject
void saveForce(const int ftag=Results::normal)
void runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup)
SimpleBroadcastObject< Tensor > positionRescaleFactor
void buildRattleList_SOA()
void langevinVelocities(BigReal)
IMDSessionInfo IMDsendsettings
static CollectionMaster * Object()
void hardWallDrude(BigReal, int)
NodeBroadcast * nodeBroadcast
#define TIMER_STOP(T, TYPE)
void multigratorTemperature(int step, int callNumber)
static constexpr int num_inline_peer
double * recipMass
derived from mass
void reinitVelocities(void)
int pressureProfileAtomTypes
int checkpoint_berendsenPressure_count
ControllerBroadcasts * broadcast
#define NAMD_EVENT_START_EX(eon, id, str)
void maximumMove_SOA(const double dt, const double maxvel2)
CollectionMgr *const collection
void updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal)
#define GB2_COMPUTE_HOME_PRIORITY
#define NAMD_PROFILE_STOP()
Bool langevinGammasDiffer
int getNumStepsToRun(void)
void resetMovingAverage()
void newtonianVelocities(BigReal, const BigReal, const BigReal, const BigReal, const int, const int, const int)
static void print_vel_AOS(const FullAtom *a, int ilo=0, int ihip1=1)
void rescaleSoluteCharges(BigReal)
void addVelocityToPosition_SOA(const double dt)
void setVal(const NodeReduction *other)
#define SOA_SIMPLIFY_PARAMS
void submitVelocities(int seq, int zero, FullAtomList &a, int prec)
void submitMinimizeReductions(int, BigReal fmax2)
#define ADD_VECTOR_OBJECT(R, RL, D)
CudaComputeNonbonded * getCudaComputeNonbonded()
int multigratorPressureFreq
static int velocityNeeded(int timestep)
Check if the step requires to output the velocities.
int numPatchesOnNode(int node)
void submitPositions(int seq, FullAtomList &a, Lattice l, int prec, int dcdSelectionIndex)
void correctMomentum(int step, BigReal drifttime)
NAMD_HOST_DEVICE void outerAdd(BigReal scale, const Vector &v1, const Vector &v2)
std::ostream & iERROR(std::ostream &s)
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms) __attribute__((__noinline__))
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
ForceList f[Results::maxNumForces]
float * langScalRandBBK2
from langevinParam and recipMass
void get_movdrag_params(Vector &v, int atomnum) const
static GlobalGPUMgr * Object()
void enableEarlyExit(void)
void submitReductions(int)
#define namd_reciprocal(x)
SimpleBroadcastObject< Tensor > positionRescaleFactor2
void integrate_CUDA_SOA(int scriptTask)
void loweAndersenFinish()
int getNumPesSharingDevice()
SimParameters *const simParams
SimpleBroadcastObject< Tensor > velocityRescaleTensor
NAMD_HOST_DEVICE Vector unit(void) const
__thread DeviceCUDA * deviceCUDA
NAMD_HOST_DEVICE Vector origin() const
virtual void atomUpdate()
void rescaleVelocitiesByFactor(BigReal)
int multigratorTemperatureFreq
int globalMasterFrequency
static SynchronousCollectives * Object()
CudaPmeOneDevice * createCudaPmeOneDevice()
#define PATCH_PRIORITY(PID)
CudaPmeOneDevice * getCudaPmeOneDevice()
void updatePatchOrder(const std::vector< CudaLocalRecord > &data)
void berendsenPressure_SOA(int step)
int32 numAtoms
number of atoms
void printDevicePatchMap()
void compute(const Lattice &lattice, int doEnergyVirial, int step)
void exchangeAtoms(int scriptTask)
T get(int tag, const int expected=-1)