30 #define PRINT_FORCES 0 62 #define MIN_DEBUG_LEVEL 3 72 #define START_HPM_STEP 1000 73 #define STOP_HPM_STEP 1500 77 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 79 #define __thread __declspec(thread) 88 #define SPECIAL_PATCH_ID 91 97 int ilo=0,
int ihip1=1
99 printf(
"AOS Velocities:\n");
100 for (
int i=ilo; i < ihip1; i++) {
101 printf(
"%d %g %g %g\n", i,
111 int ilo=0,
int ihip1=1
113 printf(
"SOA Velocities:\n");
114 for (
int i=ilo; i < ihip1; i++) {
115 printf(
"%d %g %g %g\n", i, vel_x[i], vel_y[i], vel_z[i]);
121 printf(
"%g %g %g %g %g %g %g %g %g\n",
159 inline int init(
int initstep,
int initperiod,
int delta=0) {
176 pairlistsAreValid(0),
183 #
if defined(NAMD_CUDA) || defined(NAMD_HIP)
185 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
186 PatchData* patchData = cpdata.ckLocalBranch();
189 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP) 228 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER) 253 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER) 255 delete CUDASequencer;
262 void Sequencer::threadRun(
Sequencer* arg)
272 DebugM(4,
"::run() - this = " <<
this <<
"\n" );
273 thread = CthCreate((CthVoidFn)&(threadRun),(
void*)(
this),
SEQ_STK_SZ);
274 CthSetStrategyDefault(thread);
295 switch ( scriptTask ) {
340 NAMD_die(
"Minimization is currently not supported on the GPU integrator\n");
355 #ifdef NODEGROUP_FORCE_REGISTER 361 #ifdef NODEGROUP_FORCE_REGISTER 370 NAMD_bug(
"Unknown task in Sequencer::algorithm");
385 #if defined(NODEGROUP_FORCE_REGISTER) 389 CUDASequencer->numPatchesCheckedIn += 1;
390 if (CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe())) {
392 CUDASequencer->waitingThreads.push_back(CthSelf());
399 int lastStep = CUDASequencer->patchData->flags.step;
401 if (CUDASequencer->breakSuspends)
break;
407 CUDASequencer->numPatchesCheckedIn += 1;
408 CUDASequencer->waitingThreads.push_back(CthSelf());
409 if(CUDASequencer->numPatchesCheckedIn == patchMap->
numPatchesOnNode(CkMyPe()) - 1 &&
410 CUDASequencer->masterThreadSleeping){
411 CUDASequencer->masterThreadSleeping =
false;
412 CthAwaken(CUDASequencer->masterThread);
419 CUDASequencer->numPatchesCheckedIn = 0;
420 for (CthThread t : CUDASequencer->waitingThreads) {
423 CUDASequencer->waitingThreads.clear();
440 CUDASequencer->sync();
452 ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
460 if (isMaster && cudaGlobal && doMigration) cudaGlobal->setStep(static_cast<int64_t>(
patch->
flags.
step));
480 cudaBond->atomUpdate();
487 cudaGlobal->updateAtomMaps();
488 cudaGlobal->communicateToClients(&(this->
patch->
lattice));
496 CUDASequencer->launch_set_compute_positions();
497 CUDASequencer->sync();
505 for(
int i = 0 ; i < CkNumPes(); i++){
508 ComputeBondedCUDA* b = CUDASequencer->patchData->cudaBondedList[i];
510 if (b == NULL)
continue;
511 auto list = std::find(std::begin(b->getBondedPes()), std::end(b->getBondedPes()), CkMyPe());
512 if( list != std::end(b->getBondedPes()) ){
513 b->openBoxesOnPe(startup);
528 cudaBond->loadTuplesOnPe(startup);
534 cudaBond->copyTupleDataGPU(startup);
536 cudaBond->copyTupleDataSN();
546 cudaBond->launchWork();
548 if (cudaPme && computePme) {
549 cudaPme->
compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
554 cudaGlobal->calculate();
570 for(
int i = 0; i < numhp; ++i) {
572 hp->zero_global_forces_SOA();
587 CUDASequencer->copyGlobalForcesToDevice();
594 cudaBond->finishPatches();
602 cudaBond->finishReductions();
603 if (cudaGlobal) cudaGlobal->finishReductions();
612 if (cudaPme && computePme) {
613 cudaPme->
compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
619 cudaGlobal->calculate();
631 for(
int i = 0; i < numhp; ++i) {
633 hp->zero_global_forces_SOA();
648 CUDASequencer->copyGlobalForcesToDevice();
654 cudaBond->finishPatches();
659 if (cudaGlobal) cudaGlobal->finishReductions();
677 const int doMigration,
680 const int maxForceNumber,
685 Controller *c_out = CUDASequencer->patchData->c_out;
686 bool mGpuOn = CUDASequencer->mGpuOn;
692 CUDASequencer->submitReductionValues();
695 CUDASequencer->patchData->reductionBackendSave->setVal(
reduction);
699 c_out->mcPressure_prepare(step);
706 CUDASequencer->monteCarloPressure_part1(factor, origin, oldLattice);
711 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
712 CUDASequencer->patchData->factor = &(factor);
714 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
725 CUDASequencer->update_patch_flags();
736 CUDASequencer->monteCarloPressure_part2(step, maxForceNumber,
737 doEnergy, doGlobal, doVirial);
738 CUDASequencer->submitReductionValues();
742 c_out->mcPressure_accept(step);
748 CUDASequencer->monteCarloPressure_accept(doMigration);
753 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
755 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
759 CUDASequencer->monteCarloPressure_reject(this->
patch->
lattice);
762 reduction->
setVal(CUDASequencer->patchData->reductionBackendSave);
768 if(isMasterPe && !accepted){
770 CUDASequencer->update_patch_flags();
775 const int updatePatchMap) {
778 const bool updatePatchData = startup || doGlobal || updatePatchMap;
781 bool realloc =
false;
790 if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
797 CUDASequencer->reallocateMigrationDestination();
798 CUDASequencer->registerSOAPointersToHost();
802 CUDASequencer->copySOAHostRegisterToDevice();
811 CUDASequencer->migrationLocalInit();
817 CUDASequencer->migrationPerform();
823 CUDASequencer->migrationUpdateAtomCounts();
829 CUDASequencer->migrationUpdateAtomOffsets();
835 CUDASequencer->copyPatchDataToHost();
843 realloc = CUDASequencer->copyPatchData(
true,
false);
850 if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
857 CUDASequencer->registerSOAPointersToHost();
861 CUDASequencer->copySOAHostRegisterToDevice();
867 CUDASequencer->migrationLocalPost(0);
868 CUDASequencer->migrationSortAtomsNonbonded();
873 if (!updatePatchData) {
876 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
877 CUDASequencer->masterThreadSleeping =
true;
878 CUDASequencer->masterThread = CthSelf();
884 CUDASequencer->sync();
889 CUDASequencer->migrationUpdateDestination();
894 CUDASequencer->migrationUpdateProxyDestination();
899 CUDASequencer->migrationUpdateRemoteOffsets();
904 CUDASequencer->copyDataToPeers(
true);
908 if (updatePatchData) {
912 for(
int i = 0; i < numhp; ++i) {
921 CUDASequencer->copyAoSDataToHost();
926 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
927 CUDASequencer->masterThreadSleeping =
true;
928 CUDASequencer->masterThread = CthSelf();
935 CUDASequencer->updateHostPatchDataSOA();
943 CUDASequencer->migrationUpdateAdvancedFeatures(
false);
951 #ifdef TIMER_COLLECTION 952 TimerSet& t =
patch->timerSet;
969 Controller *c_out = CUDASequencer->patchData->c_out;
994 doNonbonded = (step >= numberOfSteps) ||
1011 doFullElectrostatics = (dofull &&
1012 ((step >= numberOfSteps) ||
1024 doEnergy = energyFrequency.
init(step, newComputeEnergies);
1037 int doGlobal = (doTcl || doColvars || doIMD);
1040 bool globalMasterStep=
false;
1041 int doGlobalObjects=0;
1042 int doGlobalStaleForces = 0;
1047 globalMasterStep = globalMasterFrequency.
check(step);
1048 doGlobalObjects = globalMasterStep? 1:0;
1052 doGlobalStaleForces=1;
1060 doGlobalStaleForces=doGlobalObjects;
1064 doGlobalStaleForces=doGlobalObjects;
1069 doGlobalStaleForces = 0;
1070 doGlobalObjects = 0;
1086 langevinPistonFrequency.
init(step,
1118 patch->copy_atoms_to_SOA();
1134 CUDASequencer->breakSuspends =
false;
1139 if (CUDASequencer->patchData->ptrCollectionMaster == NULL) {
1142 CUDASequencer->patchData->ptrCollectionMaster = pcm;
1145 if (CUDASequencer->patchData->ptrOutput == NULL) {
1148 CUDASequencer->patchData->ptrOutput = pout;
1151 if (CUDASequencer->patchData->pdb == NULL) {
1154 CUDASequencer->patchData->pdb = pdb;
1157 if (CUDASequencer->patchData->imd == NULL) {
1160 CUDASequencer->patchData->imd = imd;
1170 CUDASequencer->patchData->cudaBondedList[CkMyPe()] = NULL;
1171 CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = NULL;
1201 if(patchData->updateCounter.load()>0)
1203 CUDASequencer->updateDeviceKernels();
1207 CUDASequencer->startRun1(maxForceUsed, this->
patch->
lattice);
1210 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
1211 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
1215 const bool addCudaGlobalForces =
1216 (cudaGlobalMasterObject !=
nullptr) ?
1217 cudaGlobalMasterObject->willAddGlobalForces() :
1219 if (addCudaGlobalForces) {
1220 CUDASequencer->allocateGPUSavedForces();
1228 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
1229 CUDASequencer->masterThreadSleeping =
true;
1230 CUDASequencer->masterThread = CthSelf();
1252 CUDASequencer->setRescalePairlistTolerance(step < numberOfSteps);
1263 CUDASequencer->finish_patch_flags(
true);
1266 const bool addCudaGlobalForces =
1267 (cudaGlobalMasterObject !=
nullptr) ?
1268 cudaGlobalMasterObject->willAddGlobalForces() :
1270 CUDASequencer->startRun2(timestep,
1272 doGlobal || addCudaGlobalForces, maxForceUsed);
1276 const bool requestTotalForces = computeGlobal ? computeGlobal->
getForceSendActive() :
false;
1279 const bool requestGPUTotalForces =
1280 (cudaGlobalMasterObject !=
nullptr) ?
1281 cudaGlobalMasterObject->requestedTotalForces() :
1283 CUDASequencer->startRun3(timestep,
1285 requestTotalForces, doGlobalStaleForces,
1287 requestGPUTotalForces,
1298 for(
int i = 0; i < numhp; ++i) {
1304 CUDASequencer->submitReductionValues();
1311 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
1313 c_out->printStep(step);
1319 double velrescaling = 1;
1321 for( ++step; step <= numberOfSteps; ++step ){
1322 const int isForcesOutputStep = forceDcdFrequency.
check(step) + imdFrequency.
check(step);
1323 int dcdSelectionChecks=0;
1325 for(
int dcdindex=0; dcdindex<16;++dcdindex)
1328 if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
1329 dcdSelectionChecks++;
1331 const int isCollection = restartFrequency.
check(step) +
1332 dcdFrequency.
check(step) + velDcdFrequency.
check(step) +
1333 isForcesOutputStep + dcdSelectionChecks;
1334 int isMigration =
false;
1335 const int doVelocityRescale = stochRescaleFrequency.
check(step);
1336 const int doMCPressure = monteCarloPressureFrequency.
check(step);
1338 doEnergy = energyFrequency.
check(step) || doVelocityRescale || doMCPressure;
1339 int langevinPistonStep = langevinPistonFrequency.
check(step);
1341 int reassignVelocityStep = reassignVelocityFrequency.
check(step);
1344 int berendsenPressureStep = 0;
1349 berendsenPressureStep = 1;
1352 if(patchData->updateCounter.load()>0)
1354 CUDASequencer->updateDeviceKernels();
1359 globalMasterStep = globalMasterFrequency.
check(step);
1360 doGlobalObjects = globalMasterStep? 1:0;
1364 doGlobalStaleForces=1;
1372 doGlobalStaleForces=doGlobalObjects;
1376 doGlobalStaleForces=doGlobalObjects;
1381 doGlobalStaleForces = 0;
1382 doGlobalObjects = 0;
1383 globalMasterStep =
false;
1387 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 1402 c_out->piston1(step);
1406 c_out->berendsenPressureController(step);
1412 if (isMasterPe)
cudaCheck(cudaDeviceSynchronize());
1415 if (langevinPistonStep || berendsenPressureStep) {
1420 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
1421 CUDASequencer->patchData->factor = &(factor);
1427 int previousMaxForceUsed;
1430 previousMaxForceUsed = maxForceUsed;
1434 doNonbonded = nonbondedFrequency.
check(step);
1436 doFullElectrostatics = (dofull && fullElectFrequency.
check(step));
1452 CUDASequencer->launch_part1(
1454 timestep, nbondstep, slowstep, velrescaling, maxvel2,
1455 *(CUDASequencer->patchData->factor),
1458 (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1459 reassignVelocityStep,
1461 berendsenPressureStep,
1462 previousMaxForceUsed,
1464 this->patch->flags.savePairlists,
1465 this->patch->flags.usePairlists,
1470 if (reassignVelocityStep)
1480 CUDASequencer->launch_part11(
1481 timestep, nbondstep, slowstep, velrescaling, maxvel2,
1482 *(CUDASequencer->patchData->factor),
1485 (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1487 previousMaxForceUsed,
1489 this->patch->flags.savePairlists,
1490 this->patch->flags.usePairlists,
1501 if(CUDASequencer->patchData->migrationFlagPerDevice[i] != 0) {
1510 CUDASequencer->launch_set_compute_positions();
1518 CUDASequencer->updatePairlistFlags(isMigration);
1520 CUDASequencer->copyPositionsAndVelocitiesToHost(isMigration, doGlobalObjects);
1529 cudaGlobal->communicateToClients(&(this->
patch->
lattice));
1540 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
1541 CUDASequencer->masterThreadSleeping =
true;
1542 CUDASequencer->masterThread = CthSelf();
1559 CUDASequencer->finish_patch_flags(isMigration);
1568 const bool addCudaGlobalForces =
1569 (cudaGlobalMasterObject !=
nullptr) ?
1570 cudaGlobalMasterObject->willAddGlobalForces() :
1572 CUDASequencer->launch_part2(doMCPressure,
1573 timestep, nbondstep, slowstep,
1580 doGlobalStaleForces || addCudaGlobalForces,
1592 const bool requestTotalForces = (computeGlobal ? computeGlobal->
getForceSendActive() :
false) && doGlobalObjects;
1597 const bool requestGPUTotalForces =
1598 (CudaGlobalMasterObject !=
nullptr) ?
1599 CudaGlobalMasterObject->requestedTotalForces() :
1601 CUDASequencer->launch_part3(doMCPressure,
1602 timestep, nbondstep, slowstep,
1607 doGlobalStaleForces,
1608 requestGPUTotalForces,
1612 isForcesOutputStep);
1619 if (requestTotalForces) {
1624 for(
int i = 0; i < numhp; ++i) {
1630 CUDASequencer->submitReductionValues();
1637 c_out->printStep(step);
1641 if (doVelocityRescale) {
1648 if (doVelocityRescale) {
1659 CUDASequencer->copyAoSDataToHost();
1666 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1679 CUDASequencer->copyAoSDataToHost();
1680 CUDASequencer->saveForceCUDASOA_direct(
false,
true, maxForceUsed);
1686 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1688 hp->sort_solvent_atoms();
1689 hp->copy_atoms_to_SOA();
1690 hp->copy_forces_to_AOS();
1694 CUDASequencer->updateHostPatchDataSOA();
1695 CUDASequencer->saveForceCUDASOA_direct(
false,
true, maxForceUsed);
1697 if(isMasterPe) CUDASequencer->copyPositionsAndVelocitiesToHost(
true,doGlobal);
1700 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1702 hp->copy_updates_to_AOS();
1703 hp->copy_forces_to_AOS();
1709 CUDASequencer->breakSuspends =
true;
1723 CUDASequencer->copyPatchData(
true, startup);
1725 CUDASequencer->reallocateMigrationDestination();
1726 CUDASequencer->copyAtomDataToDeviceAoS();
1728 CUDASequencer->copyAtomDataToDevice(startup, maxForceUsed);
1730 CUDASequencer->migrationLocalPost(startup);
1731 CUDASequencer->migrationUpdateAdvancedFeatures(startup);
1733 CUDASequencer->registerSOAPointersToHost();
1737 CUDASequencer->copySOAHostRegisterToDevice();
1743 CUDASequencer->updateHostPatchDataSOA();
1757 ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
1760 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1761 patchData = cpdata.ckLocalBranch();
1766 if (patchData->devicePatchMapFlag[CkMyPe()])
return;
1767 patchData->devicePatchMapFlag[CkMyPe()] = 1;
1778 std::vector<NBPatchRecord>& nonbondedPatches = cudaNbond->
getPatches();
1779 std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1780 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1786 homePatches.begin(),
1794 for (
int i = 0; i < nonbondedPatches.size(); i++) {
1796 record.
patchID = nonbondedPatches[i].patchID;
1799 const int targetPatchID = record.
patchID;
1800 auto result = std::find_if(
1801 homePatches.begin(),
1804 return (p->getPatchID() == targetPatchID);
1807 record.
isProxy = (result == homePatches.end());
1808 localPatches.push_back(record);
1815 localPatches.begin(),
1818 return (a.
isProxy < b.isProxy);
1823 cudaBond->updatePatchOrder(localPatches);
1825 patchData->devData[deviceIndex].numPatchesHome = homePatches.size();
1826 patchData->devData[deviceIndex].numPatchesHomeAndProxy = localPatches.size();
1838 std::vector<CudaPeerRecord>& myPeerPatches = patchData->devData[deviceIndex].h_peerPatches;
1839 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1841 for (
int i = 0; i < localPatches.size(); i++) {
1842 std::vector<CudaPeerRecord> tempPeers;
1843 const int targetPatchID = localPatches[i].patchID;
1844 const int targetIsProxy = localPatches[i].isProxy;
1847 if (devIdx == deviceIndex)
continue;
1848 std::vector<CudaLocalRecord>& peerPatches = patchData->devData[devIdx].h_localPatches;
1852 for (
int j = 0; j < patchData->devData[devIdx].numPatchesHomeAndProxy; j++) {
1854 if (peer.
patchID == targetPatchID && peer.
isProxy != targetIsProxy) {
1858 tempPeers.push_back(peerRecord);
1866 localPatches[i].numPeerRecord = tempPeers.size();
1867 if (!tempPeers.empty()) {
1868 localPatches[i].peerRecordStartIndex = myPeerPatches.size();
1869 myPeerPatches.insert(myPeerPatches.end(), tempPeers.begin(), tempPeers.end());
1877 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1878 patchData = cpdata.ckLocalBranch();
1884 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1885 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1888 CkPrintf(
"PE: %d\n", CkMyPe());
1890 CkPrintf(
"[%d] Home patches %d Local patches %d\n", CkMyPe(), numPatchesHome, localPatches.size());
1892 CkPrintf(
"Home Patches: ");
1893 for (
int i = 0; i < numPatchesHome; i++) {
1894 CkPrintf(
"%d ", localPatches[i].patchID);
1898 CkPrintf(
"Proxy Patches: ");
1899 for (
int i = numPatchesHome; i < localPatches.size(); i++) {
1900 CkPrintf(
"%d ", localPatches[i].patchID);
1910 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1911 patchData = cpdata.ckLocalBranch();
1916 if (!patchData->devicePatchMapFlag[CkMyPe()])
return;
1917 patchData->devicePatchMapFlag[CkMyPe()] = 0;
1924 std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1925 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1926 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1928 homePatches.clear();
1929 localPatches.clear();
1930 peerPatches.clear();
1943 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1944 patchData = cpdata.ckLocalBranch();
1950 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1951 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1956 int max_atom_count = 0;
1957 int total_atom_count = 0;
1960 for (
int i = 0; i < numPatchesHome; i++) {
1965 if (
patch != NULL)
break;
1967 if (
patch == NULL)
NAMD_die(
"Sequencer: Failed to find patch in updateDevicePatchMap");
1972 if (localPatches[i].numAtoms > max_atom_count) max_atom_count = localPatches[i].numAtoms;
1973 total_atom_count += localPatches[i].numAtoms;
1981 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1982 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
1983 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1984 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1986 for (
int i = numPatchesHome; i < numPatchesHomeAndProxy; i++) {
1987 const int index = localPatches[i].peerRecordStartIndex;
1988 const int devIdx = peerPatches[index].deviceIndex;
1989 const int peerIdx = peerPatches[index].patchIndex;
1990 const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
1992 localPatches[i].numAtoms = peer.
numAtoms;
2001 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2002 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2004 int runningOffset = 0;
2005 int runningOffsetNBPad = 0;
2007 for (
int i = 0; i < numPatchesHomeAndProxy; i++) {
2008 localPatches[i].bufferOffset = runningOffset;
2009 localPatches[i].bufferOffsetNBPad = runningOffsetNBPad;
2010 runningOffset += localPatches[i].numAtoms;
2011 runningOffsetNBPad += localPatches[i].numAtomsNBPad;
2019 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2020 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2021 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2024 for (
int i = 0; i < peerPatches.size(); i++) {
2025 const int devIdx = peerPatches[i].deviceIndex;
2026 const int peerIdx = peerPatches[i].patchIndex;
2027 const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2034 for (
int i = 0; i < numPatchesHomeAndProxy; i++) {
2035 const int numPeerRecord = localPatches[i].numPeerRecord;
2036 const int peerOffset = localPatches[i].peerRecordStartIndex;
2039 localPatches[i].inline_peers[j] = peerPatches[peerOffset+j];
2056 #ifdef TIMER_COLLECTION 2057 TimerSet& t =
patch->timerSet;
2096 doNonbonded = (step >= numberOfSteps) ||
2113 doFullElectrostatics = (dofull &&
2114 ((step >= numberOfSteps) ||
2126 doEnergy = energyFrequency.
init(step, newComputeEnergies);
2131 int doGlobal = doTcl || doColvars;
2149 langevinPistonFrequency.
init(step,
2316 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 2320 int beginStep =
simParams->beginEventStep;
2325 for ( ++step; step <= numberOfSteps; ++step ) {
2326 int dcdSelectionChecks=0;
2328 for(
int dcdindex=0; dcdindex<16;++dcdindex)
2331 if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
2332 dcdSelectionChecks++;
2334 const int isCollection = restartFrequency.
check(step) +
2335 dcdFrequency.
check(step) + velDcdFrequency.
check(step) +
2336 forceDcdFrequency.
check(step) + imdFrequency.
check(step) +
2338 const int isMigration = stepsPerCycle.
check(step);
2339 doEnergy = energyFrequency.
check(step);
2340 DebugM(3,
"doGlobal now "<< doGlobal<<
"\n"<<
endi);
2342 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 2343 eon = epid && (beginStep < step && step <= endStep);
2345 if (controlProfiling && step == beginStep) {
2348 if (controlProfiling && step == endStep) {
2415 if ( langevinPistonFrequency.
check(step) ) {
2502 doNonbonded = nonbondedFrequency.
check(step);
2504 doFullElectrostatics = (dofull && fullElectFrequency.
check(step));
2660 fprintf(stderr,
"Patch %d has %d atoms\n", pid, n);
2661 fprintf(stderr,
"%3s %8s %12s %12s %12s\n",
2662 "",
"id",
"fnormal_x",
"fnbond_x",
"fslow_x");
2663 for (
int i=0; i < n; i++) {
2664 int index = p.
id[i];
2665 fprintf(stderr,
"%3d %8d %12.8f %12.8f %12.8f\n",
2672 for (
int i=0; i < n; i++) {
2683 TestArray_write<double>(
2684 "f_normal_good.bin",
"f_normal good", (
double*)f_normal, 3*n);
2685 TestArray_write<double>(
2686 "f_nbond_good.bin",
"f_nbond good", (
double*)f_nbond, 3*n);
2687 TestArray_write<double>(
2688 "f_slow_good.bin",
"f_slow good", (
double*)f_slow, 3*n);
2710 patch->copy_updates_to_AOS();
2714 printf(
"Timer collection reporting in microseconds for " 2725 const double scaling,
2730 const double * __restrict recipMass,
2731 const double * __restrict f_normal_x,
2732 const double * __restrict f_normal_y,
2733 const double * __restrict f_normal_z,
2734 const double * __restrict f_nbond_x,
2735 const double * __restrict f_nbond_y,
2736 const double * __restrict f_nbond_z,
2737 const double * __restrict f_slow_x,
2738 const double * __restrict f_slow_y,
2739 const double * __restrict f_slow_z,
2740 double * __restrict vel_x,
2741 double * __restrict vel_y,
2742 double * __restrict vel_z,
2748 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM_SOA);
2750 #ifdef SOA_SIMPLIFY_PARAMS 2751 const double * __restrict recipMass =
patch->patchDataSOA.
recipMass;
2753 const double * __restrict f_normal_x =
patch->patchDataSOA.
f_normal_x;
2754 const double * __restrict f_normal_y =
patch->patchDataSOA.
f_normal_y;
2755 const double * __restrict f_normal_z =
patch->patchDataSOA.
f_normal_z;
2757 const double * __restrict f_nbond_x =
patch->patchDataSOA.
f_nbond_x;
2758 const double * __restrict f_nbond_y =
patch->patchDataSOA.
f_nbond_y;
2759 const double * __restrict f_nbond_z =
patch->patchDataSOA.
f_nbond_z;
2761 const double * __restrict f_slow_x =
patch->patchDataSOA.
f_slow_x;
2762 const double * __restrict f_slow_y =
patch->patchDataSOA.
f_slow_y;
2763 const double * __restrict f_slow_z =
patch->patchDataSOA.
f_slow_z;
2764 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2765 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2766 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2781 if(this->patch->getPatchID() == 538){
2789 fprintf(stderr,
"Old Velocities %lf %lf %lf\n", vel_x[0], vel_y[0], vel_z[ 0]);
2790 fprintf(stderr,
"Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2791 f_slow_x[43], f_slow_y[43], f_slow_z[43],
2792 f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2793 f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2796 switch (maxForceNumber) {
2799 for (
int i=0; i < numAtoms; i++) {
2800 vel_x[i] += f_slow_x[i] * recipMass[i] * dt_slow;
2801 vel_y[i] += f_slow_y[i] * recipMass[i] * dt_slow;
2802 vel_z[i] += f_slow_z[i] * recipMass[i] * dt_slow;
2806 dt_nbond *= scaling;
2807 for (
int i=0; i < numAtoms; i++) {
2808 vel_x[i] += f_nbond_x[i] * recipMass[i] * dt_nbond;
2809 vel_y[i] += f_nbond_y[i] * recipMass[i] * dt_nbond;
2810 vel_z[i] += f_nbond_z[i] * recipMass[i] * dt_nbond;
2814 dt_normal *= scaling;
2815 for (
int i=0; i < numAtoms; i++) {
2816 vel_x[i] += f_normal_x[i] * recipMass[i] * dt_normal;
2817 vel_y[i] += f_normal_y[i] * recipMass[i] * dt_normal;
2818 vel_z[i] += f_normal_z[i] * recipMass[i] * dt_normal;
2831 const double * __restrict vel_x,
2832 const double * __restrict vel_y,
2833 const double * __restrict vel_z,
2834 double * __restrict pos_x,
2835 double * __restrict pos_y,
2836 double * __restrict pos_z,
2841 NamdProfileEvent::ADD_VELOCITY_TO_POSITION_SOA);
2842 #ifdef SOA_SIMPLIFY_PARAMS 2843 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2844 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2845 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2846 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
2847 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
2848 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
2851 for (
int i=0; i < numAtoms; i++) {
2852 pos_x[i] += vel_x[i] * dt;
2853 pos_y[i] += vel_y[i] * dt;
2854 pos_z[i] += vel_z[i] * dt;
2857 if(this->patch->getPatchID() == 538){
2858 fprintf(stderr,
"New Positions %lf %lf %lf\n", pos_x[43], pos_y[43], pos_z[43]);
2859 fprintf(stderr,
"New Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[43]);
2868 const int * __restrict hydrogenGroupSize,
2869 const float * __restrict mass,
2870 const double * __restrict vel_x,
2871 const double * __restrict vel_y,
2872 const double * __restrict vel_z,
2877 NamdProfileEvent::SUBMIT_HALFSTEP_SOA);
2878 #ifdef SOA_SIMPLIFY_PARAMS 2880 const float * __restrict mass =
patch->patchDataSOA.
mass;
2881 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2882 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2883 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2889 for (
int i=0; i < numAtoms; i++) {
2891 kineticEnergy += mass[i] *
2892 (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
2894 virial.
xx += mass[i] * vel_x[i] * vel_x[i];
2895 virial.
xy += mass[i] * vel_x[i] * vel_y[i];
2896 virial.
xz += mass[i] * vel_x[i] * vel_z[i];
2897 virial.
yx += mass[i] * vel_y[i] * vel_x[i];
2898 virial.
yy += mass[i] * vel_y[i] * vel_y[i];
2899 virial.
yz += mass[i] * vel_y[i] * vel_z[i];
2900 virial.
zx += mass[i] * vel_z[i] * vel_x[i];
2901 virial.
zy += mass[i] * vel_z[i] * vel_y[i];
2902 virial.
zz += mass[i] * vel_z[i] * vel_z[i];
2904 kineticEnergy *= 0.5 * 0.5;
2915 for (
int i=0; i < numAtoms; i += hgs) {
2918 hgs = hydrogenGroupSize[i];
2923 for (
int j = i; j < (i+hgs); j++) {
2925 v_cm_x += mass[j] * vel_x[j];
2926 v_cm_y += mass[j] * vel_y[j];
2927 v_cm_z += mass[j] * vel_z[j];
2929 BigReal recip_m_cm = 1.0 / m_cm;
2930 v_cm_x *= recip_m_cm;
2931 v_cm_y *= recip_m_cm;
2932 v_cm_z *= recip_m_cm;
2934 for (
int j = i; j < (i+hgs); j++) {
2935 BigReal dv_x = vel_x[j] - v_cm_x;
2936 BigReal dv_y = vel_y[j] - v_cm_y;
2937 BigReal dv_z = vel_z[j] - v_cm_z;
2939 intKineticEnergy += mass[j] *
2940 (vel_x[j] * dv_x + vel_y[j] * dv_y + vel_z[j] * dv_z);
2942 intVirialNormal.
xx += mass[j] * vel_x[j] * dv_x;
2943 intVirialNormal.
xy += mass[j] * vel_x[j] * dv_y;
2944 intVirialNormal.
xz += mass[j] * vel_x[j] * dv_z;
2945 intVirialNormal.
yx += mass[j] * vel_y[j] * dv_x;
2946 intVirialNormal.
yy += mass[j] * vel_y[j] * dv_y;
2947 intVirialNormal.
yz += mass[j] * vel_y[j] * dv_z;
2948 intVirialNormal.
zx += mass[j] * vel_z[j] * dv_x;
2949 intVirialNormal.
zy += mass[j] * vel_z[j] * dv_y;
2950 intVirialNormal.
zz += mass[j] * vel_z[j] * dv_z;
2953 intKineticEnergy *= 0.5 * 0.5;
2954 intVirialNormal *= 0.5;
2956 += intKineticEnergy;
2968 const int * __restrict hydrogenGroupSize,
2969 const float * __restrict mass,
2970 const double * __restrict pos_x,
2971 const double * __restrict pos_y,
2972 const double * __restrict pos_z,
2973 const double * __restrict vel_x,
2974 const double * __restrict vel_y,
2975 const double * __restrict vel_z,
2976 const double * __restrict f_normal_x,
2977 const double * __restrict f_normal_y,
2978 const double * __restrict f_normal_z,
2979 const double * __restrict f_nbond_x,
2980 const double * __restrict f_nbond_y,
2981 const double * __restrict f_nbond_z,
2982 const double * __restrict f_slow_x,
2983 const double * __restrict f_slow_y,
2984 const double * __restrict f_slow_z,
2989 NamdProfileEvent::SUBMIT_REDUCTIONS_SOA);
2990 #ifdef SOA_SIMPLIFY_PARAMS 2992 const float * __restrict mass =
patch->patchDataSOA.
mass;
2993 const double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
2994 const double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
2995 const double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
2996 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2997 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2998 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2999 const double * __restrict f_normal_x =
patch->patchDataSOA.
f_normal_x;
3000 const double * __restrict f_normal_y =
patch->patchDataSOA.
f_normal_y;
3001 const double * __restrict f_normal_z =
patch->patchDataSOA.
f_normal_z;
3002 const double * __restrict f_nbond_x =
patch->patchDataSOA.
f_nbond_x;
3003 const double * __restrict f_nbond_y =
patch->patchDataSOA.
f_nbond_y;
3004 const double * __restrict f_nbond_z =
patch->patchDataSOA.
f_nbond_z;
3005 const double * __restrict f_slow_x =
patch->patchDataSOA.
f_slow_x;
3006 const double * __restrict f_slow_y =
patch->patchDataSOA.
f_slow_y;
3007 const double * __restrict f_slow_z =
patch->patchDataSOA.
f_slow_z;
3019 BigReal angularMomentum_x = 0;
3020 BigReal angularMomentum_y = 0;
3021 BigReal angularMomentum_z = 0;
3028 for (
int i=0; i < numAtoms; i++) {
3031 kineticEnergy += mass[i] *
3032 (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
3035 momentum_x += mass[i] * vel_x[i];
3036 momentum_y += mass[i] * vel_y[i];
3037 momentum_z += mass[i] * vel_z[i];
3040 BigReal dpos_x = pos_x[i] - origin_x;
3041 BigReal dpos_y = pos_y[i] - origin_y;
3042 BigReal dpos_z = pos_z[i] - origin_z;
3045 angularMomentum_x += mass[i] * (dpos_y*vel_z[i] - dpos_z*vel_y[i]);
3046 angularMomentum_y += mass[i] * (dpos_z*vel_x[i] - dpos_x*vel_z[i]);
3047 angularMomentum_z += mass[i] * (dpos_x*vel_y[i] - dpos_y*vel_x[i]);
3052 kineticEnergy *= 0.5;
3053 Vector momentum(momentum_x, momentum_y, momentum_z);
3054 Vector angularMomentum(angularMomentum_x, angularMomentum_y,
3069 for (
int i=0; i < numAtoms; i += hgs) {
3070 hgs = hydrogenGroupSize[i];
3079 for ( j = i; j < (i+hgs); ++j ) {
3081 r_cm_x += mass[j] * pos_x[j];
3082 r_cm_y += mass[j] * pos_y[j];
3083 r_cm_z += mass[j] * pos_z[j];
3084 v_cm_x += mass[j] * vel_x[j];
3085 v_cm_y += mass[j] * vel_y[j];
3086 v_cm_z += mass[j] * vel_z[j];
3097 for ( j = i; j < (i+hgs); ++j ) {
3111 intKineticEnergy += mass[j] *
3112 (v_x * dv_x + v_y * dv_y + v_z * dv_z);
3115 BigReal dr_x = pos_x[j] - r_cm_x;
3116 BigReal dr_y = pos_y[j] - r_cm_y;
3117 BigReal dr_z = pos_z[j] - r_cm_z;
3120 intVirialNormal.
xx += f_normal_x[j] * dr_x;
3121 intVirialNormal.
xy += f_normal_x[j] * dr_y;
3122 intVirialNormal.
xz += f_normal_x[j] * dr_z;
3123 intVirialNormal.
yx += f_normal_y[j] * dr_x;
3124 intVirialNormal.
yy += f_normal_y[j] * dr_y;
3125 intVirialNormal.
yz += f_normal_y[j] * dr_z;
3126 intVirialNormal.
zx += f_normal_z[j] * dr_x;
3127 intVirialNormal.
zy += f_normal_z[j] * dr_y;
3128 intVirialNormal.
zz += f_normal_z[j] * dr_z;
3131 intVirialNbond.
xx += f_nbond_x[j] * dr_x;
3132 intVirialNbond.
xy += f_nbond_x[j] * dr_y;
3133 intVirialNbond.
xz += f_nbond_x[j] * dr_z;
3134 intVirialNbond.
yx += f_nbond_y[j] * dr_x;
3135 intVirialNbond.
yy += f_nbond_y[j] * dr_y;
3136 intVirialNbond.
yz += f_nbond_y[j] * dr_z;
3137 intVirialNbond.
zx += f_nbond_z[j] * dr_x;
3138 intVirialNbond.
zy += f_nbond_z[j] * dr_y;
3139 intVirialNbond.
zz += f_nbond_z[j] * dr_z;
3142 intVirialSlow.
xx += f_slow_x[j] * dr_x;
3143 intVirialSlow.
xy += f_slow_x[j] * dr_y;
3144 intVirialSlow.
xz += f_slow_x[j] * dr_z;
3145 intVirialSlow.
yx += f_slow_y[j] * dr_x;
3146 intVirialSlow.
yy += f_slow_y[j] * dr_y;
3147 intVirialSlow.
yz += f_slow_y[j] * dr_z;
3148 intVirialSlow.
zx += f_slow_z[j] * dr_x;
3149 intVirialSlow.
zy += f_slow_z[j] * dr_y;
3150 intVirialSlow.
zz += f_slow_z[j] * dr_z;
3154 intKineticEnergy *= 0.5;
3179 NamdProfileEvent::SUBMIT_COLLECTIONS_SOA);
3194 if ( is_pos_needed || is_vel_needed ) {
3195 patch->copy_updates_to_AOS();
3204 patch->copy_forces_to_AOS();
3206 if ( is_pos_needed ) {
3209 if ( is_vel_needed ) {
3212 if ( is_f_needed ) {
3222 const double maxvel2
3225 const double * __restrict vel_x,
3226 const double * __restrict vel_y,
3227 const double * __restrict vel_z,
3232 #ifdef SOA_SIMPLIFY_PARAMS 3233 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3234 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3235 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3243 for (
int i=0; i < numAtoms; i++) {
3245 vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3246 killme = killme + ( vel2 > maxvel2 );
3253 for (
int i=0; i < numAtoms; i++) {
3255 vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3256 if (vel2 > maxvel2) {
3258 const Vector vel(vel_x[i], vel_y[i], vel_z[i]);
3259 const BigReal maxvel = sqrt(maxvel2);
3261 iout <<
iERROR <<
"Atom " << (a[i].
id + 1) <<
" velocity is " 3264 << i <<
" of " << numAtoms <<
" on patch " 3269 "Atoms moving too fast; simulation has become unstable (" 3271 <<
" pe " << CkMyPe() <<
").\n" <<
endi;
3282 const float * __restrict langevinParam,
3283 double * __restrict vel_x,
3284 double * __restrict vel_y,
3285 double * __restrict vel_z,
3290 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1_SOA);
3291 #ifdef SOA_SIMPLIFY_PARAMS 3293 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3294 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3295 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3311 for (
int i=0; i < numAtoms; i++) {
3312 BigReal dt_gamma = dt * langevinParam[i];
3315 BigReal scaling = 1. - 0.5 * dt_gamma;
3316 vel_x[i] *= scaling;
3317 vel_y[i] *= scaling;
3318 vel_z[i] *= scaling;
3328 const float * __restrict langevinParam,
3329 const float * __restrict langScalVelBBK2,
3330 const float * __restrict langScalRandBBK2,
3331 float * __restrict gaussrand_x,
3332 float * __restrict gaussrand_y,
3333 float * __restrict gaussrand_z,
3334 double * __restrict vel_x,
3335 double * __restrict vel_y,
3336 double * __restrict vel_z,
3342 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2_SOA);
3343 #ifdef SOA_SIMPLIFY_PARAMS 3350 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3351 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3352 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3380 for (
int i=0; i < numAtoms; i++) {
3383 gaussrand_x[i] = float(rg.
x);
3384 gaussrand_y[i] = float(rg.
y);
3385 gaussrand_z[i] = float(rg.
z);
3396 for (
int i=0; i < numAtoms; i++) {
3397 vel_x[i] += gaussrand_x[i] * langScalRandBBK2[i];
3398 vel_y[i] += gaussrand_y[i] * langScalRandBBK2[i];
3399 vel_z[i] += gaussrand_z[i] * langScalRandBBK2[i];
3400 vel_x[i] *= langScalVelBBK2[i];
3401 vel_y[i] *= langScalVelBBK2[i];
3402 vel_z[i] *= langScalVelBBK2[i];
3409 const int * __restrict hydrogenGroupSize,
3410 const float * __restrict mass,
3411 double * __restrict pos_x,
3412 double * __restrict pos_y,
3413 double * __restrict pos_z,
3418 #ifdef SOA_SIMPLIFY_PARAMS 3420 const float * __restrict mass =
patch->patchDataSOA.
mass;
3421 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3422 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3423 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3442 for (
int i = 0; i < numAtoms; i += hgs) {
3444 hgs = hydrogenGroupSize[i];
3451 for ( j = i; j < (i+hgs); ++j ) {
3453 r_cm_x += mass[j] * pos_x[j];
3454 r_cm_y += mass[j] * pos_y[j];
3455 r_cm_z += mass[j] * pos_z[j];
3463 double tx = r_cm_x - origin.
x;
3464 double ty = r_cm_y - origin.
y;
3465 double tz = r_cm_z - origin.
z;
3467 double new_r_cm_x = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3468 double new_r_cm_y = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3469 double new_r_cm_z = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3471 new_r_cm_x += origin.
x;
3472 new_r_cm_y += origin.
y;
3473 new_r_cm_z += origin.
z;
3475 double delta_r_cm_x = new_r_cm_x - r_cm_x;
3476 double delta_r_cm_y = new_r_cm_y - r_cm_y;
3477 double delta_r_cm_z = new_r_cm_z - r_cm_z;
3479 for (j = i; j < (i+hgs); ++j) {
3480 pos_x[j] += delta_r_cm_x;
3481 pos_y[j] += delta_r_cm_y;
3482 pos_z[j] += delta_r_cm_z;
3486 for (
int i = 0; i < numAtoms; ++i) {
3490 double tx = pos_x[i] - origin.
x;
3491 double ty = pos_y[i] - origin.
y;
3492 double tz = pos_z[i] - origin.
z;
3494 double ftx = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3495 double fty = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3496 double ftz = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3498 pos_x[i] = ftx + origin.
x;
3499 pos_y[i] = fty + origin.
y;
3500 pos_z[i] = ftz + origin.
z;
3508 const int * __restrict hydrogenGroupSize,
3509 const float * __restrict mass,
3510 double * __restrict pos_x,
3511 double * __restrict pos_y,
3512 double * __restrict pos_z,
3513 double * __restrict vel_x,
3514 double * __restrict vel_y,
3515 double * __restrict vel_z,
3521 #ifdef SOA_SIMPLIFY_PARAMS 3523 const float * __restrict mass =
patch->patchDataSOA.
mass;
3524 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3525 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3526 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3527 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3528 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3529 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3551 for (
int i=0; i < numAtoms; i += hgs) {
3553 hgs = hydrogenGroupSize[i];
3562 for ( j = i; j < (i+hgs); ++j ) {
3564 r_cm_x += mass[j] * pos_x[j];
3565 r_cm_y += mass[j] * pos_y[j];
3566 r_cm_z += mass[j] * pos_z[j];
3567 v_cm_x += mass[j] * vel_x[j];
3568 v_cm_y += mass[j] * vel_y[j];
3569 v_cm_z += mass[j] * vel_z[j];
3576 double tx = r_cm_x - origin.
x;
3577 double ty = r_cm_y - origin.
y;
3578 double tz = r_cm_z - origin.
z;
3579 double new_r_cm_x = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3580 double new_r_cm_y = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3581 double new_r_cm_z = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3582 new_r_cm_x += origin.
x;
3583 new_r_cm_y += origin.
y;
3584 new_r_cm_z += origin.
z;
3586 double delta_r_cm_x = new_r_cm_x - r_cm_x;
3587 double delta_r_cm_y = new_r_cm_y - r_cm_y;
3588 double delta_r_cm_z = new_r_cm_z - r_cm_z;
3592 double delta_v_cm_x = ( velFactor_x - 1 ) * v_cm_x;
3593 double delta_v_cm_y = ( velFactor_y - 1 ) * v_cm_y;
3594 double delta_v_cm_z = ( velFactor_z - 1 ) * v_cm_z;
3595 for (j = i; j < (i+hgs); j++) {
3596 pos_x[j] += delta_r_cm_x;
3597 pos_y[j] += delta_r_cm_y;
3598 pos_z[j] += delta_r_cm_z;
3599 vel_x[j] += delta_v_cm_x;
3600 vel_y[j] += delta_v_cm_y;
3601 vel_z[j] += delta_v_cm_z;
3610 for (
int i=0; i < numAtoms; i++) {
3611 double tx = pos_x[i] - origin.
x;
3612 double ty = pos_y[i] - origin.
y;
3613 double tz = pos_z[i] - origin.
z;
3614 double ftx = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3615 double fty = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3616 double ftz = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3617 pos_x[i] = ftx + origin.
x;
3618 pos_y[i] = fty + origin.
y;
3619 pos_z[i] = ftz + origin.
z;
3620 vel_x[i] *= velFactor_x;
3621 vel_y[i] *= velFactor_y;
3622 vel_z[i] *= velFactor_z;
3640 Tensor *vp = ( pressure ? &virial : 0 );
3644 "Constraint failure; simulation has become unstable.\n" <<
endi;
3655 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) || defined(NAMD_MIC) 3670 #if defined(NTESTPID) 3674 double *xyzq =
new double[4*numAtoms];
3679 for (
int i=0; i < numAtoms; i++) {
3685 char fname[128], remark[128];
3686 sprintf(fname,
"xyzq_soa_pid%d_step%d.bin", NTESTPID, step);
3687 sprintf(remark,
"SOA xyzq, patch %d, step %d", NTESTPID, step);
3688 TestArray_write<double>(fname, remark, xyzq, 4*numAtoms);
3693 patch->zero_global_forces_SOA();
3697 int basePriority = ( (seq & 0xffff) << 15 )
3708 #ifdef NODEGROUP_FORCE_REGISTER 3710 patch->copy_forces_to_SOA();
3713 patch->copy_forces_to_SOA();
3716 #if defined(NTESTPID) 3722 double *fxyz =
new double[3*numAtoms];
3726 for (
int i=0; i < numAtoms; i++) {
3728 fxyz[3*i+1] = fy[i];
3729 fxyz[3*i+2] = fz[i];
3731 sprintf(fname,
"fxyz_normal_soa_pid%d_step%d.bin", NTESTPID, step);
3732 sprintf(remark,
"SOA fxyz normal, patch %d, step %d", NTESTPID, step);
3733 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3737 for (
int i=0; i < numAtoms; i++) {
3739 fxyz[3*i+1] = fy[i];
3740 fxyz[3*i+2] = fz[i];
3742 sprintf(fname,
"fxyz_nbond_soa_pid%d_step%d.bin", NTESTPID, step);
3743 sprintf(remark,
"SOA fxyz nonbonded, patch %d, step %d", NTESTPID, step);
3744 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3748 for (
int i=0; i < numAtoms; i++) {
3750 fxyz[3*i+1] = fy[i];
3751 fxyz[3*i+2] = fz[i];
3753 sprintf(fname,
"fxyz_slow_soa_pid%d_step%d.bin", NTESTPID, step);
3754 sprintf(remark,
"SOA fxyz slow, patch %d, step %d", NTESTPID, step);
3755 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3763 double *fxyz =
new double[3*numAtoms];
3764 double *fx, *fy, *fz;
3765 char fname[64], remark[128];
3771 for (
int i=0; i < numAtoms; i++) {
3773 fxyz[3*i+1] = fy[i];
3774 fxyz[3*i+2] = fz[i];
3776 sprintf(fname,
"fslow_soa_%d.bin", step);
3777 sprintf(remark,
"SOA slow forces, step %d\n", step);
3778 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3783 for (
int i=0; i < numAtoms; i++) {
3785 fxyz[3*i+1] = fy[i];
3786 fxyz[3*i+2] = fz[i];
3788 sprintf(fname,
"fnbond_soa_%d.bin", step);
3789 sprintf(remark,
"SOA nonbonded forces, step %d\n", step);
3790 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3795 for (
int i=0; i < numAtoms; i++) {
3797 fxyz[3*i+1] = fy[i];
3798 fxyz[3*i+2] = fz[i];
3800 sprintf(fname,
"fnormal_soa_%d.bin", step);
3801 sprintf(remark,
"SOA normal forces, step %d\n", step);
3802 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3811 fprintf(stderr,
"CPU force arrays for alanin\n" );
3813 fprintf(stderr,
"f[%i] = %lf %lf %lf | %lf %lf %lf | %lf %lf %lf\n", i,
3842 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3843 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3844 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3848 DebugM(4,
"stochastically rescaling velocities at step " << step <<
" by " << velrescaling <<
"\n");
3849 for (
int i = 0; i < numAtoms; ++i ) {
3850 vel_x[i] *= velrescaling;
3851 vel_y[i] *= velrescaling;
3852 vel_z[i] *= velrescaling;
3863 #endif // SEQUENCER_SOA 3870 char tracePrefix[20];
3880 #ifdef TIMER_COLLECTION 3881 TimerSet& t =
patch->timerSet;
3917 const BigReal nbondstep = timestep * (staleForces?1:nonbondedFrequency);
3919 doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
3926 if ( dofull )
slowFreq = fullElectFrequency;
3927 const BigReal slowstep = timestep * (staleForces?1:fullElectFrequency);
3929 doFullElectrostatics = (dofull && ((step >= numberOfSteps) || !(step%fullElectFrequency)));
3944 if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed =
Results::amdf;
3974 int doGlobal = doTcl || doColvars;
3980 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 4008 if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4013 doEnergy = ! ( step % energyFrequency );
4015 if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4017 if ( adaptTempOn ) doEnergy=1;
4020 D_MSG(
"runComputeObjects()");
4028 if ( staleForces || doGlobal ) {
4034 D_MSG(
"newtonianVelocities()");
4048 D_MSG(
"submitHalfstep()");
4054 D_MSG(
"newtonianVelocities()");
4067 D_MSG(
"submitHalfstep()");
4071 if ( zeroMomentum && doFullElectrostatics )
submitMomentum(step);
4073 D_MSG(
"newtonianVelocities()");
4080 D_MSG(
"submitReductions()");
4088 sprintf(traceNote,
"%s%d",tracePrefix,step);
4089 traceUserSuppliedNote(traceNote);
4097 bool doMultigratorRattle =
false;
4111 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 4115 int beginStep =
simParams->beginEventStep;
4120 for ( ++step; step <= numberOfSteps; ++step )
4122 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 4123 eon = epid && (beginStep < step && step <= endStep);
4125 if (controlProfiling && step == beginStep) {
4128 if (controlProfiling && step == endStep) {
4135 DebugM(3,
"for step "<<step<<
" dGlobal " << doGlobal<<
"\n"<<
endi);
4146 newtonianVelocities(0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4151 if (doMultigratorRattle)
rattle1(timestep, 1);
4204 #endif // UPPER_BOUND 4206 doNonbonded = !(step%nonbondedFrequency);
4207 doFullElectrostatics = (dofull && !(step%fullElectFrequency));
4214 if ( zeroMomentum && doFullElectrostatics ) {
4233 if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed =
Results::amdf;
4236 doEnergy = ! ( step % energyFrequency );
4237 if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4238 if ( adaptTempOn ) doEnergy=1;
4262 if ( staleForces || doGlobal ) {
4268 if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4270 newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4279 newtonianVelocities(1.0,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4299 if ( zeroMomentum && doFullElectrostatics )
submitMomentum(step);
4303 newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4342 if(step == bstep || step == estep){
4347 #ifdef MEASURE_NAMD_WITH_PAPI 4349 int bstep =
simParams->papiMeasureStartStep;
4350 int estep = bstep +
simParams->numPapiMeasureSteps;
4351 if(step == bstep || step==estep) {
4352 papiMeasureBarrier(step);
4359 sprintf(traceNote,
"%s%d",tracePrefix,step);
4360 traceUserSuppliedNote(traceNote);
4362 #endif // UPPER_BOUND 4367 cycleBarrier(dofull && !((step+1)%fullElectFrequency),step);
4371 if(step == START_HPM_STEP)
4372 (CProxy_Node(CkpvAccess(BOCclass_group).node)).startHPM();
4374 if(step == STOP_HPM_STEP)
4375 (CProxy_Node(CkpvAccess(BOCclass_group).node)).stopHPM();
4381 #ifdef TIMER_COLLECTION 4383 printf(
"Timer collection reporting in microseconds for " 4387 #endif // TIMER_COLLECTION 4401 Vector movDragVel, dragIncrement;
4402 for (
int i = 0; i < numAtoms; ++i )
4408 dragIncrement = movDragGlobVel * movDragVel * dt;
4422 Vector rotDragAxis, rotDragPivot, dragIncrement;
4423 for (
int i = 0; i < numAtoms; ++i )
4429 dAngle = rotDragGlobVel * rotDragVel * dt;
4430 rotDragAxis /= rotDragAxis.
length();
4431 atomRadius = atom[i].
position - rotDragPivot;
4432 dragIncrement = cross(rotDragAxis, atomRadius) * dAngle;
4446 #if 0 && defined(NODEGROUP_FORCE_REGISTER) 4449 const int stepsPerCycle_save = stepsPerCycle;
4465 doFullElectrostatics = dofull;
4487 int doGlobal = doTcl || doColvars;
4506 #ifdef DEBUG_MINIMIZE 4507 printf(
"doTcl = %d doColvars = %d\n", doTcl, doColvars);
4513 #ifdef DEBUG_MINIMIZE 4514 else { printf(
"No computeGlobal\n"); }
4524 for ( ++step; step <= numberOfSteps; ++step ) {
4567 patch->copy_atoms_to_SOA();
4571 #if 0 && defined(NODEGROUP_FORCE_REGISTER) 4589 for (
int i = 0; i < numAtoms; ++i ) {
4595 for (
int j=1; j<hgs; ++j ) {
4613 for (
int i = 0; i < numAtoms; ++i ) {
4616 if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4619 if ( fixedAtomsOn && a[i].atomFixed ) a[i].
velocity = 0;
4621 if ( v2 > maxv2 ) maxv2 = v2;
4627 for (
int i = 0; i < numAtoms; ++i ) {
4628 if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4631 if ( fixedAtomsOn && a[i].atomFixed ) a[i].
velocity = 0;
4633 if ( v2 > maxv2 ) maxv2 = v2;
4643 for (
int i = 0; i < numAtoms; i += hgs ) {
4646 if ( minChildVel < fmax2 )
continue;
4647 int adjustChildren = 1;
4648 for (
int j = i+1; j < (i+hgs); ++j ) {
4649 if ( a[j].velocity.length2() > minChildVel ) adjustChildren = 0;
4651 if ( adjustChildren ) {
4653 for (
int j = i+1; j < (i+hgs); ++j ) {
4654 if (a[i].mass < 0.01)
continue;
4668 for (
int i = 0; i < numAtoms; ++i ) {
4673 for (
int i = 1; i < numAtoms; ++i ) {
4674 if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4683 for (
int i = 1; i < numAtoms; ++i ) {
4684 if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4696 for (
int i = 0; i < numAtoms; ++i ) {
4709 for (
int i = 0; i < numAtoms; ++i ) {
4714 for (
int i = 0; i < numAtoms; ++i ) {
4730 NAMD_die(
"Cannot zero momentum when fixed atoms are present.");
4741 for (
int i = 0; i < numAtoms; ++i ) {
4742 a[i].velocity += dv * a[i].recipMass;
4743 a[i].position += dx * a[i].recipMass;
4746 for (
int i = 0; i < numAtoms; ++i ) {
4747 a[i].velocity += dv;
4748 a[i].position += dx;
4760 NAMD_bug(
"Sequencer::scalePositionsVelocities, fixed atoms not implemented");
4764 for (
int i = 0; i < numAtoms; i += hgs ) {
4769 for (
int j=0;j < hgs;++j) {
4770 m_cm += a[i+j].
mass;
4779 for (
int j=0;j < hgs;++j) {
4785 for (
int i = 0; i < numAtoms; i++) {
4802 Tensor posScaleTensor = scaleTensor;
4831 for (
int i = 0; i < numAtoms; ++i ) {
4834 virialNormal.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4838 for (
int i = 0; i < numAtoms; ++i ) {
4839 if (a[i].mass < 0.01)
continue;
4841 virialNormal.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4845 kineticEnergy *= 0.5;
4853 Vector fixForceNormal = 0;
4854 Vector fixForceNbond = 0;
4857 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
4873 for (
int i = 0; i < numAtoms; i += hgs ) {
4879 for ( j = i; j < (i+hgs); ++j ) {
4890 NAMD_bug(
"Sequencer::multigratorPressure, this part needs to be implemented correctly");
4891 for ( j = i; j < (i+hgs); ++j ) {
4896 intVirialNormal2.
outerAdd (mass, v, dv);
4904 for ( j = i; j < (i+hgs); ++j ) {
4908 intVirialNormal2.
outerAdd(mass, v, dv);
4930 for (
int i = 0; i < numAtoms; i++) {
4941 for (
int i = 0; i < numAtoms; ++i ) {
4947 for (
int i = 0; i < numAtoms; ++i ) {
4951 kineticEnergy *= 0.5;
4952 return kineticEnergy;
4970 for (
int i = 0; i < numAtoms; i += hgs ) {
4976 for ( j = i; j < (i+hgs); ++j ) {
4981 momentumSqrSum.
outerAdd(1.0/m_cm, v_cm, v_cm);
4984 for (
int i = 0; i < numAtoms; i++) {
4985 momentumSqrSum.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5004 const int staleForces,
5005 const int doNonbonded,
5006 const int doFullElectrostatics)
5009 NamdProfileEvent::NEWTONIAN_VELOCITIES);
5012 if (staleForces || (doNonbonded && doFullElectrostatics)) {
5018 if (staleForces || doNonbonded)
5020 if (staleForces || doFullElectrostatics)
5047 for (
int i = 0; i < numAtoms; ++i )
5050 if ( ! dt_gamma )
continue;
5052 BigReal f1 = exp( -dt_gamma );
5053 BigReal f2 = sqrt( ( 1. - f1*f1 ) * kbT *
5065 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1);
5075 for (i = 0; i < numAtoms; i++) {
5077 if (i < numAtoms-1 &&
5078 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5090 if (dt_gamma != 0.0) {
5091 v_com *= ( 1. - 0.5 * dt_gamma );
5096 if (dt_gamma != 0.0) {
5097 v_bnd *= ( 1. - 0.5 * dt_gamma );
5108 if ( ! dt_gamma )
continue;
5110 a[i].
velocity *= ( 1. - 0.5 * dt_gamma );
5121 for ( i = 0; i < numAtoms; ++i )
5124 if ( ! dt_gamma )
continue;
5126 a[i].
velocity *= ( 1. - 0.5 * dt_gamma );
5138 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2);
5165 for (i = 0; i < numAtoms; i++) {
5167 if (i < numAtoms-1 &&
5168 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5180 if (dt_gamma != 0.0) {
5183 sqrt( 2 * dt_gamma * kbT *
5184 ( a[i].
partition ? tempFactor : 1.0 ) / mass );
5185 v_com /= ( 1. + 0.5 * dt_gamma );
5190 if (dt_gamma != 0.0) {
5193 sqrt( 2 * dt_gamma * kbT_bnd *
5194 ( a[i+1].
partition ? tempFactor : 1.0 ) / mass );
5195 v_bnd /= ( 1. + 0.5 * dt_gamma );
5206 if ( ! dt_gamma )
continue;
5209 sqrt( 2 * dt_gamma * kbT *
5210 ( a[i].
partition ? tempFactor : 1.0 ) * a[i].recipMass );
5211 a[i].
velocity /= ( 1. + 0.5 * dt_gamma );
5225 for ( i = 0; i < numAtoms; ++i )
5228 if ( ! dt_gamma )
continue;
5231 sqrt( 2 * dt_gamma * kbT *
5232 ( a[i].
partition ? tempFactor : 1.0 ) * a[i].recipMass );
5233 a[i].
velocity /= ( 1. + 0.5 * dt_gamma );
5257 for (
int i = 0; i < numAtoms; i += hgs ) {
5261 for ( j = i; j < (i+hgs); ++j ) {
5263 a[j].fixedPosition,a[j].transform);
5269 for ( j = i; j < (i+hgs); ++j ) {
5277 Position delta_x_cm = new_x_cm - x_cm;
5278 for ( j = i; j < (i+hgs); ++j ) {
5281 a[j].fixedPosition,a[j].transform);
5290 for (
int i = 0; i < numAtoms; ++i )
5294 a[i].fixedPosition,a[i].transform);
5326 for (
int i = 0; i < numAtoms; i += hgs ) {
5330 for ( j = i; j < (i+hgs); ++j ) {
5332 a[j].fixedPosition,a[j].transform);
5339 for ( j = i; j < (i+hgs); ++j ) {
5348 Position delta_x_cm = new_x_cm - x_cm;
5351 delta_v_cm.
x = ( velFactor.
x - 1 ) * v_cm.
x;
5352 delta_v_cm.
y = ( velFactor.
y - 1 ) * v_cm.
y;
5353 delta_v_cm.
z = ( velFactor.
z - 1 ) * v_cm.
z;
5354 for ( j = i; j < (i+hgs); ++j ) {
5357 a[j].fixedPosition,a[j].transform);
5368 for (
int i = 0; i < numAtoms; ++i )
5372 a[i].fixedPosition,a[i].transform);
5389 if ( rescaleFreq > 0 ) {
5396 for (
int i = 0; i < numAtoms; ++i )
5412 const BigReal factor_dihe = accelMDfactor[0];
5413 const BigReal factor_tot = accelMDfactor[1];
5417 NAMD_die(
"accelMD broadcasting error!\n");
5419 NAMD_die(
"accelMD broadcasting error!\n");
5422 for (
int i = 0; i < numAtoms; ++i)
5428 for (
int i = 0; i < numAtoms; ++i)
5431 for (
int i = 0; i < numAtoms; ++i)
5434 if (doFullElectrostatics) {
5435 for (
int i = 0; i < numAtoms; ++i)
5441 for (
int i = 0; i < numAtoms; ++i)
5452 if ( (step < simParams->adaptTempFirstStep ) ||
5467 if ( ( reassignFreq > 0 ) && ! ( step % reassignFreq ) ) {
5476 if ( newTemp < simParams->reassignHold )
5484 for (
int i = 0; i < numAtoms; ++i )
5488 sqrt(kbT * (a[i].
partition ? tempFactor : 1.0) * a[i].recipMass) *
5492 NAMD_bug(
"Sequencer::reassignVelocities called improperly!");
5506 for (
int i = 0; i < numAtoms; ++i )
5509 a[i].mass <= 0.) ?
Vector(0,0,0) :
5510 sqrt(kbT * (a[i].
partition ? tempFactor : 1.0) * a[i].recipMass) *
5523 for (
int i = 0; i < numAtoms; ++i )
5534 for (
int i = 0; i < numAtoms; ++i )
5546 BigReal sqrt_factor = sqrt(factor);
5549 for (
int i = 0; i < numAtoms; ++i ) {
5568 for (
int i = 0; i < numAtoms; ++i )
5570 BigReal f1 = exp( coefficient * a[i].langevinParam );
5588 DebugM(4,
"stochastically rescaling velocities at step " << step <<
" by " << velrescaling <<
"\n");
5589 for (
int i = 0; i < numAtoms; ++i ) {
5608 BigReal timestep,
const int ftag,
const int useSaved
5611 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5613 CmiNetworkProgressAfter (0);
5623 const BigReal timestep1,
const int ftag1,
const int useSaved1,
5624 const BigReal timestep2,
const int ftag2,
const int useSaved2,
5625 const BigReal timestep3,
const int ftag3,
const int useSaved3
5628 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5630 CmiNetworkProgressAfter (0);
5649 NamdProfileEvent::ADD_VELOCITY_TO_POSITION);
5651 CmiNetworkProgressAfter (0);
5662 Tensor *vp = ( pressure ? &virial : 0 );
5664 iout <<
iERROR <<
"Constraint failure in HardWallDrude(); " 5665 <<
"simulation may become unstable.\n" <<
endi;
5678 Tensor *vp = ( pressure ? &virial : 0 );
5681 "Constraint failure; simulation has become unstable.\n" <<
endi;
5686 printf(
"virial = %g %g %g %g %g %g %g %g %g\n",
5687 virial.
xx, virial.
xy, virial.
xz,
5688 virial.
yx, virial.
yy, virial.
yz,
5689 virial.
zx, virial.
zy, virial.
zz);
5696 printf(
"pos[%d] = %g %g %g\n", n,
5700 printf(
"vel[%d] = %g %g %g\n", n,
5705 printf(
"force[%d] = %g %g %g\n", n,
5739 const BigReal maxvel2 = maxvel * maxvel;
5740 for (
int i=0; i<numAtoms; ++i ) {
5741 if ( a[i].velocity.length2() > maxvel2 ) {
5748 const BigReal maxvel2 = maxvel * maxvel;
5750 for (
int i=0; i<numAtoms; ++i ) {
5755 for (
int i=0; i<numAtoms; ++i ) {
5756 if ( a[i].velocity.length2() > maxvel2 ) {
5758 iout <<
iERROR <<
"Atom " << (a[i].
id + 1) <<
" velocity is " 5761 << i <<
" of " << numAtoms <<
" on patch " 5766 "Atoms moving too fast; simulation has become unstable (" 5768 <<
" pe " << CkMyPe() <<
").\n" <<
endi;
5780 for (
int i=0; i<numAtoms; ++i ) {
5796 CmiNetworkProgressAfter (0);
5807 for (
int i = 0; i < numAtoms; ++i ) {
5810 virial.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5814 for (
int i = 0; i < numAtoms; ++i ) {
5815 if (a[i].mass < 0.01)
continue;
5817 virial.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5822 momentumSqrSum = virial;
5824 kineticEnergy *= 0.5 * 0.5;
5848 for (
int i=0; i<numAtoms; i += hgs) {
5855 for (j=i; j< i+hgs; ++j) {
5860 for (j=i; j < i+hgs; ++j) {
5862 if (! (useGroupPressure && j != i)) {
5864 int slab = (int)floor((z-zmin)*idz);
5865 if (slab < 0) slab += nslabs;
5866 else if (slab >= nslabs) slab -= nslabs;
5870 if (useGroupPressure) {
5893 for (
int i = 0; i < numAtoms; i += hgs ) {
5896 CmiNetworkProgress ();
5903 for ( j = i; j < (i+hgs); ++j ) {
5908 momentumSqrSum.
outerAdd(1.0/m_cm, v_cm, v_cm);
5913 for ( j = i; j < (i+hgs); ++j ) {
5918 intKineticEnergy += mass * (v * dv);
5919 intVirialNormal.
outerAdd (mass, v, dv);
5923 for ( j = i; j < (i+hgs); ++j ) {
5927 intKineticEnergy += mass * (v * dv);
5928 intVirialNormal.
outerAdd(mass, v, dv);
5932 intKineticEnergy *= 0.5 * 0.5;
5934 intVirialNormal *= 0.5;
5937 momentumSqrSum *= 0.5;
5950 for (
int j = 0; j < numAtoms; j++ ) {
5968 NamdProfileEvent::SUBMIT_REDUCTIONS);
5974 CmiNetworkProgressAfter(0);
5986 Vector angularMomentum = 0;
5991 for (i = 0; i < numAtoms; ++i ) {
5995 angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
5999 for (i = 0; i < numAtoms; ++i ) {
6002 angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6008 for (i = 0; i < numAtoms; i++) {
6009 if (i < numAtoms-1 &&
6010 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
6020 drudeComKE += m_com * v_com.
length2();
6021 drudeBondKE += m_bond * v_bond.length2();
6040 kineticEnergy *= 0.5;
6050 for (
int i = 0; i < numAtoms; ++i ) {
6057 for (
int i = 0; i < numAtoms; ++i ) {
6064 for (
int i = 0; i < numAtoms; ++i ) {
6080 for (
int i = 0; i < numAtoms; i += hgs ) {
6082 CmiNetworkProgress();
6089 for ( j = i; j < (i+hgs); ++j ) {
6099 for ( j = i; j < (i+hgs); ++j ) {
6101 ( pairInteractionSelf || a[j].
partition != 2 ) )
continue;
6103 if ( fixedAtomsOn && a[j].atomFixed )
continue;
6107 intKineticEnergy += mass * (v * dv);
6114 for ( j = i; j < (i+hgs); ++j ) {
6116 if ( fixedAtomsOn && a[j].atomFixed )
continue;
6120 intKineticEnergy += mass * (v * dv);
6129 intKineticEnergy *= 0.5;
6145 for (
int i=0; i<numAtoms; i += hgs) {
6150 for (j=i; j< i+hgs; ++j) {
6157 int slab = (int)floor((z-zmin)*idz);
6158 if (slab < 0) slab += nslabs;
6159 else if (slab >= nslabs) slab -= nslabs;
6161 int ppoffset = 3*(slab + nslabs*
partition);
6162 for (j=i; j < i+hgs; ++j) {
6168 BigReal wxx = (fnormal.
x + fnbond.
x + fslow.
x) * dx.
x;
6169 BigReal wyy = (fnormal.
y + fnbond.
y + fslow.
y) * dx.
y;
6170 BigReal wzz = (fnormal.
z + fnbond.
z + fslow.
z) * dx.
z;
6185 Vector fixForceNormal = 0;
6186 Vector fixForceNbond = 0;
6189 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6192 auto printTensor = [](
const Tensor& t,
const std::string& name){
6193 CkPrintf(
"%s", name.c_str());
6194 CkPrintf(
"\n%12.5lf %12.5lf %12.5lf\n" 6195 "%12.5lf %12.5lf %12.5lf\n" 6196 "%12.5lf %12.5lf %12.5lf\n",
6201 printTensor(fixVirialNormal,
"fixVirialNormal = ");
6202 printTensor(fixVirialNbond,
"fixVirialNbond = ");
6203 printTensor(fixVirialSlow,
"fixVirialSlow = ");
6214 #endif // UPPER_BOUND 6231 const double drudeBondLen2 = drudeBondLen * drudeBondLen;
6233 const double drudeMove = 0.01;
6234 const double drudeStep2 = drudeStep * drudeStep;
6235 const double drudeMove2 = drudeMove * drudeMove;
6240 for (
int i = 0; i < numAtoms; ++i ) {
6243 printf(
"f1[%2d]= %f %f %f\n", i, f1[i].x, f1[i].y, f1[i].z);
6244 printf(
"f2[%2d]= %f %f %f\n", i, f2[i].x, f2[i].y, f2[i].z);
6247 f1[i] += f2[i] + f3[i];
6248 if ( drudeHardWallOn && i && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) {
6249 if ( ! fixedAtomsOn || ! a[i].atomFixed ) {
6250 if ( drudeStep2 * f1[i].length2() > drudeMove2 ) {
6253 a[i].
position += drudeStep * f1[i];
6255 if ( (a[i].position - a[i-1].position).length2() > drudeBondLen2 ) {
6259 Vector netf = f1[i-1] + f1[i];
6260 if ( fixedAtomsOn && a[i-1].atomFixed ) netf = 0;
6264 if ( fixedAtomsOn && a[i].atomFixed ) f1[i] = 0;
6271 for (
int i = 0; i < numAtoms; ++i ) {
6274 if ( v2 > maxv2 ) maxv2 = v2;
6277 if ( v2 > maxv2 ) maxv2 = v2;
6288 for (
int i = 0; i < numAtoms; ++i ) {
6290 if ( drudeHardWallOn && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) )
continue;
6299 BigReal fmult = 1.01 * sqrt(fmax2/ff);
6300 f *= fmult; ff = f * f;
6309 printf(
"fdotf = %f\n", fdotf);
6310 printf(
"fdotv = %f\n", fdotv);
6311 printf(
"vdotv = %f\n", vdotv);
6324 for (
int i = 0; i < numAtoms; i += hgs ) {
6329 for ( j = i; j < (i+hgs); ++j ) {
6334 for ( j = i; j < (i+hgs); ++j ) {
6354 Vector fixForceNormal = 0;
6355 Vector fixForceNbond = 0;
6358 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6389 NamdProfileEvent::SUBMIT_COLLECTIONS);
6391 int dcdSelectionIndex;
6411 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC) 6443 int basePriority = ( (seq & 0xffff) << 15 )
6556 #ifdef NAMD_CUDA_XXX 6559 for (
int i=0; i<numAtoms; ++i ) {
6560 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6570 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6574 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6578 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6590 for (
int i=0; i<numAtoms; ++i ) {
6600 float fx = fxNo+fxNb+fxSl;
6601 float fy = fyNo+fyNb+fySl;
6602 float fz = fzNo+fzNb+fzSl;
6604 float f = sqrt(fx*fx+fy*fy+fz*fz);
6607 float x =
patch->
p[i].position.x;
6608 float y =
patch->
p[i].position.y;
6609 float z =
patch->
p[i].position.z;
6611 CkPrintf(
"FORCE(%04i)[%04i] = % .9e % .9e % .9e\n", seq,
id,
6646 #ifdef MEASURE_NAMD_WITH_PAPI 6647 void Sequencer::papiMeasureBarrier(
int step){
6649 broadcast->papiMeasureBarrier.get(step);
Real atomcharge(int anum) const
SubmitReduction * multigratorReduction
Vector gaussian_vector(void)
void rescaleVelocities(int)
void finishReduction(bool doEnergyVirial)
void minimizationQuenchVelocity(void)
int period
period for some step dependent event (e.g. stepsPerCycle)
NAMD_HOST_DEVICE void rescale(Tensor factor)
void max(int i, BigReal v)
int init(int initstep, int initperiod, int delta=0)
DCDParams dcdSelectionParams[16]
void barrier(const SynchronousCollectiveScope scope)
void tcoupleVelocities(BigReal, int)
void addMovDragToPosition(BigReal)
BigReal soluteScalingFactorCharge
void submitForces(int seq, FullAtomList &a, int maxForceUsed, ForceList *f, int prec)
virtual void algorithm(void)
void get_rotdrag_params(BigReal &v, Vector &a, Vector &p, int atomnum) const
void langevinVelocitiesBBK2_SOA(BigReal timestep)
#define NAMD_EVENT_STOP(eon, id)
Bool is_atom_movdragged(int atomnum) const
SubmitReduction * pressureProfileReduction
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
friend class SequencerCUDA
void scaleVelocities(const BigReal velScale)
void positionsReady_SOA(int doMigration=0)
#define GB1_COMPUTE_HOME_PRIORITY
void addVelocityToPosition(BigReal)
SubmitReduction * reduction
NAMD_HOST_DEVICE Vector c() const
SubmitReduction * min_reduction
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
Bool is_atom_exPressure(int atomnum) const
SimpleBroadcastObject< int > traceBarrier
void maximumMove(BigReal)
Bool monteCarloPressureOn
const GlobalMasterIMD * getIMD()
void cycleBarrier(int, int)
void submitCollections_SOA(int step, int zeroVel=0)
Bool globalMasterScaleByFrequency
static void partition(int *order, const FullAtom *atoms, int begin, int end)
SimpleBroadcastObject< Vector > momentumCorrection
void addRotDragToPosition(BigReal)
static PatchMap * Object()
void saveForce(const int ftag=Results::normal)
void registerIDsFullAtom(const FullAtom *begin, const FullAtom *end)
void langevinVelocitiesBBK2(BigReal)
void monteCarloPressureControl(const int step, const int doMigration, const int doEnergy, const int doVirial, const int maxForceNumber, const int doGlobal)
virtual void submit(void)=0
#define ADD_TENSOR_OBJECT(R, RL, D)
SimParameters * simParameters
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
void newMinimizeDirection(BigReal)
void newMinimizePosition(BigReal)
double stochRescaleCoefficient()
Bool CUDASOAintegrateMode
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
int nextstep
next step value
void gbisComputeAfterP2()
void startWork(const LDObjHandle &handle)
HomePatchList * homePatchList()
void langevinVelocitiesBBK1(BigReal)
std::ostream & endi(std::ostream &s)
char const *const NamdProfileEventStr[]
void updateDevicePatchMap(int startup)
int berendsenPressureFreq
SubmitReduction * willSubmit(int setID, int size=-1)
void rattle1(BigReal, int)
void saveTotalForces(HomePatch *)
SimpleBroadcastObject< BigReal > adaptTemperature
unsigned char get_ss_type(int anum) const
void rebalanceLoad(int timestep)
Bool globalMasterStaleForces
static ReductionMgr * Object(void)
void addForceToMomentum_SOA(const double scaling, double dt_normal, double dt_nbond, double dt_slow, int maxForceNumber)
void minimizeMoveDownhill(BigReal fmax2)
Patch * patch(PatchID pid)
void addForceToMomentum(BigReal, const int ftag=Results::normal, const int useSaved=0)
void submitReductions_SOA()
std::vector< PatchRecord > & getPatches()
static PatchMap * ObjectOnPe(int pe)
float * langScalVelBBK2
derived from langevinParam
void pauseWork(const LDObjHandle &handle)
SimpleBroadcastObject< BigReal > tcoupleCoefficient
int NAMD_gcd(int a, int b)
void exchangeCheckpoint(int scriptTask, int &bpc)
Molecule stores the structural information for the system.
void split(int iStream, int numStreams)
static NAMD_HOST_DEVICE Tensor identity(BigReal v1=1.0)
void addForceToMomentum3(const BigReal timestep1, const int ftag1, const int useSaved1, const BigReal timestep2, const int ftag2, const int useSaved2, const BigReal timestep3, const int ftag3, const int useSaved3)
void positionsReady(int doMigration=0)
void submitHalfstep_SOA()
SimpleBroadcastObject< BigReal > stochRescaleCoefficient
SimpleBroadcastObject< int > monteCarloBarostatAcceptance
void submitCollections(int step, int zeroVel=0)
void stochRescaleVelocities_SOA(int step)
static void print_vel_SOA(const double *vel_x, const double *vel_y, const double *vel_z, int ilo=0, int ihip1=1)
void runComputeObjects_SOA(int migration, int pairlists, int step)
BigReal calcKineticEnergy()
void adaptTempUpdate(int)
void positionsReady_GPU(int doMigration=0, int startup=0)
int32 * hydrogenGroupSize
#define TIMER_START(T, TYPE)
int rattle1_SOA(const BigReal, Tensor *virial, SubmitReduction *)
void calcFixVirial(Tensor &fixVirialNormal, Tensor &fixVirialNbond, Tensor &fixVirialSlow, Vector &fixForceNormal, Vector &fixForceNbond, Vector &fixForceSlow)
#define NAMD_PROFILE_START()
float * gaussrand_x
fill with Gaussian distributed random numbers
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
int numPatches(void) const
static std::pair< int, int > coordinateNeeded(int timestep)
Check if the step requires to output the coordinates.
#define NAMD_EVENT_START(eon, id)
void stochRescaleVelocities(int)
void rattle1_SOA(BigReal, int)
#define COMPUTE_HOME_PRIORITY
void constructDevicePatchMap()
static void print_tensor(const Tensor &t)
NAMD_HOST_DEVICE BigReal length(void) const
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
void NAMD_bug(const char *err_msg)
void multigratorPressure(int step, int callNumber)
static ComputeCUDAMgr * getComputeCUDAMgr()
void berendsenPressure(int)
void submitMomentum(int step)
int rescaleVelocities_numTemps
double * vel_x
Jim recommends double precision velocity.
void submitLoadStats(int timestep)
void mollyMollify(Tensor *virial)
void runComputeObjects(int migration=1, int pairlists=0, int pressureStep=0)
void rebalance(Sequencer *seq, PatchID id)
void rescaleaccelMD(int, int, int)
SimpleBroadcastObject< Tensor > velocityRescaleTensor2
SimpleBroadcastObject< int > IMDTimeEnergyBarrier
void clearDevicePatchMap()
NAMD_HOST_DEVICE BigReal length2(void) const
#define NAMD_EVENT_RANGE_2(eon, id)
SimpleBroadcastObject< int > scriptBarrier
bool getIsGlobalDevice() const
const_iterator const_begin(void) const
PatchID getPatchID() const
void scalePositionsVelocities(const Tensor &posScale, const Tensor &velScale)
int monteCarloPressureFreq
int getPesSharingDevice(const int i)
SimpleBroadcastObject< BigReal > velocityRescaleFactor2
Bool is_atom_rotdragged(int atomnum) const
void doMigrationGPU(const int startup, const int doGlobal, const int updatePatchMap)
void langevinPiston_SOA(int step)
void gbisComputeAfterP1()
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
void gaussian_array_f(float *a, int n)
#define TIMER_INIT_WIDTH(T, TYPE, WIDTH)
int getForceSendActive() const
static int forceNeeded(int timestep)
Check if the step requires to output the forces.
int berendsenPressure_count
SimpleBroadcastObject< BigReal > velocityRescaleFactor
void publish(int tag, const T &t)
SimpleBroadcastObject< BigReal > minimizeCoefficient
void reassignVelocities(BigReal, int)
void langevinVelocitiesBBK1_SOA(BigReal timestep)
SimpleBroadcastObject< Vector > accelMDRescaleFactor
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
ComputeGlobal * computeGlobalObject
void saveForce(const int ftag=Results::normal)
void runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup)
SimpleBroadcastObject< Tensor > positionRescaleFactor
void buildRattleList_SOA()
void langevinVelocities(BigReal)
IMDSessionInfo IMDsendsettings
static CollectionMaster * Object()
void hardWallDrude(BigReal, int)
NodeBroadcast * nodeBroadcast
#define TIMER_STOP(T, TYPE)
void multigratorTemperature(int step, int callNumber)
static constexpr int num_inline_peer
double * recipMass
derived from mass
void reinitVelocities(void)
int pressureProfileAtomTypes
int checkpoint_berendsenPressure_count
ControllerBroadcasts * broadcast
#define NAMD_EVENT_START_EX(eon, id, str)
void maximumMove_SOA(const double dt, const double maxvel2)
CollectionMgr *const collection
void updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal)
#define GB2_COMPUTE_HOME_PRIORITY
#define NAMD_PROFILE_STOP()
Bool langevinGammasDiffer
int getNumStepsToRun(void)
void resetMovingAverage()
void newtonianVelocities(BigReal, const BigReal, const BigReal, const BigReal, const int, const int, const int)
static void print_vel_AOS(const FullAtom *a, int ilo=0, int ihip1=1)
void rescaleSoluteCharges(BigReal)
void addVelocityToPosition_SOA(const double dt)
void setVal(const NodeReduction *other)
#define SOA_SIMPLIFY_PARAMS
void submitVelocities(int seq, int zero, FullAtomList &a, int prec)
void submitMinimizeReductions(int, BigReal fmax2)
#define ADD_VECTOR_OBJECT(R, RL, D)
CudaComputeNonbonded * getCudaComputeNonbonded()
int multigratorPressureFreq
static int velocityNeeded(int timestep)
Check if the step requires to output the velocities.
int numPatchesOnNode(int node)
void submitPositions(int seq, FullAtomList &a, Lattice l, int prec, int dcdSelectionIndex)
void correctMomentum(int step, BigReal drifttime)
NAMD_HOST_DEVICE void outerAdd(BigReal scale, const Vector &v1, const Vector &v2)
std::ostream & iERROR(std::ostream &s)
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms) __attribute__((__noinline__))
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
ForceList f[Results::maxNumForces]
float * langScalRandBBK2
from langevinParam and recipMass
void get_movdrag_params(Vector &v, int atomnum) const
static GlobalGPUMgr * Object()
void enableEarlyExit(void)
void submitReductions(int)
#define namd_reciprocal(x)
SimpleBroadcastObject< Tensor > positionRescaleFactor2
void integrate_CUDA_SOA(int scriptTask)
void loweAndersenFinish()
int getNumPesSharingDevice()
SimParameters *const simParams
SimpleBroadcastObject< Tensor > velocityRescaleTensor
NAMD_HOST_DEVICE Vector unit(void) const
__thread DeviceCUDA * deviceCUDA
NAMD_HOST_DEVICE Vector origin() const
virtual void atomUpdate()
void rescaleVelocitiesByFactor(BigReal)
int multigratorTemperatureFreq
int globalMasterFrequency
static SynchronousCollectives * Object()
CudaPmeOneDevice * createCudaPmeOneDevice()
#define PATCH_PRIORITY(PID)
CudaPmeOneDevice * getCudaPmeOneDevice()
void updatePatchOrder(const std::vector< CudaLocalRecord > &data)
void berendsenPressure_SOA(int step)
int32 numAtoms
number of atoms
void printDevicePatchMap()
void compute(const Lattice &lattice, int doEnergyVirial, int step)
void exchangeAtoms(int scriptTask)
T get(int tag, const int expected=-1)