30 #define PRINT_FORCES 0 62 #define MIN_DEBUG_LEVEL 3 72 #define START_HPM_STEP 1000 73 #define STOP_HPM_STEP 1500 77 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 79 #define __thread __declspec(thread) 88 #define SPECIAL_PATCH_ID 91 97 int ilo=0,
int ihip1=1
99 printf(
"AOS Velocities:\n");
100 for (
int i=ilo; i < ihip1; i++) {
101 printf(
"%d %g %g %g\n", i,
111 int ilo=0,
int ihip1=1
113 printf(
"SOA Velocities:\n");
114 for (
int i=ilo; i < ihip1; i++) {
115 printf(
"%d %g %g %g\n", i, vel_x[i], vel_y[i], vel_z[i]);
121 printf(
"%g %g %g %g %g %g %g %g %g\n",
159 inline int init(
int initstep,
int initperiod,
int delta=0) {
176 pairlistsAreValid(0),
217 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER) 228 CUDASequencer->patchData->reduction->zero();
241 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER) 243 delete CUDASequencer;
250 void Sequencer::threadRun(
Sequencer* arg)
260 DebugM(4,
"::run() - this = " <<
this <<
"\n" );
261 thread = CthCreate((CthVoidFn)&(threadRun),(
void*)(
this),
SEQ_STK_SZ);
262 CthSetStrategyDefault(thread);
283 switch ( scriptTask ) {
328 NAMD_die(
"Minimization is currently not supported on the GPU integrator\n");
343 #ifdef NODEGROUP_FORCE_REGISTER 349 #ifdef NODEGROUP_FORCE_REGISTER 358 NAMD_bug(
"Unknown task in Sequencer::algorithm");
373 #if defined(NODEGROUP_FORCE_REGISTER) 377 CUDASequencer->numPatchesCheckedIn += 1;
378 if (CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe())) {
380 CUDASequencer->waitingThreads.push_back(CthSelf());
387 int lastStep = CUDASequencer->patchData->flags.step;
389 if (CUDASequencer->breakSuspends)
break;
395 CUDASequencer->numPatchesCheckedIn += 1;
396 CUDASequencer->waitingThreads.push_back(CthSelf());
397 if(CUDASequencer->numPatchesCheckedIn == patchMap->
numPatchesOnNode(CkMyPe()) - 1 &&
398 CUDASequencer->masterThreadSleeping){
399 CUDASequencer->masterThreadSleeping =
false;
400 CthAwaken(CUDASequencer->masterThread);
407 CUDASequencer->numPatchesCheckedIn = 0;
408 for (CthThread t : CUDASequencer->waitingThreads) {
411 CUDASequencer->waitingThreads.clear();
427 CUDASequencer->sync();
440 ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
447 if (isMaster && cudaGlobal) cudaGlobal->setStep(static_cast<int64_t>(
patch->
flags.
step));
468 cudaBond->atomUpdate();
476 cudaGlobal->updateAtomMaps();
485 CUDASequencer->launch_set_compute_positions();
486 CUDASequencer->sync();
494 for(
int i = 0 ; i < CkNumPes(); i++){
497 ComputeBondedCUDA* b = CUDASequencer->patchData->cudaBondedList[i];
499 if (b == NULL)
continue;
500 auto list = std::find(std::begin(b->getBondedPes()), std::end(b->getBondedPes()), CkMyPe());
501 if( list != std::end(b->getBondedPes()) ){
502 b->openBoxesOnPe(startup);
517 cudaBond->loadTuplesOnPe(startup);
523 cudaBond->copyTupleDataGPU(startup);
525 cudaBond->copyTupleDataSN();
535 cudaBond->launchWork();
538 cudaPme->
compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
543 cudaGlobal->communicateToClients(&(this->
patch->
lattice));
544 cudaGlobal->communicateToMD();
560 for(
int i = 0; i < numhp; ++i) {
562 hp->zero_global_forces_SOA();
577 CUDASequencer->copyGlobalForcesToDevice();
589 cudaBond->finishPatches();
597 cudaBond->finishReductions();
599 if (cudaGlobal) cudaGlobal->finishReductions(
601 CUDASequencer->patchData->reduction);
613 cudaPme->
compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
615 if (doNbond) cudaNbond->
doWork();
619 cudaGlobal->communicateToClients(&(this->
patch->
lattice));
620 cudaGlobal->communicateToMD();
632 for(
int i = 0; i < numhp; ++i) {
634 hp->zero_global_forces_SOA();
649 CUDASequencer->copyGlobalForcesToDevice();
659 cudaBond->finishPatches();
665 if (cudaGlobal) cudaGlobal->finishReductions(
667 CUDASequencer->patchData->reduction);
686 const int doMigration,
689 const int maxForceNumber,
694 Controller *c_out = CUDASequencer->patchData->c_out;
695 bool mGpuOn = CUDASequencer->mGpuOn;
703 CUDASequencer->patchData->nodeReductionSave->setVal(
reduction);
707 c_out->mcPressure_prepare(step);
709 factor = c_out->getPositionRescaleFactor(step);
714 CUDASequencer->monteCarloPressure_part1(factor, origin, oldLattice);
719 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
720 CUDASequencer->patchData->factor = &(factor);
722 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
733 CUDASequencer->update_patch_flags();
744 CUDASequencer->monteCarloPressure_part2(
reduction, step, maxForceNumber,
745 doEnergy, doGlobal, doVirial);
749 c_out->mcPressure_accept(step);
750 accepted = c_out->getMCAcceptance(step);
756 CUDASequencer->monteCarloPressure_accept(
reduction, doMigration);
761 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
763 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
767 CUDASequencer->monteCarloPressure_reject(this->
patch->
lattice);
769 reduction->
setVal(CUDASequencer->patchData->nodeReductionSave);
775 if(isMasterPe && !accepted){
777 CUDASequencer->update_patch_flags();
782 const int updatePatchMap) {
785 const bool updatePatchData = startup || doGlobal || updatePatchMap;
788 bool realloc =
false;
797 if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
804 CUDASequencer->reallocateMigrationDestination();
805 CUDASequencer->registerSOAPointersToHost();
809 CUDASequencer->copySOAHostRegisterToDevice();
818 CUDASequencer->migrationLocalInit();
824 CUDASequencer->migrationPerform();
830 CUDASequencer->migrationUpdateAtomCounts();
836 CUDASequencer->migrationUpdateAtomOffsets();
842 CUDASequencer->copyPatchDataToHost();
850 realloc = CUDASequencer->copyPatchData(
true,
false);
857 if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
864 CUDASequencer->registerSOAPointersToHost();
868 CUDASequencer->copySOAHostRegisterToDevice();
874 CUDASequencer->migrationLocalPost(0);
875 CUDASequencer->migrationSortAtomsNonbonded();
880 if (!updatePatchData) {
883 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
884 CUDASequencer->masterThreadSleeping =
true;
885 CUDASequencer->masterThread = CthSelf();
891 CUDASequencer->sync();
895 CUDASequencer->migrationUpdateDestination();
900 CUDASequencer->migrationUpdateProxyDestination();
905 CUDASequencer->migrationUpdateRemoteOffsets();
910 CUDASequencer->copyDataToPeers(
true);
914 if (updatePatchData) {
918 for(
int i = 0; i < numhp; ++i) {
927 CUDASequencer->copyAoSDataToHost();
932 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
933 CUDASequencer->masterThreadSleeping =
true;
934 CUDASequencer->masterThread = CthSelf();
941 CUDASequencer->updateHostPatchDataSOA();
949 CUDASequencer->migrationUpdateAdvancedFeatures(
false);
957 #ifdef TIMER_COLLECTION 958 TimerSet& t =
patch->timerSet;
975 Controller *c_out = CUDASequencer->patchData->c_out;
1000 doNonbonded = (step >= numberOfSteps) ||
1017 doFullElectrostatics = (dofull &&
1018 ((step >= numberOfSteps) ||
1030 doEnergy = energyFrequency.
init(step, newComputeEnergies);
1042 int doGlobal = (doTcl || doColvars);
1045 bool globalMasterStep=
false;
1046 int doGlobalObjects=0;
1047 int doGlobalStaleForces = 0;
1052 globalMasterStep = globalMasterFrequency.
check(step);
1053 doGlobalObjects = globalMasterStep? 1:0;
1057 doGlobalStaleForces=1;
1065 doGlobalStaleForces=doGlobalObjects;
1069 doGlobalStaleForces=doGlobalObjects;
1074 doGlobalStaleForces = 0;
1075 doGlobalObjects = 0;
1091 langevinPistonFrequency.
init(step,
1123 patch->copy_atoms_to_SOA();
1139 CUDASequencer->breakSuspends =
false;
1144 if (CUDASequencer->patchData->ptrCollectionMaster == NULL) {
1147 CUDASequencer->patchData->ptrCollectionMaster = pcm;
1150 if (CUDASequencer->patchData->ptrOutput == NULL) {
1153 CUDASequencer->patchData->ptrOutput = pout;
1156 if (CUDASequencer->patchData->pdb == NULL) {
1159 CUDASequencer->patchData->pdb = pdb;
1162 if (CUDASequencer->patchData->imd == NULL) {
1165 CUDASequencer->patchData->imd = imd;
1175 CUDASequencer->patchData->cudaBondedList[CkMyPe()] = NULL;
1176 CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = NULL;
1185 CUDASequencer->patchData->reduction->zero();
1207 if(patchData->updateCounter.load()>0)
1209 CUDASequencer->updateDeviceKernels();
1213 CUDASequencer->startRun1(maxForceUsed, this->
patch->
lattice);
1216 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
1217 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
1223 const bool addCudaGlobalForces =
1225 (cudaGlobalMasterObject !=
nullptr) ?
1226 cudaGlobalMasterObject->willAddGlobalForces() :
1229 if (addCudaGlobalForces) {
1230 CUDASequencer->allocateGPUSavedForces();
1238 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
1239 CUDASequencer->masterThreadSleeping =
true;
1240 CUDASequencer->masterThread = CthSelf();
1257 CUDASequencer->finish_patch_flags(
true);
1262 const bool addCudaGlobalForces =
1264 (cudaGlobalMasterObject !=
nullptr) ?
1265 cudaGlobalMasterObject->willAddGlobalForces() :
1268 CUDASequencer->startRun2(timestep,
1270 CUDASequencer->patchData->reduction, doGlobal || addCudaGlobalForces, maxForceUsed);
1274 const bool requestTotalForces = computeGlobal ? computeGlobal->
getForceSendActive() :
false;
1279 const bool requestGPUTotalForces =
1281 (cudaGlobalMasterObject !=
nullptr) ?
1282 cudaGlobalMasterObject->requestedTotalForces() :
1285 CUDASequencer->startRun3(timestep,
1287 CUDASequencer->patchData->reduction,
1288 requestTotalForces, doGlobalStaleForces,
1290 requestGPUTotalForces,
1301 for(
int i = 0; i < numhp; ++i) {
1313 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
1315 c_out->printStep(step, CUDASequencer->patchData->reduction);
1316 CUDASequencer->patchData->reduction->zero();
1322 double velrescaling = 1;
1324 for( ++step; step <= numberOfSteps; ++step ){
1325 const int isForcesOutputStep = forceDcdFrequency.
check(step);
1326 int dcdSelectionChecks=0;
1328 for(
int dcdindex=0; dcdindex<16;++dcdindex)
1331 if(dcdSelectionFrequency && step % dcdSelectionFrequency)
1332 dcdSelectionChecks++;
1334 const int isCollection = restartFrequency.
check(step) +
1335 dcdFrequency.
check(step) + velDcdFrequency.
check(step) +
1336 isForcesOutputStep + imdFrequency.
check(step) + dcdSelectionChecks;
1337 int isMigration =
false;
1338 const int doVelocityRescale = stochRescaleFrequency.
check(step);
1339 const int doMCPressure = monteCarloPressureFrequency.
check(step);
1341 doEnergy = energyFrequency.
check(step) || doVelocityRescale || doMCPressure;
1342 int langevinPistonStep = langevinPistonFrequency.
check(step);
1344 int reassignVelocityStep = reassignVelocityFrequency.
check(step);
1347 int berendsenPressureStep = 0;
1352 berendsenPressureStep = 1;
1355 if(patchData->updateCounter.load()>0)
1357 CUDASequencer->updateDeviceKernels();
1362 globalMasterStep = globalMasterFrequency.
check(step);
1363 doGlobalObjects = globalMasterStep? 1:0;
1367 doGlobalStaleForces=1;
1375 doGlobalStaleForces=doGlobalObjects;
1379 doGlobalStaleForces=doGlobalObjects;
1384 doGlobalStaleForces = 0;
1385 doGlobalObjects = 0;
1386 globalMasterStep =
false;
1390 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 1405 c_out->piston1(step);
1409 c_out->berendsenPressureController(step);
1412 if (langevinPistonStep || berendsenPressureStep) {
1414 factor = c_out->getPositionRescaleFactor(step);
1416 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
1417 CUDASequencer->patchData->factor = &(factor);
1423 int previousMaxForceUsed;
1426 previousMaxForceUsed = maxForceUsed;
1430 doNonbonded = nonbondedFrequency.
check(step);
1432 doFullElectrostatics = (dofull && fullElectFrequency.
check(step));
1448 CUDASequencer->launch_part1(
1450 timestep, nbondstep, slowstep, velrescaling, maxvel2,
1451 CUDASequencer->patchData->reduction,
1452 *(CUDASequencer->patchData->factor),
1455 (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1456 reassignVelocityStep,
1458 berendsenPressureStep,
1459 previousMaxForceUsed,
1461 this->patch->flags.savePairlists,
1462 this->patch->flags.usePairlists,
1467 if (reassignVelocityStep)
1477 CUDASequencer->launch_part11(
1478 timestep, nbondstep, slowstep, velrescaling, maxvel2,
1479 CUDASequencer->patchData->reduction,
1480 *(CUDASequencer->patchData->factor),
1483 (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1485 previousMaxForceUsed,
1487 this->patch->flags.savePairlists,
1488 this->patch->flags.usePairlists,
1499 if(CUDASequencer->patchData->migrationFlagPerDevice[i] != 0) {
1508 CUDASequencer->launch_set_compute_positions();
1516 CUDASequencer->updatePairlistFlags(isMigration);
1518 CUDASequencer->copyPositionsAndVelocitiesToHost(isMigration, doGlobalObjects);
1528 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
1529 CUDASequencer->masterThreadSleeping =
true;
1530 CUDASequencer->masterThread = CthSelf();
1547 CUDASequencer->finish_patch_flags(isMigration);
1558 const bool addCudaGlobalForces =
1560 (cudaGlobalMasterObject !=
nullptr) ?
1561 cudaGlobalMasterObject->willAddGlobalForces() :
1564 CUDASequencer->launch_part2(doMCPressure,
1565 timestep, nbondstep, slowstep,
1566 CUDASequencer->patchData->reduction,
1573 doGlobalStaleForces || addCudaGlobalForces,
1585 const bool requestTotalForces = (computeGlobal ? computeGlobal->
getForceSendActive() :
false) && doGlobalObjects;
1592 const bool requestGPUTotalForces =
1594 (CudaGlobalMasterObject !=
nullptr) ?
1595 CudaGlobalMasterObject->requestedTotalForces() :
1598 CUDASequencer->launch_part3(doMCPressure,
1599 timestep, nbondstep, slowstep,
1600 CUDASequencer->patchData->reduction,
1605 doGlobalStaleForces,
1606 requestGPUTotalForces,
1610 isForcesOutputStep);
1617 if (requestTotalForces) {
1622 for(
int i = 0; i < numhp; ++i) {
1634 c_out->printStep(step, CUDASequencer->patchData->reduction);
1638 if (doVelocityRescale) {
1654 CUDASequencer->copyAoSDataToHost();
1661 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1673 CUDASequencer->copyAoSDataToHost();
1677 CUDASequencer->updateHostPatchDataSOA();
1678 CUDASequencer->saveForceCUDASOA_direct(
false,
true, maxForceUsed);
1680 if(isMasterPe) CUDASequencer->copyPositionsAndVelocitiesToHost(
true,doGlobal);
1683 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1685 hp->copy_updates_to_AOS();
1686 hp->copy_forces_to_AOS();
1692 CUDASequencer->breakSuspends =
true;
1706 CUDASequencer->copyPatchData(
true, startup);
1708 CUDASequencer->reallocateMigrationDestination();
1709 CUDASequencer->copyAtomDataToDeviceAoS();
1711 CUDASequencer->copyAtomDataToDevice(startup, maxForceUsed);
1713 CUDASequencer->migrationLocalPost(startup);
1714 CUDASequencer->migrationUpdateAdvancedFeatures(startup);
1716 CUDASequencer->registerSOAPointersToHost();
1720 CUDASequencer->copySOAHostRegisterToDevice();
1726 CUDASequencer->updateHostPatchDataSOA();
1740 ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
1743 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1744 patchData = cpdata.ckLocalBranch();
1749 if (patchData->devicePatchMapFlag[CkMyPe()])
return;
1750 patchData->devicePatchMapFlag[CkMyPe()] = 1;
1761 std::vector<NBPatchRecord>& nonbondedPatches = cudaNbond->
getPatches();
1762 std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1763 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1769 homePatches.begin(),
1777 for (
int i = 0; i < nonbondedPatches.size(); i++) {
1779 record.
patchID = nonbondedPatches[i].patchID;
1782 const int targetPatchID = record.
patchID;
1783 auto result = std::find_if(
1784 homePatches.begin(),
1787 return (p->getPatchID() == targetPatchID);
1790 record.
isProxy = (result == homePatches.end());
1791 localPatches.push_back(record);
1798 localPatches.begin(),
1801 return (a.
isProxy < b.isProxy);
1806 cudaBond->updatePatchOrder(localPatches);
1808 patchData->devData[deviceIndex].numPatchesHome = homePatches.size();
1809 patchData->devData[deviceIndex].numPatchesHomeAndProxy = localPatches.size();
1821 std::vector<CudaPeerRecord>& myPeerPatches = patchData->devData[deviceIndex].h_peerPatches;
1822 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1824 for (
int i = 0; i < localPatches.size(); i++) {
1825 std::vector<CudaPeerRecord> tempPeers;
1826 const int targetPatchID = localPatches[i].patchID;
1827 const int targetIsProxy = localPatches[i].isProxy;
1830 if (devIdx == deviceIndex)
continue;
1831 std::vector<CudaLocalRecord>& peerPatches = patchData->devData[devIdx].h_localPatches;
1835 for (
int j = 0; j < patchData->devData[devIdx].numPatchesHomeAndProxy; j++) {
1837 if (peer.
patchID == targetPatchID && peer.
isProxy != targetIsProxy) {
1841 tempPeers.push_back(peerRecord);
1849 localPatches[i].numPeerRecord = tempPeers.size();
1850 if (!tempPeers.empty()) {
1851 localPatches[i].peerRecordStartIndex = myPeerPatches.size();
1852 myPeerPatches.insert(myPeerPatches.end(), tempPeers.begin(), tempPeers.end());
1860 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1861 patchData = cpdata.ckLocalBranch();
1867 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1868 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1871 CkPrintf(
"PE: %d\n", CkMyPe());
1873 CkPrintf(
"[%d] Home patches %d Local patches %d\n", CkMyPe(), numPatchesHome, localPatches.size());
1875 CkPrintf(
"Home Patches: ");
1876 for (
int i = 0; i < numPatchesHome; i++) {
1877 CkPrintf(
"%d ", localPatches[i].patchID);
1881 CkPrintf(
"Proxy Patches: ");
1882 for (
int i = numPatchesHome; i < localPatches.size(); i++) {
1883 CkPrintf(
"%d ", localPatches[i].patchID);
1893 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1894 patchData = cpdata.ckLocalBranch();
1899 if (!patchData->devicePatchMapFlag[CkMyPe()])
return;
1900 patchData->devicePatchMapFlag[CkMyPe()] = 0;
1907 std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1908 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1909 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1911 homePatches.clear();
1912 localPatches.clear();
1913 peerPatches.clear();
1926 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1927 patchData = cpdata.ckLocalBranch();
1933 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1934 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1939 int max_atom_count = 0;
1940 int total_atom_count = 0;
1943 for (
int i = 0; i < numPatchesHome; i++) {
1948 if (
patch != NULL)
break;
1950 if (
patch == NULL)
NAMD_die(
"Sequencer: Failed to find patch in updateDevicePatchMap");
1955 if (localPatches[i].numAtoms > max_atom_count) max_atom_count = localPatches[i].numAtoms;
1956 total_atom_count += localPatches[i].numAtoms;
1964 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1965 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
1966 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1967 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1969 for (
int i = numPatchesHome; i < numPatchesHomeAndProxy; i++) {
1970 const int index = localPatches[i].peerRecordStartIndex;
1971 const int devIdx = peerPatches[index].deviceIndex;
1972 const int peerIdx = peerPatches[index].patchIndex;
1973 const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
1975 localPatches[i].numAtoms = peer.
numAtoms;
1984 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
1985 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1987 int runningOffset = 0;
1988 int runningOffsetNBPad = 0;
1990 for (
int i = 0; i < numPatchesHomeAndProxy; i++) {
1991 localPatches[i].bufferOffset = runningOffset;
1992 localPatches[i].bufferOffsetNBPad = runningOffsetNBPad;
1993 runningOffset += localPatches[i].numAtoms;
1994 runningOffsetNBPad += localPatches[i].numAtomsNBPad;
2002 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2003 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2004 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2007 for (
int i = 0; i < peerPatches.size(); i++) {
2008 const int devIdx = peerPatches[i].deviceIndex;
2009 const int peerIdx = peerPatches[i].patchIndex;
2010 const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2017 for (
int i = 0; i < numPatchesHomeAndProxy; i++) {
2018 const int numPeerRecord = localPatches[i].numPeerRecord;
2019 const int peerOffset = localPatches[i].peerRecordStartIndex;
2022 localPatches[i].inline_peers[j] = peerPatches[peerOffset+j];
2039 #ifdef TIMER_COLLECTION 2040 TimerSet& t =
patch->timerSet;
2079 doNonbonded = (step >= numberOfSteps) ||
2096 doFullElectrostatics = (dofull &&
2097 ((step >= numberOfSteps) ||
2109 doEnergy = energyFrequency.
init(step, newComputeEnergies);
2114 int doGlobal = doTcl || doColvars;
2132 langevinPistonFrequency.
init(step,
2299 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 2303 int beginStep =
simParams->beginEventStep;
2308 for ( ++step; step <= numberOfSteps; ++step ) {
2309 int dcdSelectionChecks=0;
2311 for(
int dcdindex=0; dcdindex<16;++dcdindex)
2314 if(dcdSelectionFrequency && step % dcdSelectionFrequency)
2315 dcdSelectionChecks++;
2317 const int isCollection = restartFrequency.
check(step) +
2318 dcdFrequency.
check(step) + velDcdFrequency.
check(step) +
2319 forceDcdFrequency.
check(step) + imdFrequency.
check(step) +
2321 const int isMigration = stepsPerCycle.
check(step);
2322 doEnergy = energyFrequency.
check(step);
2323 DebugM(3,
"doGlobal now "<< doGlobal<<
"\n"<<
endi);
2325 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 2326 eon = epid && (beginStep < step && step <= endStep);
2328 if (controlProfiling && step == beginStep) {
2331 if (controlProfiling && step == endStep) {
2398 if ( langevinPistonFrequency.
check(step) ) {
2485 doNonbonded = nonbondedFrequency.
check(step);
2487 doFullElectrostatics = (dofull && fullElectFrequency.
check(step));
2643 fprintf(stderr,
"Patch %d has %d atoms\n", pid, n);
2644 fprintf(stderr,
"%3s %8s %12s %12s %12s\n",
2645 "",
"id",
"fnormal_x",
"fnbond_x",
"fslow_x");
2646 for (
int i=0; i < n; i++) {
2647 int index = p.
id[i];
2648 fprintf(stderr,
"%3d %8d %12.8f %12.8f %12.8f\n",
2655 for (
int i=0; i < n; i++) {
2666 TestArray_write<double>(
2667 "f_normal_good.bin",
"f_normal good", (
double*)f_normal, 3*n);
2668 TestArray_write<double>(
2669 "f_nbond_good.bin",
"f_nbond good", (
double*)f_nbond, 3*n);
2670 TestArray_write<double>(
2671 "f_slow_good.bin",
"f_slow good", (
double*)f_slow, 3*n);
2693 patch->copy_updates_to_AOS();
2697 printf(
"Timer collection reporting in microseconds for " 2708 const double scaling,
2713 const double * __restrict recipMass,
2714 const double * __restrict f_normal_x,
2715 const double * __restrict f_normal_y,
2716 const double * __restrict f_normal_z,
2717 const double * __restrict f_nbond_x,
2718 const double * __restrict f_nbond_y,
2719 const double * __restrict f_nbond_z,
2720 const double * __restrict f_slow_x,
2721 const double * __restrict f_slow_y,
2722 const double * __restrict f_slow_z,
2723 double * __restrict vel_x,
2724 double * __restrict vel_y,
2725 double * __restrict vel_z,
2731 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM_SOA);
2733 #ifdef SOA_SIMPLIFY_PARAMS 2734 const double * __restrict recipMass =
patch->patchDataSOA.
recipMass;
2736 const double * __restrict f_normal_x =
patch->patchDataSOA.
f_normal_x;
2737 const double * __restrict f_normal_y =
patch->patchDataSOA.
f_normal_y;
2738 const double * __restrict f_normal_z =
patch->patchDataSOA.
f_normal_z;
2740 const double * __restrict f_nbond_x =
patch->patchDataSOA.
f_nbond_x;
2741 const double * __restrict f_nbond_y =
patch->patchDataSOA.
f_nbond_y;
2742 const double * __restrict f_nbond_z =
patch->patchDataSOA.
f_nbond_z;
2744 const double * __restrict f_slow_x =
patch->patchDataSOA.
f_slow_x;
2745 const double * __restrict f_slow_y =
patch->patchDataSOA.
f_slow_y;
2746 const double * __restrict f_slow_z =
patch->patchDataSOA.
f_slow_z;
2747 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2748 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2749 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2764 if(this->patch->getPatchID() == 538){
2772 fprintf(stderr,
"Old Velocities %lf %lf %lf\n", vel_x[0], vel_y[0], vel_z[ 0]);
2773 fprintf(stderr,
"Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2774 f_slow_x[43], f_slow_y[43], f_slow_z[43],
2775 f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2776 f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2779 switch (maxForceNumber) {
2782 for (
int i=0; i < numAtoms; i++) {
2783 vel_x[i] += f_slow_x[i] * recipMass[i] * dt_slow;
2784 vel_y[i] += f_slow_y[i] * recipMass[i] * dt_slow;
2785 vel_z[i] += f_slow_z[i] * recipMass[i] * dt_slow;
2789 dt_nbond *= scaling;
2790 for (
int i=0; i < numAtoms; i++) {
2791 vel_x[i] += f_nbond_x[i] * recipMass[i] * dt_nbond;
2792 vel_y[i] += f_nbond_y[i] * recipMass[i] * dt_nbond;
2793 vel_z[i] += f_nbond_z[i] * recipMass[i] * dt_nbond;
2797 dt_normal *= scaling;
2798 for (
int i=0; i < numAtoms; i++) {
2799 vel_x[i] += f_normal_x[i] * recipMass[i] * dt_normal;
2800 vel_y[i] += f_normal_y[i] * recipMass[i] * dt_normal;
2801 vel_z[i] += f_normal_z[i] * recipMass[i] * dt_normal;
2814 const double * __restrict vel_x,
2815 const double * __restrict vel_y,
2816 const double * __restrict vel_z,
2817 double * __restrict pos_x,
2818 double * __restrict pos_y,
2819 double * __restrict pos_z,
2824 NamdProfileEvent::ADD_VELOCITY_TO_POSITION_SOA);
2825 #ifdef SOA_SIMPLIFY_PARAMS 2826 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2827 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2828 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2829 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
2830 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
2831 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
2834 for (
int i=0; i < numAtoms; i++) {
2835 pos_x[i] += vel_x[i] * dt;
2836 pos_y[i] += vel_y[i] * dt;
2837 pos_z[i] += vel_z[i] * dt;
2840 if(this->patch->getPatchID() == 538){
2841 fprintf(stderr,
"New Positions %lf %lf %lf\n", pos_x[43], pos_y[43], pos_z[43]);
2842 fprintf(stderr,
"New Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[43]);
2851 const int * __restrict hydrogenGroupSize,
2852 const float * __restrict mass,
2853 const double * __restrict vel_x,
2854 const double * __restrict vel_y,
2855 const double * __restrict vel_z,
2860 NamdProfileEvent::SUBMIT_HALFSTEP_SOA);
2861 #ifdef SOA_SIMPLIFY_PARAMS 2863 const float * __restrict mass =
patch->patchDataSOA.
mass;
2864 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2865 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2866 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2872 for (
int i=0; i < numAtoms; i++) {
2874 kineticEnergy += mass[i] *
2875 (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
2877 virial.
xx += mass[i] * vel_x[i] * vel_x[i];
2878 virial.
xy += mass[i] * vel_x[i] * vel_y[i];
2879 virial.
xz += mass[i] * vel_x[i] * vel_z[i];
2880 virial.
yx += mass[i] * vel_y[i] * vel_x[i];
2881 virial.
yy += mass[i] * vel_y[i] * vel_y[i];
2882 virial.
yz += mass[i] * vel_y[i] * vel_z[i];
2883 virial.
zx += mass[i] * vel_z[i] * vel_x[i];
2884 virial.
zy += mass[i] * vel_z[i] * vel_y[i];
2885 virial.
zz += mass[i] * vel_z[i] * vel_z[i];
2887 kineticEnergy *= 0.5 * 0.5;
2890 #ifdef NODEGROUP_FORCE_REGISTER 2893 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_VIRIAL_NORMAL,virial);
2899 #ifdef NODEGROUP_FORCE_REGISTER 2908 for (
int i=0; i < numAtoms; i += hgs) {
2911 hgs = hydrogenGroupSize[i];
2916 for (
int j = i; j < (i+hgs); j++) {
2918 v_cm_x += mass[j] * vel_x[j];
2919 v_cm_y += mass[j] * vel_y[j];
2920 v_cm_z += mass[j] * vel_z[j];
2922 BigReal recip_m_cm = 1.0 / m_cm;
2923 v_cm_x *= recip_m_cm;
2924 v_cm_y *= recip_m_cm;
2925 v_cm_z *= recip_m_cm;
2927 for (
int j = i; j < (i+hgs); j++) {
2928 BigReal dv_x = vel_x[j] - v_cm_x;
2929 BigReal dv_y = vel_y[j] - v_cm_y;
2930 BigReal dv_z = vel_z[j] - v_cm_z;
2932 intKineticEnergy += mass[j] *
2933 (vel_x[j] * dv_x + vel_y[j] * dv_y + vel_z[j] * dv_z);
2935 intVirialNormal.
xx += mass[j] * vel_x[j] * dv_x;
2936 intVirialNormal.
xy += mass[j] * vel_x[j] * dv_y;
2937 intVirialNormal.
xz += mass[j] * vel_x[j] * dv_z;
2938 intVirialNormal.
yx += mass[j] * vel_y[j] * dv_x;
2939 intVirialNormal.
yy += mass[j] * vel_y[j] * dv_y;
2940 intVirialNormal.
yz += mass[j] * vel_y[j] * dv_z;
2941 intVirialNormal.
zx += mass[j] * vel_z[j] * dv_x;
2942 intVirialNormal.
zy += mass[j] * vel_z[j] * dv_y;
2943 intVirialNormal.
zz += mass[j] * vel_z[j] * dv_z;
2946 intKineticEnergy *= 0.5 * 0.5;
2947 intVirialNormal *= 0.5;
2948 #ifdef NODEGROUP_FORCE_REGISTER 2951 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal);
2956 += intKineticEnergy;
2959 #ifdef NODEGROUP_FORCE_REGISTER 2971 const int * __restrict hydrogenGroupSize,
2972 const float * __restrict mass,
2973 const double * __restrict pos_x,
2974 const double * __restrict pos_y,
2975 const double * __restrict pos_z,
2976 const double * __restrict vel_x,
2977 const double * __restrict vel_y,
2978 const double * __restrict vel_z,
2979 const double * __restrict f_normal_x,
2980 const double * __restrict f_normal_y,
2981 const double * __restrict f_normal_z,
2982 const double * __restrict f_nbond_x,
2983 const double * __restrict f_nbond_y,
2984 const double * __restrict f_nbond_z,
2985 const double * __restrict f_slow_x,
2986 const double * __restrict f_slow_y,
2987 const double * __restrict f_slow_z,
2992 NamdProfileEvent::SUBMIT_REDUCTIONS_SOA);
2993 #ifdef SOA_SIMPLIFY_PARAMS 2995 const float * __restrict mass =
patch->patchDataSOA.
mass;
2996 const double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
2997 const double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
2998 const double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
2999 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3000 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3001 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3002 const double * __restrict f_normal_x =
patch->patchDataSOA.
f_normal_x;
3003 const double * __restrict f_normal_y =
patch->patchDataSOA.
f_normal_y;
3004 const double * __restrict f_normal_z =
patch->patchDataSOA.
f_normal_z;
3005 const double * __restrict f_nbond_x =
patch->patchDataSOA.
f_nbond_x;
3006 const double * __restrict f_nbond_y =
patch->patchDataSOA.
f_nbond_y;
3007 const double * __restrict f_nbond_z =
patch->patchDataSOA.
f_nbond_z;
3008 const double * __restrict f_slow_x =
patch->patchDataSOA.
f_slow_x;
3009 const double * __restrict f_slow_y =
patch->patchDataSOA.
f_slow_y;
3010 const double * __restrict f_slow_z =
patch->patchDataSOA.
f_slow_z;
3014 #ifdef NODEGROUP_FORCE_REGISTER 3022 #ifdef NODEGROUP_FORCE_REGISTER 3031 BigReal angularMomentum_x = 0;
3032 BigReal angularMomentum_y = 0;
3033 BigReal angularMomentum_z = 0;
3040 for (
int i=0; i < numAtoms; i++) {
3043 kineticEnergy += mass[i] *
3044 (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
3047 momentum_x += mass[i] * vel_x[i];
3048 momentum_y += mass[i] * vel_y[i];
3049 momentum_z += mass[i] * vel_z[i];
3052 BigReal dpos_x = pos_x[i] - origin_x;
3053 BigReal dpos_y = pos_y[i] - origin_y;
3054 BigReal dpos_z = pos_z[i] - origin_z;
3057 angularMomentum_x += mass[i] * (dpos_y*vel_z[i] - dpos_z*vel_y[i]);
3058 angularMomentum_y += mass[i] * (dpos_z*vel_x[i] - dpos_x*vel_z[i]);
3059 angularMomentum_z += mass[i] * (dpos_x*vel_y[i] - dpos_y*vel_x[i]);
3064 kineticEnergy *= 0.5;
3065 Vector momentum(momentum_x, momentum_y, momentum_z);
3066 Vector angularMomentum(angularMomentum_x, angularMomentum_y,
3069 #ifdef NODEGROUP_FORCE_REGISTER 3072 ADD_VECTOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_MOMENTUM,momentum);
3073 ADD_VECTOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
3079 #ifdef NODEGROUP_FORCE_REGISTER 3091 for (
int i=0; i < numAtoms; i += hgs) {
3092 hgs = hydrogenGroupSize[i];
3101 for ( j = i; j < (i+hgs); ++j ) {
3103 r_cm_x += mass[j] * pos_x[j];
3104 r_cm_y += mass[j] * pos_y[j];
3105 r_cm_z += mass[j] * pos_z[j];
3106 v_cm_x += mass[j] * vel_x[j];
3107 v_cm_y += mass[j] * vel_y[j];
3108 v_cm_z += mass[j] * vel_z[j];
3119 for ( j = i; j < (i+hgs); ++j ) {
3133 intKineticEnergy += mass[j] *
3134 (v_x * dv_x + v_y * dv_y + v_z * dv_z);
3137 BigReal dr_x = pos_x[j] - r_cm_x;
3138 BigReal dr_y = pos_y[j] - r_cm_y;
3139 BigReal dr_z = pos_z[j] - r_cm_z;
3142 intVirialNormal.
xx += f_normal_x[j] * dr_x;
3143 intVirialNormal.
xy += f_normal_x[j] * dr_y;
3144 intVirialNormal.
xz += f_normal_x[j] * dr_z;
3145 intVirialNormal.
yx += f_normal_y[j] * dr_x;
3146 intVirialNormal.
yy += f_normal_y[j] * dr_y;
3147 intVirialNormal.
yz += f_normal_y[j] * dr_z;
3148 intVirialNormal.
zx += f_normal_z[j] * dr_x;
3149 intVirialNormal.
zy += f_normal_z[j] * dr_y;
3150 intVirialNormal.
zz += f_normal_z[j] * dr_z;
3153 intVirialNbond.
xx += f_nbond_x[j] * dr_x;
3154 intVirialNbond.
xy += f_nbond_x[j] * dr_y;
3155 intVirialNbond.
xz += f_nbond_x[j] * dr_z;
3156 intVirialNbond.
yx += f_nbond_y[j] * dr_x;
3157 intVirialNbond.
yy += f_nbond_y[j] * dr_y;
3158 intVirialNbond.
yz += f_nbond_y[j] * dr_z;
3159 intVirialNbond.
zx += f_nbond_z[j] * dr_x;
3160 intVirialNbond.
zy += f_nbond_z[j] * dr_y;
3161 intVirialNbond.
zz += f_nbond_z[j] * dr_z;
3164 intVirialSlow.
xx += f_slow_x[j] * dr_x;
3165 intVirialSlow.
xy += f_slow_x[j] * dr_y;
3166 intVirialSlow.
xz += f_slow_x[j] * dr_z;
3167 intVirialSlow.
yx += f_slow_y[j] * dr_x;
3168 intVirialSlow.
yy += f_slow_y[j] * dr_y;
3169 intVirialSlow.
yz += f_slow_y[j] * dr_z;
3170 intVirialSlow.
zx += f_slow_z[j] * dr_x;
3171 intVirialSlow.
zy += f_slow_z[j] * dr_y;
3172 intVirialSlow.
zz += f_slow_z[j] * dr_z;
3176 intKineticEnergy *= 0.5;
3178 #ifdef NODEGROUP_FORCE_REGISTER 3184 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
3185 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
3186 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
3193 #ifdef NODEGROUP_FORCE_REGISTER 3215 NamdProfileEvent::SUBMIT_COLLECTIONS_SOA);
3230 if ( is_pos_needed || is_vel_needed ) {
3231 patch->copy_updates_to_AOS();
3234 patch->copy_forces_to_AOS();
3237 if ( is_pos_needed ) {
3238 #ifndef MEM_OPT_VERSION 3244 if ( is_vel_needed ) {
3247 if ( is_f_needed ) {
3257 const double maxvel2
3260 const double * __restrict vel_x,
3261 const double * __restrict vel_y,
3262 const double * __restrict vel_z,
3267 #ifdef SOA_SIMPLIFY_PARAMS 3268 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3269 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3270 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3278 for (
int i=0; i < numAtoms; i++) {
3280 vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3281 killme = killme + ( vel2 > maxvel2 );
3288 for (
int i=0; i < numAtoms; i++) {
3290 vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3291 if (vel2 > maxvel2) {
3293 const Vector vel(vel_x[i], vel_y[i], vel_z[i]);
3294 const BigReal maxvel = sqrt(maxvel2);
3296 iout <<
iERROR <<
"Atom " << (a[i].
id + 1) <<
" velocity is " 3299 << i <<
" of " << numAtoms <<
" on patch " 3304 "Atoms moving too fast; simulation has become unstable (" 3306 <<
" pe " << CkMyPe() <<
").\n" <<
endi;
3317 const float * __restrict langevinParam,
3318 double * __restrict vel_x,
3319 double * __restrict vel_y,
3320 double * __restrict vel_z,
3325 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1_SOA);
3326 #ifdef SOA_SIMPLIFY_PARAMS 3328 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3329 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3330 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3346 for (
int i=0; i < numAtoms; i++) {
3347 BigReal dt_gamma = dt * langevinParam[i];
3350 BigReal scaling = 1. - 0.5 * dt_gamma;
3351 vel_x[i] *= scaling;
3352 vel_y[i] *= scaling;
3353 vel_z[i] *= scaling;
3363 const float * __restrict langevinParam,
3364 const float * __restrict langScalVelBBK2,
3365 const float * __restrict langScalRandBBK2,
3366 float * __restrict gaussrand_x,
3367 float * __restrict gaussrand_y,
3368 float * __restrict gaussrand_z,
3369 double * __restrict vel_x,
3370 double * __restrict vel_y,
3371 double * __restrict vel_z,
3377 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2_SOA);
3378 #ifdef SOA_SIMPLIFY_PARAMS 3385 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3386 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3387 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3415 for (
int i=0; i < numAtoms; i++) {
3418 gaussrand_x[i] = float(rg.
x);
3419 gaussrand_y[i] = float(rg.
y);
3420 gaussrand_z[i] = float(rg.
z);
3431 for (
int i=0; i < numAtoms; i++) {
3432 vel_x[i] += gaussrand_x[i] * langScalRandBBK2[i];
3433 vel_y[i] += gaussrand_y[i] * langScalRandBBK2[i];
3434 vel_z[i] += gaussrand_z[i] * langScalRandBBK2[i];
3435 vel_x[i] *= langScalVelBBK2[i];
3436 vel_y[i] *= langScalVelBBK2[i];
3437 vel_z[i] *= langScalVelBBK2[i];
3444 const int * __restrict hydrogenGroupSize,
3445 const float * __restrict mass,
3446 double * __restrict pos_x,
3447 double * __restrict pos_y,
3448 double * __restrict pos_z,
3453 #ifdef SOA_SIMPLIFY_PARAMS 3455 const float * __restrict mass =
patch->patchDataSOA.
mass;
3456 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3457 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3458 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3477 for (
int i = 0; i < numAtoms; i += hgs) {
3479 hgs = hydrogenGroupSize[i];
3486 for ( j = i; j < (i+hgs); ++j ) {
3488 r_cm_x += mass[j] * pos_x[j];
3489 r_cm_y += mass[j] * pos_y[j];
3490 r_cm_z += mass[j] * pos_z[j];
3498 double tx = r_cm_x - origin.
x;
3499 double ty = r_cm_y - origin.
y;
3500 double tz = r_cm_z - origin.
z;
3502 double new_r_cm_x = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3503 double new_r_cm_y = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3504 double new_r_cm_z = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3506 new_r_cm_x += origin.
x;
3507 new_r_cm_y += origin.
y;
3508 new_r_cm_z += origin.
z;
3510 double delta_r_cm_x = new_r_cm_x - r_cm_x;
3511 double delta_r_cm_y = new_r_cm_y - r_cm_y;
3512 double delta_r_cm_z = new_r_cm_z - r_cm_z;
3514 for (j = i; j < (i+hgs); ++j) {
3515 pos_x[j] += delta_r_cm_x;
3516 pos_y[j] += delta_r_cm_y;
3517 pos_z[j] += delta_r_cm_z;
3521 for (
int i = 0; i < numAtoms; ++i) {
3525 double tx = pos_x[i] - origin.
x;
3526 double ty = pos_y[i] - origin.
y;
3527 double tz = pos_z[i] - origin.
z;
3529 double ftx = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3530 double fty = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3531 double ftz = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3533 pos_x[i] = ftx + origin.
x;
3534 pos_y[i] = fty + origin.
y;
3535 pos_z[i] = ftz + origin.
z;
3543 const int * __restrict hydrogenGroupSize,
3544 const float * __restrict mass,
3545 double * __restrict pos_x,
3546 double * __restrict pos_y,
3547 double * __restrict pos_z,
3548 double * __restrict vel_x,
3549 double * __restrict vel_y,
3550 double * __restrict vel_z,
3556 #ifdef SOA_SIMPLIFY_PARAMS 3558 const float * __restrict mass =
patch->patchDataSOA.
mass;
3559 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3560 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3561 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3562 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3563 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3564 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3586 for (
int i=0; i < numAtoms; i += hgs) {
3588 hgs = hydrogenGroupSize[i];
3597 for ( j = i; j < (i+hgs); ++j ) {
3599 r_cm_x += mass[j] * pos_x[j];
3600 r_cm_y += mass[j] * pos_y[j];
3601 r_cm_z += mass[j] * pos_z[j];
3602 v_cm_x += mass[j] * vel_x[j];
3603 v_cm_y += mass[j] * vel_y[j];
3604 v_cm_z += mass[j] * vel_z[j];
3611 double tx = r_cm_x - origin.
x;
3612 double ty = r_cm_y - origin.
y;
3613 double tz = r_cm_z - origin.
z;
3614 double new_r_cm_x = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3615 double new_r_cm_y = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3616 double new_r_cm_z = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3617 new_r_cm_x += origin.
x;
3618 new_r_cm_y += origin.
y;
3619 new_r_cm_z += origin.
z;
3621 double delta_r_cm_x = new_r_cm_x - r_cm_x;
3622 double delta_r_cm_y = new_r_cm_y - r_cm_y;
3623 double delta_r_cm_z = new_r_cm_z - r_cm_z;
3627 double delta_v_cm_x = ( velFactor_x - 1 ) * v_cm_x;
3628 double delta_v_cm_y = ( velFactor_y - 1 ) * v_cm_y;
3629 double delta_v_cm_z = ( velFactor_z - 1 ) * v_cm_z;
3630 for (j = i; j < (i+hgs); j++) {
3631 pos_x[j] += delta_r_cm_x;
3632 pos_y[j] += delta_r_cm_y;
3633 pos_z[j] += delta_r_cm_z;
3634 vel_x[j] += delta_v_cm_x;
3635 vel_y[j] += delta_v_cm_y;
3636 vel_z[j] += delta_v_cm_z;
3645 for (
int i=0; i < numAtoms; i++) {
3646 double tx = pos_x[i] - origin.
x;
3647 double ty = pos_y[i] - origin.
y;
3648 double tz = pos_z[i] - origin.
z;
3649 double ftx = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3650 double fty = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3651 double ftz = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3652 pos_x[i] = ftx + origin.
x;
3653 pos_y[i] = fty + origin.
y;
3654 pos_z[i] = ftz + origin.
z;
3655 vel_x[i] *= velFactor_x;
3656 vel_y[i] *= velFactor_y;
3657 vel_z[i] *= velFactor_z;
3675 Tensor *vp = ( pressure ? &virial : 0 );
3679 "Constraint failure; simulation has become unstable.\n" <<
endi;
3683 #ifdef NODEGROUP_FORCE_REGISTER 3685 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_VIRIAL_NORMAL,virial);
3690 #ifdef NODEGROUP_FORCE_REGISTER 3699 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) || defined(NAMD_MIC) 3714 #if defined(NTESTPID) 3718 double *xyzq =
new double[4*numAtoms];
3723 for (
int i=0; i < numAtoms; i++) {
3729 char fname[128], remark[128];
3730 sprintf(fname,
"xyzq_soa_pid%d_step%d.bin", NTESTPID, step);
3731 sprintf(remark,
"SOA xyzq, patch %d, step %d", NTESTPID, step);
3732 TestArray_write<double>(fname, remark, xyzq, 4*numAtoms);
3737 patch->zero_global_forces_SOA();
3741 int basePriority = ( (seq & 0xffff) << 15 )
3752 #ifdef NODEGROUP_FORCE_REGISTER 3754 patch->copy_forces_to_SOA();
3757 patch->copy_forces_to_SOA();
3760 #if defined(NTESTPID) 3766 double *fxyz =
new double[3*numAtoms];
3770 for (
int i=0; i < numAtoms; i++) {
3772 fxyz[3*i+1] = fy[i];
3773 fxyz[3*i+2] = fz[i];
3775 sprintf(fname,
"fxyz_normal_soa_pid%d_step%d.bin", NTESTPID, step);
3776 sprintf(remark,
"SOA fxyz normal, patch %d, step %d", NTESTPID, step);
3777 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3781 for (
int i=0; i < numAtoms; i++) {
3783 fxyz[3*i+1] = fy[i];
3784 fxyz[3*i+2] = fz[i];
3786 sprintf(fname,
"fxyz_nbond_soa_pid%d_step%d.bin", NTESTPID, step);
3787 sprintf(remark,
"SOA fxyz nonbonded, patch %d, step %d", NTESTPID, step);
3788 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3792 for (
int i=0; i < numAtoms; i++) {
3794 fxyz[3*i+1] = fy[i];
3795 fxyz[3*i+2] = fz[i];
3797 sprintf(fname,
"fxyz_slow_soa_pid%d_step%d.bin", NTESTPID, step);
3798 sprintf(remark,
"SOA fxyz slow, patch %d, step %d", NTESTPID, step);
3799 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3807 double *fxyz =
new double[3*numAtoms];
3808 double *fx, *fy, *fz;
3809 char fname[64], remark[128];
3815 for (
int i=0; i < numAtoms; i++) {
3817 fxyz[3*i+1] = fy[i];
3818 fxyz[3*i+2] = fz[i];
3820 sprintf(fname,
"fslow_soa_%d.bin", step);
3821 sprintf(remark,
"SOA slow forces, step %d\n", step);
3822 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3827 for (
int i=0; i < numAtoms; i++) {
3829 fxyz[3*i+1] = fy[i];
3830 fxyz[3*i+2] = fz[i];
3832 sprintf(fname,
"fnbond_soa_%d.bin", step);
3833 sprintf(remark,
"SOA nonbonded forces, step %d\n", step);
3834 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3839 for (
int i=0; i < numAtoms; i++) {
3841 fxyz[3*i+1] = fy[i];
3842 fxyz[3*i+2] = fz[i];
3844 sprintf(fname,
"fnormal_soa_%d.bin", step);
3845 sprintf(remark,
"SOA normal forces, step %d\n", step);
3846 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3855 fprintf(stderr,
"CPU force arrays for alanin\n" );
3857 fprintf(stderr,
"f[%i] = %lf %lf %lf | %lf %lf %lf | %lf %lf %lf\n", i,
3886 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3887 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3888 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3892 DebugM(4,
"stochastically rescaling velocities at step " << step <<
" by " << velrescaling <<
"\n");
3893 for (
int i = 0; i < numAtoms; ++i ) {
3894 vel_x[i] *= velrescaling;
3895 vel_y[i] *= velrescaling;
3896 vel_z[i] *= velrescaling;
3907 #endif // SEQUENCER_SOA 3914 char tracePrefix[20];
3924 #ifdef TIMER_COLLECTION 3925 TimerSet& t =
patch->timerSet;
3961 const BigReal nbondstep = timestep * (staleForces?1:nonbondedFrequency);
3963 doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
3970 if ( dofull )
slowFreq = fullElectFrequency;
3971 const BigReal slowstep = timestep * (staleForces?1:fullElectFrequency);
3973 doFullElectrostatics = (dofull && ((step >= numberOfSteps) || !(step%fullElectFrequency)));
3982 if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed =
Results::amdf;
4012 int doGlobal = doTcl || doColvars;
4018 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 4046 if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4051 doEnergy = ! ( step % energyFrequency );
4053 if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4055 if ( adaptTempOn ) doEnergy=1;
4058 D_MSG(
"runComputeObjects()");
4066 if ( staleForces || doGlobal ) {
4072 D_MSG(
"newtonianVelocities()");
4086 D_MSG(
"submitHalfstep()");
4092 D_MSG(
"newtonianVelocities()");
4105 D_MSG(
"submitHalfstep()");
4109 if ( zeroMomentum && doFullElectrostatics )
submitMomentum(step);
4111 D_MSG(
"newtonianVelocities()");
4118 D_MSG(
"submitReductions()");
4126 sprintf(traceNote,
"%s%d",tracePrefix,step);
4127 traceUserSuppliedNote(traceNote);
4135 bool doMultigratorRattle =
false;
4149 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 4153 int beginStep =
simParams->beginEventStep;
4158 for ( ++step; step <= numberOfSteps; ++step )
4160 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 4161 eon = epid && (beginStep < step && step <= endStep);
4163 if (controlProfiling && step == beginStep) {
4166 if (controlProfiling && step == endStep) {
4173 DebugM(3,
"for step "<<step<<
" dGlobal " << doGlobal<<
"\n"<<
endi);
4184 newtonianVelocities(0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4189 if (doMultigratorRattle)
rattle1(timestep, 1);
4242 #endif // UPPER_BOUND 4244 doNonbonded = !(step%nonbondedFrequency);
4245 doFullElectrostatics = (dofull && !(step%fullElectFrequency));
4248 if ( zeroMomentum && doFullElectrostatics ) {
4267 if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed =
Results::amdf;
4270 doEnergy = ! ( step % energyFrequency );
4271 if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4272 if ( adaptTempOn ) doEnergy=1;
4296 if ( staleForces || doGlobal ) {
4302 if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4304 newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4313 newtonianVelocities(1.0,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4333 if ( zeroMomentum && doFullElectrostatics )
submitMomentum(step);
4337 newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4376 if(step == bstep || step == estep){
4381 #ifdef MEASURE_NAMD_WITH_PAPI 4383 int bstep =
simParams->papiMeasureStartStep;
4384 int estep = bstep +
simParams->numPapiMeasureSteps;
4385 if(step == bstep || step==estep) {
4386 papiMeasureBarrier(step);
4393 sprintf(traceNote,
"%s%d",tracePrefix,step);
4394 traceUserSuppliedNote(traceNote);
4396 #endif // UPPER_BOUND 4401 cycleBarrier(dofull && !((step+1)%fullElectFrequency),step);
4405 if(step == START_HPM_STEP)
4406 (CProxy_Node(CkpvAccess(BOCclass_group).node)).startHPM();
4408 if(step == STOP_HPM_STEP)
4409 (CProxy_Node(CkpvAccess(BOCclass_group).node)).stopHPM();
4415 #ifdef TIMER_COLLECTION 4417 printf(
"Timer collection reporting in microseconds for " 4421 #endif // TIMER_COLLECTION 4435 Vector movDragVel, dragIncrement;
4436 for (
int i = 0; i < numAtoms; ++i )
4442 dragIncrement = movDragGlobVel * movDragVel * dt;
4456 Vector rotDragAxis, rotDragPivot, dragIncrement;
4457 for (
int i = 0; i < numAtoms; ++i )
4463 dAngle = rotDragGlobVel * rotDragVel * dt;
4464 rotDragAxis /= rotDragAxis.
length();
4465 atomRadius = atom[i].
position - rotDragPivot;
4466 dragIncrement = cross(rotDragAxis, atomRadius) * dAngle;
4480 #if 0 && defined(NODEGROUP_FORCE_REGISTER) 4483 const int stepsPerCycle_save = stepsPerCycle;
4499 doFullElectrostatics = dofull;
4521 int doGlobal = doTcl || doColvars;
4540 #ifdef DEBUG_MINIMIZE 4541 printf(
"doTcl = %d doColvars = %d\n", doTcl, doColvars);
4547 #ifdef DEBUG_MINIMIZE 4548 else { printf(
"No computeGlobal\n"); }
4558 for ( ++step; step <= numberOfSteps; ++step ) {
4601 patch->copy_atoms_to_SOA();
4605 #if 0 && defined(NODEGROUP_FORCE_REGISTER) 4623 for (
int i = 0; i < numAtoms; ++i ) {
4629 for (
int j=1; j<hgs; ++j ) {
4647 for (
int i = 0; i < numAtoms; ++i ) {
4650 if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4653 if ( fixedAtomsOn && a[i].atomFixed ) a[i].
velocity = 0;
4655 if ( v2 > maxv2 ) maxv2 = v2;
4661 for (
int i = 0; i < numAtoms; ++i ) {
4662 if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4665 if ( fixedAtomsOn && a[i].atomFixed ) a[i].
velocity = 0;
4667 if ( v2 > maxv2 ) maxv2 = v2;
4677 for (
int i = 0; i < numAtoms; i += hgs ) {
4680 if ( minChildVel < fmax2 )
continue;
4681 int adjustChildren = 1;
4682 for (
int j = i+1; j < (i+hgs); ++j ) {
4683 if ( a[j].velocity.length2() > minChildVel ) adjustChildren = 0;
4685 if ( adjustChildren ) {
4687 for (
int j = i+1; j < (i+hgs); ++j ) {
4688 if (a[i].mass < 0.01)
continue;
4702 for (
int i = 0; i < numAtoms; ++i ) {
4707 for (
int i = 1; i < numAtoms; ++i ) {
4708 if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4717 for (
int i = 1; i < numAtoms; ++i ) {
4718 if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4730 for (
int i = 0; i < numAtoms; ++i ) {
4743 for (
int i = 0; i < numAtoms; ++i ) {
4748 for (
int i = 0; i < numAtoms; ++i ) {
4764 NAMD_die(
"Cannot zero momentum when fixed atoms are present.");
4775 for (
int i = 0; i < numAtoms; ++i ) {
4776 a[i].velocity += dv * a[i].recipMass;
4777 a[i].position += dx * a[i].recipMass;
4780 for (
int i = 0; i < numAtoms; ++i ) {
4781 a[i].velocity += dv;
4782 a[i].position += dx;
4794 NAMD_bug(
"Sequencer::scalePositionsVelocities, fixed atoms not implemented");
4798 for (
int i = 0; i < numAtoms; i += hgs ) {
4803 for (
int j=0;j < hgs;++j) {
4804 m_cm += a[i+j].
mass;
4813 for (
int j=0;j < hgs;++j) {
4819 for (
int i = 0; i < numAtoms; i++) {
4836 Tensor posScaleTensor = scaleTensor;
4865 for (
int i = 0; i < numAtoms; ++i ) {
4868 virialNormal.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4872 for (
int i = 0; i < numAtoms; ++i ) {
4873 if (a[i].mass < 0.01)
continue;
4875 virialNormal.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4879 kineticEnergy *= 0.5;
4887 Vector fixForceNormal = 0;
4888 Vector fixForceNbond = 0;
4891 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
4907 for (
int i = 0; i < numAtoms; i += hgs ) {
4913 for ( j = i; j < (i+hgs); ++j ) {
4924 NAMD_bug(
"Sequencer::multigratorPressure, this part needs to be implemented correctly");
4925 for ( j = i; j < (i+hgs); ++j ) {
4930 intVirialNormal2.
outerAdd (mass, v, dv);
4938 for ( j = i; j < (i+hgs); ++j ) {
4942 intVirialNormal2.
outerAdd(mass, v, dv);
4964 for (
int i = 0; i < numAtoms; i++) {
4975 for (
int i = 0; i < numAtoms; ++i ) {
4981 for (
int i = 0; i < numAtoms; ++i ) {
4985 kineticEnergy *= 0.5;
4986 return kineticEnergy;
5004 for (
int i = 0; i < numAtoms; i += hgs ) {
5010 for ( j = i; j < (i+hgs); ++j ) {
5015 momentumSqrSum.
outerAdd(1.0/m_cm, v_cm, v_cm);
5018 for (
int i = 0; i < numAtoms; i++) {
5019 momentumSqrSum.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5038 const int staleForces,
5039 const int doNonbonded,
5040 const int doFullElectrostatics)
5043 NamdProfileEvent::NEWTONIAN_VELOCITIES);
5046 if (staleForces || (doNonbonded && doFullElectrostatics)) {
5052 if (staleForces || doNonbonded)
5054 if (staleForces || doFullElectrostatics)
5081 for (
int i = 0; i < numAtoms; ++i )
5084 if ( ! dt_gamma )
continue;
5086 BigReal f1 = exp( -dt_gamma );
5087 BigReal f2 = sqrt( ( 1. - f1*f1 ) * kbT *
5099 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1);
5109 for (i = 0; i < numAtoms; i++) {
5111 if (i < numAtoms-1 &&
5112 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5124 if (dt_gamma != 0.0) {
5125 v_com *= ( 1. - 0.5 * dt_gamma );
5130 if (dt_gamma != 0.0) {
5131 v_bnd *= ( 1. - 0.5 * dt_gamma );
5142 if ( ! dt_gamma )
continue;
5144 a[i].
velocity *= ( 1. - 0.5 * dt_gamma );
5155 for ( i = 0; i < numAtoms; ++i )
5158 if ( ! dt_gamma )
continue;
5160 a[i].
velocity *= ( 1. - 0.5 * dt_gamma );
5172 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2);
5199 for (i = 0; i < numAtoms; i++) {
5201 if (i < numAtoms-1 &&
5202 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5214 if (dt_gamma != 0.0) {
5217 sqrt( 2 * dt_gamma * kbT *
5218 ( a[i].
partition ? tempFactor : 1.0 ) / mass );
5219 v_com /= ( 1. + 0.5 * dt_gamma );
5224 if (dt_gamma != 0.0) {
5227 sqrt( 2 * dt_gamma * kbT_bnd *
5228 ( a[i+1].
partition ? tempFactor : 1.0 ) / mass );
5229 v_bnd /= ( 1. + 0.5 * dt_gamma );
5240 if ( ! dt_gamma )
continue;
5243 sqrt( 2 * dt_gamma * kbT *
5244 ( a[i].
partition ? tempFactor : 1.0 ) * a[i].recipMass );
5245 a[i].
velocity /= ( 1. + 0.5 * dt_gamma );
5259 for ( i = 0; i < numAtoms; ++i )
5262 if ( ! dt_gamma )
continue;
5265 sqrt( 2 * dt_gamma * kbT *
5266 ( a[i].
partition ? tempFactor : 1.0 ) * a[i].recipMass );
5267 a[i].
velocity /= ( 1. + 0.5 * dt_gamma );
5291 for (
int i = 0; i < numAtoms; i += hgs ) {
5295 for ( j = i; j < (i+hgs); ++j ) {
5297 a[j].fixedPosition,a[j].transform);
5303 for ( j = i; j < (i+hgs); ++j ) {
5311 Position delta_x_cm = new_x_cm - x_cm;
5312 for ( j = i; j < (i+hgs); ++j ) {
5315 a[j].fixedPosition,a[j].transform);
5324 for (
int i = 0; i < numAtoms; ++i )
5328 a[i].fixedPosition,a[i].transform);
5360 for (
int i = 0; i < numAtoms; i += hgs ) {
5364 for ( j = i; j < (i+hgs); ++j ) {
5366 a[j].fixedPosition,a[j].transform);
5373 for ( j = i; j < (i+hgs); ++j ) {
5382 Position delta_x_cm = new_x_cm - x_cm;
5385 delta_v_cm.
x = ( velFactor.
x - 1 ) * v_cm.
x;
5386 delta_v_cm.
y = ( velFactor.
y - 1 ) * v_cm.
y;
5387 delta_v_cm.
z = ( velFactor.
z - 1 ) * v_cm.
z;
5388 for ( j = i; j < (i+hgs); ++j ) {
5391 a[j].fixedPosition,a[j].transform);
5402 for (
int i = 0; i < numAtoms; ++i )
5406 a[i].fixedPosition,a[i].transform);
5423 if ( rescaleFreq > 0 ) {
5430 for (
int i = 0; i < numAtoms; ++i )
5446 const BigReal factor_dihe = accelMDfactor[0];
5447 const BigReal factor_tot = accelMDfactor[1];
5451 NAMD_die(
"accelMD broadcasting error!\n");
5453 NAMD_die(
"accelMD broadcasting error!\n");
5456 for (
int i = 0; i < numAtoms; ++i)
5462 for (
int i = 0; i < numAtoms; ++i)
5465 for (
int i = 0; i < numAtoms; ++i)
5468 if (doFullElectrostatics) {
5469 for (
int i = 0; i < numAtoms; ++i)
5475 for (
int i = 0; i < numAtoms; ++i)
5486 if ( (step < simParams->adaptTempFirstStep ) ||
5501 if ( ( reassignFreq > 0 ) && ! ( step % reassignFreq ) ) {
5510 if ( newTemp < simParams->reassignHold )
5518 for (
int i = 0; i < numAtoms; ++i )
5522 sqrt(kbT * (a[i].
partition ? tempFactor : 1.0) * a[i].recipMass) *
5526 NAMD_bug(
"Sequencer::reassignVelocities called improperly!");
5540 for (
int i = 0; i < numAtoms; ++i )
5543 a[i].mass <= 0.) ?
Vector(0,0,0) :
5544 sqrt(kbT * (a[i].
partition ? tempFactor : 1.0) * a[i].recipMass) *
5557 for (
int i = 0; i < numAtoms; ++i )
5568 for (
int i = 0; i < numAtoms; ++i )
5580 BigReal sqrt_factor = sqrt(factor);
5583 for (
int i = 0; i < numAtoms; ++i ) {
5602 for (
int i = 0; i < numAtoms; ++i )
5604 BigReal f1 = exp( coefficient * a[i].langevinParam );
5622 DebugM(4,
"stochastically rescaling velocities at step " << step <<
" by " << velrescaling <<
"\n");
5623 for (
int i = 0; i < numAtoms; ++i ) {
5642 BigReal timestep,
const int ftag,
const int useSaved
5645 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5647 CmiNetworkProgressAfter (0);
5657 const BigReal timestep1,
const int ftag1,
const int useSaved1,
5658 const BigReal timestep2,
const int ftag2,
const int useSaved2,
5659 const BigReal timestep3,
const int ftag3,
const int useSaved3
5662 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5664 CmiNetworkProgressAfter (0);
5683 NamdProfileEvent::ADD_VELOCITY_TO_POSITION);
5685 CmiNetworkProgressAfter (0);
5696 Tensor *vp = ( pressure ? &virial : 0 );
5698 iout <<
iERROR <<
"Constraint failure in HardWallDrude(); " 5699 <<
"simulation may become unstable.\n" <<
endi;
5712 Tensor *vp = ( pressure ? &virial : 0 );
5715 "Constraint failure; simulation has become unstable.\n" <<
endi;
5720 printf(
"virial = %g %g %g %g %g %g %g %g %g\n",
5721 virial.
xx, virial.
xy, virial.
xz,
5722 virial.
yx, virial.
yy, virial.
yz,
5723 virial.
zx, virial.
zy, virial.
zz);
5730 printf(
"pos[%d] = %g %g %g\n", n,
5734 printf(
"vel[%d] = %g %g %g\n", n,
5739 printf(
"force[%d] = %g %g %g\n", n,
5773 const BigReal maxvel2 = maxvel * maxvel;
5774 for (
int i=0; i<numAtoms; ++i ) {
5775 if ( a[i].velocity.length2() > maxvel2 ) {
5782 const BigReal maxvel2 = maxvel * maxvel;
5784 for (
int i=0; i<numAtoms; ++i ) {
5789 for (
int i=0; i<numAtoms; ++i ) {
5790 if ( a[i].velocity.length2() > maxvel2 ) {
5792 iout <<
iERROR <<
"Atom " << (a[i].
id + 1) <<
" velocity is " 5795 << i <<
" of " << numAtoms <<
" on patch " 5800 "Atoms moving too fast; simulation has become unstable (" 5802 <<
" pe " << CkMyPe() <<
").\n" <<
endi;
5814 for (
int i=0; i<numAtoms; ++i ) {
5830 CmiNetworkProgressAfter (0);
5841 for (
int i = 0; i < numAtoms; ++i ) {
5844 virial.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5848 for (
int i = 0; i < numAtoms; ++i ) {
5849 if (a[i].mass < 0.01)
continue;
5851 virial.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5856 momentumSqrSum = virial;
5858 kineticEnergy *= 0.5 * 0.5;
5882 for (
int i=0; i<numAtoms; i += hgs) {
5889 for (j=i; j< i+hgs; ++j) {
5894 for (j=i; j < i+hgs; ++j) {
5896 if (! (useGroupPressure && j != i)) {
5898 int slab = (int)floor((z-zmin)*idz);
5899 if (slab < 0) slab += nslabs;
5900 else if (slab >= nslabs) slab -= nslabs;
5904 if (useGroupPressure) {
5927 for (
int i = 0; i < numAtoms; i += hgs ) {
5930 CmiNetworkProgress ();
5937 for ( j = i; j < (i+hgs); ++j ) {
5942 momentumSqrSum.
outerAdd(1.0/m_cm, v_cm, v_cm);
5947 for ( j = i; j < (i+hgs); ++j ) {
5952 intKineticEnergy += mass * (v * dv);
5953 intVirialNormal.
outerAdd (mass, v, dv);
5957 for ( j = i; j < (i+hgs); ++j ) {
5961 intKineticEnergy += mass * (v * dv);
5962 intVirialNormal.
outerAdd(mass, v, dv);
5966 intKineticEnergy *= 0.5 * 0.5;
5968 intVirialNormal *= 0.5;
5971 momentumSqrSum *= 0.5;
5984 for (
int j = 0; j < numAtoms; j++ ) {
6002 NamdProfileEvent::SUBMIT_REDUCTIONS);
6008 CmiNetworkProgressAfter(0);
6020 Vector angularMomentum = 0;
6025 for (i = 0; i < numAtoms; ++i ) {
6029 angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6033 for (i = 0; i < numAtoms; ++i ) {
6036 angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6042 for (i = 0; i < numAtoms; i++) {
6043 if (i < numAtoms-1 &&
6044 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
6054 drudeComKE += m_com * v_com.
length2();
6055 drudeBondKE += m_bond * v_bond.length2();
6074 kineticEnergy *= 0.5;
6084 for (
int i = 0; i < numAtoms; ++i ) {
6091 for (
int i = 0; i < numAtoms; ++i ) {
6098 for (
int i = 0; i < numAtoms; ++i ) {
6114 for (
int i = 0; i < numAtoms; i += hgs ) {
6116 CmiNetworkProgress();
6123 for ( j = i; j < (i+hgs); ++j ) {
6133 for ( j = i; j < (i+hgs); ++j ) {
6135 ( pairInteractionSelf || a[j].
partition != 2 ) )
continue;
6137 if ( fixedAtomsOn && a[j].atomFixed )
continue;
6141 intKineticEnergy += mass * (v * dv);
6148 for ( j = i; j < (i+hgs); ++j ) {
6150 if ( fixedAtomsOn && a[j].atomFixed )
continue;
6154 intKineticEnergy += mass * (v * dv);
6163 intKineticEnergy *= 0.5;
6179 for (
int i=0; i<numAtoms; i += hgs) {
6184 for (j=i; j< i+hgs; ++j) {
6191 int slab = (int)floor((z-zmin)*idz);
6192 if (slab < 0) slab += nslabs;
6193 else if (slab >= nslabs) slab -= nslabs;
6195 int ppoffset = 3*(slab + nslabs*
partition);
6196 for (j=i; j < i+hgs; ++j) {
6202 BigReal wxx = (fnormal.
x + fnbond.
x + fslow.
x) * dx.
x;
6203 BigReal wyy = (fnormal.
y + fnbond.
y + fslow.
y) * dx.
y;
6204 BigReal wzz = (fnormal.
z + fnbond.
z + fslow.
z) * dx.
z;
6219 Vector fixForceNormal = 0;
6220 Vector fixForceNbond = 0;
6223 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6226 auto printTensor = [](
const Tensor& t,
const std::string& name){
6227 CkPrintf(
"%s", name.c_str());
6228 CkPrintf(
"\n%12.5lf %12.5lf %12.5lf\n" 6229 "%12.5lf %12.5lf %12.5lf\n" 6230 "%12.5lf %12.5lf %12.5lf\n",
6235 printTensor(fixVirialNormal,
"fixVirialNormal = ");
6236 printTensor(fixVirialNbond,
"fixVirialNbond = ");
6237 printTensor(fixVirialSlow,
"fixVirialSlow = ");
6248 #endif // UPPER_BOUND 6265 const double drudeBondLen2 = drudeBondLen * drudeBondLen;
6267 const double drudeMove = 0.01;
6268 const double drudeStep2 = drudeStep * drudeStep;
6269 const double drudeMove2 = drudeMove * drudeMove;
6274 for (
int i = 0; i < numAtoms; ++i ) {
6277 printf(
"f1[%2d]= %f %f %f\n", i, f1[i].x, f1[i].y, f1[i].z);
6278 printf(
"f2[%2d]= %f %f %f\n", i, f2[i].x, f2[i].y, f2[i].z);
6281 f1[i] += f2[i] + f3[i];
6282 if ( drudeHardWallOn && i && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) {
6283 if ( ! fixedAtomsOn || ! a[i].atomFixed ) {
6284 if ( drudeStep2 * f1[i].length2() > drudeMove2 ) {
6287 a[i].
position += drudeStep * f1[i];
6289 if ( (a[i].position - a[i-1].position).length2() > drudeBondLen2 ) {
6293 Vector netf = f1[i-1] + f1[i];
6294 if ( fixedAtomsOn && a[i-1].atomFixed ) netf = 0;
6298 if ( fixedAtomsOn && a[i].atomFixed ) f1[i] = 0;
6305 for (
int i = 0; i < numAtoms; ++i ) {
6308 if ( v2 > maxv2 ) maxv2 = v2;
6311 if ( v2 > maxv2 ) maxv2 = v2;
6322 for (
int i = 0; i < numAtoms; ++i ) {
6324 if ( drudeHardWallOn && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) )
continue;
6333 BigReal fmult = 1.01 * sqrt(fmax2/ff);
6334 f *= fmult; ff = f * f;
6343 printf(
"fdotf = %f\n", fdotf);
6344 printf(
"fdotv = %f\n", fdotv);
6345 printf(
"vdotv = %f\n", vdotv);
6358 for (
int i = 0; i < numAtoms; i += hgs ) {
6363 for ( j = i; j < (i+hgs); ++j ) {
6368 for ( j = i; j < (i+hgs); ++j ) {
6388 Vector fixForceNormal = 0;
6389 Vector fixForceNbond = 0;
6392 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6415 NamdProfileEvent::SUBMIT_COLLECTIONS);
6417 int dcdSelectionIndex;
6420 #ifndef MEM_OPT_VERSION 6439 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC) 6471 int basePriority = ( (seq & 0xffff) << 15 )
6584 #ifdef NAMD_CUDA_XXX 6587 for (
int i=0; i<numAtoms; ++i ) {
6588 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6598 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6602 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6606 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6618 for (
int i=0; i<numAtoms; ++i ) {
6628 float fx = fxNo+fxNb+fxSl;
6629 float fy = fyNo+fyNb+fySl;
6630 float fz = fzNo+fzNb+fzSl;
6632 float f = sqrt(fx*fx+fy*fy+fz*fz);
6635 float x =
patch->
p[i].position.x;
6636 float y =
patch->
p[i].position.y;
6637 float z =
patch->
p[i].position.z;
6639 CkPrintf(
"FORCE(%04i)[%04i] = % .9e % .9e % .9e\n", seq,
id,
6674 #ifdef MEASURE_NAMD_WITH_PAPI 6675 void Sequencer::papiMeasureBarrier(
int step){
6677 broadcast->papiMeasureBarrier.get(step);
Real atomcharge(int anum) const
SubmitReduction * multigratorReduction
Vector gaussian_vector(void)
void rescaleVelocities(int)
void finishReduction(bool doEnergyVirial)
void minimizationQuenchVelocity(void)
int period
period for some step dependent event (e.g. stepsPerCycle)
NAMD_HOST_DEVICE void rescale(Tensor factor)
void max(int i, BigReal v)
int init(int initstep, int initperiod, int delta=0)
DCDParams dcdSelectionParams[16]
void tcoupleVelocities(BigReal, int)
void addMovDragToPosition(BigReal)
BigReal soluteScalingFactorCharge
virtual void algorithm(void)
void get_rotdrag_params(BigReal &v, Vector &a, Vector &p, int atomnum) const
void langevinVelocitiesBBK2_SOA(BigReal timestep)
#define NAMD_EVENT_STOP(eon, id)
Bool is_atom_movdragged(int atomnum) const
SubmitReduction * pressureProfileReduction
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
friend class SequencerCUDA
static int velocityNeeded(int)
void scaleVelocities(const BigReal velScale)
void positionsReady_SOA(int doMigration=0)
#define GB1_COMPUTE_HOME_PRIORITY
void addVelocityToPosition(BigReal)
SubmitReduction * reduction
NAMD_HOST_DEVICE Vector c() const
SubmitReduction * min_reduction
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
Bool is_atom_exPressure(int atomnum) const
SimpleBroadcastObject< int > traceBarrier
void maximumMove(BigReal)
Bool monteCarloPressureOn
const GlobalMasterIMD * getIMD()
void cycleBarrier(int, int)
void submitCollections_SOA(int step, int zeroVel=0)
Bool globalMasterScaleByFrequency
static void partition(int *order, const FullAtom *atoms, int begin, int end)
SimpleBroadcastObject< Vector > momentumCorrection
void addRotDragToPosition(BigReal)
static PatchMap * Object()
void saveForce(const int ftag=Results::normal)
void registerIDsFullAtom(const FullAtom *begin, const FullAtom *end)
void langevinVelocitiesBBK2(BigReal)
void monteCarloPressureControl(const int step, const int doMigration, const int doEnergy, const int doVirial, const int maxForceNumber, const int doGlobal)
#define ADD_TENSOR_OBJECT(R, RL, D)
SimParameters * simParameters
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
void newMinimizeDirection(BigReal)
void newMinimizePosition(BigReal)
double stochRescaleCoefficient()
Bool CUDASOAintegrateMode
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
int nextstep
next step value
void gbisComputeAfterP2()
void startWork(const LDObjHandle &handle)
HomePatchList * homePatchList()
void langevinVelocitiesBBK1(BigReal)
std::ostream & endi(std::ostream &s)
char const *const NamdProfileEventStr[]
void updateDevicePatchMap(int startup)
int berendsenPressureFreq
SubmitReduction * willSubmit(int setID, int size=-1)
void rattle1(BigReal, int)
void saveTotalForces(HomePatch *)
void submitVelocities(int seq, int zero, FullAtomList &a)
SimpleBroadcastObject< BigReal > adaptTemperature
unsigned char get_ss_type(int anum) const
void rebalanceLoad(int timestep)
Bool globalMasterStaleForces
static ReductionMgr * Object(void)
void addForceToMomentum_SOA(const double scaling, double dt_normal, double dt_nbond, double dt_slow, int maxForceNumber)
void minimizeMoveDownhill(BigReal fmax2)
Patch * patch(PatchID pid)
void addForceToMomentum(BigReal, const int ftag=Results::normal, const int useSaved=0)
void submitReductions_SOA()
std::vector< PatchRecord > & getPatches()
static PatchMap * ObjectOnPe(int pe)
float * langScalVelBBK2
derived from langevinParam
void pauseWork(const LDObjHandle &handle)
SimpleBroadcastObject< BigReal > tcoupleCoefficient
static int forceNeeded(int)
int NAMD_gcd(int a, int b)
void exchangeCheckpoint(int scriptTask, int &bpc)
Molecule stores the structural information for the system.
void split(int iStream, int numStreams)
static NAMD_HOST_DEVICE Tensor identity(BigReal v1=1.0)
void addForceToMomentum3(const BigReal timestep1, const int ftag1, const int useSaved1, const BigReal timestep2, const int ftag2, const int useSaved2, const BigReal timestep3, const int ftag3, const int useSaved3)
void positionsReady(int doMigration=0)
void submitHalfstep_SOA()
SimpleBroadcastObject< BigReal > stochRescaleCoefficient
void submitCollections(int step, int zeroVel=0)
void stochRescaleVelocities_SOA(int step)
static void print_vel_SOA(const double *vel_x, const double *vel_y, const double *vel_z, int ilo=0, int ihip1=1)
void runComputeObjects_SOA(int migration, int pairlists, int step)
BigReal calcKineticEnergy()
void adaptTempUpdate(int)
void positionsReady_GPU(int doMigration=0, int startup=0)
int32 * hydrogenGroupSize
#define TIMER_START(T, TYPE)
int rattle1_SOA(const BigReal, Tensor *virial, SubmitReduction *)
void calcFixVirial(Tensor &fixVirialNormal, Tensor &fixVirialNbond, Tensor &fixVirialSlow, Vector &fixForceNormal, Vector &fixForceNbond, Vector &fixForceSlow)
#define NAMD_PROFILE_START()
float * gaussrand_x
fill with Gaussian distributed random numbers
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
int numPatches(void) const
#define NAMD_EVENT_START(eon, id)
void stochRescaleVelocities(int)
void rattle1_SOA(BigReal, int)
#define COMPUTE_HOME_PRIORITY
void constructDevicePatchMap()
static void print_tensor(const Tensor &t)
NAMD_HOST_DEVICE BigReal length(void) const
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
void NAMD_bug(const char *err_msg)
void multigratorPressure(int step, int callNumber)
static ComputeCUDAMgr * getComputeCUDAMgr()
void berendsenPressure(int)
void submitMomentum(int step)
int rescaleVelocities_numTemps
double * vel_x
Jim recommends double precision velocity.
void submitLoadStats(int timestep)
void mollyMollify(Tensor *virial)
void runComputeObjects(int migration=1, int pairlists=0, int pressureStep=0)
void rebalance(Sequencer *seq, PatchID id)
void rescaleaccelMD(int, int, int)
SimpleBroadcastObject< Tensor > velocityRescaleTensor2
void clearDevicePatchMap()
NAMD_HOST_DEVICE BigReal length2(void) const
#define NAMD_EVENT_RANGE_2(eon, id)
SimpleBroadcastObject< int > scriptBarrier
bool getIsGlobalDevice() const
const_iterator const_begin(void) const
PatchID getPatchID() const
void scalePositionsVelocities(const Tensor &posScale, const Tensor &velScale)
int monteCarloPressureFreq
int getPesSharingDevice(const int i)
SimpleBroadcastObject< BigReal > velocityRescaleFactor2
Bool is_atom_rotdragged(int atomnum) const
void doMigrationGPU(const int startup, const int doGlobal, const int updatePatchMap)
void langevinPiston_SOA(int step)
void gbisComputeAfterP1()
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
void gaussian_array_f(float *a, int n)
#define TIMER_INIT_WIDTH(T, TYPE, WIDTH)
int getForceSendActive() const
int berendsenPressure_count
SimpleBroadcastObject< BigReal > velocityRescaleFactor
SimpleBroadcastObject< BigReal > minimizeCoefficient
void reassignVelocities(BigReal, int)
void langevinVelocitiesBBK1_SOA(BigReal timestep)
SimpleBroadcastObject< Vector > accelMDRescaleFactor
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
ComputeGlobal * computeGlobalObject
void saveForce(const int ftag=Results::normal)
void runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup)
SimpleBroadcastObject< Tensor > positionRescaleFactor
void buildRattleList_SOA()
void langevinVelocities(BigReal)
static std::pair< int, int > coordinateNeeded(int)
void submitForces(int seq, FullAtomList &a, int maxForceUsed, ForceList *f)
static CollectionMaster * Object()
void hardWallDrude(BigReal, int)
#define TIMER_STOP(T, TYPE)
void multigratorTemperature(int step, int callNumber)
static constexpr int num_inline_peer
double * recipMass
derived from mass
void reinitVelocities(void)
int pressureProfileAtomTypes
int checkpoint_berendsenPressure_count
ControllerBroadcasts * broadcast
#define NAMD_EVENT_START_EX(eon, id, str)
void maximumMove_SOA(const double dt, const double maxvel2)
CollectionMgr *const collection
void updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal)
#define GB2_COMPUTE_HOME_PRIORITY
#define NAMD_PROFILE_STOP()
Bool langevinGammasDiffer
int getNumStepsToRun(void)
void resetMovingAverage()
void newtonianVelocities(BigReal, const BigReal, const BigReal, const BigReal, const int, const int, const int)
static void print_vel_AOS(const FullAtom *a, int ilo=0, int ihip1=1)
void rescaleSoluteCharges(BigReal)
void addVelocityToPosition_SOA(const double dt)
void setVal(const NodeReduction *other)
#define SOA_SIMPLIFY_PARAMS
void submitMinimizeReductions(int, BigReal fmax2)
#define ADD_VECTOR_OBJECT(R, RL, D)
CudaComputeNonbonded * getCudaComputeNonbonded()
int multigratorPressureFreq
int numPatchesOnNode(int node)
void submitPositions(int seq, FullAtomList &a, Lattice l, int prec, int dcdSelectionIndex)
void correctMomentum(int step, BigReal drifttime)
NAMD_HOST_DEVICE void outerAdd(BigReal scale, const Vector &v1, const Vector &v2)
std::ostream & iERROR(std::ostream &s)
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms) __attribute__((__noinline__))
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
ForceList f[Results::maxNumForces]
float * langScalRandBBK2
from langevinParam and recipMass
void get_movdrag_params(Vector &v, int atomnum) const
void enableEarlyExit(void)
void submitReductions(int)
#define namd_reciprocal(x)
SimpleBroadcastObject< Tensor > positionRescaleFactor2
void integrate_CUDA_SOA(int scriptTask)
void loweAndersenFinish()
int getNumPesSharingDevice()
SimParameters *const simParams
SimpleBroadcastObject< Tensor > velocityRescaleTensor
NAMD_HOST_DEVICE Vector unit(void) const
__thread DeviceCUDA * deviceCUDA
NAMD_HOST_DEVICE Vector origin() const
virtual void atomUpdate()
void rescaleVelocitiesByFactor(BigReal)
int multigratorTemperatureFreq
int globalMasterFrequency
CudaPmeOneDevice * createCudaPmeOneDevice()
#define PATCH_PRIORITY(PID)
CudaPmeOneDevice * getCudaPmeOneDevice()
void updatePatchOrder(const std::vector< CudaLocalRecord > &data)
void berendsenPressure_SOA(int step)
int32 numAtoms
number of atoms
void printDevicePatchMap()
void compute(const Lattice &lattice, int doEnergyVirial, int step)
void exchangeAtoms(int scriptTask)