30 #define PRINT_FORCES 0 62 #define MIN_DEBUG_LEVEL 3 72 #define START_HPM_STEP 1000 73 #define STOP_HPM_STEP 1500 77 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 79 #define __thread __declspec(thread) 88 #define SPECIAL_PATCH_ID 91 97 int ilo=0,
int ihip1=1
99 printf(
"AOS Velocities:\n");
100 for (
int i=ilo; i < ihip1; i++) {
101 printf(
"%d %g %g %g\n", i,
111 int ilo=0,
int ihip1=1
113 printf(
"SOA Velocities:\n");
114 for (
int i=ilo; i < ihip1; i++) {
115 printf(
"%d %g %g %g\n", i, vel_x[i], vel_y[i], vel_z[i]);
121 printf(
"%g %g %g %g %g %g %g %g %g\n",
159 inline int init(
int initstep,
int initperiod,
int delta=0) {
176 pairlistsAreValid(0),
217 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER) 228 CUDASequencer->patchData->reduction->zero();
241 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER) 243 delete CUDASequencer;
250 void Sequencer::threadRun(
Sequencer* arg)
260 DebugM(4,
"::run() - this = " <<
this <<
"\n" );
261 thread = CthCreate((CthVoidFn)&(threadRun),(
void*)(
this),
SEQ_STK_SZ);
262 CthSetStrategyDefault(thread);
283 switch ( scriptTask ) {
328 NAMD_die(
"Minimization is currently not supported on the GPU integrator\n");
343 #ifdef NODEGROUP_FORCE_REGISTER 349 #ifdef NODEGROUP_FORCE_REGISTER 358 NAMD_bug(
"Unknown task in Sequencer::algorithm");
373 #if defined(NODEGROUP_FORCE_REGISTER) 377 CUDASequencer->numPatchesCheckedIn += 1;
378 if (CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe())) {
380 CUDASequencer->waitingThreads.push_back(CthSelf());
387 int lastStep = CUDASequencer->patchData->flags.step;
389 if (CUDASequencer->breakSuspends)
break;
395 CUDASequencer->numPatchesCheckedIn += 1;
396 CUDASequencer->waitingThreads.push_back(CthSelf());
397 if(CUDASequencer->numPatchesCheckedIn == patchMap->
numPatchesOnNode(CkMyPe()) - 1 &&
398 CUDASequencer->masterThreadSleeping){
399 CUDASequencer->masterThreadSleeping =
false;
400 CthAwaken(CUDASequencer->masterThread);
407 CUDASequencer->numPatchesCheckedIn = 0;
408 for (CthThread t : CUDASequencer->waitingThreads) {
411 CUDASequencer->waitingThreads.clear();
427 CUDASequencer->sync();
440 ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
447 if (isMaster && cudaGlobal) cudaGlobal->setStep(static_cast<int64_t>(
patch->
flags.
step));
468 cudaBond->atomUpdate();
476 cudaGlobal->updateAtomMaps();
485 CUDASequencer->launch_set_compute_positions();
486 CUDASequencer->sync();
494 for(
int i = 0 ; i < CkNumPes(); i++){
497 ComputeBondedCUDA* b = CUDASequencer->patchData->cudaBondedList[i];
499 if (b == NULL)
continue;
500 auto list = std::find(std::begin(b->getBondedPes()), std::end(b->getBondedPes()), CkMyPe());
501 if( list != std::end(b->getBondedPes()) ){
502 b->openBoxesOnPe(startup);
517 cudaBond->loadTuplesOnPe(startup);
523 cudaBond->copyTupleDataGPU(startup);
525 cudaBond->copyTupleDataSN();
535 cudaBond->launchWork();
538 cudaPme->
compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
543 cudaGlobal->communicateToClients(&(this->
patch->
lattice));
544 cudaGlobal->communicateToMD();
560 for(
int i = 0; i < numhp; ++i) {
562 hp->zero_global_forces_SOA();
577 CUDASequencer->copyGlobalForcesToDevice();
589 cudaBond->finishPatches();
597 cudaBond->finishReductions();
599 if (cudaGlobal) cudaGlobal->finishReductions(
601 CUDASequencer->patchData->reduction);
613 cudaPme->
compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
615 if (doNbond) cudaNbond->
doWork();
619 cudaGlobal->communicateToClients(&(this->
patch->
lattice));
620 cudaGlobal->communicateToMD();
632 for(
int i = 0; i < numhp; ++i) {
634 hp->zero_global_forces_SOA();
649 CUDASequencer->copyGlobalForcesToDevice();
659 cudaBond->finishPatches();
665 if (cudaGlobal) cudaGlobal->finishReductions(
667 CUDASequencer->patchData->reduction);
686 const int doMigration,
689 const int maxForceNumber,
694 Controller *c_out = CUDASequencer->patchData->c_out;
695 bool mGpuOn = CUDASequencer->mGpuOn;
703 CUDASequencer->patchData->nodeReductionSave->setVal(
reduction);
707 c_out->mcPressure_prepare(step);
709 factor = c_out->getPositionRescaleFactor(step);
714 CUDASequencer->monteCarloPressure_part1(factor, origin, oldLattice);
719 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
720 CUDASequencer->patchData->factor = &(factor);
722 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
733 CUDASequencer->update_patch_flags();
744 CUDASequencer->monteCarloPressure_part2(
reduction, step, maxForceNumber,
745 doEnergy, doGlobal, doVirial);
749 c_out->mcPressure_accept(step);
750 accepted = c_out->getMCAcceptance(step);
756 CUDASequencer->monteCarloPressure_accept(
reduction, doMigration);
761 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
763 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
767 CUDASequencer->monteCarloPressure_reject(this->
patch->
lattice);
769 reduction->
setVal(CUDASequencer->patchData->nodeReductionSave);
775 if(isMasterPe && !accepted){
777 CUDASequencer->update_patch_flags();
782 const int updatePatchMap) {
785 const bool updatePatchData = startup || doGlobal || updatePatchMap;
788 bool realloc =
false;
797 if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
804 CUDASequencer->reallocateMigrationDestination();
805 CUDASequencer->registerSOAPointersToHost();
809 CUDASequencer->copySOAHostRegisterToDevice();
818 CUDASequencer->migrationLocalInit();
824 CUDASequencer->migrationPerform();
830 CUDASequencer->migrationUpdateAtomCounts();
836 CUDASequencer->migrationUpdateAtomOffsets();
842 CUDASequencer->copyPatchDataToHost();
850 realloc = CUDASequencer->copyPatchData(
true,
false);
857 if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
864 CUDASequencer->registerSOAPointersToHost();
868 CUDASequencer->copySOAHostRegisterToDevice();
874 CUDASequencer->migrationLocalPost(0);
875 CUDASequencer->migrationSortAtomsNonbonded();
880 if (!updatePatchData) {
883 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
884 CUDASequencer->masterThreadSleeping =
true;
885 CUDASequencer->masterThread = CthSelf();
891 CUDASequencer->sync();
895 CUDASequencer->migrationUpdateDestination();
900 CUDASequencer->migrationUpdateProxyDestination();
905 CUDASequencer->migrationUpdateRemoteOffsets();
910 CUDASequencer->copyDataToPeers(
true);
914 if (updatePatchData) {
918 for(
int i = 0; i < numhp; ++i) {
927 CUDASequencer->copyAoSDataToHost();
932 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
933 CUDASequencer->masterThreadSleeping =
true;
934 CUDASequencer->masterThread = CthSelf();
941 CUDASequencer->updateHostPatchDataSOA();
949 CUDASequencer->migrationUpdateAdvancedFeatures(
false);
957 #ifdef TIMER_COLLECTION 958 TimerSet& t =
patch->timerSet;
975 Controller *c_out = CUDASequencer->patchData->c_out;
1000 doNonbonded = (step >= numberOfSteps) ||
1017 doFullElectrostatics = (dofull &&
1018 ((step >= numberOfSteps) ||
1030 doEnergy = energyFrequency.
init(step, newComputeEnergies);
1042 int doGlobal = (doTcl || doColvars);
1045 bool globalMasterStep=
false;
1046 int doGlobalObjects=0;
1047 int doGlobalStaleForces = 0;
1052 globalMasterStep = globalMasterFrequency.
check(step);
1053 doGlobalObjects = globalMasterStep? 1:0;
1057 doGlobalStaleForces=1;
1065 doGlobalStaleForces=doGlobalObjects;
1069 doGlobalStaleForces=doGlobalObjects;
1074 doGlobalStaleForces = 0;
1075 doGlobalObjects = 0;
1091 langevinPistonFrequency.
init(step,
1123 patch->copy_atoms_to_SOA();
1139 CUDASequencer->breakSuspends =
false;
1144 if (CUDASequencer->patchData->ptrCollectionMaster == NULL) {
1147 CUDASequencer->patchData->ptrCollectionMaster = pcm;
1150 if (CUDASequencer->patchData->ptrOutput == NULL) {
1153 CUDASequencer->patchData->ptrOutput = pout;
1156 if (CUDASequencer->patchData->pdb == NULL) {
1159 CUDASequencer->patchData->pdb = pdb;
1162 if (CUDASequencer->patchData->imd == NULL) {
1165 CUDASequencer->patchData->imd = imd;
1175 CUDASequencer->patchData->cudaBondedList[CkMyPe()] = NULL;
1176 CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = NULL;
1185 CUDASequencer->patchData->reduction->zero();
1207 if(patchData->updateCounter.load()>0)
1209 CUDASequencer->updateDeviceKernels();
1213 CUDASequencer->startRun1(maxForceUsed, this->
patch->
lattice);
1216 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
1217 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
1223 const bool addCudaGlobalForces =
1225 (cudaGlobalMasterObject !=
nullptr) ?
1226 cudaGlobalMasterObject->willAddGlobalForces() :
1229 if (addCudaGlobalForces) {
1230 CUDASequencer->allocateGPUSavedForces();
1238 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
1239 CUDASequencer->masterThreadSleeping =
true;
1240 CUDASequencer->masterThread = CthSelf();
1257 CUDASequencer->finish_patch_flags(
true);
1262 const bool addCudaGlobalForces =
1264 (cudaGlobalMasterObject !=
nullptr) ?
1265 cudaGlobalMasterObject->willAddGlobalForces() :
1268 CUDASequencer->startRun2(timestep,
1270 CUDASequencer->patchData->reduction, doGlobal || addCudaGlobalForces, maxForceUsed);
1274 const bool requestTotalForces = computeGlobal ? computeGlobal->
getForceSendActive() :
false;
1279 const bool requestGPUTotalForces =
1281 (cudaGlobalMasterObject !=
nullptr) ?
1282 cudaGlobalMasterObject->requestedTotalForces() :
1285 CUDASequencer->startRun3(timestep,
1287 CUDASequencer->patchData->reduction,
1288 requestTotalForces, doGlobalStaleForces,
1290 requestGPUTotalForces,
1301 for(
int i = 0; i < numhp; ++i) {
1313 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
1315 c_out->printStep(step, CUDASequencer->patchData->reduction);
1316 CUDASequencer->patchData->reduction->zero();
1322 double velrescaling = 1;
1324 for( ++step; step <= numberOfSteps; ++step ){
1325 const int isForcesOutputStep = forceDcdFrequency.
check(step);
1326 int dcdSelectionChecks=0;
1328 for(
int dcdindex=0; dcdindex<16;++dcdindex)
1331 if(dcdSelectionFrequency && step % dcdSelectionFrequency)
1332 dcdSelectionChecks++;
1334 const int isCollection = restartFrequency.
check(step) +
1335 dcdFrequency.
check(step) + velDcdFrequency.
check(step) +
1336 isForcesOutputStep + imdFrequency.
check(step) + dcdSelectionChecks;
1337 int isMigration =
false;
1338 const int doVelocityRescale = stochRescaleFrequency.
check(step);
1339 const int doMCPressure = monteCarloPressureFrequency.
check(step);
1341 doEnergy = energyFrequency.
check(step) || doVelocityRescale || doMCPressure;
1342 int langevinPistonStep = langevinPistonFrequency.
check(step);
1344 int reassignVelocityStep = reassignVelocityFrequency.
check(step);
1347 int berendsenPressureStep = 0;
1352 berendsenPressureStep = 1;
1355 if(patchData->updateCounter.load()>0)
1357 CUDASequencer->updateDeviceKernels();
1362 globalMasterStep = globalMasterFrequency.
check(step);
1363 doGlobalObjects = globalMasterStep? 1:0;
1367 doGlobalStaleForces=1;
1375 doGlobalStaleForces=doGlobalObjects;
1379 doGlobalStaleForces=doGlobalObjects;
1384 doGlobalStaleForces = 0;
1385 doGlobalObjects = 0;
1386 globalMasterStep =
false;
1390 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 1405 c_out->piston1(step);
1409 c_out->berendsenPressureController(step);
1412 if (langevinPistonStep || berendsenPressureStep) {
1414 factor = c_out->getPositionRescaleFactor(step);
1416 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
1417 CUDASequencer->patchData->factor = &(factor);
1423 int previousMaxForceUsed;
1426 previousMaxForceUsed = maxForceUsed;
1430 doNonbonded = nonbondedFrequency.
check(step);
1432 doFullElectrostatics = (dofull && fullElectFrequency.
check(step));
1448 CUDASequencer->launch_part1(
1450 timestep, nbondstep, slowstep, velrescaling, maxvel2,
1451 CUDASequencer->patchData->reduction,
1452 *(CUDASequencer->patchData->factor),
1455 (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1456 reassignVelocityStep,
1458 berendsenPressureStep,
1459 previousMaxForceUsed,
1461 this->patch->flags.savePairlists,
1462 this->patch->flags.usePairlists,
1467 if (reassignVelocityStep)
1477 CUDASequencer->launch_part11(
1478 timestep, nbondstep, slowstep, velrescaling, maxvel2,
1479 CUDASequencer->patchData->reduction,
1480 *(CUDASequencer->patchData->factor),
1483 (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1485 previousMaxForceUsed,
1487 this->patch->flags.savePairlists,
1488 this->patch->flags.usePairlists,
1499 if(CUDASequencer->patchData->migrationFlagPerDevice[i] != 0) {
1508 CUDASequencer->launch_set_compute_positions();
1516 CUDASequencer->updatePairlistFlags(isMigration);
1518 CUDASequencer->copyPositionsAndVelocitiesToHost(isMigration, doGlobalObjects);
1528 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
1529 CUDASequencer->masterThreadSleeping =
true;
1530 CUDASequencer->masterThread = CthSelf();
1547 CUDASequencer->finish_patch_flags(isMigration);
1558 const bool addCudaGlobalForces =
1560 (cudaGlobalMasterObject !=
nullptr) ?
1561 cudaGlobalMasterObject->willAddGlobalForces() :
1564 CUDASequencer->launch_part2(doMCPressure,
1565 timestep, nbondstep, slowstep,
1566 CUDASequencer->patchData->reduction,
1573 doGlobalStaleForces || addCudaGlobalForces,
1585 const bool requestTotalForces = (computeGlobal ? computeGlobal->
getForceSendActive() :
false) && doGlobalObjects;
1592 const bool requestGPUTotalForces =
1594 (CudaGlobalMasterObject !=
nullptr) ?
1595 CudaGlobalMasterObject->requestedTotalForces() :
1598 CUDASequencer->launch_part3(doMCPressure,
1599 timestep, nbondstep, slowstep,
1600 CUDASequencer->patchData->reduction,
1605 doGlobalStaleForces,
1606 requestGPUTotalForces,
1610 isForcesOutputStep);
1617 if (requestTotalForces) {
1622 for(
int i = 0; i < numhp; ++i) {
1634 c_out->printStep(step, CUDASequencer->patchData->reduction);
1638 if (doVelocityRescale) {
1654 CUDASequencer->copyAoSDataToHost();
1661 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1673 CUDASequencer->copyAoSDataToHost();
1677 CUDASequencer->updateHostPatchDataSOA();
1678 CUDASequencer->saveForceCUDASOA_direct(
false,
true, maxForceUsed);
1680 if(isMasterPe) CUDASequencer->copyPositionsAndVelocitiesToHost(
true,doGlobal);
1683 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1685 hp->copy_updates_to_AOS();
1686 hp->copy_forces_to_AOS();
1692 CUDASequencer->breakSuspends =
true;
1706 CUDASequencer->copyPatchData(
true, startup);
1708 CUDASequencer->reallocateMigrationDestination();
1709 CUDASequencer->copyAtomDataToDeviceAoS();
1711 CUDASequencer->copyAtomDataToDevice(startup, maxForceUsed);
1713 CUDASequencer->migrationLocalPost(startup);
1714 CUDASequencer->migrationUpdateAdvancedFeatures(startup);
1716 CUDASequencer->registerSOAPointersToHost();
1720 CUDASequencer->copySOAHostRegisterToDevice();
1726 CUDASequencer->updateHostPatchDataSOA();
1740 ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
1743 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1744 patchData = cpdata.ckLocalBranch();
1749 if (patchData->devicePatchMapFlag[CkMyPe()])
return;
1750 patchData->devicePatchMapFlag[CkMyPe()] = 1;
1761 std::vector<NBPatchRecord>& nonbondedPatches = cudaNbond->
getPatches();
1762 std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1763 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1769 homePatches.begin(),
1777 for (
int i = 0; i < nonbondedPatches.size(); i++) {
1779 record.
patchID = nonbondedPatches[i].patchID;
1782 const int targetPatchID = record.
patchID;
1783 auto result = std::find_if(
1784 homePatches.begin(),
1787 return (p->getPatchID() == targetPatchID);
1790 record.
isProxy = (result == homePatches.end());
1791 localPatches.push_back(record);
1798 localPatches.begin(),
1801 return (a.
isProxy < b.isProxy);
1806 cudaBond->updatePatchOrder(localPatches);
1808 patchData->devData[deviceIndex].numPatchesHome = homePatches.size();
1809 patchData->devData[deviceIndex].numPatchesHomeAndProxy = localPatches.size();
1821 std::vector<CudaPeerRecord>& myPeerPatches = patchData->devData[deviceIndex].h_peerPatches;
1822 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1824 for (
int i = 0; i < localPatches.size(); i++) {
1825 std::vector<CudaPeerRecord> tempPeers;
1826 const int targetPatchID = localPatches[i].patchID;
1827 const int targetIsProxy = localPatches[i].isProxy;
1830 if (devIdx == deviceIndex)
continue;
1831 std::vector<CudaLocalRecord>& peerPatches = patchData->devData[devIdx].h_localPatches;
1835 for (
int j = 0; j < patchData->devData[devIdx].numPatchesHomeAndProxy; j++) {
1837 if (peer.
patchID == targetPatchID && peer.
isProxy != targetIsProxy) {
1841 tempPeers.push_back(peerRecord);
1849 localPatches[i].numPeerRecord = tempPeers.size();
1850 if (!tempPeers.empty()) {
1851 localPatches[i].peerRecordStartIndex = myPeerPatches.size();
1852 myPeerPatches.insert(myPeerPatches.end(), tempPeers.begin(), tempPeers.end());
1860 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1861 patchData = cpdata.ckLocalBranch();
1867 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1868 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1871 CkPrintf(
"PE: %d\n", CkMyPe());
1873 CkPrintf(
"[%d] Home patches %d Local patches %d\n", CkMyPe(), numPatchesHome, localPatches.size());
1875 CkPrintf(
"Home Patches: ");
1876 for (
int i = 0; i < numPatchesHome; i++) {
1877 CkPrintf(
"%d ", localPatches[i].patchID);
1881 CkPrintf(
"Proxy Patches: ");
1882 for (
int i = numPatchesHome; i < localPatches.size(); i++) {
1883 CkPrintf(
"%d ", localPatches[i].patchID);
1893 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1894 patchData = cpdata.ckLocalBranch();
1899 if (!patchData->devicePatchMapFlag[CkMyPe()])
return;
1900 patchData->devicePatchMapFlag[CkMyPe()] = 0;
1907 std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1908 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1909 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1911 homePatches.clear();
1912 localPatches.clear();
1913 peerPatches.clear();
1926 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1927 patchData = cpdata.ckLocalBranch();
1933 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1934 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1939 int max_atom_count = 0;
1940 int total_atom_count = 0;
1943 for (
int i = 0; i < numPatchesHome; i++) {
1948 if (
patch != NULL)
break;
1950 if (
patch == NULL)
NAMD_die(
"Sequencer: Failed to find patch in updateDevicePatchMap");
1955 if (localPatches[i].numAtoms > max_atom_count) max_atom_count = localPatches[i].numAtoms;
1956 total_atom_count += localPatches[i].numAtoms;
1964 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1965 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
1966 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1967 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1969 for (
int i = numPatchesHome; i < numPatchesHomeAndProxy; i++) {
1970 const int index = localPatches[i].peerRecordStartIndex;
1971 const int devIdx = peerPatches[index].deviceIndex;
1972 const int peerIdx = peerPatches[index].patchIndex;
1973 const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
1975 localPatches[i].numAtoms = peer.
numAtoms;
1984 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
1985 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1987 int runningOffset = 0;
1988 int runningOffsetNBPad = 0;
1990 for (
int i = 0; i < numPatchesHomeAndProxy; i++) {
1991 localPatches[i].bufferOffset = runningOffset;
1992 localPatches[i].bufferOffsetNBPad = runningOffsetNBPad;
1993 runningOffset += localPatches[i].numAtoms;
1994 runningOffsetNBPad += localPatches[i].numAtomsNBPad;
2002 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2003 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2004 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2007 for (
int i = 0; i < peerPatches.size(); i++) {
2008 const int devIdx = peerPatches[i].deviceIndex;
2009 const int peerIdx = peerPatches[i].patchIndex;
2010 const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2017 for (
int i = 0; i < numPatchesHomeAndProxy; i++) {
2018 const int numPeerRecord = localPatches[i].numPeerRecord;
2019 const int peerOffset = localPatches[i].peerRecordStartIndex;
2022 localPatches[i].inline_peers[j] = peerPatches[peerOffset+j];
2039 #ifdef TIMER_COLLECTION 2040 TimerSet& t =
patch->timerSet;
2079 doNonbonded = (step >= numberOfSteps) ||
2096 doFullElectrostatics = (dofull &&
2097 ((step >= numberOfSteps) ||
2109 doEnergy = energyFrequency.
init(step, newComputeEnergies);
2114 int doGlobal = doTcl || doColvars;
2132 langevinPistonFrequency.
init(step,
2299 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 2303 int beginStep =
simParams->beginEventStep;
2308 for ( ++step; step <= numberOfSteps; ++step ) {
2309 int dcdSelectionChecks=0;
2311 for(
int dcdindex=0; dcdindex<16;++dcdindex)
2314 if(dcdSelectionFrequency && step % dcdSelectionFrequency)
2315 dcdSelectionChecks++;
2317 const int isCollection = restartFrequency.
check(step) +
2318 dcdFrequency.
check(step) + velDcdFrequency.
check(step) +
2319 forceDcdFrequency.
check(step) + imdFrequency.
check(step) +
2321 const int isMigration = stepsPerCycle.
check(step);
2322 doEnergy = energyFrequency.
check(step);
2323 DebugM(3,
"doGlobal now "<< doGlobal<<
"\n"<<
endi);
2325 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 2326 eon = epid && (beginStep < step && step <= endStep);
2328 if (controlProfiling && step == beginStep) {
2331 if (controlProfiling && step == endStep) {
2398 if ( langevinPistonFrequency.
check(step) ) {
2485 doNonbonded = nonbondedFrequency.
check(step);
2487 doFullElectrostatics = (dofull && fullElectFrequency.
check(step));
2643 fprintf(stderr,
"Patch %d has %d atoms\n", pid, n);
2644 fprintf(stderr,
"%3s %8s %12s %12s %12s\n",
2645 "",
"id",
"fnormal_x",
"fnbond_x",
"fslow_x");
2646 for (
int i=0; i < n; i++) {
2647 int index = p.
id[i];
2648 fprintf(stderr,
"%3d %8d %12.8f %12.8f %12.8f\n",
2655 for (
int i=0; i < n; i++) {
2666 TestArray_write<double>(
2667 "f_normal_good.bin",
"f_normal good", (
double*)f_normal, 3*n);
2668 TestArray_write<double>(
2669 "f_nbond_good.bin",
"f_nbond good", (
double*)f_nbond, 3*n);
2670 TestArray_write<double>(
2671 "f_slow_good.bin",
"f_slow good", (
double*)f_slow, 3*n);
2693 patch->copy_updates_to_AOS();
2697 printf(
"Timer collection reporting in microseconds for " 2708 const double scaling,
2713 const double * __restrict recipMass,
2714 const double * __restrict f_normal_x,
2715 const double * __restrict f_normal_y,
2716 const double * __restrict f_normal_z,
2717 const double * __restrict f_nbond_x,
2718 const double * __restrict f_nbond_y,
2719 const double * __restrict f_nbond_z,
2720 const double * __restrict f_slow_x,
2721 const double * __restrict f_slow_y,
2722 const double * __restrict f_slow_z,
2723 double * __restrict vel_x,
2724 double * __restrict vel_y,
2725 double * __restrict vel_z,
2731 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM_SOA);
2733 #ifdef SOA_SIMPLIFY_PARAMS 2734 const double * __restrict recipMass =
patch->patchDataSOA.
recipMass;
2736 const double * __restrict f_normal_x =
patch->patchDataSOA.
f_normal_x;
2737 const double * __restrict f_normal_y =
patch->patchDataSOA.
f_normal_y;
2738 const double * __restrict f_normal_z =
patch->patchDataSOA.
f_normal_z;
2740 const double * __restrict f_nbond_x =
patch->patchDataSOA.
f_nbond_x;
2741 const double * __restrict f_nbond_y =
patch->patchDataSOA.
f_nbond_y;
2742 const double * __restrict f_nbond_z =
patch->patchDataSOA.
f_nbond_z;
2744 const double * __restrict f_slow_x =
patch->patchDataSOA.
f_slow_x;
2745 const double * __restrict f_slow_y =
patch->patchDataSOA.
f_slow_y;
2746 const double * __restrict f_slow_z =
patch->patchDataSOA.
f_slow_z;
2747 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2748 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2749 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2764 if(this->patch->getPatchID() == 538){
2772 fprintf(stderr,
"Old Velocities %lf %lf %lf\n", vel_x[0], vel_y[0], vel_z[ 0]);
2773 fprintf(stderr,
"Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2774 f_slow_x[43], f_slow_y[43], f_slow_z[43],
2775 f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2776 f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2779 switch (maxForceNumber) {
2782 for (
int i=0; i < numAtoms; i++) {
2783 vel_x[i] += f_slow_x[i] * recipMass[i] * dt_slow;
2784 vel_y[i] += f_slow_y[i] * recipMass[i] * dt_slow;
2785 vel_z[i] += f_slow_z[i] * recipMass[i] * dt_slow;
2789 dt_nbond *= scaling;
2790 for (
int i=0; i < numAtoms; i++) {
2791 vel_x[i] += f_nbond_x[i] * recipMass[i] * dt_nbond;
2792 vel_y[i] += f_nbond_y[i] * recipMass[i] * dt_nbond;
2793 vel_z[i] += f_nbond_z[i] * recipMass[i] * dt_nbond;
2797 dt_normal *= scaling;
2798 for (
int i=0; i < numAtoms; i++) {
2799 vel_x[i] += f_normal_x[i] * recipMass[i] * dt_normal;
2800 vel_y[i] += f_normal_y[i] * recipMass[i] * dt_normal;
2801 vel_z[i] += f_normal_z[i] * recipMass[i] * dt_normal;
2814 const double * __restrict vel_x,
2815 const double * __restrict vel_y,
2816 const double * __restrict vel_z,
2817 double * __restrict pos_x,
2818 double * __restrict pos_y,
2819 double * __restrict pos_z,
2824 NamdProfileEvent::ADD_VELOCITY_TO_POSITION_SOA);
2825 #ifdef SOA_SIMPLIFY_PARAMS 2826 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2827 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2828 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2829 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
2830 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
2831 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
2834 for (
int i=0; i < numAtoms; i++) {
2835 pos_x[i] += vel_x[i] * dt;
2836 pos_y[i] += vel_y[i] * dt;
2837 pos_z[i] += vel_z[i] * dt;
2840 if(this->patch->getPatchID() == 538){
2841 fprintf(stderr,
"New Positions %lf %lf %lf\n", pos_x[43], pos_y[43], pos_z[43]);
2842 fprintf(stderr,
"New Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[43]);
2851 const int * __restrict hydrogenGroupSize,
2852 const float * __restrict mass,
2853 const double * __restrict vel_x,
2854 const double * __restrict vel_y,
2855 const double * __restrict vel_z,
2860 NamdProfileEvent::SUBMIT_HALFSTEP_SOA);
2861 #ifdef SOA_SIMPLIFY_PARAMS 2863 const float * __restrict mass =
patch->patchDataSOA.
mass;
2864 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2865 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2866 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2872 for (
int i=0; i < numAtoms; i++) {
2874 kineticEnergy += mass[i] *
2875 (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
2877 virial.
xx += mass[i] * vel_x[i] * vel_x[i];
2878 virial.
xy += mass[i] * vel_x[i] * vel_y[i];
2879 virial.
xz += mass[i] * vel_x[i] * vel_z[i];
2880 virial.
yx += mass[i] * vel_y[i] * vel_x[i];
2881 virial.
yy += mass[i] * vel_y[i] * vel_y[i];
2882 virial.
yz += mass[i] * vel_y[i] * vel_z[i];
2883 virial.
zx += mass[i] * vel_z[i] * vel_x[i];
2884 virial.
zy += mass[i] * vel_z[i] * vel_y[i];
2885 virial.
zz += mass[i] * vel_z[i] * vel_z[i];
2887 kineticEnergy *= 0.5 * 0.5;
2890 #ifdef NODEGROUP_FORCE_REGISTER 2893 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_VIRIAL_NORMAL,virial);
2899 #ifdef NODEGROUP_FORCE_REGISTER 2908 for (
int i=0; i < numAtoms; i += hgs) {
2911 hgs = hydrogenGroupSize[i];
2916 for (
int j = i; j < (i+hgs); j++) {
2918 v_cm_x += mass[j] * vel_x[j];
2919 v_cm_y += mass[j] * vel_y[j];
2920 v_cm_z += mass[j] * vel_z[j];
2922 BigReal recip_m_cm = 1.0 / m_cm;
2923 v_cm_x *= recip_m_cm;
2924 v_cm_y *= recip_m_cm;
2925 v_cm_z *= recip_m_cm;
2927 for (
int j = i; j < (i+hgs); j++) {
2928 BigReal dv_x = vel_x[j] - v_cm_x;
2929 BigReal dv_y = vel_y[j] - v_cm_y;
2930 BigReal dv_z = vel_z[j] - v_cm_z;
2932 intKineticEnergy += mass[j] *
2933 (vel_x[j] * dv_x + vel_y[j] * dv_y + vel_z[j] * dv_z);
2935 intVirialNormal.
xx += mass[j] * vel_x[j] * dv_x;
2936 intVirialNormal.
xy += mass[j] * vel_x[j] * dv_y;
2937 intVirialNormal.
xz += mass[j] * vel_x[j] * dv_z;
2938 intVirialNormal.
yx += mass[j] * vel_y[j] * dv_x;
2939 intVirialNormal.
yy += mass[j] * vel_y[j] * dv_y;
2940 intVirialNormal.
yz += mass[j] * vel_y[j] * dv_z;
2941 intVirialNormal.
zx += mass[j] * vel_z[j] * dv_x;
2942 intVirialNormal.
zy += mass[j] * vel_z[j] * dv_y;
2943 intVirialNormal.
zz += mass[j] * vel_z[j] * dv_z;
2946 intKineticEnergy *= 0.5 * 0.5;
2947 intVirialNormal *= 0.5;
2948 #ifdef NODEGROUP_FORCE_REGISTER 2951 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal);
2956 += intKineticEnergy;
2959 #ifdef NODEGROUP_FORCE_REGISTER 2971 const int * __restrict hydrogenGroupSize,
2972 const float * __restrict mass,
2973 const double * __restrict pos_x,
2974 const double * __restrict pos_y,
2975 const double * __restrict pos_z,
2976 const double * __restrict vel_x,
2977 const double * __restrict vel_y,
2978 const double * __restrict vel_z,
2979 const double * __restrict f_normal_x,
2980 const double * __restrict f_normal_y,
2981 const double * __restrict f_normal_z,
2982 const double * __restrict f_nbond_x,
2983 const double * __restrict f_nbond_y,
2984 const double * __restrict f_nbond_z,
2985 const double * __restrict f_slow_x,
2986 const double * __restrict f_slow_y,
2987 const double * __restrict f_slow_z,
2992 NamdProfileEvent::SUBMIT_REDUCTIONS_SOA);
2993 #ifdef SOA_SIMPLIFY_PARAMS 2995 const float * __restrict mass =
patch->patchDataSOA.
mass;
2996 const double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
2997 const double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
2998 const double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
2999 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3000 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3001 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3002 const double * __restrict f_normal_x =
patch->patchDataSOA.
f_normal_x;
3003 const double * __restrict f_normal_y =
patch->patchDataSOA.
f_normal_y;
3004 const double * __restrict f_normal_z =
patch->patchDataSOA.
f_normal_z;
3005 const double * __restrict f_nbond_x =
patch->patchDataSOA.
f_nbond_x;
3006 const double * __restrict f_nbond_y =
patch->patchDataSOA.
f_nbond_y;
3007 const double * __restrict f_nbond_z =
patch->patchDataSOA.
f_nbond_z;
3008 const double * __restrict f_slow_x =
patch->patchDataSOA.
f_slow_x;
3009 const double * __restrict f_slow_y =
patch->patchDataSOA.
f_slow_y;
3010 const double * __restrict f_slow_z =
patch->patchDataSOA.
f_slow_z;
3014 #ifdef NODEGROUP_FORCE_REGISTER 3022 #ifdef NODEGROUP_FORCE_REGISTER 3031 BigReal angularMomentum_x = 0;
3032 BigReal angularMomentum_y = 0;
3033 BigReal angularMomentum_z = 0;
3040 for (
int i=0; i < numAtoms; i++) {
3043 kineticEnergy += mass[i] *
3044 (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
3047 momentum_x += mass[i] * vel_x[i];
3048 momentum_y += mass[i] * vel_y[i];
3049 momentum_z += mass[i] * vel_z[i];
3052 BigReal dpos_x = pos_x[i] - origin_x;
3053 BigReal dpos_y = pos_y[i] - origin_y;
3054 BigReal dpos_z = pos_z[i] - origin_z;
3057 angularMomentum_x += mass[i] * (dpos_y*vel_z[i] - dpos_z*vel_y[i]);
3058 angularMomentum_y += mass[i] * (dpos_z*vel_x[i] - dpos_x*vel_z[i]);
3059 angularMomentum_z += mass[i] * (dpos_x*vel_y[i] - dpos_y*vel_x[i]);
3064 kineticEnergy *= 0.5;
3065 Vector momentum(momentum_x, momentum_y, momentum_z);
3066 Vector angularMomentum(angularMomentum_x, angularMomentum_y,
3069 #ifdef NODEGROUP_FORCE_REGISTER 3072 ADD_VECTOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_MOMENTUM,momentum);
3073 ADD_VECTOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
3079 #ifdef NODEGROUP_FORCE_REGISTER 3091 for (
int i=0; i < numAtoms; i += hgs) {
3092 hgs = hydrogenGroupSize[i];
3101 for ( j = i; j < (i+hgs); ++j ) {
3103 r_cm_x += mass[j] * pos_x[j];
3104 r_cm_y += mass[j] * pos_y[j];
3105 r_cm_z += mass[j] * pos_z[j];
3106 v_cm_x += mass[j] * vel_x[j];
3107 v_cm_y += mass[j] * vel_y[j];
3108 v_cm_z += mass[j] * vel_z[j];
3119 for ( j = i; j < (i+hgs); ++j ) {
3133 intKineticEnergy += mass[j] *
3134 (v_x * dv_x + v_y * dv_y + v_z * dv_z);
3137 BigReal dr_x = pos_x[j] - r_cm_x;
3138 BigReal dr_y = pos_y[j] - r_cm_y;
3139 BigReal dr_z = pos_z[j] - r_cm_z;
3142 intVirialNormal.
xx += f_normal_x[j] * dr_x;
3143 intVirialNormal.
xy += f_normal_x[j] * dr_y;
3144 intVirialNormal.
xz += f_normal_x[j] * dr_z;
3145 intVirialNormal.
yx += f_normal_y[j] * dr_x;
3146 intVirialNormal.
yy += f_normal_y[j] * dr_y;
3147 intVirialNormal.
yz += f_normal_y[j] * dr_z;
3148 intVirialNormal.
zx += f_normal_z[j] * dr_x;
3149 intVirialNormal.
zy += f_normal_z[j] * dr_y;
3150 intVirialNormal.
zz += f_normal_z[j] * dr_z;
3153 intVirialNbond.
xx += f_nbond_x[j] * dr_x;
3154 intVirialNbond.
xy += f_nbond_x[j] * dr_y;
3155 intVirialNbond.
xz += f_nbond_x[j] * dr_z;
3156 intVirialNbond.
yx += f_nbond_y[j] * dr_x;
3157 intVirialNbond.
yy += f_nbond_y[j] * dr_y;
3158 intVirialNbond.
yz += f_nbond_y[j] * dr_z;
3159 intVirialNbond.
zx += f_nbond_z[j] * dr_x;
3160 intVirialNbond.
zy += f_nbond_z[j] * dr_y;
3161 intVirialNbond.
zz += f_nbond_z[j] * dr_z;
3164 intVirialSlow.
xx += f_slow_x[j] * dr_x;
3165 intVirialSlow.
xy += f_slow_x[j] * dr_y;
3166 intVirialSlow.
xz += f_slow_x[j] * dr_z;
3167 intVirialSlow.
yx += f_slow_y[j] * dr_x;
3168 intVirialSlow.
yy += f_slow_y[j] * dr_y;
3169 intVirialSlow.
yz += f_slow_y[j] * dr_z;
3170 intVirialSlow.
zx += f_slow_z[j] * dr_x;
3171 intVirialSlow.
zy += f_slow_z[j] * dr_y;
3172 intVirialSlow.
zz += f_slow_z[j] * dr_z;
3176 intKineticEnergy *= 0.5;
3178 #ifdef NODEGROUP_FORCE_REGISTER 3184 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
3185 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
3186 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
3193 #ifdef NODEGROUP_FORCE_REGISTER 3215 NamdProfileEvent::SUBMIT_COLLECTIONS_SOA);
3230 if ( is_pos_needed || is_vel_needed ) {
3231 patch->copy_updates_to_AOS();
3234 patch->copy_forces_to_AOS();
3237 if ( is_pos_needed ) {
3238 #ifndef MEM_OPT_VERSION 3244 if ( is_vel_needed ) {
3247 if ( is_f_needed ) {
3257 const double maxvel2
3260 const double * __restrict vel_x,
3261 const double * __restrict vel_y,
3262 const double * __restrict vel_z,
3267 #ifdef SOA_SIMPLIFY_PARAMS 3268 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3269 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3270 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3278 for (
int i=0; i < numAtoms; i++) {
3280 vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3281 killme = killme + ( vel2 > maxvel2 );
3288 for (
int i=0; i < numAtoms; i++) {
3290 vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3291 if (vel2 > maxvel2) {
3293 const Vector vel(vel_x[i], vel_y[i], vel_z[i]);
3294 const BigReal maxvel = sqrt(maxvel2);
3296 iout <<
iERROR <<
"Atom " << (a[i].
id + 1) <<
" velocity is " 3299 << i <<
" of " << numAtoms <<
" on patch " 3304 "Atoms moving too fast; simulation has become unstable (" 3306 <<
" pe " << CkMyPe() <<
").\n" <<
endi;
3317 const float * __restrict langevinParam,
3318 double * __restrict vel_x,
3319 double * __restrict vel_y,
3320 double * __restrict vel_z,
3325 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1_SOA);
3326 #ifdef SOA_SIMPLIFY_PARAMS 3328 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3329 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3330 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3346 for (
int i=0; i < numAtoms; i++) {
3347 BigReal dt_gamma = dt * langevinParam[i];
3350 BigReal scaling = 1. - 0.5 * dt_gamma;
3351 vel_x[i] *= scaling;
3352 vel_y[i] *= scaling;
3353 vel_z[i] *= scaling;
3363 const float * __restrict langevinParam,
3364 const float * __restrict langScalVelBBK2,
3365 const float * __restrict langScalRandBBK2,
3366 float * __restrict gaussrand_x,
3367 float * __restrict gaussrand_y,
3368 float * __restrict gaussrand_z,
3369 double * __restrict vel_x,
3370 double * __restrict vel_y,
3371 double * __restrict vel_z,
3377 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2_SOA);
3378 #ifdef SOA_SIMPLIFY_PARAMS 3385 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3386 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3387 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3415 for (
int i=0; i < numAtoms; i++) {
3418 gaussrand_x[i] = float(rg.
x);
3419 gaussrand_y[i] = float(rg.
y);
3420 gaussrand_z[i] = float(rg.
z);
3431 for (
int i=0; i < numAtoms; i++) {
3432 vel_x[i] += gaussrand_x[i] * langScalRandBBK2[i];
3433 vel_y[i] += gaussrand_y[i] * langScalRandBBK2[i];
3434 vel_z[i] += gaussrand_z[i] * langScalRandBBK2[i];
3435 vel_x[i] *= langScalVelBBK2[i];
3436 vel_y[i] *= langScalVelBBK2[i];
3437 vel_z[i] *= langScalVelBBK2[i];
3444 const int * __restrict hydrogenGroupSize,
3445 const float * __restrict mass,
3446 double * __restrict pos_x,
3447 double * __restrict pos_y,
3448 double * __restrict pos_z,
3453 #ifdef SOA_SIMPLIFY_PARAMS 3455 const float * __restrict mass =
patch->patchDataSOA.
mass;
3456 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3457 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3458 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3477 for (
int i = 0; i < numAtoms; i += hgs) {
3479 hgs = hydrogenGroupSize[i];
3486 for ( j = i; j < (i+hgs); ++j ) {
3488 r_cm_x += mass[j] * pos_x[j];
3489 r_cm_y += mass[j] * pos_y[j];
3490 r_cm_z += mass[j] * pos_z[j];
3498 double tx = r_cm_x - origin.
x;
3499 double ty = r_cm_y - origin.
y;
3500 double tz = r_cm_z - origin.
z;
3502 double new_r_cm_x = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3503 double new_r_cm_y = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3504 double new_r_cm_z = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3506 new_r_cm_x += origin.
x;
3507 new_r_cm_y += origin.
y;
3508 new_r_cm_z += origin.
z;
3510 double delta_r_cm_x = new_r_cm_x - r_cm_x;
3511 double delta_r_cm_y = new_r_cm_y - r_cm_y;
3512 double delta_r_cm_z = new_r_cm_z - r_cm_z;
3514 for (j = i; j < (i+hgs); ++j) {
3515 pos_x[j] += delta_r_cm_x;
3516 pos_y[j] += delta_r_cm_y;
3517 pos_z[j] += delta_r_cm_z;
3521 for (
int i = 0; i < numAtoms; ++i) {
3525 double tx = pos_x[i] - origin.
x;
3526 double ty = pos_y[i] - origin.
y;
3527 double tz = pos_z[i] - origin.
z;
3529 double ftx = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3530 double fty = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3531 double ftz = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3533 pos_x[i] = ftx + origin.
x;
3534 pos_y[i] = fty + origin.
y;
3535 pos_z[i] = ftz + origin.
z;
3543 const int * __restrict hydrogenGroupSize,
3544 const float * __restrict mass,
3545 double * __restrict pos_x,
3546 double * __restrict pos_y,
3547 double * __restrict pos_z,
3548 double * __restrict vel_x,
3549 double * __restrict vel_y,
3550 double * __restrict vel_z,
3556 #ifdef SOA_SIMPLIFY_PARAMS 3558 const float * __restrict mass =
patch->patchDataSOA.
mass;
3559 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3560 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3561 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3562 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3563 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3564 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3586 for (
int i=0; i < numAtoms; i += hgs) {
3588 hgs = hydrogenGroupSize[i];
3597 for ( j = i; j < (i+hgs); ++j ) {
3599 r_cm_x += mass[j] * pos_x[j];
3600 r_cm_y += mass[j] * pos_y[j];
3601 r_cm_z += mass[j] * pos_z[j];
3602 v_cm_x += mass[j] * vel_x[j];
3603 v_cm_y += mass[j] * vel_y[j];
3604 v_cm_z += mass[j] * vel_z[j];
3611 double tx = r_cm_x - origin.
x;
3612 double ty = r_cm_y - origin.
y;
3613 double tz = r_cm_z - origin.
z;
3614 double new_r_cm_x = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3615 double new_r_cm_y = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3616 double new_r_cm_z = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3617 new_r_cm_x += origin.
x;
3618 new_r_cm_y += origin.
y;
3619 new_r_cm_z += origin.
z;
3621 double delta_r_cm_x = new_r_cm_x - r_cm_x;
3622 double delta_r_cm_y = new_r_cm_y - r_cm_y;
3623 double delta_r_cm_z = new_r_cm_z - r_cm_z;
3627 double delta_v_cm_x = ( velFactor_x - 1 ) * v_cm_x;
3628 double delta_v_cm_y = ( velFactor_y - 1 ) * v_cm_y;
3629 double delta_v_cm_z = ( velFactor_z - 1 ) * v_cm_z;
3630 for (j = i; j < (i+hgs); j++) {
3631 pos_x[j] += delta_r_cm_x;
3632 pos_y[j] += delta_r_cm_y;
3633 pos_z[j] += delta_r_cm_z;
3634 vel_x[j] += delta_v_cm_x;
3635 vel_y[j] += delta_v_cm_y;
3636 vel_z[j] += delta_v_cm_z;
3645 for (
int i=0; i < numAtoms; i++) {
3646 double tx = pos_x[i] - origin.
x;
3647 double ty = pos_y[i] - origin.
y;
3648 double tz = pos_z[i] - origin.
z;
3649 double ftx = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3650 double fty = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3651 double ftz = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3652 pos_x[i] = ftx + origin.
x;
3653 pos_y[i] = fty + origin.
y;
3654 pos_z[i] = ftz + origin.
z;
3655 vel_x[i] *= velFactor_x;
3656 vel_y[i] *= velFactor_y;
3657 vel_z[i] *= velFactor_z;
3675 Tensor *vp = ( pressure ? &virial : 0 );
3679 "Constraint failure; simulation has become unstable.\n" <<
endi;
3683 #ifdef NODEGROUP_FORCE_REGISTER 3685 ADD_TENSOR_OBJECT(CUDASequencer->patchData->reduction,REDUCTION_VIRIAL_NORMAL,virial);
3690 #ifdef NODEGROUP_FORCE_REGISTER 3699 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) || defined(NAMD_MIC) 3714 #if defined(NTESTPID) 3718 double *xyzq =
new double[4*numAtoms];
3723 for (
int i=0; i < numAtoms; i++) {
3729 char fname[128], remark[128];
3730 sprintf(fname,
"xyzq_soa_pid%d_step%d.bin", NTESTPID, step);
3731 sprintf(remark,
"SOA xyzq, patch %d, step %d", NTESTPID, step);
3732 TestArray_write<double>(fname, remark, xyzq, 4*numAtoms);
3737 patch->zero_global_forces_SOA();
3741 int basePriority = ( (seq & 0xffff) << 15 )
3752 #ifdef NODEGROUP_FORCE_REGISTER 3754 patch->copy_forces_to_SOA();
3757 patch->copy_forces_to_SOA();
3760 #if defined(NTESTPID) 3766 double *fxyz =
new double[3*numAtoms];
3770 for (
int i=0; i < numAtoms; i++) {
3772 fxyz[3*i+1] = fy[i];
3773 fxyz[3*i+2] = fz[i];
3775 sprintf(fname,
"fxyz_normal_soa_pid%d_step%d.bin", NTESTPID, step);
3776 sprintf(remark,
"SOA fxyz normal, patch %d, step %d", NTESTPID, step);
3777 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3781 for (
int i=0; i < numAtoms; i++) {
3783 fxyz[3*i+1] = fy[i];
3784 fxyz[3*i+2] = fz[i];
3786 sprintf(fname,
"fxyz_nbond_soa_pid%d_step%d.bin", NTESTPID, step);
3787 sprintf(remark,
"SOA fxyz nonbonded, patch %d, step %d", NTESTPID, step);
3788 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3792 for (
int i=0; i < numAtoms; i++) {
3794 fxyz[3*i+1] = fy[i];
3795 fxyz[3*i+2] = fz[i];
3797 sprintf(fname,
"fxyz_slow_soa_pid%d_step%d.bin", NTESTPID, step);
3798 sprintf(remark,
"SOA fxyz slow, patch %d, step %d", NTESTPID, step);
3799 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3807 double *fxyz =
new double[3*numAtoms];
3808 double *fx, *fy, *fz;
3809 char fname[64], remark[128];
3815 for (
int i=0; i < numAtoms; i++) {
3817 fxyz[3*i+1] = fy[i];
3818 fxyz[3*i+2] = fz[i];
3820 sprintf(fname,
"fslow_soa_%d.bin", step);
3821 sprintf(remark,
"SOA slow forces, step %d\n", step);
3822 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3827 for (
int i=0; i < numAtoms; i++) {
3829 fxyz[3*i+1] = fy[i];
3830 fxyz[3*i+2] = fz[i];
3832 sprintf(fname,
"fnbond_soa_%d.bin", step);
3833 sprintf(remark,
"SOA nonbonded forces, step %d\n", step);
3834 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3839 for (
int i=0; i < numAtoms; i++) {
3841 fxyz[3*i+1] = fy[i];
3842 fxyz[3*i+2] = fz[i];
3844 sprintf(fname,
"fnormal_soa_%d.bin", step);
3845 sprintf(remark,
"SOA normal forces, step %d\n", step);
3846 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3855 fprintf(stderr,
"CPU force arrays for alanin\n" );
3857 fprintf(stderr,
"f[%i] = %lf %lf %lf | %lf %lf %lf | %lf %lf %lf\n", i,
3886 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3887 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3888 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3892 DebugM(4,
"stochastically rescaling velocities at step " << step <<
" by " << velrescaling <<
"\n");
3893 for (
int i = 0; i < numAtoms; ++i ) {
3894 vel_x[i] *= velrescaling;
3895 vel_y[i] *= velrescaling;
3896 vel_z[i] *= velrescaling;
3907 #endif // SEQUENCER_SOA 3914 char tracePrefix[20];
3924 #ifdef TIMER_COLLECTION 3925 TimerSet& t =
patch->timerSet;
3961 const BigReal nbondstep = timestep * (staleForces?1:nonbondedFrequency);
3963 doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
3970 if ( dofull )
slowFreq = fullElectFrequency;
3971 const BigReal slowstep = timestep * (staleForces?1:fullElectFrequency);
3973 doFullElectrostatics = (dofull && ((step >= numberOfSteps) || !(step%fullElectFrequency)));
3982 if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed =
Results::amdf;
4012 int doGlobal = doTcl || doColvars;
4018 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 4046 if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4051 doEnergy = ! ( step % energyFrequency );
4053 if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4055 if ( adaptTempOn ) doEnergy=1;
4058 D_MSG(
"runComputeObjects()");
4066 if ( staleForces || doGlobal ) {
4072 D_MSG(
"newtonianVelocities()");
4086 D_MSG(
"submitHalfstep()");
4092 D_MSG(
"newtonianVelocities()");
4105 D_MSG(
"submitHalfstep()");
4109 if ( zeroMomentum && doFullElectrostatics )
submitMomentum(step);
4111 D_MSG(
"newtonianVelocities()");
4118 D_MSG(
"submitReductions()");
4126 sprintf(traceNote,
"%s%d",tracePrefix,step);
4127 traceUserSuppliedNote(traceNote);
4135 bool doMultigratorRattle =
false;
4149 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 4153 int beginStep =
simParams->beginEventStep;
4158 for ( ++step; step <= numberOfSteps; ++step )
4160 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 4161 eon = epid && (beginStep < step && step <= endStep);
4163 if (controlProfiling && step == beginStep) {
4166 if (controlProfiling && step == endStep) {
4173 DebugM(3,
"for step "<<step<<
" dGlobal " << doGlobal<<
"\n"<<
endi);
4184 newtonianVelocities(0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4189 if (doMultigratorRattle)
rattle1(timestep, 1);
4242 #endif // UPPER_BOUND 4244 doNonbonded = !(step%nonbondedFrequency);
4245 doFullElectrostatics = (dofull && !(step%fullElectFrequency));
4248 if ( zeroMomentum && doFullElectrostatics ) {
4267 if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed =
Results::amdf;
4270 doEnergy = ! ( step % energyFrequency );
4271 if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4272 if ( adaptTempOn ) doEnergy=1;
4296 if ( staleForces || doGlobal ) {
4302 if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4304 newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4313 newtonianVelocities(1.0,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4333 if ( zeroMomentum && doFullElectrostatics )
submitMomentum(step);
4337 newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4376 if(step == bstep || step == estep){
4381 #ifdef MEASURE_NAMD_WITH_PAPI 4383 int bstep =
simParams->papiMeasureStartStep;
4384 int estep = bstep +
simParams->numPapiMeasureSteps;
4385 if(step == bstep || step==estep) {
4386 papiMeasureBarrier(step);
4393 sprintf(traceNote,
"%s%d",tracePrefix,step);
4394 traceUserSuppliedNote(traceNote);
4396 #endif // UPPER_BOUND 4401 cycleBarrier(dofull && !((step+1)%fullElectFrequency),step);
4405 if(step == START_HPM_STEP)
4406 (CProxy_Node(CkpvAccess(BOCclass_group).node)).startHPM();
4408 if(step == STOP_HPM_STEP)
4409 (CProxy_Node(CkpvAccess(BOCclass_group).node)).stopHPM();
4415 #ifdef TIMER_COLLECTION 4417 printf(
"Timer collection reporting in microseconds for " 4421 #endif // TIMER_COLLECTION 4435 Vector movDragVel, dragIncrement;
4436 for (
int i = 0; i < numAtoms; ++i )
4442 dragIncrement = movDragGlobVel * movDragVel * dt;
4456 Vector rotDragAxis, rotDragPivot, dragIncrement;
4457 for (
int i = 0; i < numAtoms; ++i )
4463 dAngle = rotDragGlobVel * rotDragVel * dt;
4464 rotDragAxis /= rotDragAxis.
length();
4465 atomRadius = atom[i].
position - rotDragPivot;
4466 dragIncrement = cross(rotDragAxis, atomRadius) * dAngle;
4480 #if 0 && defined(NODEGROUP_FORCE_REGISTER) 4483 const int stepsPerCycle_save = stepsPerCycle;
4499 doFullElectrostatics = dofull;
4521 int doGlobal = doTcl || doColvars;
4540 #ifdef DEBUG_MINIMIZE 4541 printf(
"doTcl = %d doColvars = %d\n", doTcl, doColvars);
4547 #ifdef DEBUG_MINIMIZE 4548 else { printf(
"No computeGlobal\n"); }
4558 for ( ++step; step <= numberOfSteps; ++step ) {
4601 patch->copy_atoms_to_SOA();
4605 #if 0 && defined(NODEGROUP_FORCE_REGISTER) 4623 for (
int i = 0; i < numAtoms; ++i ) {
4629 for (
int j=1; j<hgs; ++j ) {
4647 for (
int i = 0; i < numAtoms; ++i ) {
4650 if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4653 if ( fixedAtomsOn && a[i].atomFixed ) a[i].
velocity = 0;
4655 if ( v2 > maxv2 ) maxv2 = v2;
4661 for (
int i = 0; i < numAtoms; ++i ) {
4662 if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4665 if ( fixedAtomsOn && a[i].atomFixed ) a[i].
velocity = 0;
4667 if ( v2 > maxv2 ) maxv2 = v2;
4677 for (
int i = 0; i < numAtoms; i += hgs ) {
4680 if ( minChildVel < fmax2 )
continue;
4681 int adjustChildren = 1;
4682 for (
int j = i+1; j < (i+hgs); ++j ) {
4683 if ( a[j].velocity.length2() > minChildVel ) adjustChildren = 0;
4685 if ( adjustChildren ) {
4687 for (
int j = i+1; j < (i+hgs); ++j ) {
4688 if (a[i].mass < 0.01)
continue;
4702 for (
int i = 0; i < numAtoms; ++i ) {
4707 for (
int i = 1; i < numAtoms; ++i ) {
4708 if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4717 for (
int i = 1; i < numAtoms; ++i ) {
4718 if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4730 for (
int i = 0; i < numAtoms; ++i ) {
4743 for (
int i = 0; i < numAtoms; ++i ) {
4748 for (
int i = 0; i < numAtoms; ++i ) {
4764 NAMD_die(
"Cannot zero momentum when fixed atoms are present.");
4775 for (
int i = 0; i < numAtoms; ++i ) {
4776 a[i].velocity += dv * a[i].recipMass;
4777 a[i].position += dx * a[i].recipMass;
4780 for (
int i = 0; i < numAtoms; ++i ) {
4781 a[i].velocity += dv;
4782 a[i].position += dx;
4794 NAMD_bug(
"Sequencer::scalePositionsVelocities, fixed atoms not implemented");
4798 for (
int i = 0; i < numAtoms; i += hgs ) {
4803 for (
int j=0;j < hgs;++j) {
4804 m_cm += a[i+j].
mass;
4813 for (
int j=0;j < hgs;++j) {
4819 for (
int i = 0; i < numAtoms; i++) {
4836 Tensor posScaleTensor = scaleTensor;
4865 for (
int i = 0; i < numAtoms; ++i ) {
4868 virialNormal.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4872 for (
int i = 0; i < numAtoms; ++i ) {
4873 if (a[i].mass < 0.01)
continue;
4875 virialNormal.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4879 kineticEnergy *= 0.5;
4887 Vector fixForceNormal = 0;
4888 Vector fixForceNbond = 0;
4891 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
4907 for (
int i = 0; i < numAtoms; i += hgs ) {
4913 for ( j = i; j < (i+hgs); ++j ) {
4924 NAMD_bug(
"Sequencer::multigratorPressure, this part needs to be implemented correctly");
4925 for ( j = i; j < (i+hgs); ++j ) {
4930 intVirialNormal2.
outerAdd (mass, v, dv);
4938 for ( j = i; j < (i+hgs); ++j ) {
4942 intVirialNormal2.
outerAdd(mass, v, dv);
4964 for (
int i = 0; i < numAtoms; i++) {
4975 for (
int i = 0; i < numAtoms; ++i ) {
4981 for (
int i = 0; i < numAtoms; ++i ) {
4985 kineticEnergy *= 0.5;
4986 return kineticEnergy;
5004 for (
int i = 0; i < numAtoms; i += hgs ) {
5010 for ( j = i; j < (i+hgs); ++j ) {
5015 momentumSqrSum.
outerAdd(1.0/m_cm, v_cm, v_cm);
5018 for (
int i = 0; i < numAtoms; i++) {
5019 momentumSqrSum.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5038 const int staleForces,
5039 const int doNonbonded,
5040 const int doFullElectrostatics)
5043 NamdProfileEvent::NEWTONIAN_VELOCITIES);
5046 if (staleForces || (doNonbonded && doFullElectrostatics)) {
5052 if (staleForces || doNonbonded)
5054 if (staleForces || doFullElectrostatics)
5081 for (
int i = 0; i < numAtoms; ++i )
5084 if ( ! dt_gamma )
continue;
5086 BigReal f1 = exp( -dt_gamma );
5087 BigReal f2 = sqrt( ( 1. - f1*f1 ) * kbT *
5099 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1);
5109 for (i = 0; i < numAtoms; i++) {
5111 if (i < numAtoms-1 &&
5112 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5124 if (dt_gamma != 0.0) {
5125 v_com *= ( 1. - 0.5 * dt_gamma );
5130 if (dt_gamma != 0.0) {
5131 v_bnd *= ( 1. - 0.5 * dt_gamma );
5142 if ( ! dt_gamma )
continue;
5144 a[i].
velocity *= ( 1. - 0.5 * dt_gamma );
5155 for ( i = 0; i < numAtoms; ++i )
5158 if ( ! dt_gamma )
continue;
5160 a[i].
velocity *= ( 1. - 0.5 * dt_gamma );
5172 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2);
5199 for (i = 0; i < numAtoms; i++) {
5201 if (i < numAtoms-1 &&
5202 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5214 if (dt_gamma != 0.0) {
5217 sqrt( 2 * dt_gamma * kbT *
5218 ( a[i].
partition ? tempFactor : 1.0 ) / mass );
5219 v_com /= ( 1. + 0.5 * dt_gamma );
5224 if (dt_gamma != 0.0) {
5227 sqrt( 2 * dt_gamma * kbT_bnd *
5228 ( a[i+1].
partition ? tempFactor : 1.0 ) / mass );
5229 v_bnd /= ( 1. + 0.5 * dt_gamma );
5240 if ( ! dt_gamma )
continue;
5243 sqrt( 2 * dt_gamma * kbT *
5244 ( a[i].
partition ? tempFactor : 1.0 ) * a[i].recipMass );
5245 a[i].
velocity /= ( 1. + 0.5 * dt_gamma );
5259 for ( i = 0; i < numAtoms; ++i )
5262 if ( ! dt_gamma )
continue;
5265 sqrt( 2 * dt_gamma * kbT *
5266 ( a[i].
partition ? tempFactor : 1.0 ) * a[i].recipMass );
5267 a[i].
velocity /= ( 1. + 0.5 * dt_gamma );
5291 for (
int i = 0; i < numAtoms; i += hgs ) {
5295 for ( j = i; j < (i+hgs); ++j ) {
5297 a[j].fixedPosition,a[j].transform);
5303 for ( j = i; j < (i+hgs); ++j ) {
5311 Position delta_x_cm = new_x_cm - x_cm;
5312 for ( j = i; j < (i+hgs); ++j ) {
5315 a[j].fixedPosition,a[j].transform);
5324 for (
int i = 0; i < numAtoms; ++i )
5328 a[i].fixedPosition,a[i].transform);
5360 for (
int i = 0; i < numAtoms; i += hgs ) {
5364 for ( j = i; j < (i+hgs); ++j ) {
5366 a[j].fixedPosition,a[j].transform);
5373 for ( j = i; j < (i+hgs); ++j ) {
5382 Position delta_x_cm = new_x_cm - x_cm;
5385 delta_v_cm.
x = ( velFactor.
x - 1 ) * v_cm.
x;
5386 delta_v_cm.
y = ( velFactor.
y - 1 ) * v_cm.
y;
5387 delta_v_cm.
z = ( velFactor.
z - 1 ) * v_cm.
z;
5388 for ( j = i; j < (i+hgs); ++j ) {
5391 a[j].fixedPosition,a[j].transform);
5402 for (
int i = 0; i < numAtoms; ++i )
5406 a[i].fixedPosition,a[i].transform);