12 #if !defined(WIN32) || defined(__CYGWIN__) 16 #include "Node.decl.h" 24 #define MIN_DEBUG_LEVEL 3 35 #include "main.decl.h" 58 #include "ComputeMgr.decl.h" 59 #include "ComputePmeMgr.decl.h" 61 #include "ComputeCUDAMgr.decl.h" 63 #include "ComputePmeCUDAMgr.decl.h" 66 #include "ComputeGridForceMgr.decl.h" 71 #include "CollectionMgr.decl.h" 72 #include "ParallelIOMgr.decl.h" 85 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE) 86 extern "C" void CApplicationInit();
108 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0 109 #include "ckBIconfig.h" 114 #include "CollectionMgr.decl.h" 115 #include "CollectionMaster.decl.h" 118 extern "C" void HPM_Init(
int);
119 extern "C" void HPM_Start(
char *label,
int);
120 extern "C" void HPM_Stop(
char *label,
int);
121 extern "C" void HPM_Print(
int,
int);
124 #if defined(NAMD_MIC) 125 extern void mic_dumpHostDeviceComputeMap();
126 extern void mic_initHostDeviceLDB();
129 #ifdef MEASURE_NAMD_WITH_PAPI 134 #define NUM_PAPI_EVENTS 6 137 #define MEASURE_PAPI_SPP 1 138 #define MEASURE_PAPI_CACHE 0 139 #define MEASURE_PAPI_FLOPS 0 141 static void namdInitPapiCounters(){
144 int retval = PAPI_library_init(PAPI_VER_CURRENT);
145 if(retval != PAPI_VER_CURRENT) {
147 NAMD_die(
"PAPI library is not compatitible!");
152 if(PAPI_thread_init(pthread_self)!=PAPI_OK) {
154 NAMD_die(
"Multi-thread mode in PAPI could not be initialized!");
159 CkpvInitialize(
int *, papiEvents);
160 CkpvAccess(papiEvents) =
new int[NUM_PAPI_EVENTS+1];
162 #if MEASURE_PAPI_CACHE 163 if(PAPI_query_event(PAPI_L1_DCM)==PAPI_OK) {
164 CkpvAccess(papiEvents)[0] = PAPI_L1_DCM;
167 CkPrintf(
"WARNING: PAPI_L1_DCM doesn't exsit on this platform!\n");
170 CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
173 if(PAPI_query_event(PAPI_L2_DCM)==PAPI_OK) {
174 CkpvAccess(papiEvents)[1] = PAPI_L2_DCM;
177 CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
179 #elif MEASURE_PAPI_FLOPS 180 if(PAPI_query_event(PAPI_FP_INS)==PAPI_OK) {
181 CkpvAccess(papiEvents)[0] = PAPI_FP_INS;
184 CkPrintf(
"WARNING: PAPI_FP_INS doesn't exsit on this platform!\n");
187 CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
190 if(PAPI_query_event(PAPI_FMA_INS)==PAPI_OK) {
191 CkpvAccess(papiEvents)[1] = PAPI_FMA_INS;
194 CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
196 #elif MEASURE_PAPI_SPP 207 int papiEventSet = PAPI_NULL;
208 if (PAPI_create_eventset(&papiEventSet) != PAPI_OK) {
209 CmiAbort(
"PAPI failed to create event set!\n");
212 if(PAPI_query_event(PAPI_FP_OPS)==PAPI_OK) {
213 CkpvAccess(papiEvents)[0] = PAPI_FP_OPS;
216 CkAbort(
"WARNING: PAPI_FP_OPS doesn't exist on this platform!");
219 if(PAPI_query_event(PAPI_TOT_INS)==PAPI_OK) {
220 CkpvAccess(papiEvents)[1] = PAPI_TOT_INS;
223 CkAbort(
"WARNING: PAPI_TOT_INS doesn't exist on this platform!");
228 ret=PAPI_event_name_to_code(
"perf::PERF_COUNT_HW_CACHE_LL:MISS",&EventCode);
229 if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
230 CkpvAccess(papiEvents)[2] = EventCode;
233 CkAbort(
"WARNING: perf::PERF_COUNT_HW_CACHE_LL:MISS doesn't exist on this platform!");
236 ret=PAPI_event_name_to_code(
"DATA_PREFETCHER:ALL",&EventCode);
237 if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
238 CkpvAccess(papiEvents)[3] = EventCode;
241 CkAbort(
"WARNING: DATA_PREFETCHER:ALL doesn't exist on this platform!");
244 if(PAPI_query_event(PAPI_L1_DCA)==PAPI_OK) {
245 CkpvAccess(papiEvents)[4] = PAPI_L1_DCA;
248 CkAbort(
"WARNING: PAPI_L1_DCA doesn't exist on this platform!");
260 if(PAPI_query_event(PAPI_TOT_CYC)==PAPI_OK) {
261 CkpvAccess(papiEvents)[5] = PAPI_TOT_CYC;
264 CkAbort(
"WARNING: PAPI_TOT_CYC doesn't exist on this platform!");
267 for(
int i=0;i<NUM_PAPI_EVENTS;i++)
269 int papiRetValue=PAPI_add_events(papiEventSet, &CkpvAccess(papiEvents)[i],1);
270 if (papiRetValue != PAPI_OK) {
271 CkPrintf(
"failure for event %d\n",i);
272 if (papiRetValue == PAPI_ECNFLCT) {
273 CmiAbort(
"PAPI events conflict! Please re-assign event types!\n");
275 CmiAbort(
"PAPI failed to add designated events!\n");
284 #ifdef OPENATOM_VERSION 285 static void startOA(){(
char inDriverFile[1024],
char inPhysicsFile[1024], CkCallback doneCB)
287 CProxy_oaSetup moaInstance = CProxy_oaSetup::ckNew(inDriverFile, inPhysicsFile, doneCB);
289 #endif //OPENATOM_VERSION 303 DebugM(4,
"Creating Node\n");
304 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE) 307 if (CkpvAccess(Node_instance) == 0) {
308 CkpvAccess(Node_instance) =
this;
311 NAMD_bug(
"Node::Node() - another instance of Node exists!");
314 CkpvAccess(BOCclass_group) = msg->
group;
317 CkpvAccess(BOCclass_group).node = thisgroup;
336 TopoManager *tmgr =
new TopoManager();
338 tmgr->rankToCoordinates(CkMyPe(), x, y, z, localRankOnNode);
344 DebugM(4,
"Creating PatchMap, AtomMap, ComputeMap\n");
352 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0 354 balancedInjectionLevel=ck_get_GNI_BIConfig();
356 ck_set_GNI_BIConfig(20);
373 delete CkpvAccess(comm);
377 #ifdef MEASURE_NAMD_WITH_PAPI 378 delete CkpvAccess(papiEvents);
382 void Node::bindBocVars(){
383 DebugM(4,
"Binding to BOC's\n");
384 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
385 patchMgr = pm.ckLocalBranch();
386 CProxy_ProxyMgr prm(CkpvAccess(BOCclass_group).proxyMgr);
387 proxyMgr = prm.ckLocalBranch();
388 CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).
workDistrib);
390 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).
computeMgr);
392 CProxy_LdbCoordinator lc(CkpvAccess(BOCclass_group).
ldbCoordinator);
394 #ifdef MEM_OPT_VERSION 395 CProxy_ParallelIOMgr io(CkpvAccess(BOCclass_group).ioMgr);
396 ioMgr = io.ckLocalBranch();
406 char* foo = (
char*) malloc(size*MB);
409 sprintf(buf,
"Malloc fails on Pe %d at %d MB.\n",CkMyPe(),step*size);
412 memset(foo,0,size*MB*
sizeof(
char));
417 CkPrintf(
"All PEs successfully allocated %d MB.\n", 100*
mallocTest_size);
419 CkPrintf(
"Starting malloc test on all PEs.\n");
423 CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
431 (CProxy_Node(CkpvAccess(BOCclass_group).node)).
startup();
446 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 474 newTime = CmiWallTimer();
475 iout <<
iINFO <<
"Startup phase " << startupPhase-1 <<
" took " 482 switch (startupPhase) {
509 #if !CMK_SMP || ! USE_CKLOOP 520 CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
526 #ifdef MEASURE_NAMD_WITH_PAPI 530 #ifdef MEM_OPT_VERSION 537 ioMgr->initialize(
this);
544 #ifdef MEM_OPT_VERSION 546 ioMgr->readPerAtomInfo();
549 ioMgr->sendDcdParams();
556 #ifdef MEM_OPT_VERSION 558 ioMgr->updateMolInfo();
561 ioMgr->migrateAtomsMGrp();
572 HPM_Init(localRankOnNode);
582 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 589 CkpvAccess(BOCclass_group).computePmeMgr = CProxy_ComputePmeMgr::ckNew();
591 #ifdef OPENATOM_VERSION 593 CkpvAccess(BOCclass_group).computeMoaMgr = CProxy_ComputeMoaMgr::ckNew();
595 #endif // OPENATOM_VERSION 599 #ifdef OPENATOM_VERSION 602 CkCallback doneMoaStart(CkIndexmain::doneMoaSetup(), thishandle);
606 #endif // OPENATOM_VERSION 616 #ifdef MEM_OPT_VERSION 623 ioMgr->integrateMigratedAtoms();
626 ioMgr->integrateClusterSize();
632 ioMgr->calcAtomsInEachPatch();
648 #ifdef PROCTRACE_DEBUG 649 DebugFileTrace::Instance(
"procTrace");
655 #ifndef MEM_OPT_VERSION 667 #if defined(NAMD_MIC) 668 mic_initHostDeviceLDB();
675 iout <<
iINFO <<
"Simulating initial mapping is done, now NAMD exits\n" <<
endi;
685 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 686 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
692 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 693 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
696 npm[CkMyNode()].ckLocalBranch()->registerLocalProxyMgr(CkpvAccess(BOCclass_group).proxyMgr);
698 npm[CkMyNode()].ckLocalBranch()->registerLocalPatchMap(CkMyRank(),
PatchMap::Object());
706 CProxy_ComputeMsmMsaMgr
msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
707 msm[CkMyPe()].initialize(
new CkQdMsg);
711 CProxy_ComputeMsmMgr
msm(CkpvAccess(BOCclass_group).computeMsmMgr);
715 if (lattice.
a_p() && lattice.
b_p() && lattice.
c_p()) {
718 msm[CkMyPe()].initialize(msg);
720 else if ( ! CkMyPe() ) {
740 #ifdef OPENATOM_VERSION 742 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
743 moa[CkMyPe()].initialize(
new CkQdMsg);
745 #endif // OPENATOM_VERSION 746 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 749 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
750 pme.ckLocalBranch()->initialize(
new CkQdMsg);
755 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
756 pme[CkMyPe()].initialize(
new CkQdMsg);
762 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 763 if ( CkMyRank()==0 ) {
764 CProxy_ComputeCUDAMgr nb(CkpvAccess(BOCclass_group).computeCUDAMgr);
765 nb.ckLocalBranch()->initialize(
new CkQdMsg);
775 #ifdef MEM_OPT_VERSION 777 ioMgr->sendAtomsToHomePatchProcs();
784 CProxy_ComputeMsmMgr
msm(CkpvAccess(BOCclass_group).computeMsmMgr);
785 msm[CkMyPe()].initialize_create();
789 #ifdef OPENATOM_VERSION 791 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
792 moa[CkMyPe()].initWorkers(
new CkQdMsg);
794 #endif // OPENATOM_VERSION 795 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 798 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
799 pme[CkMyNode()].initialize_pencils(
new CkQdMsg);
804 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
805 pme[CkMyPe()].initialize_pencils(
new CkQdMsg);
810 CProxy_ComputeMsmMsaMgr
msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
811 msm[CkMyPe()].initWorkers(
new CkQdMsg);
815 CProxy_ComputeMsmMgr
msm(CkpvAccess(BOCclass_group).computeMsmMgr);
816 msm[CkMyPe()].update(
new CkQdMsg);
820 #ifdef MEM_OPT_VERSION 823 ioMgr->createHomePatches();
833 #ifdef OPENATOM_VERSION 835 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
836 moa[CkMyPe()].startWorkers(
new CkQdMsg);
838 #endif // OPENATOM_VERSION 839 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 842 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
843 pme[CkMyNode()].activate_pencils(
new CkQdMsg);
848 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
849 pme[CkMyPe()].activate_pencils(
new CkQdMsg);
854 CProxy_ComputeMsmMsaMgr
msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
855 msm[CkMyPe()].startWorkers(
new CkQdMsg);
871 if (CkMyPe() && CkMyNodeSize() > 1 && ! CkMyRank()) Tcl_DeleteInterp(Tcl_CreateInterp());
874 #ifdef USE_NODEPATCHMGR 879 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
880 npm[CkMyNode()].ckLocalBranch()->createSTForHomePatches(
PatchMap::Object());
888 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 899 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 902 gpuResInitThreadPriority = 0;
903 CthSetStrategyDefault(gpuResInitThread);
904 CthAwakenPrio(gpuResInitThread, CK_QUEUEING_IFIFO,
PRIORITY_SIZE, &gpuResInitThreadPriority);
918 #if defined(NAMD_MIC) 919 mic_dumpHostDeviceComputeMap();
924 <<
" COMPUTE OBJECTS\n" <<
endi;
926 DebugM(4,
"Creating Computes\n");
928 DebugM(4,
"Building Sequencers\n");
930 DebugM(4,
"Initializing LDB\n");
938 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0 941 ck_set_GNI_BIConfig(balancedInjectionLevel);
957 #ifdef MEM_OPT_VERSION 959 ioMgr->readInfoForParOutDcdSelection();
961 ioMgr->freeMolSpace();
967 NAMD_bug(
"Startup Phase has a bug - check case statement");
975 CkStartQD(CkCallback(CkIndex_Node::startup(), thisgroup));
984 #ifdef OPENATOM_VERSION 985 void Node::doneMoaStart()
987 #ifdef OPENATOM_VERSION_DEBUG 988 CkPrintf(
"doneMoaStart executed on processor %d.\n", CkMyPe() );
989 #endif //OPENATOM_VERSION_DEBUG 991 #endif //OPENATOM_VERSION 993 void Node::namdOneCommInit()
995 if (CkpvAccess(comm) == NULL) {
1005 void Node::namdOneRecv() {
1006 if ( CmiMyRank() )
return;
1017 DebugM(4,
"Getting SimParameters\n");
1018 conv_msg = CkpvAccess(comm)->newInputStream(0,
SIMPARAMSTAG);
1021 DebugM(4,
"Getting Parameters\n");
1025 DebugM(4,
"Getting Molecule\n");
1026 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
1030 iout <<
iINFO <<
"Compute Nodes receiving GoMolecule Information" <<
"\n" <<
endi;
1031 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
1035 DebugM(4,
"Done Receiving\n");
1038 void Node::namdOneSend() {
1045 DebugM(4,
"Sending SimParameters\n");
1049 DebugM(4,
"Sending Parameters\n");
1053 DebugM(4,
"Sending Molecule\n");
1061 iout <<
iINFO <<
"Master Node sending GoMolecule Information" <<
"\n" <<
endi;
1076 CProxy_Node nodeProxy(thisgroup);
1077 nodeProxy.resendMolecule();
1082 if ( CmiMyRank() ) {
1085 if ( CmiMyPe() == 0 ) {
1101 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
1108 CProxy_Node nodeProxy(thisgroup);
1109 for (
int i=0; i<CmiMyNodeSize(); ++i ) {
1110 nodeProxy[CmiMyPe()+i].resendMolecule2();
1123 void Node::threadInit() {
1125 if (CthImplemented()) {
1126 CthSetStrategyDefault(CthSelf());
1128 NAMD_bug(
"Node::startup() Oh no, tiny elvis, threads not implemented");
1133 void Node::buildSequencers() {
1141 #ifdef NODEGROUP_FORCE_REGISTER 1142 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1143 PatchData *pdata = cpdata.ckLocalBranch();
1145 pdata->
c_out = controller;
1152 for (ai=ai.begin(); ai != ai.end(); ai++) {
1165 (CProxy_Node(CkpvAccess(BOCclass_group).node)).
run();
1183 DebugM(4,
"Starting Sequencers\n");
1187 for (ai=ai.
begin(); ai != ai.
end(); ai++) {
1194 double newTime = CmiWallTimer();
1195 iout <<
iINFO <<
"Startup phase " << startupPhase-1 <<
" took " 1198 iout <<
iINFO <<
"Finished startup at " << newTime <<
" s, " 1212 CkStartQD(CkIndex_Node::scriptBarrier(), &thishandle);
1225 FILE *file = fopen(filename,
"r");
1226 if ( ! file )
NAMD_die(
"node::reloadCharges():Error opening charge file.");
1229 float *charge =
new float[n];
1231 for (
int i = 0; i < n; ++i ) {
1232 if ( ! fscanf(file,
"%f",&charge[i]) )
1233 NAMD_die(
"Node::reloadCharges():Not enough numbers in charge file.");
1237 CProxy_Node(thisgroup).reloadCharges(charge,n);
1238 #ifdef NODEGROUP_FORCE_REGISTER 1241 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1242 cpdata.setDeviceKernelUpdateCounter();
1255 DebugM(4,
"reloadGridforceGrid(const char*) called on node " << CkMyPe() <<
"\n" <<
endi);
1267 if (gridnum < 0 || mgridParams == NULL) {
1268 NAMD_die(
"Node::reloadGridforceGrid(const char*):Could not find grid.");
1273 NAMD_bug(
"Node::reloadGridforceGrid(const char*):grid not found");
1277 CProxy_Node(thisgroup).reloadGridforceGrid(gridnum);
1278 #ifdef NODEGROUP_FORCE_REGISTER 1281 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1282 cpdata.setDeviceKernelUpdateCounter();
1285 DebugM(4,
"reloadGridforceGrid(const char*) finished\n" <<
endi);
1289 DebugM(4,
"updateGridScale(char*, Vector) called on node " << CkMyPe() <<
"\n" <<
endi);
1301 if (gridnum < 0 || mgridParams == NULL) {
1302 NAMD_die(
"Node::updateGridScale(char*, Vector): Could not find grid.");
1307 NAMD_bug(
"Node::updateGridScale(char*, Vector): grid not found");
1309 CProxy_Node(thisgroup).updateGridScale(gridnum, scale.
x, scale.
y, scale.
z);
1311 DebugM(4,
"updateGridScale(char*, Vector) finished\n" <<
endi);
1314 if (CmiMyRank())
return;
1315 DebugM(4,
"updateGridScale(char*, int, float, float, float) called on node " << CkMyPe() <<
"\n" <<
endi);
1319 NAMD_bug(
"Node::updateGridScale(char*, int, float, float, float):grid not found");
1325 #ifdef NODEGROUP_FORCE_REGISTER 1328 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1329 cpdata.setDeviceKernelUpdateCounter();
1332 DebugM(4,
"updateGridScale(char*, int, float, float, float) finished\n" <<
endi);
1336 if (CmiMyRank())
return;
1337 DebugM(4,
"reloadGridforceGrid(int) called on node " << CkMyPe() <<
"\n" <<
endi);
1341 NAMD_bug(
"Node::reloadGridforceGrid(int):grid not found");
1346 DebugM(4,
"Receiving grid\n");
1356 DebugM(4,
"Sending grid\n");
1363 #ifdef NODEGROUP_FORCE_REGISTER 1366 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1367 cpdata.setDeviceKernelUpdateCounter();
1370 DebugM(4,
"reloadGridforceGrid(int) finished\n" <<
endi);
1378 msg->
replica = CmiMyPartition();
1382 strcpy(msg->
key,key);
1383 envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1384 CmiSetHandler(env,recvCheckpointCReq_index);
1385 #if CMK_HAS_PARTITION 1386 CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(
char*)env);
1388 CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(
char*)env);
1404 msg->
replica = CmiMyPartition();
1405 envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1406 CmiSetHandler(env,recvCheckpointCAck_index);
1407 #if CMK_HAS_PARTITION 1408 CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(
char*)env);
1410 CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(
char*)env);
1430 CProxy_Node nodeProxy(thisgroup);
1431 nodeProxy[0].recvEnableExitScheduler();
1443 CkStartQD(CkIndex_Node::exitScheduler(), &thishandle);
1453 CProxy_Node nodeProxy(thisgroup);
1454 nodeProxy[0].recvEnableEarlyExit();
1465 CkStartQD(CkIndex_Node::earlyExit(),&thishandle);
1470 NAMD_die(
"Exiting prematurely; see error messages above.");
1483 this->
pdb = state->pdb;
1484 this->state =
state;
1490 HPM_Start(
"500 steps", localRankOnNode);
1496 HPM_Stop(
"500 steps", localRankOnNode);
1497 HPM_Print(CkMyPe(), localRankOnNode);
1503 if(turnOnTrace) traceBegin();
1506 if(turnOnTrace) CmiTurnOnStats();
1507 else CmiTurnOffStats();
1510 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1511 CkCallback cb(CkIndex_Node::resumeAfterTraceBarrier(NULL), nd[0]);
1512 contribute(0, NULL, CkReduction::sum_int, cb);
1517 CmiAssert(CmiMyPe()==0);
1523 #ifdef MEASURE_NAMD_WITH_PAPI 1525 double results[NUM_PAPI_EVENTS+1];
1528 CkpvAccess(papiEvents)[NUM_PAPI_EVENTS]=CmiWallTimer();
1530 long long counters[NUM_PAPI_EVENTS+1];
1531 int ret=PAPI_start_counters(CkpvAccess(papiEvents), NUM_PAPI_EVENTS);
1538 CkPrintf(
"error PAPI_start_counters (%d) at step %d called on proc %d\n",ret , step, CkMyPe());
1540 if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)!=PAPI_OK)
1542 CkPrintf(
"error PAPI_read_counters %d\n",PAPI_read_counters(counters, NUM_PAPI_EVENTS));
1545 long long counters[NUM_PAPI_EVENTS+1];
1546 for(
int i=0;i<NUM_PAPI_EVENTS;i++) counters[i]=0LL;
1547 if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)==PAPI_OK)
1549 #if !MEASURE_PAPI_SPP 1550 results[0] = (double)counters[0]/1e6;
1551 results[1] = (double)counters[1]/1e6;
1553 for(
int i=0;i<NUM_PAPI_EVENTS;i++) results[i] = counters[i]/1e6;
1563 PAPI_stop_counters(counters, NUM_PAPI_EVENTS);
1567 results[NUM_PAPI_EVENTS]=CkpvAccess(papiEvents)[NUM_PAPI_EVENTS];
1568 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1569 CkCallback cb(CkIndex_Node::resumeAfterPapiMeasureBarrier(NULL), nd[0]);
1570 contribute(
sizeof(
double)*(NUM_PAPI_EVENTS+1), &results, CkReduction::sum_double, cb);
1575 #ifdef MEASURE_NAMD_WITH_PAPI 1578 double *results = (
double *)msg->getData();
1579 double endtime=CmiWallTimer();
1582 #if MEASURE_PAPI_SPP 1583 CkPrintf(
"SPP INFO: PAPI_FP_OPS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[0]);
1584 CkPrintf(
"SPP INFO: PAPI_TOT_INS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[1]);
1585 CkPrintf(
"SPP INFO: perf::PERF_COUNT_HW_CACHE_LL:MISS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[2]);
1586 CkPrintf(
"SPP INFO: DATA_PREFETCHER:ALL timestep %d to %d is %lf(1e6)\n", bstep,estep,results[3]);
1587 CkPrintf(
"SPP INFO: PAPI_L1_DCA timestep %d to %d is %lf(1e6)\n", bstep,estep,results[4]);
1588 CkPrintf(
"SPP INFO: PAPI_TOT_CYC timestep %d to % is %lf(1e6)\n", bstep,estep,results[5]);
1591 CkPrintf(
"SPP INFO: WALLtime timestep %d to %d is %lf\n", bstep,estep,endtime-results[NUM_PAPI_EVENTS]);
1592 CkPrintf(
"SPP INFO: endtime %lf avgtime %lf tottime %lf\n", endtime,results[NUM_PAPI_EVENTS]/CkNumPes(),results[NUM_PAPI_EVENTS] );
1594 if(CkpvAccess(papiEvents)[0] == PAPI_FP_INS){
1595 double totalFPIns = results[0];
1596 if(CkpvAccess(papiEvents)[1] == PAPI_FMA_INS) totalFPIns += (results[1]*2);
1597 CkPrintf(
"FLOPS INFO: from timestep %d to %d, the total FP instruction of NAMD is %lf(x1e6) per processor\n",
1598 bstep, estep, totalFPIns/CkNumPes());
1600 char nameBuf[PAPI_MAX_STR_LEN];
1601 CkPrintf(
"PAPI COUNTERS INFO: from timestep %d to %d, ",
1603 for(
int i=0; i<NUM_PAPI_EVENTS; i++) {
1604 PAPI_event_code_to_name(CkpvAccess(papiEvents)[i], nameBuf);
1605 CkPrintf(
"%s is %lf(x1e6), ", nameBuf, results[i]/CkNumPes());
1607 CkPrintf(
"per processor\n");
1620 int numpes = CkNumPes();
1621 int nodesize = CkMyNodeSize();
1628 sprintf(fname,
"mapdump_%s.%d_%d_%d_%s", filename, numpes, nodesize, tag,
gNAMDBinaryName);
1630 FILE *fp = fopen(fname,
"w");
1632 NAMD_die(
"Error in outputing PatchMap and ComputeMap info!\n");
1639 fprintf(fp,
"%d %d %d %d %d %d %d\n", numpes, nodesize, numPatches, numComputes,
1642 for(
int i=0; i<numPatches; i++) {
1643 #ifdef MEM_OPT_VERSION 1644 fprintf(fp,
"%d %d\n", pMap->numAtoms(i), pMap->
node(i));
1651 for(
int i=0; i<numComputes; i++) {
1652 fprintf(fp,
"%d %d %d %d\n", cMap->
node(i), cMap->
type(i), cMap->
pid(i,0), cMap->
pid(i,1));
1657 #ifndef NODEGROUP_FORCE_REGISTER 1661 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1662 PatchData* patchData = cpdata.ckLocalBranch();
1663 return patchData->
script;
1674 #include "Node.def.h"
void allocateMap(int nAtomIDs)
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
Bool simulateInitialMapping
void recvCheckpointReq(const char *key, int task, checkpoint &cp)
void recvCheckpointCReq_handler(envelope *)
void setPatchMapArrived(bool s)
GridforceGrid * get_gridfrc_grid(int gridnum) const
void receive_SimParameters(MIStream *)
void send_GoMolecule(MOStream *)
static ProxyMgr * Object()
NAMD_HOST_DEVICE int c_p() const
void saveMolDataPointers(NamdState *)
void receive_GoMolecule(MIStream *)
LdbCoordinator * ldbCoordinator
static PatchMap * Object()
void sendEnableEarlyExit(void)
void send_Molecule(MOStream *)
static void exit(int status=0)
static AtomMap * Instance()
SimParameters * simParameters
int proxyTreeBranchFactor
static void pack_grid(GridforceGrid *grid, MOStream *msg)
void createLoadBalancer()
HomePatchList * homePatchList()
std::ostream & endi(std::ostream &s)
void enableScriptBarrier()
void initMasterScope(const int isMasterPe, const int isMasterDevice, const int numDevices, const int deviceIndex, const std::vector< int > &masterPeList)
void initializeGPUResident()
void scriptSet(const char *, const char *)
char value[MAX_SCRIPT_PARAM_SIZE]
void sendEnableExitScheduler(void)
void recvCheckpointReq(CheckpointMsg *)
static void messageStartUp()
ResizeArrayIter< T > begin(void) const
void reloadCharges(float charge[], int n)
Patch * patch(PatchID pid)
void outputPatchComputeMaps(const char *filename, int tag)
int loadStructure(const char *, const char *, int)
void createComputes(ComputeMap *map)
Molecule stores the structural information for the system.
NAMD_HOST_DEVICE int b_p() const
void split(int iStream, int numStreams)
void recvCheckpointAck(CheckpointMsg *)
int gridsize_c(void) const
virtual void reinitialize(SimParameters *simParams, MGridforceParams *mgridParams)=0
void sendComputeMap(void)
int gridsize_a(void) const
void resumeAfterPapiMeasureBarrier(CkReductionMsg *msg)
void initialize(PatchMap *pmap, ComputeMap *cmap, int reinit=0)
void scriptParam(ScriptParamMsg *)
int index_for_key(const char *key)
int numPatches(void) const
#define NAMD_EVENT_START(eon, id)
void enableExitScheduler(void)
void buildProxySpanningTree()
void createHomePatches(void)
void NAMD_bug(const char *err_msg)
ComputeType type(ComputeID cid)
Controller::checkpoint checkpoint
#define MGRIDFORCEPARAMS_DEFAULTKEY
MGridforceParams * find_key(const char *key)
void traceBarrier(int turnOnTrace, int step)
void updateGridScale(const char *key, Vector scale)
void recvCheckpointAck(checkpoint &cp)
void reloadStructure(const char *, const char *)
void recvEnableExitScheduler(void)
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
NAMD_HOST_DEVICE int a_p() const
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
void initializeBackends()
const std::vector< int > & getDeviceIndexToPeMap() const
static AtomMap * Object()
MGridforceParamsList mgridforcelist
static void nonbonded_select()
void recvEnableEarlyExit(void)
void send_Parameters(MOStream *)
int isRecvSpanningTreeOn()
static ComputeMap * Instance()
void resumeAfterTraceBarrier(CkReductionMsg *msg)
CkpvDeclare(AtomMap *, AtomMap_instance)
WorkDistrib * workDistrib
Parameters * node_parameters
static GridforceGrid * unpack_grid(int gridnum, MIStream *msg)
SimParameters * node_simParameters
void recvCheckpointCAck_handler(envelope *)
void registerUserEventsForAllComputeObjs(void)
static ComputeMap * Object()
void useController(Controller *controllerPtr)
void papiMeasureBarrier(int turnOnMeasure, int step)
void distributeHomePatches(void)
void setProxyTreeBranchFactor(int dim)
int gridsize_b(void) const
int set_gridfrc_grid(int gridnum, GridforceGrid *grid)
__thread DeviceCUDA * deviceCUDA
int pid(ComputeID cid, int i)
int isSendSpanningTreeOn()
void resumeAfterTraceBarrier(int)
void sendCheckpointReq(int remote, const char *key, int task, Lattice &lat, ControllerState &cs)
static GlobalGPUMgr * Object()
void enableEarlyExit(void)
void receive_Molecule(MIStream *)
MGridforceParams * at_index(int idx)
ResizeArrayIter< T > end(void) const
static PatchMap * Instance()
void useSequencer(Sequencer *sequencerPtr)
char param[MAX_SCRIPT_PARAM_SIZE]
void receive_Parameters(MIStream *)
static SynchronousCollectives * Object()
virtual void set_scale(Vector s)=0
void reloadCharges(const char *filename)
void send_SimParameters(MOStream *)
void reloadGridforceGrid(const char *key)
void assignNodeToPatch(void)