12 #if !defined(WIN32) || defined(__CYGWIN__) 16 #include "Node.decl.h" 24 #define MIN_DEBUG_LEVEL 3 35 #include "main.decl.h" 58 #include "ComputeMgr.decl.h" 59 #include "ComputePmeMgr.decl.h" 61 #include "ComputeCUDAMgr.decl.h" 63 #include "ComputePmeCUDAMgr.decl.h" 66 #include "ComputeGridForceMgr.decl.h" 71 #include "CollectionMgr.decl.h" 72 #include "ParallelIOMgr.decl.h" 85 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE) 86 extern "C" void CApplicationInit();
111 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0 112 #include "ckBIconfig.h" 117 #include "CollectionMgr.decl.h" 118 #include "CollectionMaster.decl.h" 121 extern "C" void HPM_Init(
int);
122 extern "C" void HPM_Start(
char *label,
int);
123 extern "C" void HPM_Stop(
char *label,
int);
124 extern "C" void HPM_Print(
int,
int);
127 #if defined(NAMD_MIC) 128 extern void mic_dumpHostDeviceComputeMap();
129 extern void mic_initHostDeviceLDB();
132 #ifdef MEASURE_NAMD_WITH_PAPI 137 #define NUM_PAPI_EVENTS 6 140 #define MEASURE_PAPI_SPP 1 141 #define MEASURE_PAPI_CACHE 0 142 #define MEASURE_PAPI_FLOPS 0 144 static void namdInitPapiCounters(){
147 int retval = PAPI_library_init(PAPI_VER_CURRENT);
148 if(retval != PAPI_VER_CURRENT) {
150 NAMD_die(
"PAPI library is not compatitible!");
155 if(PAPI_thread_init(pthread_self)!=PAPI_OK) {
157 NAMD_die(
"Multi-thread mode in PAPI could not be initialized!");
162 CkpvInitialize(
int *, papiEvents);
163 CkpvAccess(papiEvents) =
new int[NUM_PAPI_EVENTS+1];
165 #if MEASURE_PAPI_CACHE 166 if(PAPI_query_event(PAPI_L1_DCM)==PAPI_OK) {
167 CkpvAccess(papiEvents)[0] = PAPI_L1_DCM;
170 CkPrintf(
"WARNING: PAPI_L1_DCM doesn't exsit on this platform!\n");
173 CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
176 if(PAPI_query_event(PAPI_L2_DCM)==PAPI_OK) {
177 CkpvAccess(papiEvents)[1] = PAPI_L2_DCM;
180 CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
182 #elif MEASURE_PAPI_FLOPS 183 if(PAPI_query_event(PAPI_FP_INS)==PAPI_OK) {
184 CkpvAccess(papiEvents)[0] = PAPI_FP_INS;
187 CkPrintf(
"WARNING: PAPI_FP_INS doesn't exsit on this platform!\n");
190 CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
193 if(PAPI_query_event(PAPI_FMA_INS)==PAPI_OK) {
194 CkpvAccess(papiEvents)[1] = PAPI_FMA_INS;
197 CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
199 #elif MEASURE_PAPI_SPP 210 int papiEventSet = PAPI_NULL;
211 if (PAPI_create_eventset(&papiEventSet) != PAPI_OK) {
212 CmiAbort(
"PAPI failed to create event set!\n");
215 if(PAPI_query_event(PAPI_FP_OPS)==PAPI_OK) {
216 CkpvAccess(papiEvents)[0] = PAPI_FP_OPS;
219 CkAbort(
"WARNING: PAPI_FP_OPS doesn't exist on this platform!");
222 if(PAPI_query_event(PAPI_TOT_INS)==PAPI_OK) {
223 CkpvAccess(papiEvents)[1] = PAPI_TOT_INS;
226 CkAbort(
"WARNING: PAPI_TOT_INS doesn't exist on this platform!");
231 ret=PAPI_event_name_to_code(
"perf::PERF_COUNT_HW_CACHE_LL:MISS",&EventCode);
232 if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
233 CkpvAccess(papiEvents)[2] = EventCode;
236 CkAbort(
"WARNING: perf::PERF_COUNT_HW_CACHE_LL:MISS doesn't exist on this platform!");
239 ret=PAPI_event_name_to_code(
"DATA_PREFETCHER:ALL",&EventCode);
240 if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
241 CkpvAccess(papiEvents)[3] = EventCode;
244 CkAbort(
"WARNING: DATA_PREFETCHER:ALL doesn't exist on this platform!");
247 if(PAPI_query_event(PAPI_L1_DCA)==PAPI_OK) {
248 CkpvAccess(papiEvents)[4] = PAPI_L1_DCA;
251 CkAbort(
"WARNING: PAPI_L1_DCA doesn't exist on this platform!");
263 if(PAPI_query_event(PAPI_TOT_CYC)==PAPI_OK) {
264 CkpvAccess(papiEvents)[5] = PAPI_TOT_CYC;
267 CkAbort(
"WARNING: PAPI_TOT_CYC doesn't exist on this platform!");
270 for(
int i=0;i<NUM_PAPI_EVENTS;i++)
272 int papiRetValue=PAPI_add_events(papiEventSet, &CkpvAccess(papiEvents)[i],1);
273 if (papiRetValue != PAPI_OK) {
274 CkPrintf(
"failure for event %d\n",i);
275 if (papiRetValue == PAPI_ECNFLCT) {
276 CmiAbort(
"PAPI events conflict! Please re-assign event types!\n");
278 CmiAbort(
"PAPI failed to add designated events!\n");
287 #ifdef OPENATOM_VERSION 288 static void startOA(){(
char inDriverFile[1024],
char inPhysicsFile[1024], CkCallback doneCB)
290 CProxy_oaSetup moaInstance = CProxy_oaSetup::ckNew(inDriverFile, inPhysicsFile, doneCB);
292 #endif //OPENATOM_VERSION 306 DebugM(4,
"Creating Node\n");
307 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE) 310 if (CkpvAccess(Node_instance) == 0) {
311 CkpvAccess(Node_instance) =
this;
314 NAMD_bug(
"Node::Node() - another instance of Node exists!");
317 CkpvAccess(BOCclass_group) = msg->
group;
320 CkpvAccess(BOCclass_group).node = thisgroup;
339 TopoManager *tmgr =
new TopoManager();
341 tmgr->rankToCoordinates(CkMyPe(), x, y, z, localRankOnNode);
347 DebugM(4,
"Creating PatchMap, AtomMap, ComputeMap\n");
355 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0 357 balancedInjectionLevel=ck_get_GNI_BIConfig();
359 ck_set_GNI_BIConfig(20);
376 delete CkpvAccess(comm);
380 #ifdef MEASURE_NAMD_WITH_PAPI 381 delete CkpvAccess(papiEvents);
385 void Node::bindBocVars(){
386 DebugM(4,
"Binding to BOC's\n");
387 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
388 patchMgr = pm.ckLocalBranch();
389 CProxy_ProxyMgr prm(CkpvAccess(BOCclass_group).proxyMgr);
390 proxyMgr = prm.ckLocalBranch();
391 CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).
workDistrib);
393 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).
computeMgr);
395 CProxy_LdbCoordinator lc(CkpvAccess(BOCclass_group).
ldbCoordinator);
397 #ifdef MEM_OPT_VERSION 398 CProxy_ParallelIOMgr io(CkpvAccess(BOCclass_group).ioMgr);
399 ioMgr = io.ckLocalBranch();
409 char* foo = (
char*) malloc(size*MB);
412 sprintf(buf,
"Malloc fails on Pe %d at %d MB.\n",CkMyPe(),step*size);
415 memset(foo,0,size*MB*
sizeof(
char));
420 CkPrintf(
"All PEs successfully allocated %d MB.\n", 100*
mallocTest_size);
422 CkPrintf(
"Starting malloc test on all PEs.\n");
426 CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
434 (CProxy_Node(CkpvAccess(BOCclass_group).node)).
startup();
449 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 477 newTime = CmiWallTimer();
478 iout <<
iINFO <<
"Startup phase " << startupPhase-1 <<
" took " 485 switch (startupPhase) {
512 #if !CMK_SMP || ! USE_CKLOOP 523 CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
529 #ifdef MEASURE_NAMD_WITH_PAPI 533 #ifdef MEM_OPT_VERSION 540 ioMgr->initialize(
this);
547 #ifdef MEM_OPT_VERSION 549 ioMgr->readPerAtomInfo();
552 ioMgr->sendDcdParams();
559 #ifdef MEM_OPT_VERSION 561 ioMgr->updateMolInfo();
564 ioMgr->migrateAtomsMGrp();
575 HPM_Init(localRankOnNode);
585 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 592 CkpvAccess(BOCclass_group).computePmeMgr = CProxy_ComputePmeMgr::ckNew();
594 #ifdef OPENATOM_VERSION 596 CkpvAccess(BOCclass_group).computeMoaMgr = CProxy_ComputeMoaMgr::ckNew();
598 #endif // OPENATOM_VERSION 602 #ifdef OPENATOM_VERSION 605 CkCallback doneMoaStart(CkIndexmain::doneMoaSetup(), thishandle);
609 #endif // OPENATOM_VERSION 619 #ifdef MEM_OPT_VERSION 626 ioMgr->integrateMigratedAtoms();
629 ioMgr->integrateClusterSize();
635 ioMgr->calcAtomsInEachPatch();
651 #ifdef PROCTRACE_DEBUG 652 DebugFileTrace::Instance(
"procTrace");
658 #ifndef MEM_OPT_VERSION 670 #if defined(NAMD_MIC) 671 mic_initHostDeviceLDB();
678 iout <<
iINFO <<
"Simulating initial mapping is done, now NAMD exits\n" <<
endi;
688 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 689 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
695 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 696 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
699 npm[CkMyNode()].ckLocalBranch()->registerLocalProxyMgr(CkpvAccess(BOCclass_group).proxyMgr);
701 npm[CkMyNode()].ckLocalBranch()->registerLocalPatchMap(CkMyRank(),
PatchMap::Object());
709 CProxy_ComputeMsmMsaMgr
msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
710 msm[CkMyPe()].initialize(
new CkQdMsg);
714 CProxy_ComputeMsmMgr
msm(CkpvAccess(BOCclass_group).computeMsmMgr);
718 if (lattice.
a_p() && lattice.
b_p() && lattice.
c_p()) {
721 msm[CkMyPe()].initialize(msg);
723 else if ( ! CkMyPe() ) {
743 #ifdef OPENATOM_VERSION 745 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
746 moa[CkMyPe()].initialize(
new CkQdMsg);
748 #endif // OPENATOM_VERSION 749 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 752 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
753 pme.ckLocalBranch()->initialize(
new CkQdMsg);
758 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
759 pme[CkMyPe()].initialize(
new CkQdMsg);
765 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 766 if ( CkMyRank()==0 ) {
767 CProxy_ComputeCUDAMgr nb(CkpvAccess(BOCclass_group).computeCUDAMgr);
768 nb.ckLocalBranch()->initialize(
new CkQdMsg);
778 #ifdef MEM_OPT_VERSION 780 ioMgr->sendAtomsToHomePatchProcs();
787 CProxy_ComputeMsmMgr
msm(CkpvAccess(BOCclass_group).computeMsmMgr);
788 msm[CkMyPe()].initialize_create();
792 #ifdef OPENATOM_VERSION 794 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
795 moa[CkMyPe()].initWorkers(
new CkQdMsg);
797 #endif // OPENATOM_VERSION 798 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 801 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
802 pme[CkMyNode()].initialize_pencils(
new CkQdMsg);
807 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
808 pme[CkMyPe()].initialize_pencils(
new CkQdMsg);
813 CProxy_ComputeMsmMsaMgr
msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
814 msm[CkMyPe()].initWorkers(
new CkQdMsg);
818 CProxy_ComputeMsmMgr
msm(CkpvAccess(BOCclass_group).computeMsmMgr);
819 msm[CkMyPe()].update(
new CkQdMsg);
823 #ifdef MEM_OPT_VERSION 826 ioMgr->createHomePatches();
836 #ifdef OPENATOM_VERSION 838 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
839 moa[CkMyPe()].startWorkers(
new CkQdMsg);
841 #endif // OPENATOM_VERSION 842 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 845 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
846 pme[CkMyNode()].activate_pencils(
new CkQdMsg);
851 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
852 pme[CkMyPe()].activate_pencils(
new CkQdMsg);
857 CProxy_ComputeMsmMsaMgr
msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
858 msm[CkMyPe()].startWorkers(
new CkQdMsg);
874 if (CkMyPe() && CkMyNodeSize() > 1 && ! CkMyRank()) Tcl_DeleteInterp(Tcl_CreateInterp());
877 #ifdef USE_NODEPATCHMGR 882 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
883 npm[CkMyNode()].ckLocalBranch()->createSTForHomePatches(
PatchMap::Object());
891 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 902 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 905 gpuResInitThreadPriority = 0;
906 CthSetStrategyDefault(gpuResInitThread);
907 CthAwakenPrio(gpuResInitThread, CK_QUEUEING_IFIFO,
PRIORITY_SIZE, &gpuResInitThreadPriority);
921 #if defined(NAMD_MIC) 922 mic_dumpHostDeviceComputeMap();
927 <<
" COMPUTE OBJECTS\n" <<
endi;
929 DebugM(4,
"Creating Computes\n");
931 DebugM(4,
"Building Sequencers\n");
933 DebugM(4,
"Initializing LDB\n");
941 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0 944 ck_set_GNI_BIConfig(balancedInjectionLevel);
960 #ifdef MEM_OPT_VERSION 962 ioMgr->readInfoForParOutDcdSelection();
964 ioMgr->freeMolSpace();
970 NAMD_bug(
"Startup Phase has a bug - check case statement");
978 CkStartQD(CkCallback(CkIndex_Node::startup(), thisgroup));
987 #ifdef OPENATOM_VERSION 988 void Node::doneMoaStart()
990 #ifdef OPENATOM_VERSION_DEBUG 991 CkPrintf(
"doneMoaStart executed on processor %d.\n", CkMyPe() );
992 #endif //OPENATOM_VERSION_DEBUG 994 #endif //OPENATOM_VERSION 996 void Node::namdOneCommInit()
998 if (CkpvAccess(comm) == NULL) {
1008 void Node::namdOneRecv() {
1009 if ( CmiMyRank() )
return;
1020 DebugM(4,
"Getting SimParameters\n");
1021 conv_msg = CkpvAccess(comm)->newInputStream(0,
SIMPARAMSTAG);
1024 DebugM(4,
"Getting Parameters\n");
1028 DebugM(4,
"Getting Molecule\n");
1029 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
1033 iout <<
iINFO <<
"Compute Nodes receiving GoMolecule Information" <<
"\n" <<
endi;
1034 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
1038 DebugM(4,
"Done Receiving\n");
1041 void Node::namdOneSend() {
1048 DebugM(4,
"Sending SimParameters\n");
1052 DebugM(4,
"Sending Parameters\n");
1056 DebugM(4,
"Sending Molecule\n");
1064 iout <<
iINFO <<
"Master Node sending GoMolecule Information" <<
"\n" <<
endi;
1079 CProxy_Node nodeProxy(thisgroup);
1080 nodeProxy.resendMolecule();
1085 if ( CmiMyRank() ) {
1088 if ( CmiMyPe() == 0 ) {
1104 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
1111 CProxy_Node nodeProxy(thisgroup);
1112 for (
int i=0; i<CmiMyNodeSize(); ++i ) {
1113 nodeProxy[CmiMyPe()+i].resendMolecule2();
1126 void Node::threadInit() {
1128 if (CthImplemented()) {
1129 CthSetStrategyDefault(CthSelf());
1131 NAMD_bug(
"Node::startup() Oh no, tiny elvis, threads not implemented");
1136 void Node::buildSequencers() {
1144 #ifdef NODEGROUP_FORCE_REGISTER 1145 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1146 PatchData *pdata = cpdata.ckLocalBranch();
1148 pdata->
c_out = controller;
1155 for (ai=ai.begin(); ai != ai.end(); ai++) {
1168 (CProxy_Node(CkpvAccess(BOCclass_group).node)).
run();
1186 DebugM(4,
"Starting Sequencers\n");
1190 for (ai=ai.
begin(); ai != ai.
end(); ai++) {
1197 double newTime = CmiWallTimer();
1198 iout <<
iINFO <<
"Startup phase " << startupPhase-1 <<
" took " 1201 iout <<
iINFO <<
"Finished startup at " << newTime <<
" s, " 1215 CkStartQD(CkIndex_Node::scriptBarrier(), &thishandle);
1228 FILE *file = fopen(filename,
"r");
1229 if ( ! file )
NAMD_die(
"node::reloadCharges():Error opening charge file.");
1232 float *charge =
new float[n];
1234 for (
int i = 0; i < n; ++i ) {
1235 if ( ! fscanf(file,
"%f",&charge[i]) )
1236 NAMD_die(
"Node::reloadCharges():Not enough numbers in charge file.");
1240 CProxy_Node(thisgroup).reloadCharges(charge,n);
1241 #ifdef NODEGROUP_FORCE_REGISTER 1244 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1245 cpdata.setDeviceKernelUpdateCounter();
1258 DebugM(4,
"reloadGridforceGrid(const char*) called on node " << CkMyPe() <<
"\n" <<
endi);
1270 if (gridnum < 0 || mgridParams == NULL) {
1271 NAMD_die(
"Node::reloadGridforceGrid(const char*):Could not find grid.");
1276 NAMD_bug(
"Node::reloadGridforceGrid(const char*):grid not found");
1280 CProxy_Node(thisgroup).reloadGridforceGrid(gridnum);
1281 #ifdef NODEGROUP_FORCE_REGISTER 1284 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1285 cpdata.setDeviceKernelUpdateCounter();
1288 DebugM(4,
"reloadGridforceGrid(const char*) finished\n" <<
endi);
1292 DebugM(4,
"updateGridScale(char*, Vector) called on node " << CkMyPe() <<
"\n" <<
endi);
1304 if (gridnum < 0 || mgridParams == NULL) {
1305 NAMD_die(
"Node::updateGridScale(char*, Vector): Could not find grid.");
1310 NAMD_bug(
"Node::updateGridScale(char*, Vector): grid not found");
1312 CProxy_Node(thisgroup).updateGridScale(gridnum, scale.
x, scale.
y, scale.
z);
1314 DebugM(4,
"updateGridScale(char*, Vector) finished\n" <<
endi);
1317 if (CmiMyRank())
return;
1318 DebugM(4,
"updateGridScale(char*, int, float, float, float) called on node " << CkMyPe() <<
"\n" <<
endi);
1322 NAMD_bug(
"Node::updateGridScale(char*, int, float, float, float):grid not found");
1328 #ifdef NODEGROUP_FORCE_REGISTER 1331 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1332 cpdata.setDeviceKernelUpdateCounter();
1335 DebugM(4,
"updateGridScale(char*, int, float, float, float) finished\n" <<
endi);
1339 if (CmiMyRank())
return;
1340 DebugM(4,
"reloadGridforceGrid(int) called on node " << CkMyPe() <<
"\n" <<
endi);
1344 NAMD_bug(
"Node::reloadGridforceGrid(int):grid not found");
1349 DebugM(4,
"Receiving grid\n");
1359 DebugM(4,
"Sending grid\n");
1366 #ifdef NODEGROUP_FORCE_REGISTER 1369 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1370 cpdata.setDeviceKernelUpdateCounter();
1373 DebugM(4,
"reloadGridforceGrid(int) finished\n" <<
endi);
1381 msg->
replica = CmiMyPartition();
1385 strcpy(msg->
key,key);
1386 envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1387 CmiSetHandler(env,recvCheckpointCReq_index);
1388 #if CMK_HAS_PARTITION 1389 CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(
char*)env);
1391 CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(
char*)env);
1407 msg->
replica = CmiMyPartition();
1408 envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1409 CmiSetHandler(env,recvCheckpointCAck_index);
1410 #if CMK_HAS_PARTITION 1411 CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(
char*)env);
1413 CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(
char*)env);
1433 CProxy_Node nodeProxy(thisgroup);
1434 nodeProxy[0].recvEnableExitScheduler();
1446 CkStartQD(CkIndex_Node::exitScheduler(), &thishandle);
1456 CProxy_Node nodeProxy(thisgroup);
1457 nodeProxy[0].recvEnableEarlyExit();
1468 CkStartQD(CkIndex_Node::earlyExit(),&thishandle);
1473 NAMD_die(
"Exiting prematurely; see error messages above.");
1486 this->
pdb = state->pdb;
1487 this->state =
state;
1493 HPM_Start(
"500 steps", localRankOnNode);
1499 HPM_Stop(
"500 steps", localRankOnNode);
1500 HPM_Print(CkMyPe(), localRankOnNode);
1506 if(turnOnTrace) traceBegin();
1509 if(turnOnTrace) CmiTurnOnStats();
1510 else CmiTurnOffStats();
1513 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1514 CkCallback cb(CkIndex_Node::resumeAfterTraceBarrier(NULL), nd[0]);
1515 contribute(0, NULL, CkReduction::sum_int, cb);
1520 CmiAssert(CmiMyPe()==0);
1526 #ifdef MEASURE_NAMD_WITH_PAPI 1528 double results[NUM_PAPI_EVENTS+1];
1531 CkpvAccess(papiEvents)[NUM_PAPI_EVENTS]=CmiWallTimer();
1533 long long counters[NUM_PAPI_EVENTS+1];
1534 int ret=PAPI_start_counters(CkpvAccess(papiEvents), NUM_PAPI_EVENTS);
1541 CkPrintf(
"error PAPI_start_counters (%d) at step %d called on proc %d\n",ret , step, CkMyPe());
1543 if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)!=PAPI_OK)
1545 CkPrintf(
"error PAPI_read_counters %d\n",PAPI_read_counters(counters, NUM_PAPI_EVENTS));
1548 long long counters[NUM_PAPI_EVENTS+1];
1549 for(
int i=0;i<NUM_PAPI_EVENTS;i++) counters[i]=0LL;
1550 if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)==PAPI_OK)
1552 #if !MEASURE_PAPI_SPP 1553 results[0] = (double)counters[0]/1e6;
1554 results[1] = (double)counters[1]/1e6;
1556 for(
int i=0;i<NUM_PAPI_EVENTS;i++) results[i] = counters[i]/1e6;
1566 PAPI_stop_counters(counters, NUM_PAPI_EVENTS);
1570 results[NUM_PAPI_EVENTS]=CkpvAccess(papiEvents)[NUM_PAPI_EVENTS];
1571 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1572 CkCallback cb(CkIndex_Node::resumeAfterPapiMeasureBarrier(NULL), nd[0]);
1573 contribute(
sizeof(
double)*(NUM_PAPI_EVENTS+1), &results, CkReduction::sum_double, cb);
1578 #ifdef MEASURE_NAMD_WITH_PAPI 1581 double *results = (
double *)msg->getData();
1582 double endtime=CmiWallTimer();
1585 #if MEASURE_PAPI_SPP 1586 CkPrintf(
"SPP INFO: PAPI_FP_OPS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[0]);
1587 CkPrintf(
"SPP INFO: PAPI_TOT_INS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[1]);
1588 CkPrintf(
"SPP INFO: perf::PERF_COUNT_HW_CACHE_LL:MISS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[2]);
1589 CkPrintf(
"SPP INFO: DATA_PREFETCHER:ALL timestep %d to %d is %lf(1e6)\n", bstep,estep,results[3]);
1590 CkPrintf(
"SPP INFO: PAPI_L1_DCA timestep %d to %d is %lf(1e6)\n", bstep,estep,results[4]);
1591 CkPrintf(
"SPP INFO: PAPI_TOT_CYC timestep %d to % is %lf(1e6)\n", bstep,estep,results[5]);
1594 CkPrintf(
"SPP INFO: WALLtime timestep %d to %d is %lf\n", bstep,estep,endtime-results[NUM_PAPI_EVENTS]);
1595 CkPrintf(
"SPP INFO: endtime %lf avgtime %lf tottime %lf\n", endtime,results[NUM_PAPI_EVENTS]/CkNumPes(),results[NUM_PAPI_EVENTS] );
1597 if(CkpvAccess(papiEvents)[0] == PAPI_FP_INS){
1598 double totalFPIns = results[0];
1599 if(CkpvAccess(papiEvents)[1] == PAPI_FMA_INS) totalFPIns += (results[1]*2);
1600 CkPrintf(
"FLOPS INFO: from timestep %d to %d, the total FP instruction of NAMD is %lf(x1e6) per processor\n",
1601 bstep, estep, totalFPIns/CkNumPes());
1603 char nameBuf[PAPI_MAX_STR_LEN];
1604 CkPrintf(
"PAPI COUNTERS INFO: from timestep %d to %d, ",
1606 for(
int i=0; i<NUM_PAPI_EVENTS; i++) {
1607 PAPI_event_code_to_name(CkpvAccess(papiEvents)[i], nameBuf);
1608 CkPrintf(
"%s is %lf(x1e6), ", nameBuf, results[i]/CkNumPes());
1610 CkPrintf(
"per processor\n");
1623 int numpes = CkNumPes();
1624 int nodesize = CkMyNodeSize();
1631 sprintf(fname,
"mapdump_%s.%d_%d_%d_%s", filename, numpes, nodesize, tag,
gNAMDBinaryName);
1633 FILE *fp = fopen(fname,
"w");
1635 NAMD_die(
"Error in outputing PatchMap and ComputeMap info!\n");
1642 fprintf(fp,
"%d %d %d %d %d %d %d\n", numpes, nodesize, numPatches, numComputes,
1645 for(
int i=0; i<numPatches; i++) {
1646 #ifdef MEM_OPT_VERSION 1647 fprintf(fp,
"%d %d\n", pMap->numAtoms(i), pMap->
node(i));
1654 for(
int i=0; i<numComputes; i++) {
1655 fprintf(fp,
"%d %d %d %d\n", cMap->
node(i), cMap->
type(i), cMap->
pid(i,0), cMap->
pid(i,1));
1660 #ifndef NODEGROUP_FORCE_REGISTER 1664 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1665 PatchData* patchData = cpdata.ckLocalBranch();
1666 return patchData->
script;
1677 #include "Node.def.h"
void allocateMap(int nAtomIDs)
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
Bool simulateInitialMapping
void recvCheckpointReq(const char *key, int task, checkpoint &cp)
void recvCheckpointCReq_handler(envelope *)
void setPatchMapArrived(bool s)
GridforceGrid * get_gridfrc_grid(int gridnum) const
void receive_SimParameters(MIStream *)
void send_GoMolecule(MOStream *)
static ProxyMgr * Object()
NAMD_HOST_DEVICE int c_p() const
void saveMolDataPointers(NamdState *)
void receive_GoMolecule(MIStream *)
LdbCoordinator * ldbCoordinator
static PatchMap * Object()
void sendEnableEarlyExit(void)
void send_Molecule(MOStream *)
static void exit(int status=0)
static AtomMap * Instance()
SimParameters * simParameters
int proxyTreeBranchFactor
static void pack_grid(GridforceGrid *grid, MOStream *msg)
void createLoadBalancer()
HomePatchList * homePatchList()
std::ostream & endi(std::ostream &s)
void enableScriptBarrier()
void initMasterScope(const int isMasterPe, const int isMasterDevice, const int numDevices, const int deviceIndex, const std::vector< int > &masterPeList)
void initializeGPUResident()
void scriptSet(const char *, const char *)
char value[MAX_SCRIPT_PARAM_SIZE]
void sendEnableExitScheduler(void)
void recvCheckpointReq(CheckpointMsg *)
static void messageStartUp()
ResizeArrayIter< T > begin(void) const
void reloadCharges(float charge[], int n)
Patch * patch(PatchID pid)
void outputPatchComputeMaps(const char *filename, int tag)
int loadStructure(const char *, const char *, int)
void createComputes(ComputeMap *map)
Molecule stores the structural information for the system.
NAMD_HOST_DEVICE int b_p() const
void split(int iStream, int numStreams)
void recvCheckpointAck(CheckpointMsg *)
int gridsize_c(void) const
virtual void reinitialize(SimParameters *simParams, MGridforceParams *mgridParams)=0
void sendComputeMap(void)
int gridsize_a(void) const
void resumeAfterPapiMeasureBarrier(CkReductionMsg *msg)
void initialize(PatchMap *pmap, ComputeMap *cmap, int reinit=0)
void scriptParam(ScriptParamMsg *)
int index_for_key(const char *key)
int numPatches(void) const
#define NAMD_EVENT_START(eon, id)
void enableExitScheduler(void)
void buildProxySpanningTree()
void createHomePatches(void)
void NAMD_bug(const char *err_msg)
ComputeType type(ComputeID cid)
Controller::checkpoint checkpoint
#define MGRIDFORCEPARAMS_DEFAULTKEY
MGridforceParams * find_key(const char *key)
void traceBarrier(int turnOnTrace, int step)
void updateGridScale(const char *key, Vector scale)
void recvCheckpointAck(checkpoint &cp)
void reloadStructure(const char *, const char *)
void recvEnableExitScheduler(void)
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
NAMD_HOST_DEVICE int a_p() const
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
void initializeBackends()
const std::vector< int > & getDeviceIndexToPeMap() const
static AtomMap * Object()
MGridforceParamsList mgridforcelist
static void nonbonded_select()
void recvEnableEarlyExit(void)
void send_Parameters(MOStream *)
int isRecvSpanningTreeOn()
static ComputeMap * Instance()
void resumeAfterTraceBarrier(CkReductionMsg *msg)
CkpvDeclare(AtomMap *, AtomMap_instance)
WorkDistrib * workDistrib
Parameters * node_parameters
static GridforceGrid * unpack_grid(int gridnum, MIStream *msg)
SimParameters * node_simParameters
void recvCheckpointCAck_handler(envelope *)
void registerUserEventsForAllComputeObjs(void)
static ComputeMap * Object()
void useController(Controller *controllerPtr)
void papiMeasureBarrier(int turnOnMeasure, int step)
void distributeHomePatches(void)
void setProxyTreeBranchFactor(int dim)
int gridsize_b(void) const
int set_gridfrc_grid(int gridnum, GridforceGrid *grid)
__thread DeviceCUDA * deviceCUDA
int pid(ComputeID cid, int i)
int isSendSpanningTreeOn()
void resumeAfterTraceBarrier(int)
void sendCheckpointReq(int remote, const char *key, int task, Lattice &lat, ControllerState &cs)
static GlobalGPUMgr * Object()
void enableEarlyExit(void)
void receive_Molecule(MIStream *)
MGridforceParams * at_index(int idx)
ResizeArrayIter< T > end(void) const
static PatchMap * Instance()
void useSequencer(Sequencer *sequencerPtr)
char param[MAX_SCRIPT_PARAM_SIZE]
void receive_Parameters(MIStream *)
static SynchronousCollectives * Object()
virtual void set_scale(Vector s)=0
void reloadCharges(const char *filename)
void send_SimParameters(MOStream *)
void reloadGridforceGrid(const char *key)
void assignNodeToPatch(void)