12 #if !defined(WIN32) || defined(__CYGWIN__) 16 #include "Node.decl.h" 24 #define MIN_DEBUG_LEVEL 3 35 #include "main.decl.h" 58 #include "ComputeMgr.decl.h" 59 #include "ComputePmeMgr.decl.h" 61 #include "ComputeCUDAMgr.decl.h" 63 #include "ComputePmeCUDAMgr.decl.h" 66 #include "ComputeGridForceMgr.decl.h" 71 #include "CollectionMgr.decl.h" 72 #include "ParallelIOMgr.decl.h" 82 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE) 83 extern "C" void CApplicationInit();
105 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0 106 #include "ckBIconfig.h" 111 #include "CollectionMgr.decl.h" 112 #include "CollectionMaster.decl.h" 115 extern "C" void HPM_Init(
int);
116 extern "C" void HPM_Start(
char *label,
int);
117 extern "C" void HPM_Stop(
char *label,
int);
118 extern "C" void HPM_Print(
int,
int);
121 #if defined(NAMD_MIC) 122 extern void mic_dumpHostDeviceComputeMap();
123 extern void mic_initHostDeviceLDB();
126 #ifdef MEASURE_NAMD_WITH_PAPI 131 #define NUM_PAPI_EVENTS 6 134 #define MEASURE_PAPI_SPP 1 135 #define MEASURE_PAPI_CACHE 0 136 #define MEASURE_PAPI_FLOPS 0 138 static void namdInitPapiCounters(){
141 int retval = PAPI_library_init(PAPI_VER_CURRENT);
142 if(retval != PAPI_VER_CURRENT) {
144 NAMD_die(
"PAPI library is not compatitible!");
149 if(PAPI_thread_init(pthread_self)!=PAPI_OK) {
151 NAMD_die(
"Multi-thread mode in PAPI could not be initialized!");
156 CkpvInitialize(
int *, papiEvents);
157 CkpvAccess(papiEvents) =
new int[NUM_PAPI_EVENTS+1];
159 #if MEASURE_PAPI_CACHE 160 if(PAPI_query_event(PAPI_L1_DCM)==PAPI_OK) {
161 CkpvAccess(papiEvents)[0] = PAPI_L1_DCM;
164 CkPrintf(
"WARNING: PAPI_L1_DCM doesn't exsit on this platform!\n");
167 CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
170 if(PAPI_query_event(PAPI_L2_DCM)==PAPI_OK) {
171 CkpvAccess(papiEvents)[1] = PAPI_L2_DCM;
174 CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
176 #elif MEASURE_PAPI_FLOPS 177 if(PAPI_query_event(PAPI_FP_INS)==PAPI_OK) {
178 CkpvAccess(papiEvents)[0] = PAPI_FP_INS;
181 CkPrintf(
"WARNING: PAPI_FP_INS doesn't exsit on this platform!\n");
184 CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
187 if(PAPI_query_event(PAPI_FMA_INS)==PAPI_OK) {
188 CkpvAccess(papiEvents)[1] = PAPI_FMA_INS;
191 CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
193 #elif MEASURE_PAPI_SPP 204 int papiEventSet = PAPI_NULL;
205 if (PAPI_create_eventset(&papiEventSet) != PAPI_OK) {
206 CmiAbort(
"PAPI failed to create event set!\n");
209 if(PAPI_query_event(PAPI_FP_OPS)==PAPI_OK) {
210 CkpvAccess(papiEvents)[0] = PAPI_FP_OPS;
213 CkAbort(
"WARNING: PAPI_FP_OPS doesn't exist on this platform!");
216 if(PAPI_query_event(PAPI_TOT_INS)==PAPI_OK) {
217 CkpvAccess(papiEvents)[1] = PAPI_TOT_INS;
220 CkAbort(
"WARNING: PAPI_TOT_INS doesn't exist on this platform!");
225 ret=PAPI_event_name_to_code(
"perf::PERF_COUNT_HW_CACHE_LL:MISS",&EventCode);
226 if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
227 CkpvAccess(papiEvents)[2] = EventCode;
230 CkAbort(
"WARNING: perf::PERF_COUNT_HW_CACHE_LL:MISS doesn't exist on this platform!");
233 ret=PAPI_event_name_to_code(
"DATA_PREFETCHER:ALL",&EventCode);
234 if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
235 CkpvAccess(papiEvents)[3] = EventCode;
238 CkAbort(
"WARNING: DATA_PREFETCHER:ALL doesn't exist on this platform!");
241 if(PAPI_query_event(PAPI_L1_DCA)==PAPI_OK) {
242 CkpvAccess(papiEvents)[4] = PAPI_L1_DCA;
245 CkAbort(
"WARNING: PAPI_L1_DCA doesn't exist on this platform!");
257 if(PAPI_query_event(PAPI_TOT_CYC)==PAPI_OK) {
258 CkpvAccess(papiEvents)[5] = PAPI_TOT_CYC;
261 CkAbort(
"WARNING: PAPI_TOT_CYC doesn't exist on this platform!");
264 for(
int i=0;i<NUM_PAPI_EVENTS;i++)
266 int papiRetValue=PAPI_add_events(papiEventSet, &CkpvAccess(papiEvents)[i],1);
267 if (papiRetValue != PAPI_OK) {
268 CkPrintf(
"failure for event %d\n",i);
269 if (papiRetValue == PAPI_ECNFLCT) {
270 CmiAbort(
"PAPI events conflict! Please re-assign event types!\n");
272 CmiAbort(
"PAPI failed to add designated events!\n");
281 #ifdef OPENATOM_VERSION 282 static void startOA(){(
char inDriverFile[1024],
char inPhysicsFile[1024], CkCallback doneCB)
284 CProxy_oaSetup moaInstance = CProxy_oaSetup::ckNew(inDriverFile, inPhysicsFile, doneCB);
286 #endif //OPENATOM_VERSION 300 DebugM(4,
"Creating Node\n");
301 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE) 304 if (CkpvAccess(Node_instance) == 0) {
305 CkpvAccess(Node_instance) =
this;
308 NAMD_bug(
"Node::Node() - another instance of Node exists!");
311 CkpvAccess(BOCclass_group) = msg->
group;
314 CkpvAccess(BOCclass_group).node = thisgroup;
333 TopoManager *tmgr =
new TopoManager();
335 tmgr->rankToCoordinates(CkMyPe(), x, y, z, localRankOnNode);
341 DebugM(4,
"Creating PatchMap, AtomMap, ComputeMap\n");
349 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0 351 balancedInjectionLevel=ck_get_GNI_BIConfig();
353 ck_set_GNI_BIConfig(20);
370 delete CkpvAccess(comm);
374 #ifdef MEASURE_NAMD_WITH_PAPI 375 delete CkpvAccess(papiEvents);
379 void Node::bindBocVars(){
380 DebugM(4,
"Binding to BOC's\n");
381 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
382 patchMgr = pm.ckLocalBranch();
383 CProxy_ProxyMgr prm(CkpvAccess(BOCclass_group).proxyMgr);
384 proxyMgr = prm.ckLocalBranch();
385 CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).
workDistrib);
387 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).
computeMgr);
389 CProxy_LdbCoordinator lc(CkpvAccess(BOCclass_group).
ldbCoordinator);
391 #ifdef MEM_OPT_VERSION 392 CProxy_ParallelIOMgr io(CkpvAccess(BOCclass_group).ioMgr);
393 ioMgr = io.ckLocalBranch();
403 char* foo = (
char*) malloc(size*MB);
406 sprintf(buf,
"Malloc fails on Pe %d at %d MB.\n",CkMyPe(),step*size);
409 memset(foo,0,size*MB*
sizeof(
char));
414 CkPrintf(
"All PEs successfully allocated %d MB.\n", 100*
mallocTest_size);
416 CkPrintf(
"Starting malloc test on all PEs.\n");
420 CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
428 (CProxy_Node(CkpvAccess(BOCclass_group).node)).
startup();
449 newTime = CmiWallTimer();
450 iout <<
iINFO <<
"Startup phase " << startupPhase-1 <<
" took " 457 switch (startupPhase) {
484 #if !CMK_SMP || ! USE_CKLOOP 495 CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
501 #ifdef MEASURE_NAMD_WITH_PAPI 505 #ifdef MEM_OPT_VERSION 512 ioMgr->initialize(
this);
519 #ifdef MEM_OPT_VERSION 521 ioMgr->readPerAtomInfo();
528 #ifdef MEM_OPT_VERSION 530 ioMgr->updateMolInfo();
533 ioMgr->migrateAtomsMGrp();
544 HPM_Init(localRankOnNode);
554 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 561 CkpvAccess(BOCclass_group).computePmeMgr = CProxy_ComputePmeMgr::ckNew();
563 #ifdef OPENATOM_VERSION 565 CkpvAccess(BOCclass_group).computeMoaMgr = CProxy_ComputeMoaMgr::ckNew();
567 #endif // OPENATOM_VERSION 571 #ifdef OPENATOM_VERSION 574 CkCallback doneMoaStart(CkIndexmain::doneMoaSetup(), thishandle);
578 #endif // OPENATOM_VERSION 588 #ifdef MEM_OPT_VERSION 595 ioMgr->integrateMigratedAtoms();
598 ioMgr->integrateClusterSize();
604 ioMgr->calcAtomsInEachPatch();
620 #ifdef PROCTRACE_DEBUG 621 DebugFileTrace::Instance(
"procTrace");
627 #ifndef MEM_OPT_VERSION 639 #if defined(NAMD_MIC) 640 mic_initHostDeviceLDB();
647 iout <<
iINFO <<
"Simulating initial mapping is done, now NAMD exits\n" <<
endi;
657 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 658 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
664 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 665 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
668 npm[CkMyNode()].ckLocalBranch()->registerLocalProxyMgr(CkpvAccess(BOCclass_group).proxyMgr);
670 npm[CkMyNode()].ckLocalBranch()->registerLocalPatchMap(CkMyRank(),
PatchMap::Object());
678 CProxy_ComputeMsmMsaMgr
msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
679 msm[CkMyPe()].initialize(
new CkQdMsg);
683 CProxy_ComputeMsmMgr
msm(CkpvAccess(BOCclass_group).computeMsmMgr);
687 if (lattice.
a_p() && lattice.
b_p() && lattice.
c_p()) {
690 msm[CkMyPe()].initialize(msg);
692 else if ( ! CkMyPe() ) {
712 #ifdef OPENATOM_VERSION 714 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
715 moa[CkMyPe()].initialize(
new CkQdMsg);
717 #endif // OPENATOM_VERSION 718 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 721 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
722 pme.ckLocalBranch()->initialize(
new CkQdMsg);
727 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
728 pme[CkMyPe()].initialize(
new CkQdMsg);
734 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 735 if ( CkMyRank()==0 ) {
736 CProxy_ComputeCUDAMgr nb(CkpvAccess(BOCclass_group).computeCUDAMgr);
737 nb.ckLocalBranch()->initialize(
new CkQdMsg);
747 #ifdef MEM_OPT_VERSION 749 ioMgr->sendAtomsToHomePatchProcs();
756 CProxy_ComputeMsmMgr
msm(CkpvAccess(BOCclass_group).computeMsmMgr);
757 msm[CkMyPe()].initialize_create();
761 #ifdef OPENATOM_VERSION 763 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
764 moa[CkMyPe()].initWorkers(
new CkQdMsg);
766 #endif // OPENATOM_VERSION 767 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 770 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
771 pme[CkMyNode()].initialize_pencils(
new CkQdMsg);
776 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
777 pme[CkMyPe()].initialize_pencils(
new CkQdMsg);
782 CProxy_ComputeMsmMsaMgr
msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
783 msm[CkMyPe()].initWorkers(
new CkQdMsg);
787 CProxy_ComputeMsmMgr
msm(CkpvAccess(BOCclass_group).computeMsmMgr);
788 msm[CkMyPe()].update(
new CkQdMsg);
792 #ifdef MEM_OPT_VERSION 795 ioMgr->createHomePatches();
805 #ifdef OPENATOM_VERSION 807 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
808 moa[CkMyPe()].startWorkers(
new CkQdMsg);
810 #endif // OPENATOM_VERSION 811 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 814 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
815 pme[CkMyNode()].activate_pencils(
new CkQdMsg);
820 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
821 pme[CkMyPe()].activate_pencils(
new CkQdMsg);
826 CProxy_ComputeMsmMsaMgr
msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
827 msm[CkMyPe()].startWorkers(
new CkQdMsg);
843 if (CkMyPe() && CkMyNodeSize() > 1 && ! CkMyRank()) Tcl_DeleteInterp(Tcl_CreateInterp());
846 #ifdef USE_NODEPATCHMGR 851 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
852 npm[CkMyNode()].ckLocalBranch()->createSTForHomePatches(
PatchMap::Object());
863 #if defined(NAMD_MIC) 864 mic_dumpHostDeviceComputeMap();
869 <<
" COMPUTE OBJECTS\n" <<
endi;
871 DebugM(4,
"Creating Computes\n");
873 DebugM(4,
"Building Sequencers\n");
875 DebugM(4,
"Initializing LDB\n");
883 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0 886 ck_set_GNI_BIConfig(balancedInjectionLevel);
902 #ifdef MEM_OPT_VERSION 904 ioMgr->freeMolSpace();
910 NAMD_bug(
"Startup Phase has a bug - check case statement");
918 CkStartQD(CkCallback(CkIndex_Node::startup(), thisgroup));
927 #ifdef OPENATOM_VERSION 928 void Node::doneMoaStart()
930 #ifdef OPENATOM_VERSION_DEBUG 931 CkPrintf(
"doneMoaStart executed on processor %d.\n", CkMyPe() );
932 #endif //OPENATOM_VERSION_DEBUG 934 #endif //OPENATOM_VERSION 936 void Node::namdOneCommInit()
938 if (CkpvAccess(comm) == NULL) {
948 void Node::namdOneRecv() {
949 if ( CmiMyRank() )
return;
960 DebugM(4,
"Getting SimParameters\n");
961 conv_msg = CkpvAccess(comm)->newInputStream(0,
SIMPARAMSTAG);
964 DebugM(4,
"Getting Parameters\n");
968 DebugM(4,
"Getting Molecule\n");
969 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
973 iout <<
iINFO <<
"Compute Nodes receiving GoMolecule Information" <<
"\n" <<
endi;
974 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
978 DebugM(4,
"Done Receiving\n");
981 void Node::namdOneSend() {
988 DebugM(4,
"Sending SimParameters\n");
992 DebugM(4,
"Sending Parameters\n");
996 DebugM(4,
"Sending Molecule\n");
1004 iout <<
iINFO <<
"Master Node sending GoMolecule Information" <<
"\n" <<
endi;
1019 CProxy_Node nodeProxy(thisgroup);
1020 nodeProxy.resendMolecule();
1025 if ( CmiMyRank() ) {
1028 if ( CmiMyPe() == 0 ) {
1044 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
1051 CProxy_Node nodeProxy(thisgroup);
1052 for (
int i=0; i<CmiMyNodeSize(); ++i ) {
1053 nodeProxy[CmiMyPe()+i].resendMolecule2();
1066 void Node::threadInit() {
1068 if (CthImplemented()) {
1069 CthSetStrategyDefault(CthSelf());
1071 NAMD_bug(
"Node::startup() Oh no, tiny elvis, threads not implemented");
1076 void Node::buildSequencers() {
1084 #ifdef NODEGROUP_FORCE_REGISTER 1085 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1086 PatchData *pdata = cpdata.ckLocalBranch();
1088 pdata->
c_out = controller;
1095 for (ai=ai.begin(); ai != ai.end(); ai++) {
1108 (CProxy_Node(CkpvAccess(BOCclass_group).node)).
run();
1126 DebugM(4,
"Starting Sequencers\n");
1130 for (ai=ai.
begin(); ai != ai.
end(); ai++) {
1137 double newTime = CmiWallTimer();
1138 iout <<
iINFO <<
"Startup phase " << startupPhase-1 <<
" took " 1141 iout <<
iINFO <<
"Finished startup at " << newTime <<
" s, " 1155 CkStartQD(CkIndex_Node::scriptBarrier(), &thishandle);
1168 FILE *file = fopen(filename,
"r");
1169 if ( ! file )
NAMD_die(
"node::reloadCharges():Error opening charge file.");
1172 float *charge =
new float[n];
1174 for (
int i = 0; i < n; ++i ) {
1175 if ( ! fscanf(file,
"%f",&charge[i]) )
1176 NAMD_die(
"Node::reloadCharges():Not enough numbers in charge file.");
1180 CProxy_Node(thisgroup).reloadCharges(charge,n);
1181 #ifdef NODEGROUP_FORCE_REGISTER 1184 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1185 cpdata.setDeviceKernelUpdateCounter();
1198 DebugM(4,
"reloadGridforceGrid(const char*) called on node " << CkMyPe() <<
"\n" <<
endi);
1210 if (gridnum < 0 || mgridParams == NULL) {
1211 NAMD_die(
"Node::reloadGridforceGrid(const char*):Could not find grid.");
1216 NAMD_bug(
"Node::reloadGridforceGrid(const char*):grid not found");
1220 CProxy_Node(thisgroup).reloadGridforceGrid(gridnum);
1221 #ifdef NODEGROUP_FORCE_REGISTER 1224 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1225 cpdata.setDeviceKernelUpdateCounter();
1228 DebugM(4,
"reloadGridforceGrid(const char*) finished\n" <<
endi);
1232 DebugM(4,
"updateGridScale(char*, Vector) called on node " << CkMyPe() <<
"\n" <<
endi);
1244 if (gridnum < 0 || mgridParams == NULL) {
1245 NAMD_die(
"Node::updateGridScale(char*, Vector): Could not find grid.");
1250 NAMD_bug(
"Node::updateGridScale(char*, Vector): grid not found");
1252 CProxy_Node(thisgroup).updateGridScale(gridnum, scale.
x, scale.
y, scale.
z);
1254 DebugM(4,
"updateGridScale(char*, Vector) finished\n" <<
endi);
1257 if (CmiMyRank())
return;
1258 DebugM(4,
"updateGridScale(char*, int, float, float, float) called on node " << CkMyPe() <<
"\n" <<
endi);
1262 NAMD_bug(
"Node::updateGridScale(char*, int, float, float, float):grid not found");
1268 #ifdef NODEGROUP_FORCE_REGISTER 1271 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1272 cpdata.setDeviceKernelUpdateCounter();
1275 DebugM(4,
"updateGridScale(char*, int, float, float, float) finished\n" <<
endi);
1279 if (CmiMyRank())
return;
1280 DebugM(4,
"reloadGridforceGrid(int) called on node " << CkMyPe() <<
"\n" <<
endi);
1284 NAMD_bug(
"Node::reloadGridforceGrid(int):grid not found");
1289 DebugM(4,
"Receiving grid\n");
1299 DebugM(4,
"Sending grid\n");
1306 #ifdef NODEGROUP_FORCE_REGISTER 1309 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1310 cpdata.setDeviceKernelUpdateCounter();
1313 DebugM(4,
"reloadGridforceGrid(int) finished\n" <<
endi);
1321 msg->
replica = CmiMyPartition();
1325 strcpy(msg->
key,key);
1326 envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1327 CmiSetHandler(env,recvCheckpointCReq_index);
1328 #if CMK_HAS_PARTITION 1329 CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(
char*)env);
1331 CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(
char*)env);
1347 msg->
replica = CmiMyPartition();
1348 envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1349 CmiSetHandler(env,recvCheckpointCAck_index);
1350 #if CMK_HAS_PARTITION 1351 CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(
char*)env);
1353 CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(
char*)env);
1373 CProxy_Node nodeProxy(thisgroup);
1374 nodeProxy[0].recvEnableExitScheduler();
1386 CkStartQD(CkIndex_Node::exitScheduler(), &thishandle);
1396 CProxy_Node nodeProxy(thisgroup);
1397 nodeProxy[0].recvEnableEarlyExit();
1408 CkStartQD(CkIndex_Node::earlyExit(),&thishandle);
1413 NAMD_die(
"Exiting prematurely; see error messages above.");
1426 this->
pdb = state->pdb;
1427 this->state =
state;
1433 HPM_Start(
"500 steps", localRankOnNode);
1439 HPM_Stop(
"500 steps", localRankOnNode);
1440 HPM_Print(CkMyPe(), localRankOnNode);
1446 if(turnOnTrace) traceBegin();
1449 if(turnOnTrace) CmiTurnOnStats();
1450 else CmiTurnOffStats();
1453 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1454 CkCallback cb(CkIndex_Node::resumeAfterTraceBarrier(NULL), nd[0]);
1455 contribute(0, NULL, CkReduction::sum_int, cb);
1460 CmiAssert(CmiMyPe()==0);
1466 #ifdef MEASURE_NAMD_WITH_PAPI 1468 double results[NUM_PAPI_EVENTS+1];
1471 CkpvAccess(papiEvents)[NUM_PAPI_EVENTS]=CmiWallTimer();
1473 long long counters[NUM_PAPI_EVENTS+1];
1474 int ret=PAPI_start_counters(CkpvAccess(papiEvents), NUM_PAPI_EVENTS);
1481 CkPrintf(
"error PAPI_start_counters (%d) at step %d called on proc %d\n",ret , step, CkMyPe());
1483 if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)!=PAPI_OK)
1485 CkPrintf(
"error PAPI_read_counters %d\n",PAPI_read_counters(counters, NUM_PAPI_EVENTS));
1488 long long counters[NUM_PAPI_EVENTS+1];
1489 for(
int i=0;i<NUM_PAPI_EVENTS;i++) counters[i]=0LL;
1490 if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)==PAPI_OK)
1492 #if !MEASURE_PAPI_SPP 1493 results[0] = (double)counters[0]/1e6;
1494 results[1] = (double)counters[1]/1e6;
1496 for(
int i=0;i<NUM_PAPI_EVENTS;i++) results[i] = counters[i]/1e6;
1506 PAPI_stop_counters(counters, NUM_PAPI_EVENTS);
1510 results[NUM_PAPI_EVENTS]=CkpvAccess(papiEvents)[NUM_PAPI_EVENTS];
1511 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1512 CkCallback cb(CkIndex_Node::resumeAfterPapiMeasureBarrier(NULL), nd[0]);
1513 contribute(
sizeof(
double)*(NUM_PAPI_EVENTS+1), &results, CkReduction::sum_double, cb);
1518 #ifdef MEASURE_NAMD_WITH_PAPI 1521 double *results = (
double *)msg->getData();
1522 double endtime=CmiWallTimer();
1525 #if MEASURE_PAPI_SPP 1526 CkPrintf(
"SPP INFO: PAPI_FP_OPS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[0]);
1527 CkPrintf(
"SPP INFO: PAPI_TOT_INS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[1]);
1528 CkPrintf(
"SPP INFO: perf::PERF_COUNT_HW_CACHE_LL:MISS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[2]);
1529 CkPrintf(
"SPP INFO: DATA_PREFETCHER:ALL timestep %d to %d is %lf(1e6)\n", bstep,estep,results[3]);
1530 CkPrintf(
"SPP INFO: PAPI_L1_DCA timestep %d to %d is %lf(1e6)\n", bstep,estep,results[4]);
1531 CkPrintf(
"SPP INFO: PAPI_TOT_CYC timestep %d to % is %lf(1e6)\n", bstep,estep,results[5]);
1534 CkPrintf(
"SPP INFO: WALLtime timestep %d to %d is %lf\n", bstep,estep,endtime-results[NUM_PAPI_EVENTS]);
1535 CkPrintf(
"SPP INFO: endtime %lf avgtime %lf tottime %lf\n", endtime,results[NUM_PAPI_EVENTS]/CkNumPes(),results[NUM_PAPI_EVENTS] );
1537 if(CkpvAccess(papiEvents)[0] == PAPI_FP_INS){
1538 double totalFPIns = results[0];
1539 if(CkpvAccess(papiEvents)[1] == PAPI_FMA_INS) totalFPIns += (results[1]*2);
1540 CkPrintf(
"FLOPS INFO: from timestep %d to %d, the total FP instruction of NAMD is %lf(x1e6) per processor\n",
1541 bstep, estep, totalFPIns/CkNumPes());
1543 char nameBuf[PAPI_MAX_STR_LEN];
1544 CkPrintf(
"PAPI COUNTERS INFO: from timestep %d to %d, ",
1546 for(
int i=0; i<NUM_PAPI_EVENTS; i++) {
1547 PAPI_event_code_to_name(CkpvAccess(papiEvents)[i], nameBuf);
1548 CkPrintf(
"%s is %lf(x1e6), ", nameBuf, results[i]/CkNumPes());
1550 CkPrintf(
"per processor\n");
1563 int numpes = CkNumPes();
1564 int nodesize = CkMyNodeSize();
1571 sprintf(fname,
"mapdump_%s.%d_%d_%d_%s", filename, numpes, nodesize, tag,
gNAMDBinaryName);
1573 FILE *fp = fopen(fname,
"w");
1575 NAMD_die(
"Error in outputing PatchMap and ComputeMap info!\n");
1582 fprintf(fp,
"%d %d %d %d %d %d %d\n", numpes, nodesize, numPatches, numComputes,
1585 for(
int i=0; i<numPatches; i++) {
1586 #ifdef MEM_OPT_VERSION 1587 fprintf(fp,
"%d %d\n", pMap->numAtoms(i), pMap->
node(i));
1594 for(
int i=0; i<numComputes; i++) {
1595 fprintf(fp,
"%d %d %d %d\n", cMap->
node(i), cMap->
type(i), cMap->
pid(i,0), cMap->
pid(i,1));
1600 #ifndef NODEGROUP_FORCE_REGISTER 1604 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1605 PatchData* patchData = cpdata.ckLocalBranch();
1606 return patchData->
script;
1617 #include "Node.def.h"
void allocateMap(int nAtomIDs)
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
Bool simulateInitialMapping
void recvCheckpointReq(const char *key, int task, checkpoint &cp)
void recvCheckpointCReq_handler(envelope *)
void setPatchMapArrived(bool s)
GridforceGrid * get_gridfrc_grid(int gridnum) const
void receive_SimParameters(MIStream *)
void send_GoMolecule(MOStream *)
static ProxyMgr * Object()
NAMD_HOST_DEVICE int c_p() const
void saveMolDataPointers(NamdState *)
void receive_GoMolecule(MIStream *)
LdbCoordinator * ldbCoordinator
static PatchMap * Object()
void sendEnableEarlyExit(void)
void send_Molecule(MOStream *)
static void exit(int status=0)
static AtomMap * Instance()
SimParameters * simParameters
int proxyTreeBranchFactor
static void pack_grid(GridforceGrid *grid, MOStream *msg)
void createLoadBalancer()
HomePatchList * homePatchList()
std::ostream & endi(std::ostream &s)
void enableScriptBarrier()
void scriptSet(const char *, const char *)
char value[MAX_SCRIPT_PARAM_SIZE]
void sendEnableExitScheduler(void)
void recvCheckpointReq(CheckpointMsg *)
static void messageStartUp()
ResizeArrayIter< T > begin(void) const
void reloadCharges(float charge[], int n)
Patch * patch(PatchID pid)
void outputPatchComputeMaps(const char *filename, int tag)
int loadStructure(const char *, const char *, int)
void createComputes(ComputeMap *map)
Molecule stores the structural information for the system.
NAMD_HOST_DEVICE int b_p() const
void split(int iStream, int numStreams)
void recvCheckpointAck(CheckpointMsg *)
int gridsize_c(void) const
virtual void reinitialize(SimParameters *simParams, MGridforceParams *mgridParams)=0
void sendComputeMap(void)
int gridsize_a(void) const
void resumeAfterPapiMeasureBarrier(CkReductionMsg *msg)
void initialize(PatchMap *pmap, ComputeMap *cmap, int reinit=0)
void scriptParam(ScriptParamMsg *)
int index_for_key(const char *key)
int numPatches(void) const
#define NAMD_EVENT_START(eon, id)
void enableExitScheduler(void)
void buildProxySpanningTree()
void createHomePatches(void)
void NAMD_bug(const char *err_msg)
ComputeType type(ComputeID cid)
Controller::checkpoint checkpoint
#define MGRIDFORCEPARAMS_DEFAULTKEY
MGridforceParams * find_key(const char *key)
void traceBarrier(int turnOnTrace, int step)
void updateGridScale(const char *key, Vector scale)
void recvCheckpointAck(checkpoint &cp)
void reloadStructure(const char *, const char *)
void recvEnableExitScheduler(void)
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
NAMD_HOST_DEVICE int a_p() const
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
static AtomMap * Object()
MGridforceParamsList mgridforcelist
static void nonbonded_select()
void recvEnableEarlyExit(void)
void send_Parameters(MOStream *)
int isRecvSpanningTreeOn()
static ComputeMap * Instance()
void resumeAfterTraceBarrier(CkReductionMsg *msg)
CkpvDeclare(AtomMap *, AtomMap_instance)
WorkDistrib * workDistrib
Parameters * node_parameters
static GridforceGrid * unpack_grid(int gridnum, MIStream *msg)
SimParameters * node_simParameters
void recvCheckpointCAck_handler(envelope *)
void registerUserEventsForAllComputeObjs(void)
static ComputeMap * Object()
void useController(Controller *controllerPtr)
void papiMeasureBarrier(int turnOnMeasure, int step)
void distributeHomePatches(void)
void setProxyTreeBranchFactor(int dim)
int gridsize_b(void) const
int set_gridfrc_grid(int gridnum, GridforceGrid *grid)
__thread DeviceCUDA * deviceCUDA
int pid(ComputeID cid, int i)
int isSendSpanningTreeOn()
void resumeAfterTraceBarrier(int)
void sendCheckpointReq(int remote, const char *key, int task, Lattice &lat, ControllerState &cs)
void enableEarlyExit(void)
void receive_Molecule(MIStream *)
MGridforceParams * at_index(int idx)
ResizeArrayIter< T > end(void) const
static PatchMap * Instance()
void useSequencer(Sequencer *sequencerPtr)
char param[MAX_SCRIPT_PARAM_SIZE]
void receive_Parameters(MIStream *)
virtual void set_scale(Vector s)=0
void reloadCharges(const char *filename)
void send_SimParameters(MOStream *)
void reloadGridforceGrid(const char *key)
void assignNodeToPatch(void)