12 #if !defined(WIN32) || defined(__CYGWIN__)
16 #include "Node.decl.h"
24 #define MIN_DEBUG_LEVEL 3
35 #include "main.decl.h"
58 #include "ComputeMgr.decl.h"
59 #include "ComputePmeMgr.decl.h"
61 #include "ComputeCUDAMgr.decl.h"
63 #include "ComputePmeCUDAMgr.decl.h"
66 #include "ComputeGridForceMgr.decl.h"
71 #include "CollectionMgr.decl.h"
72 #include "ParallelIOMgr.decl.h"
79 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE)
80 extern "C" void CApplicationInit();
98 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0
99 #include "ckBIconfig.h"
104 #include "CollectionMgr.decl.h"
105 #include "CollectionMaster.decl.h"
108 extern "C" void HPM_Init(
int);
109 extern "C" void HPM_Start(
char *label,
int);
110 extern "C" void HPM_Stop(
char *label,
int);
111 extern "C" void HPM_Print(
int,
int);
114 #if defined(NAMD_MIC)
115 extern void mic_dumpHostDeviceComputeMap();
116 extern void mic_initHostDeviceLDB();
119 #ifdef MEASURE_NAMD_WITH_PAPI
124 #define NUM_PAPI_EVENTS 6
127 #define MEASURE_PAPI_SPP 1
128 #define MEASURE_PAPI_CACHE 0
129 #define MEASURE_PAPI_FLOPS 0
131 static void namdInitPapiCounters(){
134 int retval = PAPI_library_init(PAPI_VER_CURRENT);
135 if(retval != PAPI_VER_CURRENT) {
137 NAMD_die(
"PAPI library is not compatitible!");
142 if(PAPI_thread_init(pthread_self)!=PAPI_OK) {
144 NAMD_die(
"Multi-thread mode in PAPI could not be initialized!");
149 CkpvInitialize(
int *, papiEvents);
150 CkpvAccess(papiEvents) =
new int[NUM_PAPI_EVENTS+1];
152 #if MEASURE_PAPI_CACHE
153 if(PAPI_query_event(PAPI_L1_DCM)==PAPI_OK) {
154 CkpvAccess(papiEvents)[0] = PAPI_L1_DCM;
157 CkPrintf(
"WARNING: PAPI_L1_DCM doesn't exsit on this platform!\n");
160 CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
163 if(PAPI_query_event(PAPI_L2_DCM)==PAPI_OK) {
164 CkpvAccess(papiEvents)[1] = PAPI_L2_DCM;
167 CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
169 #elif MEASURE_PAPI_FLOPS
170 if(PAPI_query_event(PAPI_FP_INS)==PAPI_OK) {
171 CkpvAccess(papiEvents)[0] = PAPI_FP_INS;
174 CkPrintf(
"WARNING: PAPI_FP_INS doesn't exsit on this platform!\n");
177 CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
180 if(PAPI_query_event(PAPI_FMA_INS)==PAPI_OK) {
181 CkpvAccess(papiEvents)[1] = PAPI_FMA_INS;
184 CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
186 #elif MEASURE_PAPI_SPP
197 int papiEventSet = PAPI_NULL;
198 if (PAPI_create_eventset(&papiEventSet) != PAPI_OK) {
199 CmiAbort(
"PAPI failed to create event set!\n");
202 if(PAPI_query_event(PAPI_FP_OPS)==PAPI_OK) {
203 CkpvAccess(papiEvents)[0] = PAPI_FP_OPS;
206 CkAbort(
"WARNING: PAPI_FP_OPS doesn't exist on this platform!");
209 if(PAPI_query_event(PAPI_TOT_INS)==PAPI_OK) {
210 CkpvAccess(papiEvents)[1] = PAPI_TOT_INS;
213 CkAbort(
"WARNING: PAPI_TOT_INS doesn't exist on this platform!");
218 ret=PAPI_event_name_to_code(
"perf::PERF_COUNT_HW_CACHE_LL:MISS",&EventCode);
219 if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
220 CkpvAccess(papiEvents)[2] = EventCode;
223 CkAbort(
"WARNING: perf::PERF_COUNT_HW_CACHE_LL:MISS doesn't exist on this platform!");
226 ret=PAPI_event_name_to_code(
"DATA_PREFETCHER:ALL",&EventCode);
227 if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
228 CkpvAccess(papiEvents)[3] = EventCode;
231 CkAbort(
"WARNING: DATA_PREFETCHER:ALL doesn't exist on this platform!");
234 if(PAPI_query_event(PAPI_L1_DCA)==PAPI_OK) {
235 CkpvAccess(papiEvents)[4] = PAPI_L1_DCA;
238 CkAbort(
"WARNING: PAPI_L1_DCA doesn't exist on this platform!");
250 if(PAPI_query_event(PAPI_TOT_CYC)==PAPI_OK) {
251 CkpvAccess(papiEvents)[5] = PAPI_TOT_CYC;
254 CkAbort(
"WARNING: PAPI_TOT_CYC doesn't exist on this platform!");
257 for(
int i=0;i<NUM_PAPI_EVENTS;i++)
259 int papiRetValue=PAPI_add_events(papiEventSet, &CkpvAccess(papiEvents)[i],1);
260 if (papiRetValue != PAPI_OK) {
261 CkPrintf(
"failure for event %d\n",i);
262 if (papiRetValue == PAPI_ECNFLCT) {
263 CmiAbort(
"PAPI events conflict! Please re-assign event types!\n");
265 CmiAbort(
"PAPI failed to add designated events!\n");
274 #ifdef OPENATOM_VERSION
275 static void startOA(){(
char inDriverFile[1024],
char inPhysicsFile[1024], CkCallback doneCB)
277 CProxy_oaSetup moaInstance = CProxy_oaSetup::ckNew(inDriverFile, inPhysicsFile, doneCB);
279 #endif //OPENATOM_VERSION
293 DebugM(4,
"Creating Node\n");
294 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE)
297 if (CkpvAccess(Node_instance) == 0) {
298 CkpvAccess(Node_instance) =
this;
299 eventEndOfTimeStep = traceRegisterUserEvent(
"EndOfTimeStep", 135);
301 NAMD_bug(
"Node::Node() - another instance of Node exists!");
304 CkpvAccess(BOCclass_group) = msg->
group;
307 CkpvAccess(BOCclass_group).node = thisgroup;
326 TopoManager *tmgr =
new TopoManager();
328 tmgr->rankToCoordinates(CkMyPe(), x, y, z, localRankOnNode);
334 DebugM(4,
"Creating PatchMap, AtomMap, ComputeMap\n");
342 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0
344 balancedInjectionLevel=ck_get_GNI_BIConfig();
346 ck_set_GNI_BIConfig(20);
362 delete CkpvAccess(comm);
366 #ifdef MEASURE_NAMD_WITH_PAPI
367 delete CkpvAccess(papiEvents);
371 void Node::bindBocVars(){
372 DebugM(4,
"Binding to BOC's\n");
373 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
374 patchMgr = pm.ckLocalBranch();
375 CProxy_ProxyMgr prm(CkpvAccess(BOCclass_group).proxyMgr);
376 proxyMgr = prm.ckLocalBranch();
377 CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).
workDistrib);
379 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).
computeMgr);
381 CProxy_LdbCoordinator lc(CkpvAccess(BOCclass_group).
ldbCoordinator);
383 #ifdef MEM_OPT_VERSION
384 CProxy_ParallelIOMgr io(CkpvAccess(BOCclass_group).ioMgr);
385 ioMgr = io.ckLocalBranch();
395 char* foo = (
char*) malloc(size*MB);
398 sprintf(buf,
"Malloc fails on Pe %d at %d MB.\n",CkMyPe(),step*size);
401 memset(foo,0,size*MB*
sizeof(
char));
406 CkPrintf(
"All PEs successfully allocated %d MB.\n", 100*
mallocTest_size);
408 CkPrintf(
"Starting malloc test on all PEs.\n");
412 CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
420 (CProxy_Node(CkpvAccess(BOCclass_group).node)).
startup();
434 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
441 startupTime = CmiWallTimer();
442 iout << iINFO <<
"Entering startup at " << startupTime <<
" s, ";
444 newTime = CmiWallTimer();
445 iout <<
iINFO <<
"Startup phase " << startupPhase-1 <<
" took "
446 << newTime - startupTime <<
" s, ";
447 startupTime = newTime;
452 switch (startupPhase) {
479 #if !CMK_SMP || ! USE_CKLOOP
490 CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
496 #ifdef MEASURE_NAMD_WITH_PAPI
500 #ifdef MEM_OPT_VERSION
507 ioMgr->initialize(
this);
514 #ifdef MEM_OPT_VERSION
516 ioMgr->readPerAtomInfo();
523 #ifdef MEM_OPT_VERSION
525 ioMgr->updateMolInfo();
528 ioMgr->migrateAtomsMGrp();
539 HPM_Init(localRankOnNode);
549 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
556 CkpvAccess(BOCclass_group).computePmeMgr = CProxy_ComputePmeMgr::ckNew();
558 #ifdef OPENATOM_VERSION
560 CkpvAccess(BOCclass_group).computeMoaMgr = CProxy_ComputeMoaMgr::ckNew();
562 #endif // OPENATOM_VERSION
566 #ifdef OPENATOM_VERSION
569 CkCallback doneMoaStart(CkIndexmain::doneMoaSetup(), thishandle);
573 #endif // OPENATOM_VERSION
583 #ifdef MEM_OPT_VERSION
590 ioMgr->integrateMigratedAtoms();
593 ioMgr->integrateClusterSize();
599 ioMgr->calcAtomsInEachPatch();
615 #ifdef PROCTRACE_DEBUG
616 DebugFileTrace::Instance(
"procTrace");
622 #ifndef MEM_OPT_VERSION
634 #if defined(NAMD_MIC)
635 mic_initHostDeviceLDB();
642 iout <<
iINFO <<
"Simulating initial mapping is done, now NAMD exits\n" <<
endi;
652 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
653 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
659 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
660 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
663 npm[CkMyNode()].ckLocalBranch()->registerLocalProxyMgr(CkpvAccess(BOCclass_group).proxyMgr);
665 npm[CkMyNode()].ckLocalBranch()->registerLocalPatchMap(CkMyRank(),
PatchMap::Object());
673 CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
674 msm[CkMyPe()].initialize(
new CkQdMsg);
678 CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
682 if (lattice.
a_p() && lattice.
b_p() && lattice.
c_p()) {
685 msm[CkMyPe()].initialize(msg);
687 else if ( ! CkMyPe() ) {
707 #ifdef OPENATOM_VERSION
709 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
710 moa[CkMyPe()].initialize(
new CkQdMsg);
712 #endif // OPENATOM_VERSION
713 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
716 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
717 pme.ckLocalBranch()->initialize(
new CkQdMsg);
722 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
723 pme[CkMyPe()].initialize(
new CkQdMsg);
729 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
731 CProxy_ComputeCUDAMgr nb(CkpvAccess(BOCclass_group).computeCUDAMgr);
732 nb.ckLocalBranch()->initialize(
new CkQdMsg);
742 #ifdef MEM_OPT_VERSION
744 ioMgr->sendAtomsToHomePatchProcs();
751 CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
752 msm[CkMyPe()].initialize_create();
756 #ifdef OPENATOM_VERSION
758 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
759 moa[CkMyPe()].initWorkers(
new CkQdMsg);
761 #endif // OPENATOM_VERSION
762 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
765 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
766 pme[CkMyNode()].initialize_pencils(
new CkQdMsg);
771 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
772 pme[CkMyPe()].initialize_pencils(
new CkQdMsg);
777 CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
778 msm[CkMyPe()].initWorkers(
new CkQdMsg);
782 CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
783 msm[CkMyPe()].update(
new CkQdMsg);
787 #ifdef MEM_OPT_VERSION
790 ioMgr->createHomePatches();
800 #ifdef OPENATOM_VERSION
802 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
803 moa[CkMyPe()].startWorkers(
new CkQdMsg);
805 #endif // OPENATOM_VERSION
806 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
809 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
810 pme[CkMyNode()].activate_pencils(
new CkQdMsg);
815 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
816 pme[CkMyPe()].activate_pencils(
new CkQdMsg);
821 CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
822 msm[CkMyPe()].startWorkers(
new CkQdMsg);
838 if (CkMyPe() && CkMyNodeSize() > 1 && ! CkMyRank()) Tcl_DeleteInterp(Tcl_CreateInterp());
841 #ifdef USE_NODEPATCHMGR
846 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
847 npm[CkMyNode()].ckLocalBranch()->createSTForHomePatches(
PatchMap::Object());
858 #if defined(NAMD_MIC)
859 mic_dumpHostDeviceComputeMap();
864 <<
" COMPUTE OBJECTS\n" <<
endi;
866 DebugM(4,
"Creating Computes\n");
868 DebugM(4,
"Building Sequencers\n");
870 DebugM(4,
"Initializing LDB\n");
878 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0
881 ck_set_GNI_BIConfig(balancedInjectionLevel);
897 #ifdef MEM_OPT_VERSION
899 ioMgr->freeMolSpace();
905 NAMD_bug(
"Startup Phase has a bug - check case statement");
912 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
916 CkStartQD(CkCallback(CkIndex_Node::startup(), thisgroup));
923 #ifdef OPENATOM_VERSION
924 void Node::doneMoaStart()
926 #ifdef OPENATOM_VERSION_DEBUG
927 CkPrintf(
"doneMoaStart executed on processor %d.\n", CkMyPe() );
928 #endif //OPENATOM_VERSION_DEBUG
930 #endif //OPENATOM_VERSION
932 void Node::namdOneCommInit()
934 if (CkpvAccess(comm) == NULL) {
944 void Node::namdOneRecv() {
945 if ( CmiMyRank() )
return;
956 DebugM(4,
"Getting SimParameters\n");
957 conv_msg = CkpvAccess(comm)->newInputStream(0,
SIMPARAMSTAG);
960 DebugM(4,
"Getting Parameters\n");
964 DebugM(4,
"Getting Molecule\n");
965 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
969 iout <<
iINFO <<
"Compute Nodes receiving GoMolecule Information" <<
"\n" <<
endi;
970 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
974 DebugM(4,
"Done Receiving\n");
977 void Node::namdOneSend() {
984 DebugM(4,
"Sending SimParameters\n");
988 DebugM(4,
"Sending Parameters\n");
992 DebugM(4,
"Sending Molecule\n");
1000 iout <<
iINFO <<
"Master Node sending GoMolecule Information" <<
"\n" <<
endi;
1015 CProxy_Node nodeProxy(thisgroup);
1016 nodeProxy.resendMolecule();
1021 if ( CmiMyRank() ) {
1024 if ( CmiMyPe() == 0 ) {
1040 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
1048 CProxy_Node nodeProxy(thisgroup);
1049 for (
int i=0; i<CmiMyNodeSize(); ++i ) {
1050 nodeProxy[CmiMyPe()+i].resendMolecule2();
1063 void Node::threadInit() {
1065 if (CthImplemented()) {
1066 CthSetStrategyDefault(CthSelf());
1068 NAMD_bug(
"Node::startup() Oh no, tiny elvis, threads not implemented");
1073 void Node::buildSequencers() {
1084 for (ai=ai.begin(); ai != ai.end(); ai++) {
1097 (CProxy_Node(CkpvAccess(BOCclass_group).node)).
run();
1113 DebugM(4,
"Starting Sequencers\n");
1117 for (ai=ai.
begin(); ai != ai.
end(); ai++) {
1124 double newTime = CmiWallTimer();
1125 iout <<
iINFO <<
"Startup phase " << startupPhase-1 <<
" took "
1126 << newTime - startupTime <<
" s, "
1128 iout <<
iINFO <<
"Finished startup at " << newTime <<
" s, "
1141 CkStartQD(CkIndex_Node::scriptBarrier(), &thishandle);
1154 FILE *file = fopen(filename,
"r");
1155 if ( ! file )
NAMD_die(
"node::reloadCharges():Error opening charge file.");
1158 float *
charge =
new float[n];
1160 for (
int i = 0; i < n; ++i ) {
1161 if ( ! fscanf(file,
"%f",&charge[i]) )
1162 NAMD_die(
"Node::reloadCharges():Not enough numbers in charge file.");
1166 CProxy_Node(thisgroup).reloadCharges(charge,n);
1177 DebugM(4,
"reloadGridforceGrid(const char*) called on node " << CkMyPe() <<
"\n" << endi);
1189 if (gridnum < 0 || mgridParams == NULL) {
1190 NAMD_die(
"Node::reloadGridforceGrid(const char*):Could not find grid.");
1195 NAMD_bug(
"Node::reloadGridforceGrid(const char*):grid not found");
1199 CProxy_Node(thisgroup).reloadGridforceGrid(gridnum);
1201 DebugM(4,
"reloadGridforceGrid(const char*) finished\n" << endi);
1205 DebugM(4,
"updateGridScale(char*, Vector) called on node " << CkMyPe() <<
"\n" << endi);
1217 if (gridnum < 0 || mgridParams == NULL) {
1218 NAMD_die(
"Node::updateGridScale(char*, Vector): Could not find grid.");
1223 NAMD_bug(
"Node::updateGridScale(char*, Vector): grid not found");
1225 CProxy_Node(thisgroup).updateGridScale(gridnum, scale.
x, scale.
y, scale.
z);
1227 DebugM(4,
"updateGridScale(char*, Vector) finished\n" << endi);
1230 if (CmiMyRank())
return;
1231 DebugM(4,
"updateGridScale(char*, int, float, float, float) called on node " << CkMyPe() <<
"\n" << endi);
1235 NAMD_bug(
"Node::updateGridScale(char*, int, float, float, float):grid not found");
1242 DebugM(4,
"updateGridScale(char*, int, float, float, float) finished\n" << endi);
1246 if (CmiMyRank())
return;
1247 DebugM(4,
"reloadGridforceGrid(int) called on node " << CkMyPe() <<
"\n" << endi);
1251 NAMD_bug(
"Node::reloadGridforceGrid(int):grid not found");
1256 DebugM(4,
"Receiving grid\n");
1266 DebugM(4,
"Sending grid\n");
1274 DebugM(4,
"reloadGridforceGrid(int) finished\n" << endi);
1282 msg->
replica = CmiMyPartition();
1286 strcpy(msg->
key,key);
1287 envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1288 CmiSetHandler(env,recvCheckpointCReq_index);
1289 #if CMK_HAS_PARTITION
1290 CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(
char*)env);
1292 CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(
char*)env);
1308 msg->
replica = CmiMyPartition();
1309 envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1310 CmiSetHandler(env,recvCheckpointCAck_index);
1311 #if CMK_HAS_PARTITION
1312 CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(
char*)env);
1314 CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(
char*)env);
1334 CProxy_Node nodeProxy(thisgroup);
1335 nodeProxy[0].recvEnableExitScheduler();
1347 CkStartQD(CkIndex_Node::exitScheduler(), &thishandle);
1357 CProxy_Node nodeProxy(thisgroup);
1358 nodeProxy[0].recvEnableEarlyExit();
1369 CkStartQD(CkIndex_Node::earlyExit(),&thishandle);
1374 NAMD_die(
"Exiting prematurely; see error messages above.");
1387 this->
pdb = state->pdb;
1388 this->state =
state;
1394 HPM_Start(
"500 steps", localRankOnNode);
1400 HPM_Stop(
"500 steps", localRankOnNode);
1401 HPM_Print(CkMyPe(), localRankOnNode);
1407 if(turnOnTrace) traceBegin();
1410 if(turnOnTrace) CmiTurnOnStats();
1411 else CmiTurnOffStats();
1414 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1415 CkCallback cb(CkIndex_Node::resumeAfterTraceBarrier(NULL), nd[0]);
1416 contribute(0, NULL, CkReduction::sum_int, cb);
1421 CmiAssert(CmiMyPe()==0);
1427 #ifdef MEASURE_NAMD_WITH_PAPI
1429 double results[NUM_PAPI_EVENTS+1];
1432 CkpvAccess(papiEvents)[NUM_PAPI_EVENTS]=CmiWallTimer();
1434 long long counters[NUM_PAPI_EVENTS+1];
1435 int ret=PAPI_start_counters(CkpvAccess(papiEvents), NUM_PAPI_EVENTS);
1442 CkPrintf(
"error PAPI_start_counters (%d) at step %d called on proc %d\n",ret , step, CkMyPe());
1444 if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)!=PAPI_OK)
1446 CkPrintf(
"error PAPI_read_counters %d\n",PAPI_read_counters(counters, NUM_PAPI_EVENTS));
1449 long long counters[NUM_PAPI_EVENTS+1];
1450 for(
int i=0;i<NUM_PAPI_EVENTS;i++) counters[i]=0LL;
1451 if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)==PAPI_OK)
1453 #if !MEASURE_PAPI_SPP
1454 results[0] = (double)counters[0]/1e6;
1455 results[1] = (double)counters[1]/1e6;
1457 for(
int i=0;i<NUM_PAPI_EVENTS;i++) results[i] = counters[i]/1e6;
1467 PAPI_stop_counters(counters, NUM_PAPI_EVENTS);
1471 results[NUM_PAPI_EVENTS]=CkpvAccess(papiEvents)[NUM_PAPI_EVENTS];
1472 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1473 CkCallback cb(CkIndex_Node::resumeAfterPapiMeasureBarrier(NULL), nd[0]);
1474 contribute(
sizeof(
double)*(NUM_PAPI_EVENTS+1), &results, CkReduction::sum_double, cb);
1479 #ifdef MEASURE_NAMD_WITH_PAPI
1482 double *results = (
double *)msg->getData();
1483 double endtime=CmiWallTimer();
1486 #if MEASURE_PAPI_SPP
1487 CkPrintf(
"SPP INFO: PAPI_FP_OPS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[0]);
1488 CkPrintf(
"SPP INFO: PAPI_TOT_INS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[1]);
1489 CkPrintf(
"SPP INFO: perf::PERF_COUNT_HW_CACHE_LL:MISS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[2]);
1490 CkPrintf(
"SPP INFO: DATA_PREFETCHER:ALL timestep %d to %d is %lf(1e6)\n", bstep,estep,results[3]);
1491 CkPrintf(
"SPP INFO: PAPI_L1_DCA timestep %d to %d is %lf(1e6)\n", bstep,estep,results[4]);
1492 CkPrintf(
"SPP INFO: PAPI_TOT_CYC timestep %d to % is %lf(1e6)\n", bstep,estep,results[5]);
1495 CkPrintf(
"SPP INFO: WALLtime timestep %d to %d is %lf\n", bstep,estep,endtime-results[NUM_PAPI_EVENTS]);
1496 CkPrintf(
"SPP INFO: endtime %lf avgtime %lf tottime %lf\n", endtime,results[NUM_PAPI_EVENTS]/CkNumPes(),results[NUM_PAPI_EVENTS] );
1498 if(CkpvAccess(papiEvents)[0] == PAPI_FP_INS){
1499 double totalFPIns = results[0];
1500 if(CkpvAccess(papiEvents)[1] == PAPI_FMA_INS) totalFPIns += (results[1]*2);
1501 CkPrintf(
"FLOPS INFO: from timestep %d to %d, the total FP instruction of NAMD is %lf(x1e6) per processor\n",
1502 bstep, estep, totalFPIns/CkNumPes());
1504 char nameBuf[PAPI_MAX_STR_LEN];
1505 CkPrintf(
"PAPI COUNTERS INFO: from timestep %d to %d, ",
1507 for(
int i=0; i<NUM_PAPI_EVENTS; i++) {
1508 PAPI_event_code_to_name(CkpvAccess(papiEvents)[i], nameBuf);
1509 CkPrintf(
"%s is %lf(x1e6), ", nameBuf, results[i]/CkNumPes());
1511 CkPrintf(
"per processor\n");
1524 int numpes = CkNumPes();
1525 int nodesize = CkMyNodeSize();
1532 sprintf(fname,
"mapdump_%s.%d_%d_%d_%s", filename, numpes, nodesize, tag, gNAMDBinaryName);
1534 FILE *fp = fopen(fname,
"w");
1536 NAMD_die(
"Error in outputing PatchMap and ComputeMap info!\n");
1543 fprintf(fp,
"%d %d %d %d %d %d %d\n", numpes, nodesize, numPatches, numComputes,
1547 #ifdef MEM_OPT_VERSION
1548 fprintf(fp,
"%d %d\n", pMap->numAtoms(i), pMap->
node(i));
1555 for(
int i=0; i<numComputes; i++) {
1556 fprintf(fp,
"%d %d %d %d\n", cMap->
node(i), cMap->
type(i), cMap->
pid(i,0), cMap->
pid(i,1));
1564 #include "Node.def.h"
void allocateMap(int nAtomIDs)
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
Bool simulateInitialMapping
void recvCheckpointReq(const char *key, int task, checkpoint &cp)
void recvCheckpointCReq_handler(envelope *)
void setPatchMapArrived(bool s)
void receive_SimParameters(MIStream *)
void send_GoMolecule(MOStream *)
static ProxyMgr * Object()
void saveMolDataPointers(NamdState *)
void receive_GoMolecule(MIStream *)
LdbCoordinator * ldbCoordinator
int gridsize_c(void) const
static PatchMap * Object()
void sendEnableEarlyExit(void)
void send_Molecule(MOStream *)
static void exit(int status=0)
static AtomMap * Instance()
SimParameters * simParameters
int proxyTreeBranchFactor
static void pack_grid(GridforceGrid *grid, MOStream *msg)
void createLoadBalancer()
HomePatchList * homePatchList()
std::ostream & endi(std::ostream &s)
void enableScriptBarrier()
char const *const NamdProfileEventStr[]
void scriptSet(const char *, const char *)
char value[MAX_SCRIPT_PARAM_SIZE]
void sendEnableExitScheduler(void)
void recvCheckpointReq(CheckpointMsg *)
static void messageStartUp()
void reloadCharges(float charge[], int n)
Patch * patch(PatchID pid)
void outputPatchComputeMaps(const char *filename, int tag)
int loadStructure(const char *, const char *, int)
void createComputes(ComputeMap *map)
void split(int iStream, int numStreams)
void recvCheckpointAck(CheckpointMsg *)
virtual void reinitialize(SimParameters *simParams, MGridforceParams *mgridParams)=0
void sendComputeMap(void)
ResizeArrayIter< T > end(void) const
void registerUserEventsForAllComputeObjs()
void sendBuildCudaExclusions()
void resumeAfterPapiMeasureBarrier(CkReductionMsg *msg)
void initialize(PatchMap *pmap, ComputeMap *cmap, int reinit=0)
void scriptParam(ScriptParamMsg *)
int index_for_key(const char *key)
int gridsize_a(void) const
void enableExitScheduler(void)
void buildProxySpanningTree()
void createHomePatches(void)
void NAMD_bug(const char *err_msg)
ComputeType type(ComputeID cid)
Controller::checkpoint checkpoint
#define MGRIDFORCEPARAMS_DEFAULTKEY
MGridforceParams * find_key(const char *key)
void traceBarrier(int turnOnTrace, int step)
void updateGridScale(const char *key, Vector scale)
void recvCheckpointAck(checkpoint &cp)
void reloadStructure(const char *, const char *)
void recvEnableExitScheduler(void)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
GridforceGrid * get_gridfrc_grid(int gridnum) const
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
static AtomMap * Object()
MGridforceParamsList mgridforcelist
static void nonbonded_select()
void recvEnableEarlyExit(void)
void send_Parameters(MOStream *)
int isRecvSpanningTreeOn()
static ComputeMap * Instance()
void resumeAfterTraceBarrier(CkReductionMsg *msg)
CkpvDeclare(AtomMap *, AtomMap_instance)
WorkDistrib * workDistrib
Parameters * node_parameters
int numPatches(void) const
static GridforceGrid * unpack_grid(int gridnum, MIStream *msg)
#define NAMD_EVENT_START_EX(eon, id, str)
SimParameters * node_simParameters
void recvCheckpointCAck_handler(envelope *)
static ComputeMap * Object()
void useController(Controller *controllerPtr)
void papiMeasureBarrier(int turnOnMeasure, int step)
void distributeHomePatches(void)
void setProxyTreeBranchFactor(int dim)
k< npairi;++k){TABENERGY(const int numtypes=simParams->tableNumTypes;const float table_spacing=simParams->tableSpacing;const int npertype=(int)(namdnearbyint(simParams->tableMaxDist/simParams->tableSpacing)+1);) int table_i=(r2iilist[2 *k] >> 14)+r2_delta_expc;const int j=pairlisti[k];#define p_j BigReal diffa=r2list[k]-r2_table[table_i];#define table_four_i TABENERGY(register const int tabtype=-1-(lj_pars->A< 0?lj_pars->A:0);) BigReal kqq=kq_i *p_j-> charge
int set_gridfrc_grid(int gridnum, GridforceGrid *grid)
int pid(ComputeID cid, int i)
int isSendSpanningTreeOn()
void resumeAfterTraceBarrier(int)
void sendCheckpointReq(int remote, const char *key, int task, Lattice &lat, ControllerState &cs)
void enableEarlyExit(void)
void receive_Molecule(MIStream *)
MGridforceParams * at_index(int idx)
static PatchMap * Instance()
int gridsize_b(void) const
void useSequencer(Sequencer *sequencerPtr)
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
ResizeArrayIter< T > begin(void) const
char param[MAX_SCRIPT_PARAM_SIZE]
void receive_Parameters(MIStream *)
virtual void set_scale(Vector s)=0
void reloadCharges(const char *filename)
void send_SimParameters(MOStream *)
void reloadGridforceGrid(const char *key)
void assignNodeToPatch(void)