NAMD
Node.C
Go to the documentation of this file.
1 
7 /*
8  Toplevel routines for initializing a Node for a simulation
9  one Node per Pe (processor element).
10 */
11 
12 #if !defined(WIN32) || defined(__CYGWIN__)
13 #include <unistd.h>
14 #endif
15 #include "InfoStream.h"
16 #include "Node.decl.h"
17 #include "Node.h"
18 #ifdef DPMTA
19 #include <pvm3.h>
20 #endif
21 
22 #include "ProcessorPrivate.h"
23 
24 #define MIN_DEBUG_LEVEL 3
25 //#define DEBUGM
26 #include "Debug.h"
27 
28 #include <stdio.h>
29 #include <converse.h>
30 #include "memusage.h"
31 #include "IMDOutput.h"
32 #include "Lattice.h"
33 #include "ComputeMsmMsa.h" // needed for MsmMsaData definition
34 #include "ComputeMsm.h" // needed for MsmInitMsg definition
35 #include "main.decl.h"
36 #include "main.h"
37 #include "WorkDistrib.h"
38 #include "PatchMgr.h"
39 #include "Patch.h"
40 #include "Compute.h"
41 #include "ComputeMap.h"
42 #include "ComputeMgr.h"
43 #include "Molecule.h"
44 #include "HomePatchList.h"
45 #include "AtomMap.h"
46 #include "Sequencer.h"
47 #include "Controller.h"
48 #include "NamdState.h"
49 #include "Output.h"
50 #include "ProxyMgr.h"
51 #include "PatchMap.h"
52 #include "PatchMap.inl"
53 #include "Parameters.h"
54 #include "SimParameters.h"
55 #include "Communicate.h"
56 #include "LdbCoordinator.h"
57 #include "ScriptTcl.h"
58 #include "ComputeMgr.decl.h"
59 #include "ComputePmeMgr.decl.h"
60 // #ifdef NAMD_CUDA
61 #include "ComputeCUDAMgr.decl.h"
62 #include "ComputeCUDAMgr.h"
63 #include "ComputePmeCUDAMgr.decl.h"
64 #include "ComputePmeCUDAMgr.h"
65 // #endif
66 #include "ComputeGridForceMgr.decl.h"
67 #include "Sync.h"
68 #include "BackEnd.h"
69 #include "PDB.h"
70 #include "packmsg.h"
71 #include "CollectionMgr.decl.h"
72 #include "ParallelIOMgr.decl.h"
73 #include "Vector.h"
74 // BEGIN LA
75 #include "Random.h"
76 // END LA
77 #include "NamdEventsProfiling.h"
78 
79 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE)
80 extern "C" void CApplicationInit();
81 #endif
82 
83 #include "DumpBench.h"
84 
85 class CheckpointMsg : public CMessage_CheckpointMsg {
86 public:
87  int task;
88  int replica;
90  char *key;
91 };
92 
93 extern "C" {
94  void recvCheckpointCReq_handler(envelope*);
95  void recvCheckpointCAck_handler(envelope*);
96 }
97 
98 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0
99 #include "ckBIconfig.h"
100 #endif
101 
102 #include "CollectionMgr.h"
103 #include "CollectionMaster.h"
104 #include "CollectionMgr.decl.h"
105 #include "CollectionMaster.decl.h"
106 
107 #if USE_HPM
108 extern "C" void HPM_Init(int);
109 extern "C" void HPM_Start(char *label, int);
110 extern "C" void HPM_Stop(char *label, int);
111 extern "C" void HPM_Print(int, int);
112 #endif
113 
114 #if defined(NAMD_MIC)
115  extern void mic_dumpHostDeviceComputeMap();
116  extern void mic_initHostDeviceLDB();
117 #endif
118 
119 #ifdef MEASURE_NAMD_WITH_PAPI
120 #include "papi.h"
121 #if CMK_SMP
122 #include <pthread.h>
123 #endif
124 #define NUM_PAPI_EVENTS 6
125 CkpvDeclare(int *, papiEvents);
126 
127 #define MEASURE_PAPI_SPP 1
128 #define MEASURE_PAPI_CACHE 0
129 #define MEASURE_PAPI_FLOPS 0
130 
131 static void namdInitPapiCounters(){
132  if(CkMyRank()==0){
133  //only initialize per OS process (i.e. a charm node)
134  int retval = PAPI_library_init(PAPI_VER_CURRENT);
135  if(retval != PAPI_VER_CURRENT) {
136  if(CkMyPe()==0){
137  NAMD_die("PAPI library is not compatitible!");
138  }
139  }
140  #if CMK_SMP
141  //now only consider systems that are compatible with POSIX
142  if(PAPI_thread_init(pthread_self)!=PAPI_OK) {
143  if(CkMyPe()==0){
144  NAMD_die("Multi-thread mode in PAPI could not be initialized!");
145  }
146  }
147  #endif
148  }
149  CkpvInitialize(int *, papiEvents);
150  CkpvAccess(papiEvents) = new int[NUM_PAPI_EVENTS+1];
151 
152 #if MEASURE_PAPI_CACHE
153  if(PAPI_query_event(PAPI_L1_DCM)==PAPI_OK) {
154  CkpvAccess(papiEvents)[0] = PAPI_L1_DCM;
155  }else{
156  if(CkMyPe()==0){
157  CkPrintf("WARNING: PAPI_L1_DCM doesn't exsit on this platform!\n");
158  }
159  //if not default to PAPI_TOT_INS
160  CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
161  }
162 
163  if(PAPI_query_event(PAPI_L2_DCM)==PAPI_OK) {
164  CkpvAccess(papiEvents)[1] = PAPI_L2_DCM;
165  }else{
166  //if not default to PAPI_TOT_CYC
167  CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
168  }
169 #elif MEASURE_PAPI_FLOPS
170  if(PAPI_query_event(PAPI_FP_INS)==PAPI_OK) {
171  CkpvAccess(papiEvents)[0] = PAPI_FP_INS;
172  }else{
173  if(CkMyPe()==0){
174  CkPrintf("WARNING: PAPI_FP_INS doesn't exsit on this platform!\n");
175  }
176  //if not default to PAPI_TOT_INS
177  CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
178  }
179 
180  if(PAPI_query_event(PAPI_FMA_INS)==PAPI_OK) {
181  CkpvAccess(papiEvents)[1] = PAPI_FMA_INS;
182  }else{
183  //if not default to PAPI_TOT_CYC
184  CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
185  }
186 #elif MEASURE_PAPI_SPP
187 /* for SPP we record these
188 1) PAPI_FP_OPS
189 2) PAPI_TOT_INS
190 3) perf::PERF_COUNT_HW_CACHE_LL:MISS
191 4) DATA_PREFETCHER:ALL
192 5) PAPI_L1_DCA
193 6) INSTRUCTION_FETCH_STALL
194 7) PAPI_TOT_CYC, and
195 8) real (wall) time
196 */
197  int papiEventSet = PAPI_NULL;
198  if (PAPI_create_eventset(&papiEventSet) != PAPI_OK) {
199  CmiAbort("PAPI failed to create event set!\n");
200  }
201 
202  if(PAPI_query_event(PAPI_FP_OPS)==PAPI_OK) {
203  CkpvAccess(papiEvents)[0] = PAPI_FP_OPS;
204  }else{
205  if(CkMyPe()==0){
206  CkAbort("WARNING: PAPI_FP_OPS doesn't exist on this platform!");
207  }
208  }
209  if(PAPI_query_event(PAPI_TOT_INS)==PAPI_OK) {
210  CkpvAccess(papiEvents)[1] = PAPI_TOT_INS;
211  }else{
212  if(CkMyPe()==0){
213  CkAbort("WARNING: PAPI_TOT_INS doesn't exist on this platform!");
214  }
215  }
216  int EventCode;
217  int ret;
218  ret=PAPI_event_name_to_code("perf::PERF_COUNT_HW_CACHE_LL:MISS",&EventCode);
219  if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
220  CkpvAccess(papiEvents)[2] = EventCode;
221  }else{
222  if(CkMyPe()==0){
223  CkAbort("WARNING: perf::PERF_COUNT_HW_CACHE_LL:MISS doesn't exist on this platform!");
224  }
225  }
226  ret=PAPI_event_name_to_code("DATA_PREFETCHER:ALL",&EventCode);
227  if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
228  CkpvAccess(papiEvents)[3] = EventCode;
229  }else{
230  if(CkMyPe()==0){
231  CkAbort("WARNING: DATA_PREFETCHER:ALL doesn't exist on this platform!");
232  }
233  }
234  if(PAPI_query_event(PAPI_L1_DCA)==PAPI_OK) {
235  CkpvAccess(papiEvents)[4] = PAPI_L1_DCA;
236  }else{
237  if(CkMyPe()==0){
238  CkAbort("WARNING: PAPI_L1_DCA doesn't exist on this platform!");
239  }
240  }
241  /* ret=PAPI_event_name_to_code("INSTRUCTION_FETCH_STALL",&EventCode);
242  if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
243  CkpvAccess(papiEvents)[5] = EventCode;
244  }else{
245  if(CkMyPe()==0){
246  CkAbort("WARNING: INSTRUCTION_FETCH_STALL doesn't exist on this platform!");
247  }
248  }
249  */
250  if(PAPI_query_event(PAPI_TOT_CYC)==PAPI_OK) {
251  CkpvAccess(papiEvents)[5] = PAPI_TOT_CYC;
252  }else{
253  if(CkMyPe()==0){
254  CkAbort("WARNING: PAPI_TOT_CYC doesn't exist on this platform!");
255  }
256  }
257  for(int i=0;i<NUM_PAPI_EVENTS;i++)
258  {
259  int papiRetValue=PAPI_add_events(papiEventSet, &CkpvAccess(papiEvents)[i],1);
260  if (papiRetValue != PAPI_OK) {
261  CkPrintf("failure for event %d\n",i);
262  if (papiRetValue == PAPI_ECNFLCT) {
263  CmiAbort("PAPI events conflict! Please re-assign event types!\n");
264  } else {
265  CmiAbort("PAPI failed to add designated events!\n");
266  }
267  }
268 
269  }
270 #endif
271 }
272 #endif
273 
274 #ifdef OPENATOM_VERSION
275 static void startOA(){(char inDriverFile[1024], char inPhysicsFile[1024], CkCallback doneCB)
276 {
277  CProxy_oaSetup moaInstance = CProxy_oaSetup::ckNew(inDriverFile, inPhysicsFile, doneCB);
278 }
279 #endif //OPENATOM_VERSION
280 
281 //======================================================================
282 // Public Functions
283 
284 //----------------------------------------------------------------------
285 
287 double startupTime;
288 
289 //----------------------------------------------------------------------
290 // BOC constructor
292 {
293  DebugM(4,"Creating Node\n");
294 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE)
295  CApplicationInit();
296 #endif
297  if (CkpvAccess(Node_instance) == 0) {
298  CkpvAccess(Node_instance) = this;
299  eventEndOfTimeStep = traceRegisterUserEvent("EndOfTimeStep", 135);
300  } else {
301  NAMD_bug("Node::Node() - another instance of Node exists!");
302  }
303 
304  CkpvAccess(BOCclass_group) = msg->group;
305  delete msg;
306 
307  CkpvAccess(BOCclass_group).node = thisgroup;
308 
309  recvCheckpointCReq_index = CmiRegisterHandler((CmiHandler)recvCheckpointCReq_handler);
310  recvCheckpointCAck_index = CmiRegisterHandler((CmiHandler)recvCheckpointCAck_handler);
311 
312  startupPhase = 0;
313 
314  molecule = NULL;
315  parameters = NULL;
316  simParameters = NULL;
317  configList = NULL;
318  pdb = NULL;
319  state = NULL;
320  output = NULL;
321  imd = new IMDOutput;
322  colvars = 0;
323 
324 #if USE_HPM
325  // assumes that this will be done only on BG/P
326  TopoManager *tmgr = new TopoManager();
327  int x, y, z;
328  tmgr->rankToCoordinates(CkMyPe(), x, y, z, localRankOnNode);
329  delete tmgr;
330 #endif
331 
332  specialTracing = traceAvailable() && (traceIsOn()==0);
333 
334  DebugM(4,"Creating PatchMap, AtomMap, ComputeMap\n");
337  if ( CkMyRank() == 0 ) ComputeMap::Instance();
338 
339  //Note: Binding BOC vars such as workDistrib has been moved
340  //to the 1st phase of startup because the in-order message delivery
341  //is not always guaranteed --Chao Mei
342 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0
343  if(CkMyRank() == 0){
344  balancedInjectionLevel=ck_get_GNI_BIConfig();
345  // CkPrintf("[%d] get retrieved BI=%d\n",CkMyPe(),balancedInjectionLevel);
346  ck_set_GNI_BIConfig(20);
347  // CkPrintf("[%d] set retrieved BI=%d\n",CkMyPe(),ck_get_GNI_BIConfig());
348  }
349 #endif
350 
351 }
352 
353 //----------------------------------------------------------------------
354 // ~Node(void) needs to clean up everything.
355 
357 {
358  delete output;
359  delete computeMap;
360  delete atomMap;
361  delete patchMap;
362  delete CkpvAccess(comm);
363  // BEGIN LA
364  delete rand;
365  // END LA
366 #ifdef MEASURE_NAMD_WITH_PAPI
367  delete CkpvAccess(papiEvents);
368 #endif
369 }
370 
371 void Node::bindBocVars(){
372  DebugM(4,"Binding to BOC's\n");
373  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
374  patchMgr = pm.ckLocalBranch();
375  CProxy_ProxyMgr prm(CkpvAccess(BOCclass_group).proxyMgr);
376  proxyMgr = prm.ckLocalBranch();
377  CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).workDistrib);
378  workDistrib = wd.ckLocalBranch();
379  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
380  computeMgr = cm.ckLocalBranch();
381  CProxy_LdbCoordinator lc(CkpvAccess(BOCclass_group).ldbCoordinator);
382  ldbCoordinator = lc.ckLocalBranch();
383  #ifdef MEM_OPT_VERSION
384  CProxy_ParallelIOMgr io(CkpvAccess(BOCclass_group).ioMgr);
385  ioMgr = io.ckLocalBranch();
386  #endif
387 
388 }
389 
390 //----------------------------------------------------------------------
391 // Malloc Test Sequence
392 void Node::mallocTest(int step) {
393  int MB = 1024*1024;
394  int size = 100;
395  char* foo = (char*) malloc(size*MB);
396  if ( ! foo ) {
397  char buf[256];
398  sprintf(buf,"Malloc fails on Pe %d at %d MB.\n",CkMyPe(),step*size);
399  NAMD_die(buf);
400  }
401  memset(foo,0,size*MB*sizeof(char));
402 }
403 
405  if ( mallocTest_size ) {
406  CkPrintf("All PEs successfully allocated %d MB.\n", 100*mallocTest_size);
407  } else {
408  CkPrintf("Starting malloc test on all PEs.\n");
409  }
410  fflush(stdout);
411  ++mallocTest_size;
412  CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
413  (CProxy_Node(CkpvAccess(BOCclass_group).node)).mallocTest(mallocTest_size);
414 }
415 
416 //----------------------------------------------------------------------
417 // Startup Sequence
418 
420  (CProxy_Node(CkpvAccess(BOCclass_group).node)).startup();
421 }
422 
426 
427 extern void registerUserEventsForAllComputeObjs(void);
428 
430  int gotoRun = false;
431  double newTime;
432 
433  if (!CkMyPe()) {
434 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
435  char buf[32];
436  sprintf(buf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::NAMD_STARTUP], startupPhase);
437  NAMD_EVENT_START_EX(1, NamdProfileEvent::NAMD_STARTUP, buf);
438 #endif
439  if (!startupPhase) {
440  iout << iINFO << "\n";
441  startupTime = CmiWallTimer();
442  iout << iINFO << "Entering startup at " << startupTime << " s, ";
443  } else {
444  newTime = CmiWallTimer();
445  iout << iINFO << "Startup phase " << startupPhase-1 << " took "
446  << newTime - startupTime << " s, ";
447  startupTime = newTime;
448  }
449  iout << memusage_MB() << " MB of memory in use\n" << endi;
450  fflush(stdout);
451  }
452  switch (startupPhase) {
453 
454  case 0:
456  namdOneCommInit(); // Namd1.X style
457  break;
458 
459  case 1:
460  bindBocVars();
461 
462  // send & receive molecule, simparameters... (Namd1.X style)
463  if (CkMyPe()) {
464  namdOneRecv();
465  } else {
466  namdOneSend();
467  }
468  break;
469 
470  case 2:
471  // fix up one-per-node objects (for SMP version)
475 
478 
479  #if !CMK_SMP || ! USE_CKLOOP
480  //the CkLoop library should be only used in SMP mode
482  #else
483  if ( CkNumPes() < 2 * CkNumNodes() ) simParameters->useCkLoop = 0;
484  #endif
485 
486 
487  if ( simParameters->mallocTest ) {
488  if (!CkMyPe()) {
489  mallocTest_size = 0;
490  CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
491  }
492  return;
493  }
494 
495 
496  #ifdef MEASURE_NAMD_WITH_PAPI
497  if(simParameters->papiMeasure) namdInitPapiCounters();
498  #endif
499 
500  #ifdef MEM_OPT_VERSION
501  //At this point, each Node object has received the simParameters,
502  //parameters and the atom signatures info from the master Node
503  //(proc 0). It's time to initialize the parallel IO manager and
504  //read the binary per-atom file --Chao Mei
505 
506  //Step 1: initialize the parallel IO manager per Node
507  ioMgr->initialize(this);
508  #endif
509 
510  break;
511 
512  case 3:
513 
514  #ifdef MEM_OPT_VERSION
515  //Step 2: read the binary per-atom files (signater index, coordinates etc.)
516  ioMgr->readPerAtomInfo();
517  #endif
518 
519  break;
520 
521  case 4:
522 
523  #ifdef MEM_OPT_VERSION
524  //Step 3: update counters of tuples and exclusions inside Molecule object
525  ioMgr->updateMolInfo();
526 
527  //Step 4: prepare distributing the atoms to neighboring procs if necessary
528  ioMgr->migrateAtomsMGrp();
529 
530  //step 5: initialize patchMap and send it to every other processors
531  //to decide atoms to patch distribution on every input processor
532  if(!CkMyPe()) {
533  workDistrib->patchMapInit(); // create space division
535  }
536  #endif
537 
538  #if USE_HPM
539  HPM_Init(localRankOnNode);
540  #endif
541 
542  // take care of inital thread setting
543  threadInit();
544 
545  // create blank AtomMap
547 
548  if (!CkMyPe()) {
549 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
550  if (simParameters->usePMECUDA) {
551  // computePmeCUDAMgr was created in BackEnd.C
552  // This empty branch is to avoid initializing ComputePmeMgr
553  } else
554 #endif
555  if (simParameters->PMEOn) {
556  CkpvAccess(BOCclass_group).computePmeMgr = CProxy_ComputePmeMgr::ckNew();
557  }
558  #ifdef OPENATOM_VERSION
559  if ( simParameters->openatomOn ) {
560  CkpvAccess(BOCclass_group).computeMoaMgr = CProxy_ComputeMoaMgr::ckNew();
561  }
562  #endif // OPENATOM_VERSION
563 
564  }
565 
566  #ifdef OPENATOM_VERSION
567  if ( simParameters->openatomOn ) {
568  // if ( ! CkMyPe() ) {
569  CkCallback doneMoaStart(CkIndexmain::doneMoaSetup(), thishandle);
570  startOA(simParameters->moaDriverFile, simParameters->moaPhysicsFile, doneMoaStart);
571  // }
572  }
573  #endif // OPENATOM_VERSION
574 
575  // BEGIN LA
577  rand->split(CkMyPe(), CkNumPes());
578  // END LA
579 
580  break;
581 
582  case 5:
583  #ifdef MEM_OPT_VERSION
584  //Now, every input proc has received all the atoms necessary
585  //to decide the patches those atoms belong to
586 
587  //step 1: integrate the migrated atoms into the atom list that
588  //contains the initally distributed atoms, and sort the atoms
589  //based on hydrogenList value
590  ioMgr->integrateMigratedAtoms();
591 
592  //step 2: integrate the cluster size of each atom on each output proc
593  ioMgr->integrateClusterSize();
594 
595  //step 3: calculate the number of atoms in each patch on every
596  //input procs (atoms belonging to a patch may lie on different
597  //procs), and reduce such info on proc 0. Such info is required
598  //for determing which node a particular patch is assigned to.
599  ioMgr->calcAtomsInEachPatch();
600 
601  //set to false to re-send PatchMap later
603  #endif
604  break;
605  case 6:
608  }
611  }
614  }
615  #ifdef PROCTRACE_DEBUG
616  DebugFileTrace::Instance("procTrace");
617  #endif
618 
619  if (!CkMyPe()) {
620  output = new Output; // create output object just on PE(0)
621 
622  #ifndef MEM_OPT_VERSION
623  workDistrib->patchMapInit(); // create space division
624  workDistrib->createHomePatches(); // load atoms into HomePatch(es)
625  #endif
626 
629  //ComputeMap::Object()->printComputeMap();
630 
631  // For MIC runs, take the additional step after the compute map has been created to
632  // assign the various computes to either the host or the device. This info will
633  // be distributed across the PEs.
634  #if defined(NAMD_MIC)
635  mic_initHostDeviceLDB();
636  #endif
637 
639  iout << iINFO << "Simulating initial mapping with " << simParameters->simulatedPEs
640  << " PEs with " << simParameters->simulatedNodeSize << " PEs per node\n" << endi;
641  outputPatchComputeMaps("init_mapping", 0);
642  iout << iINFO << "Simulating initial mapping is done, now NAMD exits\n" << endi;
643  BackEnd::exit();
644  }
645 
647 
648  //in MEM_OPT_VERSION, patchMap is resent
649  //because they have been updated since creation including
650  //#atoms per patch, the proc a patch should stay etc. --Chao Mei
652  #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
653  CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
654  //a node broadcast
655  npm.createProxyInfo(PatchMap::Object()->numPatches());
656  #endif
657  }
658  {
659  #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
660  CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
661  if(CkMyRank()==0) {
662  //just need to register once
663  npm[CkMyNode()].ckLocalBranch()->registerLocalProxyMgr(CkpvAccess(BOCclass_group).proxyMgr);
664  }
665  npm[CkMyNode()].ckLocalBranch()->registerLocalPatchMap(CkMyRank(), PatchMap::Object());
666  #endif
667  }
668  break;
669 
670  case 7:
671 #ifdef CHARM_HAS_MSA
673  CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
674  msm[CkMyPe()].initialize(new CkQdMsg);
675  }
676 #else
678  CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
679  MsmInitMsg *msg = new MsmInitMsg;
680  Lattice lattice = simParameters->lattice; // system lattice vectors
681  ScaledPosition smin=0, smax=0;
682  if (lattice.a_p() && lattice.b_p() && lattice.c_p()) {
683  msg->smin = smin;
684  msg->smax = smax;
685  msm[CkMyPe()].initialize(msg); // call from my own PE
686  }
687  else if ( ! CkMyPe() ) {
688  pdb->get_extremes(smin, smax); // only available on PE 0
689  msg->smin = smin;
690  msg->smax = smax;
691  msm.initialize(msg); // broadcast to chare group
692  }
693 
694  /*
695  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
696  Node *node = nd.ckLocalBranch();
697  ScaledPosition smin, smax;
698  node->pdb->get_extremes(smin, smax);
699  msg->smin = smin; // extreme positions in system
700  msg->smax = smax;
701  msm[CkMyPe()].initialize(msg);
702  */
703  }
704 #endif
705 
706  if ( simParameters->PMEOn ) {
707  #ifdef OPENATOM_VERSION
708  if ( simParameters->openatomOn ) {
709  CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
710  moa[CkMyPe()].initialize(new CkQdMsg);
711  }
712  #endif // OPENATOM_VERSION
713 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
714  if ( simParameters->usePMECUDA ) {
715  if(CkMyRank()==0) {
716  CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
717  pme.ckLocalBranch()->initialize(new CkQdMsg); // must run on pe 0 to call ckNew
718  }
719  } else
720 #endif
721  {
722  CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
723  pme[CkMyPe()].initialize(new CkQdMsg);
724  }
725  }
726  break;
727 
728  case 8:
729 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
730  if ( (simParameters->useCUDA2 || simParameters->bondedCUDA) && CkMyRank()==0 ) {
731  CProxy_ComputeCUDAMgr nb(CkpvAccess(BOCclass_group).computeCUDAMgr);
732  nb.ckLocalBranch()->initialize(new CkQdMsg);
733  }
734 #endif
735  break;
736 
737  case 9:
739  break;
740 
741  case 10:
742  #ifdef MEM_OPT_VERSION
743  //migrate atoms to HomePatch processors
744  ioMgr->sendAtomsToHomePatchProcs();
745  #endif
746  break;
747 
748  case 11:
749  // part 2 of MSM init
751  CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
752  msm[CkMyPe()].initialize_create(); // call from my own PE
753  }
754 
755  if ( simParameters->PMEOn ) {
756  #ifdef OPENATOM_VERSION
757  if ( simParameters->openatomOn ) {
758  CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
759  moa[CkMyPe()].initWorkers(new CkQdMsg);
760  }
761  #endif // OPENATOM_VERSION
762 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
763  if ( simParameters->usePMECUDA ) {
764  if(CkMyRank()==0) {
765  CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
766  pme[CkMyNode()].initialize_pencils(new CkQdMsg);
767  }
768  } else
769 #endif
770  {
771  CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
772  pme[CkMyPe()].initialize_pencils(new CkQdMsg);
773  }
774  }
775 #ifdef CHARM_HAS_MSA
776  else if ( simParameters->MSMOn && ! simParameters->MsmSerialOn ) {
777  CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
778  msm[CkMyPe()].initWorkers(new CkQdMsg);
779  }
780 #else
781  else if ( simParameters->MSMOn && ! simParameters->MsmSerialOn ) {
782  CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
783  msm[CkMyPe()].update(new CkQdMsg);
784  }
785 #endif
786 
787  #ifdef MEM_OPT_VERSION
788  //Now every processor has all the atoms it needs to create the HomePatches.
789  //The HomePatches are created in parallel on every home patch procs.
790  ioMgr->createHomePatches();
791  #else
792  if (!CkMyPe()) {
794  }
795  #endif
796  break;
797 
798  case 12:
799  if ( simParameters->PMEOn ) {
800  #ifdef OPENATOM_VERSION
801  if ( simParameters->openatomOn ) {
802  CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
803  moa[CkMyPe()].startWorkers(new CkQdMsg);
804  }
805  #endif // OPENATOM_VERSION
806 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
807  if ( simParameters->usePMECUDA ) {
808  if(CkMyRank()==0) {
809  CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
810  pme[CkMyNode()].activate_pencils(new CkQdMsg);
811  }
812  } else
813 #endif
814  {
815  CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
816  pme[CkMyPe()].activate_pencils(new CkQdMsg);
817  }
818  }
819 #ifdef CHARM_HAS_MSA
820  else if ( simParameters->MSMOn && ! simParameters->MsmSerialOn ) {
821  CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
822  msm[CkMyPe()].startWorkers(new CkQdMsg);
823  }
824 #else
825  /*
826  else if ( simParameters->MSMOn && ! simParameters->MsmSerialOn ) {
827  CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
828  //msm[CkMyPe()].startWorkers(new CkQdMsg);
829  }
830  */
831 #endif
832 
833  proxyMgr->createProxies(); // need Home patches before this
834  if (!CkMyPe()) LdbCoordinator::Object()->createLoadBalancer();
835 
836 #ifdef NAMD_TCL
837  // TclInitSubsystems() has a race condition so we create one interp per node here
838  if (CkMyPe() && CkMyNodeSize() > 1 && ! CkMyRank()) Tcl_DeleteInterp(Tcl_CreateInterp());
839 #endif
840 
841 #ifdef USE_NODEPATCHMGR
842  //at this point, PatchMap info has been recved on PEs. It is time to create
843  //the home patch spanning tree for receiving proxy list info
844  if(proxyMgr->getSendSpanning() || proxyMgr->getRecvSpanning()) {
845  if(CkMyRank()==0) {
846  CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
847  npm[CkMyNode()].ckLocalBranch()->createSTForHomePatches(PatchMap::Object());
848  }
849  }
850 #endif
851 
852  break;
853 
854  case 13:
855 
856  // DMK - DEBUG - If, in MIC runs, the debug option to dump all the compute maps to files
857  // for debugging/verification purposes has been enabled, have each PE do so now.
858  #if defined(NAMD_MIC)
859  mic_dumpHostDeviceComputeMap();
860  #endif
861 
862  if (!CkMyPe()) {
863  iout << iINFO << "CREATING " << ComputeMap::Object()->numComputes()
864  << " COMPUTE OBJECTS\n" << endi;
865  }
866  DebugM(4,"Creating Computes\n");
868  DebugM(4,"Building Sequencers\n");
869  buildSequencers();
870  DebugM(4,"Initializing LDB\n");
872  break;
873 
874  case 14:
875  // computes may create proxies on the fly so put these in separate phase
876  Sync::Object()->openSync(); // decide if to open local Sync
878 #if defined(CMK_BALANCED_INJECTION_API) && CMK_BALANCED_INJECTION_API != 0
879  if(CkMyRank() == 0){
880  // CkPrintf("[%d] get retrieved BI=%d\n",CkMyPe(),balancedInjectionLevel);
881  ck_set_GNI_BIConfig(balancedInjectionLevel);
882  // CkPrintf("[%d] set retrieved BI=%d\n",CkMyPe(),ck_get_GNI_BIConfig());
883  }
884 #endif
885 
886  break;
887 
888  case 15:
889  {
890  //For debugging
891  /*if(!CkMyPe()){
892  FILE *dumpFile = fopen("/tmp/NAMD_Bench.dump", "w");
893  dumpbench(dumpFile);
894  NAMD_die("Normal execution\n");
895  }*/
896  }
897  #ifdef MEM_OPT_VERSION
898  //free space in the Molecule object that are not used anymore
899  ioMgr->freeMolSpace();
900  #endif
901  gotoRun = true;
902  break;
903 
904  default:
905  NAMD_bug("Startup Phase has a bug - check case statement");
906  break;
907 
908  }
909 
910  startupPhase++;
911  if (!CkMyPe()) {
912 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
913  NAMD_EVENT_STOP(1, NamdProfileEvent::NAMD_STARTUP);
914 #endif
915  if (!gotoRun) {
916  CkStartQD(CkCallback(CkIndex_Node::startup(), thisgroup));
917  } else {
919  }
920  }
921 }
922 
923 #ifdef OPENATOM_VERSION
924 void Node::doneMoaStart()
925 {
926 #ifdef OPENATOM_VERSION_DEBUG
927  CkPrintf("doneMoaStart executed on processor %d.\n", CkMyPe() );
928 #endif //OPENATOM_VERSION_DEBUG
929 }
930 #endif //OPENATOM_VERSION
931 
932 void Node::namdOneCommInit()
933 {
934  if (CkpvAccess(comm) == NULL) {
935  CkpvAccess(comm) = new Communicate();
936 #ifdef DPMTA
937  pvmc_init();
938 #endif
939  }
940 }
941 
942 // Namd 1.X style Send/Recv of simulation information
943 
944 void Node::namdOneRecv() {
945  if ( CmiMyRank() ) return;
946 
947  MIStream *conv_msg;
948 
949  // Receive molecule and simulation parameter information
950  simParameters = node_simParameters = new SimParameters;
951  //****** BEGIN CHARMM/XPLOR type changes
952  parameters = node_parameters = new Parameters();
953  //****** END CHARMM/XPLOR type changes
954  molecule = node_molecule = new Molecule(simParameters,parameters);
955 
956  DebugM(4, "Getting SimParameters\n");
957  conv_msg = CkpvAccess(comm)->newInputStream(0, SIMPARAMSTAG);
959 
960  DebugM(4, "Getting Parameters\n");
961  conv_msg = CkpvAccess(comm)->newInputStream(0, STATICPARAMSTAG);
962  parameters->receive_Parameters(conv_msg);
963 
964  DebugM(4, "Getting Molecule\n");
965  conv_msg = CkpvAccess(comm)->newInputStream(0, MOLECULETAG);
966  // Modified by JLai -- 10.21.11
967  molecule->receive_Molecule(conv_msg);
969  iout << iINFO << "Compute Nodes receiving GoMolecule Information" << "\n" << endi;
970  conv_msg = CkpvAccess(comm)->newInputStream(0, MOLECULETAG);
971  molecule->receive_GoMolecule(conv_msg);
972  }
973  // End of modification
974  DebugM(4, "Done Receiving\n");
975 }
976 
977 void Node::namdOneSend() {
978  node_simParameters = simParameters;
979  node_parameters = parameters;
980  node_molecule = molecule;
981 
982  MOStream *conv_msg;
983  // I'm Pe(0) so I send what I know
984  DebugM(4, "Sending SimParameters\n");
985  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, SIMPARAMSTAG, BUFSIZE);
987 
988  DebugM(4, "Sending Parameters\n");
989  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, STATICPARAMSTAG, BUFSIZE);
990  parameters->send_Parameters(conv_msg);
991 
992  DebugM(4, "Sending Molecule\n");
993  int bufSize = BUFSIZE;
994  if(molecule->numAtoms>=1000000) bufSize = 16*BUFSIZE;
995  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, MOLECULETAG, bufSize);
996  // Modified by JLai -- 10.21.11
997  molecule->send_Molecule(conv_msg);
998 
1000  iout << iINFO << "Master Node sending GoMolecule Information" << "\n" << endi;
1001  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, MOLECULETAG, bufSize);
1002  molecule->send_GoMolecule(conv_msg);
1003  } // End of modification
1004 }
1005 
1006 
1007 void Node::reloadStructure(const char *fname, const char *pdbname) {
1008  delete molecule;
1009  molecule = state->molecule = 0;
1010  delete pdb;
1011  pdb = state->pdb = 0;
1012  state->loadStructure(fname,pdbname,1);
1013  this->molecule = state->molecule;
1014  this->pdb = state->pdb;
1015  CProxy_Node nodeProxy(thisgroup);
1016  nodeProxy.resendMolecule();
1017 }
1018 
1019 
1021  if ( CmiMyRank() ) {
1022  return;
1023  }
1024  if ( CmiMyPe() == 0 ) {
1025  int bufSize = BUFSIZE;
1026  MOStream *conv_msg;
1027  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, STATICPARAMSTAG, bufSize);
1028  parameters->send_Parameters(conv_msg);
1029  if(molecule->numAtoms>=1000000) bufSize = 16*BUFSIZE;
1030  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, MOLECULETAG, bufSize);
1031  molecule->send_Molecule(conv_msg);
1032  } else {
1033  MIStream *conv_msg;
1034  delete parameters;
1035  parameters = new Parameters;
1036  conv_msg = CkpvAccess(comm)->newInputStream(0, STATICPARAMSTAG);
1037  parameters->receive_Parameters(conv_msg);
1038  delete molecule;
1040  conv_msg = CkpvAccess(comm)->newInputStream(0, MOLECULETAG);
1041  molecule->receive_Molecule(conv_msg);
1042  }
1043  node_parameters = parameters;
1044  node_molecule = molecule;
1048  CProxy_Node nodeProxy(thisgroup);
1049  for ( int i=0; i<CmiMyNodeSize(); ++i ) {
1050  nodeProxy[CmiMyPe()+i].resendMolecule2();
1051  }
1052 }
1053 
1058 }
1059 
1060 
1061 // Initial thread setup
1062 
1063 void Node::threadInit() {
1064  // Thread initialization
1065  if (CthImplemented()) {
1066  CthSetStrategyDefault(CthSelf());
1067  } else {
1068  NAMD_bug("Node::startup() Oh no, tiny elvis, threads not implemented");
1069  }
1070 }
1071 
1072 //
1073 void Node::buildSequencers() {
1076 
1077  // Controller object is only on Pe(0)
1078  if ( ! CkMyPe() ) {
1079  Controller *controller = new Controller(state);
1080  state->useController(controller);
1081  }
1082 
1083  // Assign Sequencer to all HomePatch(es)
1084  for (ai=ai.begin(); ai != ai.end(); ai++) {
1085  HomePatch *patch = (*ai).patch;
1086  Sequencer *sequencer = new Sequencer(patch);
1087  patch->useSequencer(sequencer);
1088  }
1089 }
1090 
1091 
1092 
1093 //-----------------------------------------------------------------------
1094 // Node run() - broadcast to all nodes
1095 //-----------------------------------------------------------------------
1097  (CProxy_Node(CkpvAccess(BOCclass_group).node)).run();
1098 }
1099 
1100 
1101 //-----------------------------------------------------------------------
1102 // run(void) runs the specified simulation for the specified number of
1103 // steps, overriding the contents of the configuration file
1104 //-----------------------------------------------------------------------
1106 {
1107  // Start Controller (aka scalar Sequencer) on Pe(0)
1108 // printf("\n\n I am in Node.C in run method about to call state->runController\n\n");
1109  if ( ! CkMyPe() ) {
1110  state->runController();
1111  }
1112 
1113  DebugM(4, "Starting Sequencers\n");
1114  // Run Sequencer on each HomePatch - i.e. start simulation
1117  for (ai=ai.begin(); ai != ai.end(); ai++) {
1118  HomePatch *patch = (*ai).patch;
1119 //CkPrintf("Proc#%d in Node calling Sequencer ",CkMyPe());
1120  patch->runSequencer();
1121  }
1122 
1123  if (!CkMyPe()) {
1124  double newTime = CmiWallTimer();
1125  iout << iINFO << "Startup phase " << startupPhase-1 << " took "
1126  << newTime - startupTime << " s, "
1127  << memusage_MB() << " MB of memory in use\n";
1128  iout << iINFO << "Finished startup at " << newTime << " s, "
1129  << memusage_MB() << " MB of memory in use\n\n" << endi;
1130  fflush(stdout);
1131  }
1132 
1133 }
1134 
1135 
1136 //-----------------------------------------------------------------------
1137 // Node scriptBarrier() - twiddle parameters with simulation halted
1138 //-----------------------------------------------------------------------
1139 
1141  CkStartQD(CkIndex_Node::scriptBarrier(), &thishandle);
1142 }
1143 
1145  //script->awaken();
1146 }
1147 
1149  simParameters->scriptSet(msg->param,msg->value);
1150  delete msg;
1151 }
1152 
1153 void Node::reloadCharges(const char *filename) {
1154  FILE *file = fopen(filename,"r");
1155  if ( ! file ) NAMD_die("node::reloadCharges():Error opening charge file.");
1156 
1157  int n = molecule->numAtoms;
1158  float *charge = new float[n];
1159 
1160  for ( int i = 0; i < n; ++i ) {
1161  if ( ! fscanf(file,"%f",&charge[i]) )
1162  NAMD_die("Node::reloadCharges():Not enough numbers in charge file.");
1163  }
1164 
1165  fclose(file);
1166  CProxy_Node(thisgroup).reloadCharges(charge,n);
1167  delete [] charge;
1168 }
1169 
1170 void Node::reloadCharges(float charge[], int n) {
1171  molecule->reloadCharges(charge,n);
1172 }
1173 
1174 
1175 // BEGIN gf
1176 void Node::reloadGridforceGrid(const char * key) {
1177  DebugM(4, "reloadGridforceGrid(const char*) called on node " << CkMyPe() << "\n" << endi);
1178 
1179  int gridnum;
1180  MGridforceParams *mgridParams;
1181  if (key == NULL) {
1184  } else {
1185  gridnum = simParameters->mgridforcelist.index_for_key(key);
1186  mgridParams = simParameters->mgridforcelist.find_key(key);
1187  }
1188 
1189  if (gridnum < 0 || mgridParams == NULL) {
1190  NAMD_die("Node::reloadGridforceGrid(const char*):Could not find grid.");
1191  }
1192 
1193  GridforceGrid *grid = molecule->get_gridfrc_grid(gridnum);
1194  if (grid == NULL) {
1195  NAMD_bug("Node::reloadGridforceGrid(const char*):grid not found");
1196  }
1197  grid->reinitialize(simParameters, mgridParams);
1198 
1199  CProxy_Node(thisgroup).reloadGridforceGrid(gridnum);
1200 
1201  DebugM(4, "reloadGridforceGrid(const char*) finished\n" << endi);
1202 }
1203 
1204 void Node::updateGridScale(const char* key, Vector scale) {
1205  DebugM(4, "updateGridScale(char*, Vector) called on node " << CkMyPe() << "\n" << endi);
1206 
1207  int gridnum;
1208  MGridforceParams* mgridParams;
1209  if (key == NULL) {
1212  } else {
1213  gridnum = simParameters->mgridforcelist.index_for_key(key);
1214  mgridParams = simParameters->mgridforcelist.find_key(key);
1215  }
1216 
1217  if (gridnum < 0 || mgridParams == NULL) {
1218  NAMD_die("Node::updateGridScale(char*, Vector): Could not find grid.");
1219  }
1220 
1221  GridforceGrid* grid = molecule->get_gridfrc_grid(gridnum);
1222  if (grid == NULL) {
1223  NAMD_bug("Node::updateGridScale(char*, Vector): grid not found");
1224  }
1225  CProxy_Node(thisgroup).updateGridScale(gridnum, scale.x, scale.y, scale.z);
1226 
1227  DebugM(4, "updateGridScale(char*, Vector) finished\n" << endi);
1228 }
1229 void Node::updateGridScale(int gridnum, float sx, float sy, float sz) {
1230  if (CmiMyRank()) return;
1231  DebugM(4, "updateGridScale(char*, int, float, float, float) called on node " << CkMyPe() << "\n" << endi);
1232 
1233  GridforceGrid *grid = molecule->get_gridfrc_grid(gridnum);
1234  if (grid == NULL) {
1235  NAMD_bug("Node::updateGridScale(char*, int, float, float, float):grid not found");
1236  }
1237 
1238  Vector scale(sx,sy,sz);
1240  grid->set_scale( scale );
1241 
1242  DebugM(4, "updateGridScale(char*, int, float, float, float) finished\n" << endi);
1243 }
1244 
1245 void Node::reloadGridforceGrid(int gridnum) {
1246  if (CmiMyRank()) return;
1247  DebugM(4, "reloadGridforceGrid(int) called on node " << CkMyPe() << "\n" << endi);
1248 
1249  GridforceGrid *grid = molecule->get_gridfrc_grid(gridnum);
1250  if (grid == NULL) {
1251  NAMD_bug("Node::reloadGridforceGrid(int):grid not found");
1252  }
1253 
1254  if (CkMyPe()) {
1255  // not node 0 -> receive grid
1256  DebugM(4, "Receiving grid\n");
1257 
1258  delete grid;
1259 
1260  MIStream *msg = CkpvAccess(comm)->newInputStream(0, GRIDFORCEGRIDTAG);
1261  grid = GridforceGrid::unpack_grid(gridnum, msg);
1262  molecule->set_gridfrc_grid(gridnum, grid);
1263  delete msg;
1264  } else {
1265  // node 0 -> send grid
1266  DebugM(4, "Sending grid\n");
1267 
1268  MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, GRIDFORCEGRIDTAG, BUFSIZE);
1269  GridforceGrid::pack_grid(grid, msg);
1270  msg->end();
1271  delete msg;
1272  }
1273 
1274  DebugM(4, "reloadGridforceGrid(int) finished\n" << endi);
1275 }
1276 // END gf
1277 
1278 
1279 // initiating replica
1280 void Node::sendCheckpointReq(int remote, const char *key, int task, Lattice &lat, ControllerState &cs) {
1281  CheckpointMsg *msg = new (1+strlen(key),0) CheckpointMsg;
1282  msg->replica = CmiMyPartition();
1283  msg->task = task;
1284  msg->checkpoint.lattice = lat;
1285  msg->checkpoint.state = cs;
1286  strcpy(msg->key,key);
1287  envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1288  CmiSetHandler(env,recvCheckpointCReq_index);
1289 #if CMK_HAS_PARTITION
1290  CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(char*)env);
1291 #else
1292  CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(char*)env);
1293 #endif
1294 }
1295 
1296 // responding replica
1297 extern "C" {
1298  void recvCheckpointCReq_handler(envelope *env) {
1299  Node::Object()->recvCheckpointReq(CheckpointMsg::unpack(EnvToUsr(env)));
1300  }
1301 }
1302 
1303 // responding replica
1305  state->controller->recvCheckpointReq(msg->key,msg->task,msg->checkpoint);
1306 
1307  int remote = msg->replica;
1308  msg->replica = CmiMyPartition();
1309  envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1310  CmiSetHandler(env,recvCheckpointCAck_index);
1311 #if CMK_HAS_PARTITION
1312  CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(char*)env);
1313 #else
1314  CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(char*)env);
1315 #endif
1316 }
1317 
1318 // initiating replica
1319 extern "C" {
1320  void recvCheckpointCAck_handler(envelope *env) {
1321  Node::Object()->recvCheckpointAck(CheckpointMsg::unpack(EnvToUsr(env)));
1322  }
1323 }
1324 
1325 // initiating replica
1327  state->controller->recvCheckpointAck(msg->checkpoint);
1328  delete msg;
1329 }
1330 
1331 
1333  //CmiPrintf("sendEnableExitScheduler\n");
1334  CProxy_Node nodeProxy(thisgroup);
1335  nodeProxy[0].recvEnableExitScheduler();
1336 }
1337 
1339  //CmiPrintf("recvEnableExitScheduler\n");
1341 }
1342 
1344  if ( CkMyPe() ) {
1346  } else {
1347  CkStartQD(CkIndex_Node::exitScheduler(), &thishandle);
1348  }
1349 }
1350 
1352  //CmiPrintf("exitScheduler %d\n",CkMyPe());
1353  CsdExitScheduler();
1354 }
1355 
1357  CProxy_Node nodeProxy(thisgroup);
1358  nodeProxy[0].recvEnableEarlyExit();
1359 }
1360 
1362  enableEarlyExit();
1363 }
1364 
1366  if ( CkMyPe() ) {
1368  } else {
1369  CkStartQD(CkIndex_Node::earlyExit(),&thishandle);
1370  }
1371 }
1372 
1373 void Node::earlyExit(void) {
1374  NAMD_die("Exiting prematurely; see error messages above.");
1375 }
1376 
1377 
1378 //------------------------------------------------------------------------
1379 // Some odd utilities
1380 //------------------------------------------------------------------------
1382 {
1383  this->molecule = state->molecule;
1384  this->parameters = state->parameters;
1385  this->simParameters = state->simParameters;
1386  this->configList = state->configList;
1387  this->pdb = state->pdb;
1388  this->state = state;
1389 }
1390 
1391 // entry methods for BG/P HPM (performance counters) library
1393 #if USE_HPM
1394  HPM_Start("500 steps", localRankOnNode);
1395 #endif
1396 }
1397 
1399 #if USE_HPM
1400  HPM_Stop("500 steps", localRankOnNode);
1401  HPM_Print(CkMyPe(), localRankOnNode);
1402 #endif
1403 }
1404 
1405 void Node::traceBarrier(int turnOnTrace, int step){
1406  curTimeStep = step;
1407  if(turnOnTrace) traceBegin();
1408  else traceEnd();
1409 
1410  if(turnOnTrace) CmiTurnOnStats();
1411  else CmiTurnOffStats();
1412 
1413  //CkPrintf("traceBarrier (%d) at step %d called on proc %d\n", turnOnTrace, step, CkMyPe());
1414  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1415  CkCallback cb(CkIndex_Node::resumeAfterTraceBarrier(NULL), nd[0]);
1416  contribute(0, NULL, CkReduction::sum_int, cb);
1417 
1418 }
1419 
1420 void Node::resumeAfterTraceBarrier(CkReductionMsg *msg){
1421  CmiAssert(CmiMyPe()==0);
1422  delete msg;
1424 }
1425 
1426 void Node::papiMeasureBarrier(int turnOnMeasure, int step){
1427 #ifdef MEASURE_NAMD_WITH_PAPI
1428  curMFlopStep = step;
1429  double results[NUM_PAPI_EVENTS+1];
1430 
1431  if(turnOnMeasure){
1432  CkpvAccess(papiEvents)[NUM_PAPI_EVENTS]=CmiWallTimer();
1433 
1434  long long counters[NUM_PAPI_EVENTS+1];
1435  int ret=PAPI_start_counters(CkpvAccess(papiEvents), NUM_PAPI_EVENTS);
1436  if(ret==PAPI_OK)
1437  {
1438  // CkPrintf("traceBarrier start counters (%d) at step %d called on proc %d\n", turnOnMeasure, step, CkMyPe());
1439  }
1440  else
1441  {
1442  CkPrintf("error PAPI_start_counters (%d) at step %d called on proc %d\n",ret , step, CkMyPe());
1443  }
1444  if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)!=PAPI_OK)
1445  {
1446  CkPrintf("error PAPI_read_counters %d\n",PAPI_read_counters(counters, NUM_PAPI_EVENTS));
1447  };
1448  }else{
1449  long long counters[NUM_PAPI_EVENTS+1];
1450  for(int i=0;i<NUM_PAPI_EVENTS;i++) counters[i]=0LL;
1451  if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)==PAPI_OK)
1452  {
1453 #if !MEASURE_PAPI_SPP
1454  results[0] = (double)counters[0]/1e6;
1455  results[1] = (double)counters[1]/1e6;
1456 #else
1457  for(int i=0;i<NUM_PAPI_EVENTS;i++) results[i] = counters[i]/1e6;
1458 #endif
1459  // for(int i=0;i<NUM_PAPI_EVENTS;i++) CkPrintf("[%d] counter %d is %ld\n",CkMyPe(),i,counters[i]);
1460  }
1461  else
1462  {
1463  // CkPrintf("error PAPI_read_counters %d\n",PAPI_read_counters(counters, NUM_PAPI_EVENTS));
1464  }
1465  // CkPrintf("traceBarrier stop counters (%d) at step %d called on proc %d\n", turnOnMeasure, step, CkMyPe());
1466 
1467  PAPI_stop_counters(counters, NUM_PAPI_EVENTS);
1468  }
1469  if(CkMyPe()==0)
1470  // CkPrintf("traceBarrier (%d) at step %d called on proc %d\n", turnOnMeasure, step, CkMyPe());
1471  results[NUM_PAPI_EVENTS]=CkpvAccess(papiEvents)[NUM_PAPI_EVENTS]; //starttime
1472  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1473  CkCallback cb(CkIndex_Node::resumeAfterPapiMeasureBarrier(NULL), nd[0]);
1474  contribute(sizeof(double)*(NUM_PAPI_EVENTS+1), &results, CkReduction::sum_double, cb);
1475 #endif
1476 }
1477 
1478 void Node::resumeAfterPapiMeasureBarrier(CkReductionMsg *msg){
1479 #ifdef MEASURE_NAMD_WITH_PAPI
1480 
1481  if(simParameters->papiMeasureStartStep != curMFlopStep) {
1482  double *results = (double *)msg->getData();
1483  double endtime=CmiWallTimer();
1484  int bstep = simParameters->papiMeasureStartStep;
1485  int estep = bstep + simParameters->numPapiMeasureSteps;
1486 #if MEASURE_PAPI_SPP
1487  CkPrintf("SPP INFO: PAPI_FP_OPS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[0]);
1488  CkPrintf("SPP INFO: PAPI_TOT_INS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[1]);
1489  CkPrintf("SPP INFO: perf::PERF_COUNT_HW_CACHE_LL:MISS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[2]);
1490  CkPrintf("SPP INFO: DATA_PREFETCHER:ALL timestep %d to %d is %lf(1e6)\n", bstep,estep,results[3]);
1491  CkPrintf("SPP INFO: PAPI_L1_DCA timestep %d to %d is %lf(1e6)\n", bstep,estep,results[4]);
1492  CkPrintf("SPP INFO: PAPI_TOT_CYC timestep %d to % is %lf(1e6)\n", bstep,estep,results[5]);
1493  // CkPrintf("SPP INFO: INSTRUCTION_FETCH_STALL timestep %d to %d is %lf(1e6)\n", bstep,estep,results[6]);
1494  // CkPrintf("SPP INFO: WALLtime timestep %d to %d is %lf\n", bstep,estep,endtime-results[NUM_PAPI_EVENTS]/CkNumPes());
1495  CkPrintf("SPP INFO: WALLtime timestep %d to %d is %lf\n", bstep,estep,endtime-results[NUM_PAPI_EVENTS]);
1496  CkPrintf("SPP INFO: endtime %lf avgtime %lf tottime %lf\n", endtime,results[NUM_PAPI_EVENTS]/CkNumPes(),results[NUM_PAPI_EVENTS] );
1497 #else
1498  if(CkpvAccess(papiEvents)[0] == PAPI_FP_INS){
1499  double totalFPIns = results[0];
1500  if(CkpvAccess(papiEvents)[1] == PAPI_FMA_INS) totalFPIns += (results[1]*2);
1501  CkPrintf("FLOPS INFO: from timestep %d to %d, the total FP instruction of NAMD is %lf(x1e6) per processor\n",
1502  bstep, estep, totalFPIns/CkNumPes());
1503  }else{
1504  char nameBuf[PAPI_MAX_STR_LEN];
1505  CkPrintf("PAPI COUNTERS INFO: from timestep %d to %d, ",
1506  bstep, estep);
1507  for(int i=0; i<NUM_PAPI_EVENTS; i++) {
1508  PAPI_event_code_to_name(CkpvAccess(papiEvents)[i], nameBuf);
1509  CkPrintf("%s is %lf(x1e6), ", nameBuf, results[i]/CkNumPes());
1510  }
1511  CkPrintf("per processor\n");
1512  }
1513 #endif
1514  }
1515  delete msg;
1516  state->controller->resumeAfterPapiMeasureBarrier(curMFlopStep);
1517 #endif
1518 }
1519 
1520 extern char *gNAMDBinaryName;
1521 void Node::outputPatchComputeMaps(const char *filename, int tag){
1523 
1524  int numpes = CkNumPes();
1525  int nodesize = CkMyNodeSize();
1527  numpes = simParameters->simulatedPEs;
1528  nodesize = simParameters->simulatedNodeSize;
1529  }
1530 
1531  char fname[128];
1532  sprintf(fname, "mapdump_%s.%d_%d_%d_%s", filename, numpes, nodesize, tag, gNAMDBinaryName);
1533 
1534  FILE *fp = fopen(fname, "w");
1535  if(fp == NULL) {
1536  NAMD_die("Error in outputing PatchMap and ComputeMap info!\n");
1537  return;
1538  }
1539  PatchMap *pMap = PatchMap::Object();
1540  ComputeMap *cMap = ComputeMap::Object();
1541  int numPatches = pMap->numPatches();
1542  int numComputes = cMap->numComputes();
1543  fprintf(fp, "%d %d %d %d %d %d %d\n", numpes, nodesize, numPatches, numComputes,
1544  pMap->gridsize_a(), pMap->gridsize_b(), pMap->gridsize_c());
1545  //output PatchMap info
1546  for(int i=0; i<numPatches; i++) {
1547  #ifdef MEM_OPT_VERSION
1548  fprintf(fp, "%d %d\n", pMap->numAtoms(i), pMap->node(i));
1549  #else
1550  fprintf(fp, "%d %d\n", pMap->patch(i)->getNumAtoms(), pMap->node(i));
1551  #endif
1552  }
1553 
1554  //output ComputeMap info
1555  for(int i=0; i<numComputes; i++) {
1556  fprintf(fp, "%d %d %d %d\n", cMap->node(i), cMap->type(i), cMap->pid(i,0), cMap->pid(i,1));
1557  }
1558 }
1559 
1560 
1561 //======================================================================
1562 // Private functions
1563 
1564 #include "Node.def.h"
1565 
static Node * Object()
Definition: Node.h:86
#define GRIDFORCEGRIDTAG
Definition: common.h:158
void allocateMap(int nAtomIDs)
Definition: AtomMap.C:161
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
bool specialTracing
Definition: Node.h:163
Bool simulateInitialMapping
void recvCheckpointReq(const char *key, int task, checkpoint &cp)
Definition: Controller.C:4087
void recvCheckpointCReq_handler(envelope *)
Definition: Node.C:1298
void setPatchMapArrived(bool s)
Definition: WorkDistrib.h:107
void runSequencer(void)
Definition: HomePatch.C:269
void createProxies(void)
Definition: ProxyMgr.C:417
void end(void)
Definition: MStream.C:176
ControllerState state
Definition: Controller.h:274
void setRecvSpanning()
Definition: ProxyMgr.C:371
void receive_SimParameters(MIStream *)
int proxyRecvSpanning
Definition: ProxyMgr.C:46
BOCgroup group
Definition: Node.h:68
int numComputes(void)
Definition: ComputeMap.h:101
int getRecvSpanning()
Definition: ProxyMgr.C:376
int curTimeStep
Definition: Node.h:151
int eventEndOfTimeStep
Definition: Node.C:286
void send_GoMolecule(MOStream *)
Definition: GoMolecule.C:1635
void mallocTest(int)
Definition: Node.C:392
void startHPM()
Definition: Node.C:1392
static ProxyMgr * Object()
Definition: ProxyMgr.h:394
void exitScheduler(void)
Definition: Node.C:1351
IMDOutput * imd
Definition: Node.h:183
void saveMolDataPointers(NamdState *)
Definition: Node.C:1381
void receive_GoMolecule(MIStream *)
Definition: GoMolecule.C:1744
LdbCoordinator * ldbCoordinator
Definition: Node.h:203
int gridsize_c(void) const
Definition: PatchMap.h:66
static PatchMap * Object()
Definition: PatchMap.h:27
void sendEnableEarlyExit(void)
Definition: Node.C:1356
void send_Molecule(MOStream *)
Definition: Molecule.C:5448
static void exit(int status=0)
Definition: BackEnd.C:276
Definition: Vector.h:64
static AtomMap * Instance()
Definition: AtomMap.C:125
Output * output
Definition: Node.h:182
SimParameters * simParameters
Definition: Node.h:178
int task
Definition: Node.C:87
void setSendSpanning()
Definition: ProxyMgr.C:362
static void pack_grid(GridforceGrid *grid, MOStream *msg)
Definition: GridForceGrid.C:50
#define DebugM(x, y)
Definition: Debug.h:59
void createLoadBalancer()
double startupTime
Definition: Node.C:287
HomePatchList * homePatchList()
Definition: PatchMap.C:438
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
#define ALLBUTME
Definition: Communicate.h:14
BigReal z
Definition: Vector.h:66
void enableScriptBarrier()
Definition: Node.C:1140
int getSendSpanning()
Definition: ProxyMgr.C:367
char const *const NamdProfileEventStr[]
void scriptSet(const char *, const char *)
char value[MAX_SCRIPT_PARAM_SIZE]
Definition: Node.h:75
void sendEnableExitScheduler(void)
Definition: Node.C:1332
void recvCheckpointReq(CheckpointMsg *)
Definition: Node.C:1304
static void messageStartUp()
Definition: Node.C:419
void stopHPM()
Definition: Node.C:1398
char * gNAMDBinaryName
Definition: BackEnd.C:237
void reloadCharges(float charge[], int n)
#define iout
Definition: InfoStream.h:51
Patch * patch(PatchID pid)
Definition: PatchMap.h:235
int curMFlopStep
Definition: Node.h:156
#define STATICPARAMSTAG
Definition: common.h:153
Molecule * node_molecule
Definition: Node.C:425
void outputPatchComputeMaps(const char *filename, int tag)
Definition: Node.C:1521
int loadStructure(const char *, const char *, int)
Definition: NamdState.C:153
void createComputes(ComputeMap *map)
Definition: ComputeMgr.C:1009
int replica
Definition: Node.C:88
void split(int iStream, int numStreams)
Definition: Random.h:77
void recvCheckpointAck(CheckpointMsg *)
Definition: Node.C:1326
void patchMapInit(void)
Definition: WorkDistrib.C:1109
void openSync()
Definition: Sync.C:63
ComputeMap * computeMap
Definition: Node.h:202
void runController(void)
Definition: NamdState.C:80
double memusage_MB()
Definition: memusage.h:13
virtual void reinitialize(SimParameters *simParams, MGridforceParams *mgridParams)=0
void sendComputeMap(void)
Definition: WorkDistrib.C:1082
ResizeArrayIter< T > end(void) const
Definition: Output.h:43
void registerUserEventsForAllComputeObjs()
Definition: ComputeMgr.C:830
void sendBuildCudaExclusions()
Definition: ComputeMgr.C:1448
void resumeAfterPapiMeasureBarrier(CkReductionMsg *msg)
Definition: Node.C:1478
void initialize(PatchMap *pmap, ComputeMap *cmap, int reinit=0)
void scriptParam(ScriptParamMsg *)
Definition: Node.C:1148
Definition: Random.h:37
int index_for_key(const char *key)
void run()
Definition: Node.C:1105
int gridsize_a(void) const
Definition: PatchMap.h:64
void enableExitScheduler(void)
Definition: Node.C:1343
void buildProxySpanningTree()
Definition: ProxyMgr.C:559
void createHomePatches(void)
Definition: WorkDistrib.C:893
void NAMD_bug(const char *err_msg)
Definition: common.C:129
ComputeType type(ComputeID cid)
Definition: ComputeMap.C:120
ScaledPosition smax
Definition: ComputeMsm.h:21
Controller::checkpoint checkpoint
Definition: Node.C:89
NamdState * state
Definition: Node.h:181
#define MGRIDFORCEPARAMS_DEFAULTKEY
MGridforceParams * find_key(const char *key)
void traceBarrier(int turnOnTrace, int step)
Definition: Node.C:1405
void updateGridScale(const char *key, Vector scale)
Definition: Node.C:1204
gridSize z
static void pme_select()
static void messageRun()
Definition: Node.C:1096
void scriptBarrier(void)
Definition: Node.C:1144
static Sync * Object()
Definition: Sync.h:50
void recvCheckpointAck(checkpoint &cp)
Definition: Controller.C:4117
void reloadStructure(const char *, const char *)
Definition: Node.C:1007
BigReal x
Definition: Vector.h:66
AtomMap * atomMap
Definition: Node.h:200
void recvEnableExitScheduler(void)
Definition: Node.C:1338
int numAtoms
Definition: Molecule.h:557
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
GridforceGrid * get_gridfrc_grid(int gridnum) const
Definition: Molecule.h:1280
void NAMD_die(const char *err_msg)
Definition: common.C:85
PDB * pdb
Definition: Node.h:180
static LdbCoordinator * Object()
ConfigList * configList
Definition: Node.h:179
static AtomMap * Object()
Definition: AtomMap.h:36
#define BUFSIZE
Definition: Communicate.h:15
MGridforceParamsList mgridforcelist
static void nonbonded_select()
void recvEnableEarlyExit(void)
Definition: Node.C:1361
void sendPatchMap(void)
Definition: WorkDistrib.C:982
void send_Parameters(MOStream *)
Definition: Parameters.C:5048
int isRecvSpanningTreeOn()
Parameters * parameters
Definition: Node.h:177
static ComputeMap * Instance()
Definition: ComputeMap.C:26
void resumeAfterTraceBarrier(CkReductionMsg *msg)
Definition: Node.C:1420
unsigned int randomSeed
CkpvDeclare(AtomMap *, AtomMap_instance)
WorkDistrib * workDistrib
Definition: Node.h:166
~Node(void)
Definition: Node.C:356
Parameters * node_parameters
Definition: Node.C:424
int numPatches(void) const
Definition: PatchMap.h:59
int node(int pid) const
Definition: PatchMap.h:114
static GridforceGrid * unpack_grid(int gridnum, MIStream *msg)
Definition: GridForceGrid.C:60
#define NAMD_EVENT_START_EX(eon, id, str)
#define SIMPARAMSTAG
Definition: common.h:152
SimParameters * node_simParameters
Definition: Node.C:423
Random * rand
Definition: Node.h:172
void mallocTestQd(void)
Definition: Node.C:404
void mapComputes(void)
Definition: WorkDistrib.C:2274
void recvCheckpointCAck_handler(envelope *)
Definition: Node.C:1320
static ComputeMap * Object()
Definition: ComputeMap.h:89
PatchMap * patchMap
Definition: Node.h:201
ScaledPosition smin
Definition: ComputeMsm.h:21
void useController(Controller *controllerPtr)
Definition: NamdState.C:75
void papiMeasureBarrier(int turnOnMeasure, int step)
Definition: Node.C:1426
BigReal y
Definition: Vector.h:66
void resendMolecule2()
Definition: Node.C:1054
int getNumAtoms()
Definition: Patch.h:105
void resendMolecule()
Definition: Node.C:1020
void distributeHomePatches(void)
Definition: WorkDistrib.C:934
void setProxyTreeBranchFactor(int dim)
Definition: ProxyMgr.C:380
colvarmodule * colvars
Definition: Node.h:184
void startup()
Definition: Node.C:429
int node(ComputeID cid)
Definition: ComputeMap.h:106
k< npairi;++k){TABENERGY(const int numtypes=simParams->tableNumTypes;const float table_spacing=simParams->tableSpacing;const int npertype=(int)(namdnearbyint(simParams->tableMaxDist/simParams->tableSpacing)+1);) int table_i=(r2iilist[2 *k] >> 14)+r2_delta_expc;const int j=pairlisti[k];#define p_j BigReal diffa=r2list[k]-r2_table[table_i];#define table_four_i TABENERGY(register const int tabtype=-1-(lj_pars->A< 0?lj_pars->A:0);) BigReal kqq=kq_i *p_j-> charge
gridSize y
int set_gridfrc_grid(int gridnum, GridforceGrid *grid)
Definition: Molecule.h:1289
int pid(ComputeID cid, int i)
Definition: ComputeMap.C:109
#define MOLECULETAG
Definition: common.h:154
int isSendSpanningTreeOn()
Node(GroupInitMsg *msg)
Definition: Node.C:291
void resumeAfterTraceBarrier(int)
Definition: Controller.C:4157
void sendCheckpointReq(int remote, const char *key, int task, Lattice &lat, ControllerState &cs)
Definition: Node.C:1280
ComputeMgr * computeMgr
Definition: Node.h:169
int b_p() const
Definition: Lattice.h:274
int mallocTest_size
Definition: Node.h:129
gridSize x
void enableEarlyExit(void)
Definition: Node.C:1365
void receive_Molecule(MIStream *)
Definition: Molecule.C:5806
void earlyExit(void)
Definition: Node.C:1373
MGridforceParams * at_index(int idx)
int a_p() const
Definition: Lattice.h:273
static PatchMap * Instance()
Definition: PatchMap.C:32
Molecule * molecule
Definition: Node.h:176
int gridsize_b(void) const
Definition: PatchMap.h:65
void useSequencer(Sequencer *sequencerPtr)
Definition: HomePatch.C:265
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
Definition: PDB.h:104
ResizeArrayIter< T > begin(void) const
char param[MAX_SCRIPT_PARAM_SIZE]
Definition: Node.h:74
void receive_Parameters(MIStream *)
Definition: Parameters.C:5424
virtual void set_scale(Vector s)=0
void reloadCharges(const char *filename)
Definition: Node.C:1153
void send_SimParameters(MOStream *)
int c_p() const
Definition: Lattice.h:275
void reloadGridforceGrid(const char *key)
Definition: Node.C:1176
void assignNodeToPatch(void)
Definition: WorkDistrib.C:1323
char * key
Definition: Node.C:90
int proxySendSpanning
Definition: ProxyMgr.C:45