Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

BackEnd.C File Reference

#include "BackEnd.h"
#include "ProcessorPrivate.h"
#include "common.h"
#include "Node.h"
#include "memusage.h"
#include <new>
#include "Lattice.h"
#include "ComputeMoa.h"
#include "ComputeMsmMsa.h"
#include "main.decl.h"
#include "main.h"
#include "BOCgroup.h"
#include "WorkDistrib.decl.h"
#include "ProxyMgr.decl.h"
#include "PatchMgr.decl.h"
#include "ReductionMgr.decl.h"
#include "CollectionMgr.decl.h"
#include "CollectionMaster.decl.h"
#include "CollectionMgr.h"
#include "CollectionMaster.h"
#include "BroadcastMgr.decl.h"
#include "LdbCoordinator.decl.h"
#include "Sync.decl.h"

Go to the source code of this file.

Functions

void _initCharm (int, char **)
 CkpvStaticDeclare (int, exitSchedHndlr)
void exit_sched (void *msg)
void register_exit_sched (void)
void NAMD_new_handler ()
void cuda_getargs (char **)
void cuda_initialize ()
void all_init (int argc, char **argv)
void after_backend_init (int argc, char **argv)
void master_init (int argc, char **argv)
void slave_init (int argc, char **argv)

Variables

float cpuTime_start
float wallTime_start
char * gNAMDBinaryName = NULL


Function Documentation

void _initCharm int  ,
char ** 
 

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Referenced by all_init().

void after_backend_init int  argc,
char **  argv
 

Definition at line 48 of file mainfunc.C.

References CHDIR, CWDSIZE, ScriptTcl::eval(), BackEnd::exit(), GETCWD, iINFO(), iout, ScriptTcl::load(), NAMD_die(), NAMD_err(), Node::Object(), PATHSEP, PATHSEPSTR, ScriptTcl::run(), and Node::setScript().

00048                                               {
00049 #define CWDSIZE 1024
00050   char origcwd_buf[CWDSIZE];
00051   char currentdir_buf[CWDSIZE];
00052 
00053   ScriptTcl *script = new ScriptTcl;
00054   Node::Object()->setScript(script);
00055 
00056   for(argc = 0; argv[argc]; ++argc);
00057   if ( argc < 2 ) {
00058     NAMD_die("No simulation config file specified on command line.");
00059   }
00060   char *origcwd = GETCWD(origcwd_buf,CWDSIZE);
00061   if ( ! origcwd ) NAMD_err("getcwd");
00062 #ifdef NAMD_TCL
00063   for(int i = 1; i < argc; ++i) {
00064   if ( strstr(argv[i],"--") == argv[i] ) {
00065     char buf[1024];
00066     if ( i + 1 == argc ) {
00067       sprintf(buf, "missing argument for command line option %s", argv[i]);
00068       NAMD_die(buf);
00069     }
00070     sprintf(buf, "%s %s", argv[i]+2, argv[i+1]);
00071     iout << iINFO << "Command-line argument is --" << buf << "\n" << endi;
00072     script->eval(buf);
00073     ++i;
00074     continue;
00075   }
00076   char *confFile = argv[i];
00077 #else
00078   char *confFile = argv[argc-1];
00079 #endif
00080   iout << iINFO << "Configuration file is " << confFile << "\n" << endi;
00081 
00082   char *currentdir=confFile;
00083   char *tmp;
00084   for(tmp=confFile;*tmp;++tmp); // find final null
00085   for( ; tmp != confFile && *tmp != PATHSEP; --tmp); // find last '/'
00086 #if defined(WIN32) && !defined(__CYGWIN__)
00087   if (tmp == confFile) {
00088     // in case this is under cygwin, search for '/' as well
00089     for(tmp=confFile;*tmp;++tmp); // find final null
00090     for( ; tmp != confFile && *tmp != '/'; --tmp); // find last '/'
00091   }
00092 #endif
00093   if ( CHDIR(origcwd) ) NAMD_err(origcwd);
00094   if ( tmp != confFile )
00095   {
00096     *tmp = 0; confFile = tmp + 1;
00097     if ( CHDIR(currentdir) ) NAMD_err(currentdir);
00098     struct stat statBuf;
00099     if (stat(confFile, &statBuf)) {
00100       char buf[1024];
00101       sprintf(buf,"Unable to access config file %s%c%s",currentdir,PATHSEP,confFile);
00102       NAMD_die(buf);
00103     }
00104     iout << iINFO << "Changed directory to " << currentdir << "\n" << endi;
00105     currentdir = GETCWD(currentdir_buf,CWDSIZE);
00106     if ( ! currentdir ) NAMD_err("getcwd after chdir");
00107   }
00108   else{
00109       if ( *tmp == PATHSEP ){ // config file in / is odd, but it might happen
00110           if ( CHDIR(PATHSEPSTR) ) NAMD_err(PATHSEPSTR);
00111           struct stat statBuf;
00112           if (stat(confFile, &statBuf)) {
00113             char buf[1024];
00114             sprintf(buf,"Unable to access config file %s",confFile);
00115             NAMD_die(buf);
00116           }
00117       }else{ // just a config file name, so the path is the current working path
00118           struct stat statBuf;
00119           if (stat(confFile, &statBuf)) {
00120             char buf[1024];
00121             if ( confFile[0] == '-' || confFile[0] == '+' ) {
00122               sprintf(buf,"Unknown command-line option %s",confFile);
00123             } else {
00124               sprintf(buf,"Unable to access config file %s",confFile);
00125             }
00126             NAMD_die(buf);
00127           }
00128           char tmpcurdir[3];
00129           tmpcurdir[0] = '.';
00130           tmpcurdir[1] = PATHSEP;
00131           tmpcurdir[2] = 0;
00132           currentdir = tmpcurdir;
00133           iout << iINFO << "Working in the current directory " << origcwd << "\n" << endi;
00134       }
00135   }
00136 
00137 #ifdef MEM_OPT_VERSION
00138     int dirlen = strlen(currentdir);
00139     gWorkDir = new char[dirlen+1];
00140     gWorkDir[dirlen]=0;
00141     memcpy(gWorkDir, currentdir, dirlen);
00142 #endif
00143 
00144   currentdir = NULL;
00145 
00146 #ifdef NAMD_TCL
00147   script->load(confFile);
00148 #else
00149   script->run(confFile);
00150 #endif
00151 
00152 #ifdef NAMD_TCL
00153 }
00154   script->run();
00155 #endif
00156 
00157   BackEnd::exit();
00158 }

void all_init int  argc,
char **  argv
 

Definition at line 86 of file BackEnd.C.

References _initCharm(), cuda_getargs(), cuda_initialize(), NAMD_new_handler(), ProcessorPrivateInit(), and register_exit_sched().

Referenced by master_init(), and slave_init().

00087 {
00088 #if defined(WIN32) && !defined(__CYGWIN__) && !defined(__MINGW_H)
00089   _set_new_handler(NAMD_new_handler);
00090 #else
00091   std::set_new_handler(NAMD_new_handler);
00092 #endif
00093   ProcessorPrivateInit();
00094   register_exit_sched();
00095 #ifdef NAMD_CUDA
00096   CmiGetArgFlag(argv, "+idlepoll");  // remove +idlepoll if it's still there
00097   cuda_getargs(argv);
00098   argc = CmiGetArgc(argv);
00099 #endif
00100   
00101   _initCharm(argc, argv);  // message main Chare
00102 
00103 #ifdef NAMD_CUDA
00104   if ( CkMyPe() < CkNumPes() ) cuda_initialize();
00105 #endif
00106 }

CkpvStaticDeclare int  ,
exitSchedHndlr 
 

void cuda_getargs char **   ) 
 

Definition at line 76 of file ComputeNonbondedCUDA.C.

References devicelist, ignoresharing, mergegrids, and usedevicelist.

Referenced by all_init().

00076                                {
00077   devicelist = 0;
00078   usedevicelist = CmiGetArgStringDesc(argv, "+devices", &devicelist,
00079         "comma-delimited list of CUDA device numbers such as 0,2,1,2");
00080   ignoresharing = CmiGetArgFlag(argv, "+ignoresharing");
00081   mergegrids = CmiGetArgFlag(argv, "+mergegrids");
00082 }

void cuda_initialize  ) 
 

Definition at line 150 of file ComputeNonbondedCUDA.C.

References CmiPhysicalNodeID, cuda_die(), cuda_errcheck(), cuda_register_user_events(), devicelist, devicePe, first_pe_sharing_gpu, gpu_is_mine, j, NAMD_bug(), NAMD_die(), next_pe_sharing_gpu, numPesSharingDevice, pesSharingDevice, shared_gpu, and sortop_bitreverse().

Referenced by all_init().

00150                        {
00151 
00152   if ( 0 == CkMyPe() ) cuda_register_user_events();
00153 
00154   char host[128];
00155 #ifdef NOHOSTNAME
00156   sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00157 #else
00158   gethostname(host, 128);  host[127] = 0;
00159 #endif
00160 
00161   int myPhysicalNodeID = CmiPhysicalNodeID(CkMyPe());
00162   int myRankInPhysicalNode;
00163   int numPesOnPhysicalNode;
00164   int *pesOnPhysicalNode;
00165   CmiGetPesOnPhysicalNode(myPhysicalNodeID,
00166                            &pesOnPhysicalNode,&numPesOnPhysicalNode);
00167 
00168   {
00169     int i;
00170     for ( i=0; i < numPesOnPhysicalNode; ++i ) {
00171       if ( i && (pesOnPhysicalNode[i] <= pesOnPhysicalNode[i-1]) ) {
00172         i = numPesOnPhysicalNode;
00173         break;
00174       }
00175       if ( pesOnPhysicalNode[i] == CkMyPe() ) break;
00176     }
00177     if ( i == numPesOnPhysicalNode || i != CmiPhysicalRank(CkMyPe()) ) {
00178       CkPrintf("Bad result from CmiGetPesOnPhysicalNode!\n");
00179       for ( i=0; i < numPesOnPhysicalNode; ++i ) {
00180         CkPrintf("pe %d physnode rank %d of %d is %d\n", CkMyPe(),
00181           i, numPesOnPhysicalNode, pesOnPhysicalNode[i]);
00182       }
00183       myRankInPhysicalNode = 0;
00184       numPesOnPhysicalNode = 1;
00185       pesOnPhysicalNode = new int[1];
00186       pesOnPhysicalNode[0] = CkMyPe();
00187     } else {
00188       myRankInPhysicalNode = i;
00189     }
00190   }
00191   // CkPrintf("Pe %d ranks %d in physical node\n",CkMyPe(),myRankInPhysicalNode);
00192 
00193   int deviceCount = 0;
00194   cudaGetDeviceCount(&deviceCount);
00195   cuda_errcheck("in cudaGetDeviceCount");
00196   if ( deviceCount <= 0 ) {
00197     cuda_die("No CUDA devices found.");
00198   }
00199 
00200   int *devices;
00201   int ndevices = 0;
00202   int nexclusive = 0;
00203   if ( usedevicelist ) {
00204     devices = new int[strlen(devicelist)];
00205     int i = 0;
00206     while ( devicelist[i] ) {
00207       ndevices += sscanf(devicelist+i,"%d",devices+ndevices);
00208       while ( devicelist[i] && isdigit(devicelist[i]) ) ++i;
00209       while ( devicelist[i] && ! isdigit(devicelist[i]) ) ++i;
00210     }
00211   } else {
00212     if ( ! CkMyPe() ) {
00213       CkPrintf("Did not find +devices i,j,k,... argument, using all\n");
00214     }
00215     devices = new int[deviceCount];
00216     for ( int i=0; i<deviceCount; ++i ) {
00217       int dev = i % deviceCount;
00218 #if CUDA_VERSION >= 2020
00219       cudaDeviceProp deviceProp;
00220       cudaGetDeviceProperties(&deviceProp, dev);
00221       cuda_errcheck("in cudaGetDeviceProperties");
00222       if ( deviceProp.computeMode != cudaComputeModeProhibited
00223            && (deviceProp.major > 1 || deviceProp.minor >= 1)
00224            && deviceProp.canMapHostMemory
00225            && deviceProp.multiProcessorCount > 2 ) {  // exclude weak cards
00226         devices[ndevices++] = dev;
00227       }
00228       if ( deviceProp.computeMode == cudaComputeModeExclusive ) {
00229         ++nexclusive;
00230       }
00231 #else
00232       devices[ndevices++] = dev;
00233 #endif
00234     }
00235   }
00236 
00237   if ( ! ndevices ) {
00238     cuda_die("All CUDA devices are in prohibited mode, of compute capability 1.0, or otherwise unusable.");
00239   }
00240 
00241   shared_gpu = 0;
00242   gpu_is_mine = 1;
00243   first_pe_sharing_gpu = CkMyPe();
00244   next_pe_sharing_gpu = CkMyPe();
00245 
00246  /* if ( (ndevices >= numPesOnPhysicalNode) || (nexclusive == 0) ) */ {
00247 
00248   int dev;
00249   if ( numPesOnPhysicalNode > 1 ) {
00250     int myDeviceRank = myRankInPhysicalNode * ndevices / numPesOnPhysicalNode;
00251     dev = devices[myDeviceRank];
00252     devicePe = CkMyPe();
00253     if ( ignoresharing ) {
00254       pesSharingDevice = new int[1];
00255       pesSharingDevice[0] = CkMyPe();
00256       numPesSharingDevice = 1;
00257     } else {
00258       pesSharingDevice = new int[numPesOnPhysicalNode];
00259       devicePe = -1;
00260       numPesSharingDevice = 0;
00261       for ( int i = 0; i < numPesOnPhysicalNode; ++i ) {
00262         if ( i * ndevices / numPesOnPhysicalNode == myDeviceRank ) {
00263           int thisPe = pesOnPhysicalNode[i];
00264           pesSharingDevice[numPesSharingDevice++] = thisPe;
00265           if ( devicePe < 1 ) devicePe = thisPe;
00266           if ( sortop_bitreverse(thisPe,devicePe) ) devicePe = thisPe;
00267         }
00268       }
00269       for ( int j = 0; j < ndevices; ++j ) {
00270         if ( devices[j] == dev && j != myDeviceRank ) shared_gpu = 1;
00271       }
00272     }
00273     if ( shared_gpu && devicePe == CkMyPe() ) {
00274       CkPrintf("Pe %d sharing CUDA device %d\n", CkMyPe(), dev);
00275     }
00276   } else {  // in case phys node code is lying
00277     dev = devices[CkMyPe() % ndevices];
00278     devicePe = CkMyPe();
00279     pesSharingDevice = new int[1];
00280     pesSharingDevice[0] = CkMyPe();
00281     numPesSharingDevice = 1;
00282   }
00283 
00284   if ( devicePe != CkMyPe() ) {
00285     CkPrintf("Pe %d physical rank %d will use CUDA device of pe %d\n",
00286              CkMyPe(), myRankInPhysicalNode, devicePe);
00287     return;
00288   }
00289 
00290   // disable token-passing but don't submit local until remote finished
00291   // if shared_gpu is true, otherwise submit all work immediately
00292   first_pe_sharing_gpu = CkMyPe();
00293   next_pe_sharing_gpu = CkMyPe();
00294 
00295   gpu_is_mine = ( first_pe_sharing_gpu == CkMyPe() ); 
00296 
00297   if ( dev >= deviceCount ) {
00298     char buf[256];
00299     sprintf(buf,"Pe %d unable to bind to CUDA device %d on %s because only %d devices are present",
00300                 CkMyPe(), dev, host, deviceCount);
00301     NAMD_die(buf);
00302   }
00303 
00304   cudaError_t err;
00305   cudaDeviceProp deviceProp;
00306   err = cudaGetDeviceProperties(&deviceProp, dev);
00307   if (err == cudaSuccess) {
00308     CkPrintf("Pe %d physical rank %d binding to CUDA device %d on %s: '%s'  Mem: %dMB  Rev: %d.%d\n",
00309              CkMyPe(), myRankInPhysicalNode, dev, host,
00310              deviceProp.name, deviceProp.totalGlobalMem / (1024*1024),
00311              deviceProp.major, deviceProp.minor);
00312 
00313     err = cudaSetDevice(dev);
00314   }
00315   if ( err != cudaSuccess) {
00316     char errmsg[1024];
00317     sprintf(errmsg,"CUDA error binding to device %d on pe %d: %s",
00318                         dev, CkMyPe(), cudaGetErrorString(err));
00319     NAMD_die(errmsg);
00320   }
00321 
00322  }  // just let CUDA pick a device for us
00323 
00324   cudaSetDeviceFlags(cudaDeviceMapHost);
00325   cuda_errcheck("in cudaSetDeviceFlags");
00326 
00327   int dev;
00328   if ( cudaGetDevice(&dev) == cudaSuccess ) {
00329     cudaDeviceProp deviceProp;
00330     cudaGetDeviceProperties(&deviceProp, dev);
00331     cuda_errcheck("in cudaGetDeviceProperties");
00332     if ( deviceProp.computeMode == cudaComputeModeProhibited )
00333       cuda_die("device in prohibited mode");
00334     if ( deviceProp.major < 2 && deviceProp.minor < 1 )
00335       cuda_die("device not of compute capability 1.1 or higher");
00336     if ( ! deviceProp.canMapHostMemory )
00337       cuda_die("device cannot map host memory");
00338   }
00339 
00340   if ( sizeof(patch_pair) & 15 ) NAMD_bug("sizeof(patch_pair) % 16 != 0");
00341   if ( sizeof(force_list) & 15 ) NAMD_bug("sizeof(force_list) % 16 != 0");
00342   if ( sizeof(atom) & 15 ) NAMD_bug("sizeof(atom) % 16 != 0");
00343   if ( sizeof(atom_param) & 15 ) NAMD_bug("sizeof(atom_param) % 16 != 0");
00344 
00345 }

void exit_sched void *  msg  ) 
 

Definition at line 50 of file BackEnd.C.

Referenced by register_exit_sched().

00051 {
00052   //  CmiPrintf("Exiting scheduler on %d\n",CmiMyPe());
00053   CsdExitScheduler();
00054 }

void master_init int  argc,
char **  argv
 

Definition at line 132 of file BackEnd.C.

References all_init(), BOCgroup::broadcastMgr, BOCgroup::collectionMgr, BOCgroup::computeExtMgr, BOCgroup::computeGBISserMgr, BOCgroup::computeMgr, BOCgroup::computeMsmMgr, BOCgroup::computeMsmMsaMgr, BOCgroup::computeMsmSerialMgr, cpuTime_start, GroupInitMsg::group, BOCgroup::ioMgr, BOCgroup::ldbCoordinator, SlaveInitMsg::master, BOCgroup::nodePmeMgr, BOCgroup::patchMgr, BOCgroup::proxyMgr, BOCgroup::reductionMgr, BOCgroup::sync, wallTime_start, and BOCgroup::workDistrib.

Referenced by BackEnd::init(), and slave_init().

00132                                        {
00133   cpuTime_start = CmiCpuTimer();
00134   wallTime_start = CmiWallTimer();
00135   if ( CmiMyPe() ) {
00136     all_init(argc, argv);
00137     CsdScheduler(-1);
00138     ConverseExit();  // should never return
00139   }
00140 
00141   all_init(argc, argv);
00142 
00143   // Create branch-office chares
00144   BOCgroup group;
00145   group.workDistrib = CProxy_WorkDistrib::ckNew();
00146   group.proxyMgr = CProxy_ProxyMgr::ckNew();
00147   group.patchMgr = CProxy_PatchMgr::ckNew();
00148   group.computeMgr = CProxy_ComputeMgr::ckNew();
00149   group.reductionMgr = CProxy_ReductionMgr::ckNew();
00150   // group.computePmeMgr set in constructor during startup
00151   group.nodePmeMgr = CProxy_NodePmeMgr::ckNew();
00152 #ifdef OPENATOM_VERSION
00153   group.computeMoaMgr = CProxy_ComputeMoaMgr::ckNew();
00154 #endif // OPENATOM_VERSION
00155   group.computeExtMgr = CProxy_ComputeExtMgr::ckNew();
00156   group.computeGBISserMgr = CProxy_ComputeGBISserMgr::ckNew();
00157   group.computeMsmSerialMgr = CProxy_ComputeMsmSerialMgr::ckNew();
00158 #ifdef CHARM_HAS_MSA
00159   group.computeMsmMsaMgr = CProxy_ComputeMsmMsaMgr::ckNew();
00160 #endif
00161   group.computeMsmMgr = CProxy_ComputeMsmMgr::ckNew();
00162 #ifdef MEM_OPT_VERSION
00163   group.ioMgr=CProxy_ParallelIOMgr::ckNew();
00164 #endif
00165 
00166   group.sync = CProxy_Sync::ckNew();
00167 
00168   #ifdef USE_NODEPATCHMGR
00169   group.nodeProxyMgr = CProxy_NodeProxyMgr::ckNew();
00170   #endif
00171   
00172 #if     USE_NODEHELPER
00173   group.nodeHelper = NodeHelper_Init();
00174 #endif
00175 
00176   CkChareID collectionMaster = CProxy_CollectionMaster::ckNew(0);  
00177   SlaveInitMsg *initmsg7 = new SlaveInitMsg;
00178   initmsg7->master = collectionMaster;
00179   group.collectionMgr = CProxy_CollectionMgr::ckNew(initmsg7);
00180 
00181   group.broadcastMgr = CProxy_BroadcastMgr::ckNew();
00182   group.ldbCoordinator = CProxy_LdbCoordinator::ckNew();
00183   GroupInitMsg *msg = new GroupInitMsg;
00184   msg->group = group;
00185   CProxy_Node::ckNew(msg);
00186  
00187 }

void NAMD_new_handler  ) 
 

Definition at line 72 of file BackEnd.C.

References NAMD_die().

Referenced by all_init().

00072                         {
00073 #endif
00074   char tmp[100];
00075   sprintf(tmp,"Memory allocation failed on processor %d.",CmiMyPe());
00076   NAMD_die(tmp);
00077 #if defined(WIN32) && !defined(__CYGWIN__) && !defined(__MINGW_H)
00078   return 0;
00079 #endif
00080 }

void register_exit_sched void   )  [static]
 

Definition at line 56 of file BackEnd.C.

References exit_sched().

Referenced by all_init().

00057 {
00058   CkpvInitialize(int,exitSchedHndlr);
00059   CkpvAccess(exitSchedHndlr) = CmiRegisterHandler((CmiHandler)exit_sched);
00060 }

void slave_init int  argc,
char **  argv
 

Definition at line 112 of file BackEnd.C.

References after_backend_init(), all_init(), and master_init().

Referenced by BackEnd::init().

00113 {
00114 #if CMK_SMP
00115   //the original main thread could now be a comm thread
00116   //and a slave thread could now be the main thread,
00117   //so we have to do the master initialization here
00118   if(CmiMyRank()==0){
00119     master_init(argc, argv);
00120     if(CmiMyPe()==0)
00121       after_backend_init(argc, argv);
00122     return;
00123   }
00124 #endif
00125 
00126   all_init(argc, argv);
00127 
00128   if (CkMyRank() < CkMyNodeSize())      // skip the communication thread
00129     CsdScheduler(-1);
00130 }


Variable Documentation

float cpuTime_start
 

Definition at line 45 of file BackEnd.C.

Referenced by master_init().

char* gNAMDBinaryName = NULL
 

Definition at line 189 of file BackEnd.C.

Referenced by BackEnd::init(), and Node::outputPatchComputeMaps().

float wallTime_start
 

Definition at line 46 of file BackEnd.C.

Referenced by master_init().


Generated on Fri May 25 04:07:17 2012 for NAMD by  doxygen 1.3.9.1