ComputeMgr.C

Go to the documentation of this file.
00001 
00007 #include "InfoStream.h"
00008 #include "ProcessorPrivate.h"
00009 
00010 //#define DEBUGM
00011 #define MIN_DEBUG_LEVEL 1
00012 #include "Debug.h"
00013 
00014 #include "BOCgroup.h"
00015 #include "ComputeMgr.decl.h"
00016 #include "ComputeMgr.h"
00017 #include "ProxyMgr.decl.h"
00018 #include "ProxyMgr.h"
00019 
00020 #include "Node.h"
00021 #include "ComputeMap.h"
00022 #include "PatchMap.h"
00023 #include "PatchMap.inl"
00024 
00025 #include "Compute.h"
00026 #include "ComputeNonbondedUtil.h"
00027 #include "ComputeNonbondedSelf.h"
00028 #include "ComputeNonbondedPair.h"
00029 #include "ComputeNonbondedCUDA.h"
00030 #include "ComputeNonbondedMIC.h"
00031 #include "ComputeAngles.h"
00032 #include "ComputeDihedrals.h"
00033 #include "ComputeImpropers.h"
00034 #include "ComputeThole.h"
00035 #include "ComputeAniso.h"
00036 #include "ComputeCrossterms.h"
00037 // JLai
00038 #include "ComputeGromacsPair.h"
00039 #include "ComputeBonds.h"
00040 #include "ComputeNonbondedCUDAExcl.h"
00041 #include "ComputeFullDirect.h"
00042 #include "ComputeGlobal.h"
00043 #include "ComputeGlobalMsgs.h"
00044 #include "ComputeExt.h"
00045 #include "ComputeQM.h"
00046 #include "ComputeGBISser.h"
00047 #include "ComputeLCPO.h"
00048 #include "ComputeFmmSerial.h"
00049 #include "ComputeMsmSerial.h"
00050 #include "ComputeMsmMsa.h"
00051 #include "ComputeMsm.h"
00052 #include "ComputeDPMTA.h"
00053 #include "ComputeDPME.h"
00054 #include "ComputeDPMEMsgs.h"
00055 #include "ComputePme.h"
00056 // #ifdef NAMD_CUDA
00057 #include "ComputePmeCUDA.h"
00058 #include "ComputeCUDAMgr.h"
00059 #include "CudaComputeNonbonded.h"
00060 #include "ComputePmeCUDAMgr.h"
00061 // #endif
00062 #include "OptPme.h"
00063 #include "ComputeEwald.h"
00064 #include "ComputeEField.h"
00065 /* BEGIN gf */
00066 #include "ComputeGridForce.h"
00067 /* END gf */
00068 #include "ComputeStir.h"
00069 #include "ComputeSphericalBC.h"
00070 #include "ComputeCylindricalBC.h"
00071 #include "ComputeTclBC.h"
00072 #include "ComputeRestraints.h"
00073 #include "ComputeConsForce.h"
00074 #include "ComputeConsForceMsgs.h"
00075 #include "WorkDistrib.h"
00076 
00077 #include "LdbCoordinator.h"
00078 
00079 /* include all of the specific masters we need here */
00080 #include "FreeEnergyEnums.h"
00081 #include "FreeEnergyAssert.h"
00082 #include "FreeEnergyGroup.h"
00083 #include "FreeEnergyVector.h"
00084 #include "FreeEnergyRestrain.h"
00085 #include "FreeEnergyRMgr.h"
00086 #include "FreeEnergyLambda.h"
00087 #include "FreeEnergyLambdMgr.h"
00088 
00089 #include "GlobalMasterTest.h"
00090 #include "GlobalMasterIMD.h"
00091 #include "GlobalMasterTcl.h"
00092 #include "GlobalMasterSMD.h"
00093 #include "GlobalMasterTMD.h"
00094 #include "GlobalMasterSymmetry.h"
00095 #include "GlobalMasterEasy.h"
00096 #include "GlobalMasterMisc.h"
00097 #include "GlobalMasterFreeEnergy.h"
00098 #include "GlobalMasterColvars.h"
00099 
00100 #include "ComputeNonbondedMICKernel.h"
00101 
00102 #include "DeviceCUDA.h"
00103 #ifdef NAMD_CUDA
00104 #ifdef WIN32
00105 #define __thread __declspec(thread)
00106 #endif
00107 extern __thread DeviceCUDA *deviceCUDA;
00108 #endif
00109 
00110 ComputeMgr::ComputeMgr()
00111 {
00112     CkpvAccess(BOCclass_group).computeMgr = thisgroup;
00113     computeGlobalObject = 0;
00114     computeGlobalResultsMsgSeq = -1;
00115     computeGlobalResultsMsgMasterSeq = -1;
00116     computeDPMEObject = 0;
00117     computeEwaldObject = 0;
00118     computeNonbondedCUDAObject = 0;
00119     computeNonbondedMICObject = 0;
00120     computeNonbondedWorkArrays = new ComputeNonbondedWorkArrays;
00121     skipSplitting = 0;
00122 
00123     #if defined(NAMD_MIC)
00124       // Create the micPEData flag array (1 bit per PE) and initially set each PE as "not driving
00125       //   a MIC card" (unset).  PEs that are driving MIC card will identify themselves during startup.
00126       int numPEs = CkNumPes();
00127       int numInts = ((numPEs + (sizeof(int)*8-1)) & (~(sizeof(int)*8-1))) / (sizeof(int)*8);  // Round up to sizeof(int) then divide by the size of an int
00128       micPEData = new int[numInts];
00129       if (micPEData == NULL) { NAMD_die("Unable to allocate memory for micPEData"); }
00130       memset(micPEData, 0, sizeof(int) * numInts);
00131     #else
00132       micPEData = NULL;
00133     #endif
00134 }
00135 
00136 ComputeMgr::~ComputeMgr(void)
00137 {
00138     delete computeNonbondedWorkArrays;
00139 }
00140 
00141 void ComputeMgr::updateComputes(int ep, CkGroupID chareID)
00142 {
00143     updateComputesReturnEP = ep;
00144     updateComputesReturnChareID = chareID;
00145     updateComputesCount = CkNumPes();
00146 
00147     if (CkMyPe())
00148     {
00149         NAMD_bug("updateComputes signaled on wrong Pe!");
00150     }
00151 
00152     CkStartQD(CkIndex_ComputeMgr::updateComputes2((CkQdMsg*)0),&thishandle);
00153 }
00154 
00155 void ComputeMgr::updateComputes2(CkQdMsg *msg)
00156 {
00157     delete msg;
00158 
00159     CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).workDistrib);
00160     WorkDistrib  *workDistrib = wd.ckLocalBranch();
00161     workDistrib->saveComputeMapChanges(CkIndex_ComputeMgr::updateComputes3(),thisgroup);
00162 }
00163 
00164 void ComputeMgr::updateComputes3()
00165 {
00166     if ( skipSplitting ) {
00167       CProxy_ComputeMgr(thisgroup).updateLocalComputes();
00168     } else {
00169       CProxy_ComputeMgr(thisgroup).splitComputes();
00170       skipSplitting = 1;
00171     }
00172 }
00173 
00174 void ComputeMgr::splitComputes()
00175 {
00176   if ( ! CkMyRank() ) {
00177     ComputeMap *computeMap = ComputeMap::Object();
00178     const int nc = computeMap->numComputes();
00179 
00180     for (int i=0; i<nc; i++) {
00181       int nnp = computeMap->newNumPartitions(i);
00182       if ( nnp > 0 ) {
00183         if ( computeMap->numPartitions(i) != 1 ) {
00184           CkPrintf("Warning: unable to partition compute %d\n", i);
00185           computeMap->setNewNumPartitions(i,0);
00186           continue;
00187         }
00188         //CkPrintf("splitting compute %d by %d\n",i,nnp);
00189         computeMap->setNumPartitions(i,nnp);
00190         if (computeMap->newNode(i) == -1) {
00191           computeMap->setNewNode(i,computeMap->node(i));
00192         }
00193         for ( int j=1; j<nnp; ++j ) {
00194           int newcid = computeMap->cloneCompute(i,j);
00195           //CkPrintf("compute %d partition %d is %d\n",i,j,newcid);
00196         }
00197       }
00198     }
00199     computeMap->extendPtrs();
00200   }
00201 
00202   if (!CkMyPe())
00203   {
00204     CkStartQD(CkIndex_ComputeMgr::splitComputes2((CkQdMsg*)0), &thishandle);
00205   }
00206 }
00207 
00208 void ComputeMgr::splitComputes2(CkQdMsg *msg)
00209 {
00210     delete msg;
00211     CProxy_ComputeMgr(thisgroup).updateLocalComputes();
00212 }
00213 
00214 void ComputeMgr::updateLocalComputes()
00215 {
00216     ComputeMap *computeMap = ComputeMap::Object();
00217     CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
00218     ProxyMgr *proxyMgr = pm.ckLocalBranch();
00219     LdbCoordinator *ldbCoordinator = LdbCoordinator::Object();
00220 
00221      computeFlag.resize(0);
00222 
00223     const int nc = computeMap->numComputes();
00224     for (int i=0; i<nc; i++) {
00225 
00226         if ( computeMap->node(i) == CkMyPe() &&
00227              computeMap->newNumPartitions(i) > 1 ) {
00228            Compute *c = computeMap->compute(i);
00229            ldbCoordinator->Migrate(c->ldObjHandle,CkMyPe());
00230            delete c;
00231            computeMap->registerCompute(i,NULL);
00232            if ( computeMap->newNode(i) == CkMyPe() ) computeFlag.add(i); 
00233         } else
00234         if (computeMap->newNode(i) == CkMyPe() && computeMap->node(i) != CkMyPe())
00235         {
00236             computeFlag.add(i);
00237             for (int n=0; n < computeMap->numPids(i); n++)
00238             {
00239                 proxyMgr->createProxy(computeMap->pid(i,n));
00240             }
00241         }
00242         else if (computeMap->node(i) == CkMyPe() &&
00243                  (computeMap->newNode(i) != -1 && computeMap->newNode(i) != CkMyPe() ))
00244         {
00245             // CkPrintf("delete compute %d on pe %d\n",i,CkMyPe());
00246             delete computeMap->compute(i);
00247             computeMap->registerCompute(i,NULL);
00248         }
00249     }
00250 
00251     if (!CkMyPe())
00252     {
00253         CkStartQD(CkIndex_ComputeMgr::updateLocalComputes2((CkQdMsg*)0), &thishandle);
00254     }
00255 }
00256 
00257 void
00258 ComputeMgr::updateLocalComputes2(CkQdMsg *msg)
00259 {
00260     delete msg;
00261     CProxy_ComputeMgr(thisgroup).updateLocalComputes3();
00262 }
00263 
00264 void
00265 ComputeMgr::updateLocalComputes3()
00266 {
00267     ComputeMap *computeMap = ComputeMap::Object();
00268     CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
00269     ProxyMgr *proxyMgr = pm.ckLocalBranch();
00270 
00271     ProxyMgr::nodecount = 0;
00272 
00273     const int nc = computeMap->numComputes();
00274 
00275     if ( ! CkMyRank() ) {
00276       for (int i=0; i<nc; i++) {
00277         computeMap->setNewNumPartitions(i,0);
00278         if (computeMap->newNode(i) != -1) {
00279           computeMap->setNode(i,computeMap->newNode(i));
00280           computeMap->setNewNode(i,-1);
00281         }
00282       }
00283     }
00284  
00285     for(int i=0; i<computeFlag.size(); i++) createCompute(computeFlag[i], computeMap);
00286     computeFlag.clear();
00287 
00288     proxyMgr->removeUnusedProxies();
00289 
00290     if (!CkMyPe())
00291     {
00292         CkStartQD(CkIndex_ComputeMgr::updateLocalComputes4((CkQdMsg*)0), &thishandle);
00293     }
00294 }
00295 
00296 void
00297 ComputeMgr::updateLocalComputes4(CkQdMsg *msg)
00298 {
00299     delete msg;
00300     CProxy_ComputeMgr(thisgroup).updateLocalComputes5();
00301 
00302     // store the latest compute map
00303            SimParameters *simParams = Node::Object()->simParameters;
00304     if (simParams->storeComputeMap) {
00305       ComputeMap *computeMap = ComputeMap::Object();
00306       computeMap->saveComputeMap(simParams->computeMapFilename);
00307     }
00308 }
00309 
00310 #if 0
00311 int firstphase = 1;
00312 #endif
00313 
00314 void
00315 ComputeMgr::updateLocalComputes5()
00316 {
00317     if ( ! CkMyRank() ) {
00318       ComputeMap::Object()->checkMap();
00319       PatchMap::Object()->checkMap();
00320     }
00321 
00322     // we always use the centralized building of spanning tree
00323     // distributed building of ST called in Node.C only
00324     if (proxySendSpanning || proxyRecvSpanning)
00325         ProxyMgr::Object()->buildProxySpanningTree2();
00326 
00327     // this code needs to be turned on if we want to
00328     // shift the creation of ST to the load balancer
00329 
00330 #if 0
00331     if (proxySendSpanning || proxyRecvSpanning)
00332     {
00333         if (firstphase)
00334             ProxyMgr::Object()->buildProxySpanningTree2();
00335         else
00336             if (CkMyPe() == 0)
00337                 ProxyMgr::Object()->sendSpanningTrees();
00338 
00339         firstphase = 0;
00340     }
00341 #endif
00342 
00343     if (!CkMyPe())
00344         CkStartQD(CkIndex_ComputeMgr::doneUpdateLocalComputes(), &thishandle);
00345 }
00346 
00347 void ComputeMgr::doneUpdateLocalComputes()
00348 {
00349 
00350 //  if (!--updateComputesCount) {
00351     DebugM(4, "doneUpdateLocalComputes on Pe("<<CkMyPe()<<")\n");
00352     void *msg = CkAllocMsg(0,0,0);
00353     CkSendMsgBranch(updateComputesReturnEP,msg,0,updateComputesReturnChareID);
00354 //  }
00355 }
00356 
00357 #ifdef NAMD_CUDA
00358 // Helper functions for creating and getting pointers to CUDA computes
00359 CudaComputeNonbonded* getCudaComputeNonbonded() {
00360   return ComputeCUDAMgr::getComputeCUDAMgr()->getCudaComputeNonbonded();
00361 }
00362 
00363 CudaComputeNonbonded* createCudaComputeNonbonded(ComputeID c) {
00364   return ComputeCUDAMgr::getComputeCUDAMgr()->createCudaComputeNonbonded(c);
00365 }
00366 
00367 #ifdef BONDED_CUDA
00368 ComputeBondedCUDA* getComputeBondedCUDA() {
00369   return ComputeCUDAMgr::getComputeCUDAMgr()->getComputeBondedCUDA();
00370 }
00371 
00372 ComputeBondedCUDA* createComputeBondedCUDA(ComputeID c, ComputeMgr* computeMgr) {
00373   return ComputeCUDAMgr::getComputeCUDAMgr()->createComputeBondedCUDA(c, computeMgr);
00374 }
00375 #endif
00376 #endif
00377 
00378 //
00379 void
00380 ComputeMgr::createCompute(ComputeID i, ComputeMap *map)
00381 {
00382     Compute *c;
00383     PatchID pid2[2];
00384     PatchIDList pids;
00385     int trans2[2];
00386     SimParameters *simParams = Node::Object()->simParameters;
00387 
00388     PatchID pid8[8];
00389     int trans8[8];
00390 
00391     switch ( map->type(i) )
00392     {
00393     case computeNonbondedSelfType:
00394 #ifdef NAMD_CUDA
00395         if (simParams->useCUDA2) {
00396           getCudaComputeNonbonded()->registerComputeSelf(i, map->computeData[i].pids[0].pid);
00397         } else {
00398           register_cuda_compute_self(i,map->computeData[i].pids[0].pid);
00399         }
00400 #elif defined(NAMD_MIC)
00401         if (map->directToDevice(i) == 0) {
00402           c = new ComputeNonbondedSelf(i,map->computeData[i].pids[0].pid,
00403                                        computeNonbondedWorkArrays,
00404                                        map->partition(i),map->partition(i)+1,
00405                                        map->numPartitions(i)); // unknown delete
00406           map->registerCompute(i,c);
00407           c->initialize();
00408         } else {
00409           register_mic_compute_self(i,map->computeData[i].pids[0].pid,map->partition(i),map->numPartitions(i));
00410         }
00411 #else
00412         c = new ComputeNonbondedSelf(i,map->computeData[i].pids[0].pid,
00413                                      computeNonbondedWorkArrays,
00414                                      map->partition(i),map->partition(i)+1,
00415                                      map->numPartitions(i)); // unknown delete
00416         map->registerCompute(i,c);
00417         c->initialize();
00418 #endif
00419         break;
00420     case computeLCPOType:
00421         for (int j = 0; j < 8; j++) {
00422           pid8[j] = map->computeData[i].pids[j].pid;
00423           trans8[j] = map->computeData[i].pids[j].trans;
00424         }
00425         c = new ComputeLCPO(i,pid8,trans8,
00426              computeNonbondedWorkArrays,
00427              map->partition(i),map->partition(i)+1,
00428              map->numPartitions(i), 8);
00429         map->registerCompute(i,c);
00430         c->initialize();
00431       
00432         break;
00433     case computeNonbondedPairType:
00434         pid2[0] = map->computeData[i].pids[0].pid;
00435         trans2[0] = map->computeData[i].pids[0].trans;
00436         pid2[1] = map->computeData[i].pids[1].pid;
00437         trans2[1] = map->computeData[i].pids[1].trans;
00438 #ifdef NAMD_CUDA
00439         if (simParams->useCUDA2) {
00440           getCudaComputeNonbonded()->registerComputePair(i, pid2, trans2);
00441         } else {
00442           register_cuda_compute_pair(i,pid2,trans2);
00443         }
00444 #elif defined(NAMD_MIC)
00445         if (map->directToDevice(i) == 0) {
00446           c = new ComputeNonbondedPair(i,pid2,trans2,
00447                                        computeNonbondedWorkArrays,
00448                                        map->partition(i),map->partition(i)+1,
00449                                        map->numPartitions(i)); // unknown delete
00450           map->registerCompute(i,c);
00451           c->initialize();
00452         } else {
00453           register_mic_compute_pair(i,pid2,trans2,map->partition(i),map->numPartitions(i));
00454         }
00455 #else
00456         c = new ComputeNonbondedPair(i,pid2,trans2,
00457                                      computeNonbondedWorkArrays,
00458                                      map->partition(i),map->partition(i)+1,
00459                                      map->numPartitions(i)); // unknown delete
00460         map->registerCompute(i,c);
00461         c->initialize();
00462 #endif
00463         break;
00464 #ifdef NAMD_CUDA
00465     case computeNonbondedCUDAType:
00466       c = computeNonbondedCUDAObject = new ComputeNonbondedCUDA(i,this); // unknown delete
00467       map->registerCompute(i,c);
00468       c->initialize();
00469       break;
00470     case computeNonbondedCUDA2Type:
00471       c = createCudaComputeNonbonded(i);
00472       map->registerCompute(i,c);
00473       // NOTE: initialize() is called at the end of createComputes(),
00474       //       after all computes have been created
00475       //c->initialize();
00476       break;
00477 #endif
00478 #ifdef NAMD_MIC
00479     case computeNonbondedMICType:
00480             c = computeNonbondedMICObject = new ComputeNonbondedMIC(i,this); // unknown delete
00481       map->registerCompute(i,c);
00482       c->initialize();
00483       break;
00484 #endif
00485 #ifdef NAMD_CUDA
00486 #ifdef BONDED_CUDA
00487     case computeBondedCUDAType:
00488       c = createComputeBondedCUDA(i, this);
00489       map->registerCompute(i,c);
00490       break;
00491 #endif
00492 #endif
00493     case computeExclsType:
00494 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00495         if (simParams->bondedCUDA & 16)
00496         {
00497           PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
00498           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
00499         } else
00500 #endif
00501         {
00502         PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
00503         c = new ComputeExcls(i,pids); // unknown delete
00504         map->registerCompute(i,c);
00505         c->initialize();
00506       }
00507       break;
00508     case computeBondsType:
00509 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00510         if (simParams->bondedCUDA & 1)
00511         {
00512           PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
00513           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
00514         } else
00515 #endif
00516         {
00517           PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
00518           c = new ComputeBonds(i,pids); // unknown delete
00519           map->registerCompute(i,c);
00520           c->initialize();
00521         }
00522         break;
00523     case computeAnglesType:
00524 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00525         if (simParams->bondedCUDA & 2)
00526         {
00527           PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
00528           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
00529         } else
00530 #endif
00531         {
00532           PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
00533           c = new ComputeAngles(i,pids); // unknown delete
00534           map->registerCompute(i,c);
00535           c->initialize();
00536         }
00537         break;
00538     case computeDihedralsType:
00539 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00540         if (simParams->bondedCUDA & 4)
00541         {
00542           PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
00543           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
00544         } else
00545 #endif
00546         {
00547           PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
00548           c = new ComputeDihedrals(i,pids); // unknown delete
00549           map->registerCompute(i,c);
00550           c->initialize();
00551         }
00552         break;
00553     case computeImpropersType:
00554 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00555         if (simParams->bondedCUDA & 8)
00556         {
00557           PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
00558           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
00559         } else
00560 #endif
00561         {
00562           PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
00563           c = new ComputeImpropers(i,pids); // unknown delete
00564           map->registerCompute(i,c);
00565           c->initialize();
00566         }
00567         break;
00568     case computeTholeType:
00569         PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
00570         c = new ComputeThole(i,pids); // unknown delete
00571         map->registerCompute(i,c);
00572         c->initialize();
00573         break;
00574     case computeAnisoType:
00575         PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
00576         c = new ComputeAniso(i,pids); // unknown delete
00577         map->registerCompute(i,c);
00578         c->initialize();
00579         break;
00580     case computeCrosstermsType:
00581 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00582         if (simParams->bondedCUDA & 32)
00583         {
00584           PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
00585           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
00586         } else
00587 #endif
00588         {
00589           PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
00590           c = new ComputeCrossterms(i,pids); // unknown delete
00591           map->registerCompute(i,c);
00592           c->initialize();
00593         }
00594         break;
00595         // JLai
00596     case computeGromacsPairType:
00597         PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
00598               c = new ComputeGromacsPair(i,pids); // unknown delete
00599               map->registerCompute(i,c);
00600               c->initialize();
00601               break;
00602   case computeSelfGromacsPairType:
00603         c = new ComputeSelfGromacsPair(i,map->computeData[i].pids[0].pid); // unknown delete
00604               map->registerCompute(i,c);
00605               c->initialize();
00606               break;
00607         // End of JLai
00608     case computeSelfExclsType:
00609 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00610         if (simParams->bondedCUDA & 16)
00611         {
00612           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
00613         } else
00614 #endif
00615         {
00616           c = new ComputeSelfExcls(i,map->computeData[i].pids[0].pid);
00617           map->registerCompute(i,c);
00618           c->initialize();
00619         }
00620         break;
00621     case computeSelfBondsType:
00622 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00623         if (simParams->bondedCUDA & 1)
00624         {
00625           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
00626         } else
00627 #endif
00628         {
00629           c = new ComputeSelfBonds(i,map->computeData[i].pids[0].pid);
00630           map->registerCompute(i,c);
00631           c->initialize();
00632         }
00633         break;
00634     case computeSelfAnglesType:
00635 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00636         if (simParams->bondedCUDA & 2)
00637         {
00638           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
00639         } else
00640 #endif
00641         {
00642           c = new ComputeSelfAngles(i,map->computeData[i].pids[0].pid);
00643           map->registerCompute(i,c);
00644           c->initialize();
00645         }
00646         break;
00647     case computeSelfDihedralsType:
00648 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00649         if (simParams->bondedCUDA & 4)
00650         {
00651           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
00652         } else
00653 #endif
00654         {
00655           c = new ComputeSelfDihedrals(i,map->computeData[i].pids[0].pid);
00656           map->registerCompute(i,c);
00657           c->initialize();
00658         }
00659         break;
00660     case computeSelfImpropersType:
00661 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00662         if (simParams->bondedCUDA & 8)
00663         {
00664           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
00665         } else
00666 #endif
00667         {
00668           c = new ComputeSelfImpropers(i,map->computeData[i].pids[0].pid);
00669           map->registerCompute(i,c);
00670           c->initialize();
00671         }
00672         break;
00673     case computeSelfTholeType:
00674         c = new ComputeSelfThole(i,map->computeData[i].pids[0].pid);
00675         map->registerCompute(i,c);
00676         c->initialize();
00677         break;
00678     case computeSelfAnisoType:
00679         c = new ComputeSelfAniso(i,map->computeData[i].pids[0].pid);
00680         map->registerCompute(i,c);
00681         c->initialize();
00682         break;
00683     case computeSelfCrosstermsType:
00684 #if defined(BONDED_CUDA) && defined(NAMD_CUDA)
00685         if (simParams->bondedCUDA & 32)
00686         {
00687           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
00688         } else
00689 #endif
00690         {
00691           c = new ComputeSelfCrossterms(i,map->computeData[i].pids[0].pid);
00692           map->registerCompute(i,c);
00693           c->initialize();
00694         }
00695         break;
00696 #ifdef DPMTA
00697     case computeDPMTAType:
00698         c = new ComputeDPMTA(i); // unknown delete
00699         map->registerCompute(i,c);
00700         c->initialize();
00701         break;
00702 #endif
00703 #ifdef DPME
00704     case computeDPMEType:
00705         c = computeDPMEObject = new ComputeDPME(i,this); // unknown delete
00706         map->registerCompute(i,c);
00707         c->initialize();
00708         break;
00709 #endif
00710     case optPmeType:
00711         c = new OptPmeCompute(i); // unknown delete
00712         map->registerCompute(i,c);
00713         c->initialize();
00714         break;
00715     case computePmeType:
00716         c = new ComputePme(i,map->computeData[i].pids[0].pid); // unknown delete
00717         map->registerCompute(i,c);
00718         c->initialize();
00719         break;
00720 #ifdef NAMD_CUDA
00721     case computePmeCUDAType:
00722         // PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
00723         // c = new ComputePmeCUDA(i, pids);
00724         c = new ComputePmeCUDA(i, map->computeData[i].pids[0].pid);
00725         map->registerCompute(i,c);
00726         c->initialize();
00727         break;
00728 #endif
00729     case computeEwaldType:
00730         c = computeEwaldObject = new ComputeEwald(i,this); // unknown delete
00731         map->registerCompute(i,c);
00732         c->initialize();
00733         break;
00734     case computeFullDirectType:
00735         c = new ComputeFullDirect(i); // unknown delete
00736         map->registerCompute(i,c);
00737         c->initialize();
00738         break;
00739     case computeGlobalType:
00740         c = computeGlobalObject = new ComputeGlobal(i,this); // unknown delete
00741         map->registerCompute(i,c);
00742         c->initialize();
00743         break;
00744     case computeStirType:
00745         c = new ComputeStir(i,map->computeData[i].pids[0].pid); // unknown delete
00746         map->registerCompute(i,c);
00747         c->initialize();
00748         break;
00749     case computeExtType:
00750         c = new ComputeExt(i); // unknown delete
00751         map->registerCompute(i,c);
00752         c->initialize();
00753         break;
00754     case computeQMType:
00755         c = new ComputeQM(i);
00756         map->registerCompute(i,c);
00757         c->initialize();
00758         break;
00759     case computeGBISserType: //gbis serial
00760         c = new ComputeGBISser(i);
00761         map->registerCompute(i,c);
00762         c->initialize();
00763         break;
00764     case computeFmmType: // FMM serial
00765         c = new ComputeFmmSerial(i);
00766         map->registerCompute(i,c);
00767         c->initialize();
00768         break;
00769     case computeMsmSerialType: // MSM serial
00770         c = new ComputeMsmSerial(i);
00771         map->registerCompute(i,c);
00772         c->initialize();
00773         break;
00774 #ifdef CHARM_HAS_MSA
00775     case computeMsmMsaType: // MSM parallel long-range part using MSA
00776         c = new ComputeMsmMsa(i);
00777         map->registerCompute(i,c);
00778         c->initialize();
00779         break;
00780 #endif
00781     case computeMsmType: // MSM parallel
00782         c = new ComputeMsm(i);
00783         map->registerCompute(i,c);
00784         c->initialize();
00785         break;
00786     case computeEFieldType:
00787         c = new ComputeEField(i,map->computeData[i].pids[0].pid); // unknown delete
00788         map->registerCompute(i,c);
00789         c->initialize();
00790         break;
00791         /* BEGIN gf */
00792     case computeGridForceType:
00793         c = new ComputeGridForce(i,map->computeData[i].pids[0].pid);
00794         map->registerCompute(i,c);
00795         c->initialize();
00796         break;
00797         /* END gf */
00798     case computeSphericalBCType:
00799         c = new ComputeSphericalBC(i,map->computeData[i].pids[0].pid); // unknown delete
00800         map->registerCompute(i,c);
00801         c->initialize();
00802         break;
00803     case computeCylindricalBCType:
00804         c = new ComputeCylindricalBC(i,map->computeData[i].pids[0].pid); // unknown delete
00805         map->registerCompute(i,c);
00806         c->initialize();
00807         break;
00808     case computeTclBCType:
00809         c = new ComputeTclBC(i); // unknown delete
00810         map->registerCompute(i,c);
00811         c->initialize();
00812         break;
00813     case computeRestraintsType:
00814         c = new ComputeRestraints(i,map->computeData[i].pids[0].pid); // unknown delete
00815         map->registerCompute(i,c);
00816         c->initialize();
00817         break;
00818     case computeConsForceType:
00819         c = new ComputeConsForce(i,map->computeData[i].pids[0].pid);
00820         map->registerCompute(i,c);
00821         c->initialize();
00822         break;
00823     case computeConsTorqueType:
00824         c = new ComputeConsTorque(i,map->computeData[i].pids[0].pid);
00825         map->registerCompute(i,c);
00826         c->initialize();
00827         break;
00828     default:
00829         NAMD_bug("Unknown compute type in ComputeMgr::createCompute().");
00830         break;
00831     }
00832 }
00833 
00834 void registerUserEventsForAllComputeObjs()
00835 {
00836 #ifdef TRACE_COMPUTE_OBJECTS
00837     ComputeMap *map = ComputeMap::Object();
00838     PatchMap *pmap = PatchMap::Object();     
00839     char user_des[50];
00840     int p1, p2;
00841     int adim, bdim, cdim;
00842     int t1, t2;
00843     int x1, y1, z1, x2, y2, z2;
00844     int dx, dy, dz;
00845     for (int i=0; i<map->numComputes(); i++)
00846     {
00847         memset(user_des, 0, 50);
00848         switch ( map->type(i) )
00849         {
00850         case computeNonbondedSelfType:
00851             sprintf(user_des, "computeNonBondedSelfType_%d_pid_%d", i, map->pid(i,0));
00852             break;
00853         case computeLCPOType:
00854             sprintf(user_des, "computeLCPOType_%d_pid_%d", i, map->pid(i,0));
00855             break;
00856         case computeNonbondedPairType:
00857             adim = pmap->gridsize_a();
00858             bdim = pmap->gridsize_b();
00859             cdim = pmap->gridsize_c();
00860             p1 = map->pid(i, 0);
00861             t1 = map->trans(i, 0);
00862             x1 = pmap->index_a(p1) + adim * Lattice::offset_a(t1);
00863             y1 = pmap->index_b(p1) + bdim * Lattice::offset_b(t1);
00864             z1 = pmap->index_c(p1) + cdim * Lattice::offset_c(t1);
00865             p2 = map->pid(i, 1);
00866             t2 = map->trans(i, 1);
00867             x2 = pmap->index_a(p2) + adim * Lattice::offset_a(t2);
00868             y2 = pmap->index_b(p2) + bdim * Lattice::offset_b(t2);
00869             z2 = pmap->index_c(p2) + cdim * Lattice::offset_c(t2);
00870             dx = abs(x1-x2);
00871             dy = abs(y1-y2);
00872             dz = abs(z1-z2);
00873             sprintf(user_des, "computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz);
00874             break;
00875 #ifdef NAMD_CUDA
00876 #ifdef BONDED_CUDA
00877         case computeBondedCUDAType:
00878             sprintf(user_des, "computeBondedCUDAType_%d", i);
00879             break;
00880 #endif
00881 #endif
00882         case computeExclsType:
00883             sprintf(user_des, "computeExclsType_%d", i);
00884             break;
00885         case computeBondsType:
00886             sprintf(user_des, "computeBondsType_%d", i);
00887             break;
00888         case computeAnglesType:
00889             sprintf(user_des, "computeAnglesType_%d", i);
00890             break;
00891         case computeDihedralsType:
00892             sprintf(user_des, "computeDihedralsType_%d", i);
00893             break;
00894         case computeImpropersType:
00895             sprintf(user_des, "computeImpropersType_%d", i);
00896             break;
00897         case computeTholeType:
00898             sprintf(user_des, "computeTholeType_%d", i);
00899             break;
00900         case computeAnisoType:
00901             sprintf(user_des, "computeAnisoType_%d", i);
00902             break;
00903         case computeCrosstermsType:
00904             sprintf(user_des, "computeCrosstermsType_%d", i);
00905             break;
00906         case computeSelfExclsType:
00907             sprintf(user_des, "computeSelfExclsType_%d", i);
00908             break;
00909         case computeSelfBondsType:
00910             sprintf(user_des, "computeSelfBondsType_%d", i);
00911             break;
00912         case computeSelfAnglesType:
00913             sprintf(user_des, "computeSelfAnglesType_%d", i);
00914             break;
00915         case computeSelfDihedralsType:
00916             sprintf(user_des, "computeSelfDihedralsType_%d", i);
00917             break;
00918         case computeSelfImpropersType:
00919             sprintf(user_des, "computeSelfImpropersType_%d", i);
00920             break;
00921         case computeSelfTholeType:
00922             sprintf(user_des, "computeSelfTholeType_%d", i);
00923             break;
00924         case computeSelfAnisoType:
00925             sprintf(user_des, "computeSelfAnisoType_%d", i);
00926             break;
00927         case computeSelfCrosstermsType:
00928             sprintf(user_des, "computeSelfCrosstermsType_%d", i);
00929             break;
00930 #ifdef DPMTA
00931         case computeDPMTAType:
00932             sprintf(user_des, "computeDPMTAType_%d", i);
00933             break;
00934 #endif
00935 #ifdef DPME
00936         case computeDPMEType:
00937             sprintf(user_des, "computeDPMEType_%d", i);
00938             break;
00939 #endif
00940         case computePmeType:
00941             sprintf(user_des, "computePMEType_%d", i);
00942             break;
00943 #ifdef NAMD_CUDA
00944         case computePmeCUDAType:
00945             sprintf(user_des, "computePMECUDAType_%d", i);
00946             break;
00947 #endif
00948         case computeEwaldType:
00949             sprintf(user_des, "computeEwaldType_%d", i);
00950             break;
00951         case computeFullDirectType:
00952             sprintf(user_des, "computeFullDirectType_%d", i);
00953             break;
00954         case computeGlobalType:
00955             sprintf(user_des, "computeGlobalType_%d", i);
00956             break;
00957         case computeStirType:
00958             sprintf(user_des, "computeStirType_%d", i);
00959             break;
00960         case computeExtType:
00961             sprintf(user_des, "computeExtType_%d", i);
00962             break;
00963         case computeQMType:
00964             sprintf(user_des, "computeQMType_%d", i);
00965             break;
00966         case computeEFieldType:
00967             sprintf(user_des, "computeEFieldType_%d", i);
00968             break;
00969             /* BEGIN gf */
00970         case computeGridForceType:
00971             sprintf(user_des, "computeGridForceType_%d", i);
00972             break;
00973             /* END gf */
00974         case computeSphericalBCType:
00975             sprintf(user_des, "computeSphericalBCType_%d", i);
00976             break;
00977         case computeCylindricalBCType:
00978             sprintf(user_des, "computeCylindricalBCType_%d", i);
00979             break;
00980         case computeTclBCType:
00981             sprintf(user_des, "computeTclBCType_%d", i);
00982             break;
00983         case computeRestraintsType:
00984             sprintf(user_des, "computeRestraintsType_%d", i);
00985             break;
00986         case computeConsForceType:
00987             sprintf(user_des, "computeConsForceType_%d", i);
00988             break;
00989         case computeConsTorqueType:
00990             sprintf(user_des, "computeConsTorqueType_%d", i);
00991             break;
00992         default:
00993             NAMD_bug("Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs().");
00994             break;
00995         }
00996         int user_des_len = strlen(user_des);
00997         char *user_des_cst = new char[user_des_len+1];
00998         memcpy(user_des_cst, user_des, user_des_len);
00999         user_des_cst[user_des_len] = 0;
01000         //Since the argument in traceRegisterUserEvent is supposed
01001         //to be a const string which will not be copied inside the
01002         //function when a new user event is created, user_des_cst 
01003         //has to be allocated in heap.
01004         int reEvenId = traceRegisterUserEvent(user_des_cst, TRACE_COMPOBJ_IDOFFSET+i);
01005         //printf("Register user event (%s) with id (%d)\n", user_des, reEvenId);
01006     }
01007 #else
01008     return;
01009 #endif
01010 }
01011 
01012 void
01013 ComputeMgr::createComputes(ComputeMap *map)
01014 {
01015 // #ifdef NAMD_CUDA
01016 //     int ComputePmeCUDACounter = 0;
01017 // #endif
01018     Node *node = Node::Object();
01019     SimParameters *simParams = node->simParameters;
01020     int myNode = node->myid();
01021 
01022     if ( simParams->globalForcesOn && !myNode )
01023     {
01024         DebugM(4,"Mgr running on Node "<<CkMyPe()<<"\n");
01025         /* create a master server to allow multiple masters */
01026         masterServerObject = new GlobalMasterServer(this,
01027                 PatchMap::Object()->numNodesWithPatches());
01028 
01029         /* create the individual global masters */
01030         // masterServerObject->addClient(new GlobalMasterTest());
01031         if (simParams->tclForcesOn)
01032             masterServerObject->addClient(new GlobalMasterTcl());
01033         if (simParams->IMDon && ! (simParams->IMDignore || simParams->IMDignoreForces) )
01034             masterServerObject->addClient(new GlobalMasterIMD());
01035 
01036         if (simParams->SMDOn)
01037             masterServerObject->addClient(
01038                 new GlobalMasterSMD(simParams->SMDk, simParams->SMDk2,
01039                                     simParams->SMDVel,
01040                                     simParams->SMDDir, simParams->SMDOutputFreq,
01041                                     simParams->firstTimestep, simParams->SMDFile,
01042                                     node->molecule->numAtoms)
01043             );
01044             
01045         if (simParams->symmetryOn && 
01046           (simParams->firstTimestep < simParams->symmetryLastStep || 
01047           simParams->symmetryLastStep == -1))
01048             masterServerObject->addClient(new GlobalMasterSymmetry());    
01049         if (simParams->TMDOn)
01050             masterServerObject->addClient(new GlobalMasterTMD());
01051         if (simParams->miscForcesOn)
01052             masterServerObject->addClient(new GlobalMasterMisc());
01053         if ( simParams->freeEnergyOn )
01054             masterServerObject->addClient(new GlobalMasterFreeEnergy());
01055                 if ( simParams->colvarsOn )
01056                         masterServerObject->addClient(new GlobalMasterColvars());
01057 
01058     }
01059 
01060     if ( !myNode && simParams->IMDon && (simParams->IMDignore || simParams->IMDignoreForces) ) {
01061       // GlobalMasterIMD constructor saves pointer to node->IMDOutput object
01062       new GlobalMasterIMD();
01063     }
01064 
01065 #ifdef NAMD_CUDA
01066     bool deviceIsMine = ( deviceCUDA->getMasterPe() == CkMyPe() );
01067 #ifdef BONDED_CUDA
01068     // Place bonded forces on Pe different from non-bonded forces
01069     int bondedMasterPe = deviceCUDA->getMasterPe();
01070     // for (int i=0;i < deviceCUDA->getNumPesSharingDevice();i++) {
01071     //   int pe = deviceCUDA->getPesSharingDevice(i);
01072     //   if (pe != deviceCUDA->getMasterPe()) {
01073     //     bondedMasterPe = pe;
01074     //   }
01075     // }
01076     bool deviceIsMineBonded = (CkMyPe() == bondedMasterPe);
01077 #endif
01078 #endif
01079 
01080     #ifdef NAMD_MIC
01081       bool deviceIsMine = ( mic_device_pe() == CkMyPe() );
01082     #endif
01083 
01084     for (int i=0; i < map->nComputes; i++)
01085     {
01086         if ( ! ( i % 100 ) )
01087         {
01088         }
01089 
01090 #if defined(NAMD_CUDA) || defined(NAMD_MIC)
01091         switch ( map->type(i) )
01092         {
01093 #ifdef NAMD_CUDA
01094           // case computePmeCUDAType:
01095           //   // Only create single ComputePmeCUDA object per Pe
01096           //  if ( map->computeData[i].node != myNode ) continue;
01097           //  if (ComputePmeCUDACounter > 0) continue;
01098           //  ComputePmeCUDACounter++;
01099           //  break;
01100           case computeNonbondedSelfType:
01101             if ( ! deviceIsMine ) continue;
01102             if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
01103           break;
01104 
01105           case computeNonbondedPairType:
01106             if ( ! deviceIsMine ) continue;
01107             if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
01108           break;
01109 
01110 #ifdef BONDED_CUDA
01111           case computeSelfBondsType:
01112           case computeBondsType:
01113             if (simParams->bondedCUDA & 1) {
01114               if ( ! deviceIsMineBonded ) continue;
01115               if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
01116             } else {
01117               if ( map->computeData[i].node != myNode ) continue;
01118             }
01119           break;
01120 
01121           case computeSelfAnglesType:
01122           case computeAnglesType:
01123             if (simParams->bondedCUDA & 2) {
01124               if ( ! deviceIsMineBonded ) continue;
01125               if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
01126             } else {
01127               if ( map->computeData[i].node != myNode ) continue;
01128             }
01129           break;
01130 
01131           case computeSelfDihedralsType:
01132           case computeDihedralsType:
01133             if (simParams->bondedCUDA & 4) {
01134               if ( ! deviceIsMineBonded ) continue;
01135               if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
01136             } else {
01137               if ( map->computeData[i].node != myNode ) continue;
01138             }
01139           break;
01140 
01141           case computeSelfImpropersType:
01142           case computeImpropersType:
01143             if (simParams->bondedCUDA & 8) {
01144               if ( ! deviceIsMineBonded ) continue;
01145               if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
01146             } else {
01147               if ( map->computeData[i].node != myNode ) continue;
01148             }
01149           break;
01150 
01151           case computeSelfExclsType:
01152           case computeExclsType:
01153             if (simParams->bondedCUDA & 16) {
01154               if ( ! deviceIsMineBonded ) continue;
01155               if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
01156             } else {
01157               if ( map->computeData[i].node != myNode ) continue;
01158             }
01159           break;
01160 
01161           case computeSelfCrosstermsType:
01162           case computeCrosstermsType:
01163             if (simParams->bondedCUDA & 32) {
01164               if ( ! deviceIsMineBonded ) continue;
01165               if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
01166             } else {
01167               if ( map->computeData[i].node != myNode ) continue;
01168             }
01169           break;
01170 
01171           case computeBondedCUDAType:
01172             if ( ! deviceIsMineBonded ) continue;
01173             if ( map->computeData[i].node != myNode ) continue;
01174           break;
01175 #endif
01176 
01177 #endif
01178 #ifdef NAMD_MIC
01179 
01180           case computeNonbondedSelfType:
01181             if (map->directToDevice(i) != 0) { // If should be directed to the device...
01182               if ( ! deviceIsMine ) continue;
01183               if ( ! mic_device_shared_with_pe(map->computeData[i].node) ) continue;
01184             } else { // ... otherwise, direct to host...
01185               if (map->computeData[i].node != myNode) { continue; }
01186             }
01187             break;
01188 
01189           case computeNonbondedPairType:
01190             if (map->directToDevice(i)) { // If should be directed to the device...
01191               if ( ! deviceIsMine ) continue;
01192               if ( ! mic_device_shared_with_pe(map->computeData[i].node) ) continue;
01193             } else { // ... otherwise, direct to host...
01194               if (map->computeData[i].node != myNode) { continue; }
01195             }
01196             break;
01197 
01198 #endif
01199           case computeNonbondedCUDAType:
01200 #ifdef NAMD_CUDA
01201           case computeNonbondedCUDA2Type:
01202 // #ifdef BONDED_CUDA
01203 //           case computeBondedCUDAType:
01204 // #endif
01205 #endif
01206           case computeNonbondedMICType:
01207             if ( ! deviceIsMine ) continue;
01208           default:
01209             if ( map->computeData[i].node != myNode ) continue;
01210         }
01211 #else // defined(NAMD_CUDA) || defined(NAMD_MIC)
01212         if ( map->computeData[i].node != myNode ) continue;
01213 #endif
01214         DebugM(1,"Compute " << i << '\n');
01215         DebugM(1,"  node = " << map->computeData[i].node << '\n');
01216         DebugM(1,"  type = " << map->computeData[i].type << '\n');
01217         DebugM(1,"  numPids = " << map->computeData[i].numPids << '\n');
01218 //         DebugM(1,"  numPidsAllocated = " << map->computeData[i].numPidsAllocated << '\n');
01219         for (int j=0; j < map->computeData[i].numPids; j++)
01220         {
01221             DebugM(1,"  pid " << map->computeData[i].pids[j].pid << '\n');
01222             if (!((j+1) % 6))
01223                 DebugM(1,'\n');
01224         }
01225         DebugM(1,"\n---------------------------------------");
01226         DebugM(1,"---------------------------------------\n");
01227 
01228         createCompute(i, map);
01229 
01230     }
01231 
01232 #ifdef NAMD_CUDA
01233     if (simParams->useCUDA2) {
01234       if (deviceIsMine) {
01235         getCudaComputeNonbonded()->assignPatches(this);
01236         getCudaComputeNonbonded()->initialize();
01237       }
01238     } else {
01239       if ( computeNonbondedCUDAObject ) {
01240         computeNonbondedCUDAObject->assignPatches();
01241       }      
01242     }
01243 #ifdef BONDED_CUDA
01244     if (simParams->bondedCUDA) {
01245       if (deviceIsMineBonded) {
01246         getComputeBondedCUDA()->initialize();
01247       }
01248     }
01249 #endif
01250 #endif
01251 #ifdef NAMD_MIC
01252     if ( computeNonbondedMICObject ) {
01253       computeNonbondedMICObject->assignPatches();
01254     }
01255 #endif
01256 
01257 }
01258 
01259 #if 0
01260 void ComputeMgr:: sendComputeGlobalConfig(ComputeGlobalConfigMsg *msg)
01261 {
01262     (CProxy_ComputeMgr(CkpvAccess(BOCclass_group).computeMgr)).recvComputeGlobalConfig(msg);
01263 }
01264 
01265 void ComputeMgr:: recvComputeGlobalConfig(ComputeGlobalConfigMsg *msg)
01266 {
01267     if ( computeGlobalObject )
01268     {
01269         computeGlobalObject->recvConfig(msg);
01270     }
01271     else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
01272     else NAMD_die("ComputeMgr::computeGlobalObject is NULL!");
01273 }
01274 #endif
01275 
01276 void ComputeMgr:: sendComputeGlobalData(ComputeGlobalDataMsg *msg)
01277 {
01278     CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01279     cm[0].recvComputeGlobalData(msg);
01280 }
01281 
01282 void ComputeMgr:: recvComputeGlobalData(ComputeGlobalDataMsg *msg)
01283 {
01284     if (masterServerObject)  // make sure it has been initialized
01285     {
01286         masterServerObject->recvData(msg);
01287     }
01288     else NAMD_die("ComputeMgr::masterServerObject is NULL!");
01289 }
01290 
01291 void ComputeMgr:: sendComputeGlobalResults(ComputeGlobalResultsMsg *msg)
01292 {
01293     msg->seq = ++computeGlobalResultsMsgMasterSeq;
01294     thisProxy.recvComputeGlobalResults(msg);
01295 }
01296 
01297 void ComputeMgr:: enableComputeGlobalResults()
01298 {
01299     ++computeGlobalResultsMsgSeq;
01300     for ( int i=0; i<computeGlobalResultsMsgs.size(); ++i ) {
01301       if ( computeGlobalResultsMsgs[i]->seq == computeGlobalResultsMsgSeq ) {
01302         ComputeGlobalResultsMsg *msg = computeGlobalResultsMsgs[i];
01303         computeGlobalResultsMsgs.del(i);
01304         recvComputeGlobalResults(msg);
01305         break;
01306       }
01307     }
01308 }
01309 
01310 void ComputeMgr:: recvComputeGlobalResults(ComputeGlobalResultsMsg *msg)
01311 {
01312     if ( computeGlobalObject )
01313     {
01314       if ( msg->seq == computeGlobalResultsMsgSeq ) {
01315         CmiEnableUrgentSend(1);
01316         computeGlobalObject->recvResults(msg);
01317         CmiEnableUrgentSend(0);
01318       } else {
01319         computeGlobalResultsMsgs.add(msg);
01320       }
01321     }
01322     else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
01323     else NAMD_die("ComputeMgr::computeGlobalObject is NULL!");
01324 }
01325 
01326 /*
01327  * Begin Ewald messages
01328  */
01329 void ComputeMgr:: sendComputeEwaldData(ComputeEwaldMsg *msg)
01330 {
01331     if (computeEwaldObject)
01332     {
01333         int node = computeEwaldObject->getMasterNode();
01334         CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01335         cm[node].recvComputeEwaldData(msg);
01336     }
01337     else if (!PatchMap::Object()->numHomePatches())
01338     {
01339         CkPrintf("skipping message on Pe(%d)\n", CkMyPe());
01340         delete msg;
01341     }
01342     else NAMD_die("ComputeMgr::computeEwaldObject is NULL!");
01343 }
01344 
01345 void ComputeMgr:: recvComputeEwaldData(ComputeEwaldMsg *msg)
01346 {
01347     if (computeEwaldObject)
01348         computeEwaldObject->recvData(msg);
01349     else NAMD_die("ComputeMgr::computeEwaldObject in recvData is NULL!");
01350 }
01351 
01352 void ComputeMgr:: sendComputeEwaldResults(ComputeEwaldMsg *msg)
01353 {
01354     (CProxy_ComputeMgr(CkpvAccess(BOCclass_group).computeMgr)).recvComputeEwaldResults(msg);
01355 }
01356 
01357 void ComputeMgr::recvComputeEwaldResults(ComputeEwaldMsg *msg)
01358 {
01359     if (computeEwaldObject) {
01360         CmiEnableUrgentSend(1);
01361         computeEwaldObject->recvResults(msg);
01362         CmiEnableUrgentSend(0);
01363     }
01364     else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
01365     else NAMD_die("ComputeMgr::computeEwaldObject in recvResults is NULL!");
01366 }
01367 
01368 void ComputeMgr:: sendComputeDPMEData(ComputeDPMEDataMsg *msg)
01369 {
01370     if ( computeDPMEObject )
01371     {
01372 #ifdef DPME
01373         int node = computeDPMEObject->getMasterNode();
01374         CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01375         cm.recvComputeDPMEData(msg,node);
01376 #endif
01377     }
01378     else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
01379     else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
01380 }
01381 
01382 void ComputeMgr:: recvComputeDPMEData(ComputeDPMEDataMsg *msg)
01383 {
01384     if ( computeDPMEObject )
01385     {
01386 #ifdef DPME
01387         computeDPMEObject->recvData(msg);
01388 #endif
01389     }
01390     else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
01391     else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
01392 }
01393 
01394 void ComputeMgr:: sendComputeDPMEResults(ComputeDPMEResultsMsg *msg, int node)
01395 {
01396     CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01397     cm[node].recvComputeDPMEResults(msg);
01398 }
01399 
01400 void ComputeMgr:: recvComputeDPMEResults(ComputeDPMEResultsMsg *msg)
01401 {
01402     if ( computeDPMEObject )
01403     {
01404 #ifdef DPME
01405         computeDPMEObject->recvResults(msg);
01406 #endif
01407     }
01408     else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
01409     else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
01410 }
01411 
01412 void ComputeMgr::recvComputeConsForceMsg(ComputeConsForceMsg *msg)
01413 {
01414     Molecule *m = Node::Object()->molecule;
01415     delete [] m->consForceIndexes;
01416     delete [] m->consForce;
01417     int n = msg->aid.size();
01418     if (n > 0)
01419     {
01420         m->consForceIndexes = new int32[m->numAtoms];
01421         m->consForce = new Vector[n];
01422         int i;
01423         for (i=0; i<m->numAtoms; i++) m->consForceIndexes[i] = -1;
01424         for (i=0; i<msg->aid.size(); i++)
01425         {
01426             m->consForceIndexes[msg->aid[i]] = i;
01427             m->consForce[i] = msg->f[i];
01428         }
01429     }
01430     else
01431     {
01432         m->consForceIndexes = NULL;
01433         m->consForce = NULL;
01434     }
01435     delete msg;
01436 }
01437 
01438 void ComputeMgr::sendYieldDevice(int pe) {
01439     CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01440     cm[pe].recvYieldDevice(CkMyPe());
01441 }
01442 
01443 void ComputeMgr::recvYieldDevice(int pe) {
01444 #ifdef NAMD_CUDA
01445     computeNonbondedCUDAObject->recvYieldDevice(pe);
01446 #endif
01447 #ifdef NAMD_MIC
01448     computeNonbondedMICObject->recvYieldDevice(pe);
01449 #endif
01450 }
01451 
01452 void ComputeMgr::sendBuildCudaExclusions() {
01453     CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01454     int pe = CkNodeFirst(CkMyNode());
01455     int end = pe + CkNodeSize(CkMyNode());
01456     for( ; pe != end; ++pe ) {
01457       cm[pe].recvBuildCudaExclusions();
01458     }
01459 }
01460 
01461 #ifdef NAMD_CUDA
01462   void build_cuda_exclusions();
01463 #endif
01464 
01465 void ComputeMgr::recvBuildCudaExclusions() {
01466 #ifdef NAMD_CUDA
01467     build_cuda_exclusions();
01468 #endif
01469 }
01470 
01471 void ComputeMgr::sendBuildCudaForceTable() {
01472     CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01473     int pe = CkNodeFirst(CkMyNode());
01474     int end = pe + CkNodeSize(CkMyNode());
01475     for( ; pe != end; ++pe ) {
01476       cm[pe].recvBuildCudaForceTable();
01477     }
01478 }
01479 
01480 #ifdef NAMD_CUDA
01481   void build_cuda_force_table();
01482 #endif
01483 
01484 void ComputeMgr::recvBuildCudaForceTable() {
01485 #ifdef NAMD_CUDA
01486     build_cuda_force_table();
01487 #endif
01488 }
01489 
01490 void ComputeMgr::sendBuildMICForceTable() {
01491   CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01492   int pe = CkNodeFirst(CkMyNode());
01493   int end = pe + CkNodeSize(CkMyNode());
01494   for( ; pe != end; ++pe ) {
01495     cm[pe].recvBuildMICForceTable();
01496   }
01497 }
01498 
01499 #ifdef NAMD_MIC
01500   void build_mic_force_table();
01501 #endif
01502 
01503 void ComputeMgr::recvBuildMICForceTable() {
01504   #ifdef NAMD_MIC
01505     build_mic_force_table();
01506   #endif
01507 }
01508 
01509 class NonbondedCUDASlaveMsg : public CMessage_NonbondedCUDASlaveMsg {
01510 public:
01511   int index;
01512   ComputeNonbondedCUDA *master;
01513 };
01514 
01515 void ComputeMgr::sendCreateNonbondedCUDASlave(int pe, int index) {
01516   NonbondedCUDASlaveMsg *msg = new NonbondedCUDASlaveMsg;
01517   msg->master = computeNonbondedCUDAObject;
01518   msg->index = index;
01519   CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01520   cm[pe].recvCreateNonbondedCUDASlave(msg);
01521 }
01522 
01523 void ComputeMgr::recvCreateNonbondedCUDASlave(NonbondedCUDASlaveMsg *msg) {
01524 #ifdef NAMD_CUDA
01525   new ComputeNonbondedCUDA(msg->master->cid,this,msg->master,msg->index);
01526 #endif
01527 }
01528 
01529 void ComputeMgr::sendNonbondedCUDASlaveReady(int pe, int np, int ac, int seq) {
01530   CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01531   cm[pe].recvNonbondedCUDASlaveReady(np,ac,seq);
01532 }
01533 
01534 void ComputeMgr::recvNonbondedCUDASlaveReady(int np, int ac, int seq) {
01535   for ( int i=0; i<np; ++i ) {
01536     computeNonbondedCUDAObject->patchReady(-1,ac,seq);
01537   }
01538 }
01539 
01540 class NonbondedCUDASkipMsg : public CMessage_NonbondedCUDASkipMsg {
01541 public:
01542   ComputeNonbondedCUDA *compute;
01543 };
01544 
01545 void ComputeMgr::sendNonbondedCUDASlaveSkip(ComputeNonbondedCUDA *c, int pe) {
01546   NonbondedCUDASkipMsg *msg = new NonbondedCUDASkipMsg;
01547   msg->compute = c;
01548   thisProxy[pe].recvNonbondedCUDASlaveSkip(msg);
01549 }
01550 
01551 void ComputeMgr::recvNonbondedCUDASlaveSkip(NonbondedCUDASkipMsg *msg) {
01552 #ifdef NAMD_CUDA
01553   msg->compute->skip();
01554 #endif
01555   delete msg;
01556 }
01557 
01558 void ComputeMgr::sendNonbondedCUDASlaveEnqueue(ComputeNonbondedCUDA *c, int pe, int seq, int prio, int ws) {
01559   if ( ws == 2 && c->localHostedPatches.size() == 0 ) return;
01560   LocalWorkMsg *msg = ( ws == 1 ? c->localWorkMsg : c->localWorkMsg2 );
01561   msg->compute = c;
01562   int type = c->type();
01563   int cid = c->cid;
01564   SET_PRIORITY(msg,seq,prio);
01565   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
01566   wdProxy[pe].enqueueCUDA(msg);
01567 }
01568 
01569 void ComputeMgr::sendNonbondedCUDASlaveEnqueuePatch(ComputeNonbondedCUDA *c, int pe, int seq, int prio, int data, FinishWorkMsg *msg) {
01570   msg->compute = c;
01571   msg->data = data;
01572   SET_PRIORITY(msg,seq,prio);
01573   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
01574   wdProxy[pe].finishCUDAPatch(msg);
01575 }
01576 
01577 class NonbondedMICSlaveMsg : public CMessage_NonbondedMICSlaveMsg {
01578 public:
01579   int index;
01580   ComputeNonbondedMIC *master;
01581 };
01582 
01583 #ifdef NAMD_CUDA
01584 class CudaComputeNonbondedMsg : public CMessage_CudaComputeNonbondedMsg {
01585 public:
01586   CudaComputeNonbonded* c;
01587   int i;
01588 };
01589 
01590 void ComputeMgr::sendAssignPatchesOnPe(std::vector<int>& pes, CudaComputeNonbonded* c) {
01591   for (int i=0;i < pes.size();i++) {
01592     CudaComputeNonbondedMsg *msg = new CudaComputeNonbondedMsg;
01593     msg->c = c;
01594     thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
01595   }
01596 }
01597 
01598 void ComputeMgr::recvAssignPatchesOnPe(CudaComputeNonbondedMsg *msg) {
01599   msg->c->assignPatchesOnPe();
01600   delete msg;
01601 }
01602 
01603 void ComputeMgr::sendSkipPatchesOnPe(std::vector<int>& pes, CudaComputeNonbonded* c) {
01604   for (int i=0;i < pes.size();i++) {
01605     CudaComputeNonbondedMsg *msg = new CudaComputeNonbondedMsg;
01606     msg->c = c;
01607     thisProxy[pes[i]].recvSkipPatchesOnPe(msg);
01608   }
01609 }
01610 
01611 void ComputeMgr::recvSkipPatchesOnPe(CudaComputeNonbondedMsg *msg) {
01612   msg->c->skipPatchesOnPe();
01613   delete msg;
01614 }
01615 
01616 void ComputeMgr::sendFinishPatchesOnPe(std::vector<int>& pes, CudaComputeNonbonded* c) {
01617   for (int i=0;i < pes.size();i++) {
01618     CudaComputeNonbondedMsg *msg = new CudaComputeNonbondedMsg;
01619     msg->c = c;
01620     thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
01621   }
01622 }
01623 
01624 void ComputeMgr::recvFinishPatchesOnPe(CudaComputeNonbondedMsg *msg) {
01625   msg->c->finishPatchesOnPe();
01626   delete msg;
01627 }
01628 
01629 void ComputeMgr::sendFinishPatchOnPe(int pe, CudaComputeNonbonded* c, int i) {
01630   CudaComputeNonbondedMsg *msg = new CudaComputeNonbondedMsg;
01631   msg->c = c;
01632   msg->i = i;
01633   thisProxy[pe].recvFinishPatchOnPe(msg);
01634 }
01635 
01636 void ComputeMgr::recvFinishPatchOnPe(CudaComputeNonbondedMsg *msg) {
01637   msg->c->finishPatchOnPe(msg->i);
01638   delete msg;
01639 }
01640 
01641 void ComputeMgr::sendOpenBoxesOnPe(std::vector<int>& pes, CudaComputeNonbonded* c) {
01642   for (int i=0;i < pes.size();i++) {
01643     CudaComputeNonbondedMsg *msg = new CudaComputeNonbondedMsg;
01644     msg->c = c;
01645     thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
01646   }
01647 }
01648 
01649 void ComputeMgr::recvOpenBoxesOnPe(CudaComputeNonbondedMsg *msg) {
01650   msg->c->openBoxesOnPe();
01651   delete msg;
01652 }
01653 
01654 void ComputeMgr::sendFinishReductions(int pe, CudaComputeNonbonded* c) {
01655   CudaComputeNonbondedMsg *msg = new CudaComputeNonbondedMsg;
01656   msg->c = c;
01657   thisProxy[pe].recvFinishReductions(msg);
01658 }
01659 
01660 void ComputeMgr::recvFinishReductions(CudaComputeNonbondedMsg *msg) {
01661   msg->c->finishReductions();
01662   delete msg;
01663 }
01664 
01665 void ComputeMgr::sendMessageEnqueueWork(int pe, CudaComputeNonbonded* c) {
01666   CudaComputeNonbondedMsg *msg = new CudaComputeNonbondedMsg;
01667   msg->c = c;
01668   thisProxy[pe].recvMessageEnqueueWork(msg);
01669 }
01670 
01671 void ComputeMgr::recvMessageEnqueueWork(CudaComputeNonbondedMsg *msg) {
01672   msg->c->messageEnqueueWork();
01673   delete msg;
01674 }
01675 
01676 void ComputeMgr::sendLaunchWork(int pe, CudaComputeNonbonded* c) {
01677   CudaComputeNonbondedMsg *msg = new CudaComputeNonbondedMsg;
01678   msg->c = c;
01679   thisProxy[pe].recvLaunchWork(msg);
01680 }
01681 
01682 void ComputeMgr::recvLaunchWork(CudaComputeNonbondedMsg *msg) {
01683   msg->c->launchWork();
01684   delete msg;
01685 }
01686 
01687 void ComputeMgr::sendUnregisterBoxesOnPe(std::vector<int>& pes, CudaComputeNonbonded* c) {
01688   for (int i=0;i < pes.size();i++) {
01689     CudaComputeNonbondedMsg *msg = new CudaComputeNonbondedMsg;
01690     msg->c = c;
01691     thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
01692   }
01693 }
01694 
01695 void ComputeMgr::recvUnregisterBoxesOnPe(CudaComputeNonbondedMsg *msg) {
01696   msg->c->unregisterBoxesOnPe();
01697   delete msg;
01698 }
01699 
01700 #ifdef BONDED_CUDA
01701 
01702 class ComputeBondedCUDAMsg : public CMessage_ComputeBondedCUDAMsg {
01703 public:
01704   ComputeBondedCUDA* c;
01705   int i;
01706 };
01707 
01708 void ComputeMgr::sendAssignPatchesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
01709   for (int i=0;i < pes.size();i++) {
01710     ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
01711     msg->c = c;
01712     thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
01713   }
01714 }
01715 
01716 void ComputeMgr::recvAssignPatchesOnPe(ComputeBondedCUDAMsg *msg) {
01717   msg->c->assignPatchesOnPe();
01718   delete msg;
01719 }
01720 
01721 void ComputeMgr::sendMessageEnqueueWork(int pe, ComputeBondedCUDA* c) {
01722   ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
01723   msg->c = c;
01724   thisProxy[pe].recvMessageEnqueueWork(msg);
01725 }
01726 
01727 void ComputeMgr::recvMessageEnqueueWork(ComputeBondedCUDAMsg *msg) {
01728   msg->c->messageEnqueueWork();
01729   delete msg;
01730 }
01731 
01732 void ComputeMgr::sendOpenBoxesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
01733   for (int i=0;i < pes.size();i++) {
01734     ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
01735     msg->c = c;
01736     thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
01737   }
01738 }
01739 
01740 void ComputeMgr::recvOpenBoxesOnPe(ComputeBondedCUDAMsg *msg) {
01741   msg->c->openBoxesOnPe();
01742   delete msg;
01743 }
01744 
01745 void ComputeMgr::sendLoadTuplesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
01746   for (int i=0;i < pes.size();i++) {
01747     ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
01748     msg->c = c;
01749     thisProxy[pes[i]].recvLoadTuplesOnPe(msg);
01750   }
01751 }
01752 
01753 void ComputeMgr::recvLoadTuplesOnPe(ComputeBondedCUDAMsg *msg) {
01754   msg->c->loadTuplesOnPe();
01755   delete msg;
01756 }
01757 
01758 void ComputeMgr::sendLaunchWork(int pe, ComputeBondedCUDA* c) {
01759   ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
01760   msg->c = c;
01761   thisProxy[pe].recvLaunchWork(msg);
01762 }
01763 
01764 void ComputeMgr::recvLaunchWork(ComputeBondedCUDAMsg *msg) {
01765   msg->c->launchWork();
01766   delete msg;
01767 }
01768 
01769 void ComputeMgr::sendFinishPatchesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
01770   for (int i=0;i < pes.size();i++) {
01771     ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
01772     msg->c = c;
01773     thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
01774   }
01775 }
01776 
01777 void ComputeMgr::recvFinishPatchesOnPe(ComputeBondedCUDAMsg *msg) {
01778   msg->c->finishPatchesOnPe();
01779   delete msg;
01780 }
01781 
01782 void ComputeMgr::sendFinishReductions(int pe, ComputeBondedCUDA* c) {
01783   ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
01784   msg->c = c;
01785   thisProxy[pe].recvFinishReductions(msg);
01786 }
01787 
01788 void ComputeMgr::recvFinishReductions(ComputeBondedCUDAMsg *msg) {
01789   msg->c->finishReductions();
01790   delete msg;
01791 }
01792 
01793 void ComputeMgr::sendUnregisterBoxesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
01794   for (int i=0;i < pes.size();i++) {
01795     ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
01796     msg->c = c;
01797     thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
01798   }
01799 }
01800 
01801 void ComputeMgr::recvUnregisterBoxesOnPe(ComputeBondedCUDAMsg *msg) {
01802   msg->c->unregisterBoxesOnPe();
01803   delete msg;
01804 }
01805 
01806 #endif // BONDED_CUDA
01807 
01808 #endif // NAMD_CUDA
01809 
01810 void ComputeMgr::sendCreateNonbondedMICSlave(int pe, int index) {
01811   NonbondedMICSlaveMsg *msg = new NonbondedMICSlaveMsg;
01812   msg->master = computeNonbondedMICObject;
01813   msg->index = index;
01814   CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01815   cm[pe].recvCreateNonbondedMICSlave(msg);
01816 }
01817 
01818 void ComputeMgr::recvCreateNonbondedMICSlave(NonbondedMICSlaveMsg *msg) {
01819 #ifdef NAMD_MIC
01820   ComputeNonbondedMIC *c = new ComputeNonbondedMIC(msg->master->cid,this,msg->master,msg->index);
01821 #endif
01822 }
01823 
01824 void ComputeMgr::sendNonbondedMICSlaveReady(int pe, int np, int ac, int seq) {
01825   CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01826   cm[pe].recvNonbondedMICSlaveReady(np,ac,seq);
01827 }
01828 
01829 void ComputeMgr::recvNonbondedMICSlaveReady(int np, int ac, int seq) {
01830   for ( int i=0; i<np; ++i ) {
01831     computeNonbondedMICObject->patchReady(-1,ac,seq);
01832   }
01833 }
01834 
01835 class NonbondedMICSkipMsg : public CMessage_NonbondedMICSkipMsg {
01836 public:
01837   ComputeNonbondedMIC *compute;
01838 };
01839 
01840 void ComputeMgr::sendNonbondedMICSlaveSkip(ComputeNonbondedMIC *c, int pe) {
01841   NonbondedMICSkipMsg *msg = new NonbondedMICSkipMsg;
01842   msg->compute = c;
01843   thisProxy[pe].recvNonbondedMICSlaveSkip(msg);
01844 }
01845 
01846 void ComputeMgr::recvNonbondedMICSlaveSkip(NonbondedMICSkipMsg *msg) {
01847 #ifdef NAMD_MIC
01848   msg->compute->skip();
01849 #endif
01850   delete msg;
01851 }
01852 
01853 void ComputeMgr::sendNonbondedMICSlaveEnqueue(ComputeNonbondedMIC *c, int pe, int seq, int prio, int ws) {
01854   if ( ws == 2 && c->localHostedPatches.size() == 0 ) return;
01855   LocalWorkMsg *msg = ( ws == 1 ? c->localWorkMsg : c->localWorkMsg2 );
01856   msg->compute = c;
01857   int type = c->type();
01858   int cid = c->cid;
01859   SET_PRIORITY(msg,seq,prio);
01860   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
01861   wdProxy[pe].enqueueMIC(msg);
01862 }
01863 
01864 void ComputeMgr::sendMICPEData(int pe, int data) {
01865   CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
01866   cm.recvMICPEData(pe, data);
01867 }
01868 
01869 void ComputeMgr::recvMICPEData(int pe, int data) {
01870   if (pe < 0 || pe >= CkNumPes() || micPEData == NULL) { return; }
01871   int majorIndex = pe / (sizeof(int)*8);
01872   int minorIndex = pe % (sizeof(int)*8);
01873   if (data != 0) {
01874     micPEData[majorIndex] |= (0x01 << minorIndex);
01875   } else {
01876     micPEData[majorIndex] &= ((~0x01) << minorIndex);
01877   }
01878 }
01879 
01880 int isMICProcessor(int pe) {
01881   return CProxy_ComputeMgr::ckLocalBranch(CkpvAccess(BOCclass_group).computeMgr)->isMICProcessor(pe);
01882 }
01883 
01884 int ComputeMgr::isMICProcessor(int pe) {
01885   if (pe < 0 || pe >= CkNumPes() || micPEData == NULL) { return 0; }
01886   int majorIndex = pe / (sizeof(int)*8);
01887   int minorIndex = pe % (sizeof(int)*8);
01888   return ((micPEData[majorIndex] >> minorIndex) & 0x01);
01889 }
01890 
01891 #include "ComputeMgr.def.h"
01892 

Generated on Sat Sep 23 01:17:12 2017 for NAMD by  doxygen 1.4.7