ComputePme.C File Reference

#include <sfftw.h>
#include <srfftw.h>
#include <vector>
#include <algorithm>
#include <deque>
#include "InfoStream.h"
#include "Node.h"
#include "PatchMap.h"
#include "PatchMap.inl"
#include "AtomMap.h"
#include "ComputePme.h"
#include "ComputePmeMgr.decl.h"
#include "PmeBase.inl"
#include "PmeRealSpace.h"
#include "PmeKSpace.h"
#include "ComputeNonbondedUtil.h"
#include "PatchMgr.h"
#include "Molecule.h"
#include "ReductionMgr.h"
#include "ComputeMgr.h"
#include "ComputeMgr.decl.h"
#include "Debug.h"
#include "SimParameters.h"
#include "WorkDistrib.h"
#include "varsizemsg.h"
#include "Random.h"
#include "ckhashtable.h"
#include "Priorities.h"
#include "ComputeMoa.h"
#include "ComputeMoaMgr.decl.h"
#include "DeviceCUDA.h"
#include <cuda_runtime.h>
#include <cuda.h>
#include "ComputePmeCUDAKernel.h"
#include "ComputePmeMgr.def.h"

Go to the source code of this file.

Classes

class  PmeAckMsg
class  PmeGridMsg
class  PmeTransMsg
class  PmeSharedTransMsg
class  PmeUntransMsg
class  PmeSharedUntransMsg
class  PmeEvirMsg
class  PmePencilMap
struct  PmePencilInitMsgData
class  PmePencilInitMsg
struct  LocalPmeInfo
struct  NodePmeInfo
struct  sortop_bit_reversed
struct  ijpair
struct  ijpair_sortop_bit_reversed
class  ComputePmeMgr
struct  ComputePmeMgr::cuda_submit_charges_args
class  NodePmeMgr
class  PmePencil< T >
class  PmeZPencil
class  PmeYPencil
class  PmeXPencil

Defines

#define fftwf_malloc   fftw_malloc
#define fftwf_free   fftw_free
#define MIN_DEBUG_LEVEL   3
#define NUM_STREAMS   1
#define CUDA_STREAM_CREATE(X)   cudaStreamCreate(X)
#define CUDA_EVENT_ID_PME_CHARGES   80
#define CUDA_EVENT_ID_PME_FORCES   81
#define CUDA_EVENT_ID_PME_TICK   82
#define CUDA_EVENT_ID_PME_COPY   83
#define CUDA_EVENT_ID_PME_KERNEL   84
#define count_limit   1000000
#define CUDA_POLL(FN, ARG)   CcdCallFnAfter(FN,ARG,0.1)
#define EVENT_STRIDE   10
#define XCOPY(X)   masterPmeMgr->X = X;
#define XCOPY(X)   X = masterPmeMgr->X;
#define DEBUG_NODE_PAR_RECV   0

Functions

void cuda_errcheck (const char *msg)
static int findRecipEvirPe ()
void generatePmePeList2 (int *gridPeMap, int numGridPes, int *transPeMap, int numTransPes)
int compare_bit_reversed (int a, int b)
bool less_than_bit_reversed (int a, int b)
ResizeArray< ComputePme * > & getComputes (ComputePmeMgr *mgr)
int isPmeProcessor (int p)
void Pme_init ()
static void PmeSlabSendTrans (int first, int last, void *result, int paraNum, void *param)
static void PmeSlabSendUntrans (int first, int last, void *result, int paraNum, void *param)
static void PmeSlabSendUngrid (int first, int last, void *result, int paraNum, void *param)
void CcdCallBacksReset (void *ignored, double curWallTime)
void cuda_check_pme_forces (void *arg, double walltime)
void cuda_check_pme_charges (void *arg, double walltime)
static void PmeXZPencilFFT (int first, int last, void *result, int paraNum, void *param)
static void PmeZPencilSendTrans (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilForwardFFT (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilSendTrans (int first, int last, void *result, int paraNum, void *param)
static void PmeXPencilSendUntrans (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilBackwardFFT (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilSendUntrans (int first, int last, void *result, int paraNum, void *param)
static void PmeZPencilSendUngrid (int first, int last, void *result, int paraNum, void *param)

Variables

__thread DeviceCUDAdeviceCUDA
char * pencilPMEProcessors


Define Documentation

#define count_limit   1000000

Definition at line 2467 of file ComputePme.C.

#define CUDA_EVENT_ID_PME_CHARGES   80

Referenced by ComputePmeMgr::ComputePmeMgr(), and cuda_check_pme_charges().

#define CUDA_EVENT_ID_PME_COPY   83

Referenced by ComputePmeMgr::chargeGridSubmitted(), ComputePmeMgr::ComputePmeMgr(), ComputePmeMgr::cuda_submit_charges(), and ComputePmeMgr::ungridCalc().

#define CUDA_EVENT_ID_PME_FORCES   81

Referenced by ComputePmeMgr::ComputePmeMgr(), and cuda_check_pme_forces().

#define CUDA_EVENT_ID_PME_KERNEL   84

Referenced by ComputePmeMgr::ComputePmeMgr(), ComputePmeMgr::cuda_submit_charges(), and ComputePmeMgr::ungridCalc().

#define CUDA_EVENT_ID_PME_TICK   82

Referenced by ComputePmeMgr::ComputePmeMgr(), and ComputePmeMgr::ungridCalc().

#define CUDA_POLL ( FN,
ARG   )     CcdCallFnAfter(FN,ARG,0.1)

Definition at line 2468 of file ComputePme.C.

#define CUDA_STREAM_CREATE (  )     cudaStreamCreate(X)

Referenced by ComputePmeMgr::ComputePmeMgr().

#define DEBUG_NODE_PAR_RECV   0

Definition at line 4977 of file ComputePme.C.

#define EVENT_STRIDE   10

Definition at line 2469 of file ComputePme.C.

Referenced by cuda_check_pme_forces(), and ComputePmeMgr::ungridCalc().

#define fftwf_free   fftw_free

Definition at line 14 of file ComputePme.C.

Referenced by PmePencil< CBase_PmeZPencil >::~PmePencil().

#define fftwf_malloc   fftw_malloc

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 13 of file ComputePme.C.

Referenced by PmeXPencil::fft_init(), PmeYPencil::fft_init(), and PmeZPencil::fft_init().

#define MIN_DEBUG_LEVEL   3

Definition at line 47 of file ComputePme.C.

#define NUM_STREAMS   1

Definition at line 513 of file ComputePme.C.

Referenced by ComputePmeMgr::ComputePmeMgr().

#define XCOPY (  )     X = masterPmeMgr->X;

#define XCOPY (  )     masterPmeMgr->X = X;

Referenced by ComputePmeMgr::initialize_computes().


Function Documentation

void CcdCallBacksReset ( void *  ignored,
double  curWallTime 
)

int compare_bit_reversed ( int  a,
int  b 
)

Definition at line 318 of file ComputePme.C.

Referenced by pe_sortop_bit_reversed::operator()().

00318                                        {
00319   int d = a ^ b;
00320   int c = 1;
00321   if ( d ) while ( ! (d & c) ) {
00322     c = c << 1;
00323   }
00324   return (a & c) - (b & c);
00325 }

void cuda_check_pme_charges ( void *  arg,
double  walltime 
)

Definition at line 3431 of file ComputePme.C.

References CcdCallBacksReset(), ComputePmeMgr::charges_time, ComputePmeMgr::check_charges_count, count_limit, cuda_errcheck(), CUDA_EVENT_ID_PME_CHARGES, CUDA_POLL, ComputePmeMgr::end_charges, NAMD_bug(), NAMD_die(), ComputePmeMgr::saved_sequence, and ComputePmeMgr::sendChargeGridReady().

Referenced by ComputePmeMgr::pollChargeGridReady().

03431                                                         {
03432   ComputePmeMgr *argp = (ComputePmeMgr *) arg;
03433 
03434   cudaError_t err = cudaEventQuery(argp->end_charges);
03435   if ( err == cudaSuccess ) {
03436     traceUserBracketEvent(CUDA_EVENT_ID_PME_CHARGES,argp->charges_time,walltime);
03437     argp->charges_time = walltime - argp->charges_time;
03438     argp->sendChargeGridReady();
03439     argp->check_charges_count = 0;
03440   } else if ( err != cudaErrorNotReady ) {
03441     cuda_errcheck("in cuda_check_pme_charges");
03442     NAMD_bug("cuda_errcheck missed error in cuda_check_pme_charges");
03443   } else if ( ++(argp->check_charges_count) >= count_limit ) {
03444     char errmsg[256];
03445     sprintf(errmsg,"cuda_check_pme_charges polled %d times over %f s on seq %d",
03446             argp->check_charges_count, walltime - argp->charges_time,
03447             argp->saved_sequence);
03448     cuda_errcheck(errmsg);
03449     NAMD_die(errmsg);
03450   } else {
03451     CcdCallBacksReset(0,walltime);  // fix Charm++
03452     CUDA_POLL(cuda_check_pme_charges, arg);
03453   }
03454 }

void cuda_check_pme_forces ( void *  arg,
double  walltime 
)

Definition at line 2473 of file ComputePme.C.

References ComputePmeMgr::check_forces_count, count_limit, cuda_errcheck(), CUDA_EVENT_ID_PME_FORCES, ComputePmeMgr::end_forces, EVENT_STRIDE, ComputePmeMgr::forces_count, ComputePmeMgr::forces_done_count, ComputePmeMgr::forces_time, if(), WorkDistrib::messageEnqueueWork(), NAMD_bug(), NAMD_die(), ComputePmeMgr::pmeComputes, and ComputePmeMgr::saved_sequence.

Referenced by ComputePmeMgr::pollForcesReady().

02473                                                        {
02474   ComputePmeMgr *argp = (ComputePmeMgr *) arg;
02475 
02476  while ( 1 ) { // process multiple events per call
02477   cudaError_t err = cudaEventQuery(argp->end_forces[argp->forces_done_count/EVENT_STRIDE]);
02478   if ( err == cudaSuccess ) {
02479     argp->check_forces_count = 0;
02480     for ( int i=0; i<EVENT_STRIDE; ++i ) {
02481       WorkDistrib::messageEnqueueWork(argp->pmeComputes[argp->forces_done_count]);
02482       if ( ++(argp->forces_done_count) == argp->forces_count ) break;
02483     }
02484     if ( argp->forces_done_count == argp->forces_count ) { // last event
02485       traceUserBracketEvent(CUDA_EVENT_ID_PME_FORCES,argp->forces_time,walltime);
02486       argp->forces_time = walltime - argp->forces_time;
02487       //CkPrintf("cuda_check_pme_forces forces_time == %f\n", argp->forces_time);
02488       return;
02489     } else { // more events
02490       continue; // check next event
02491     }
02492   } else if ( err != cudaErrorNotReady ) {
02493     cuda_errcheck("in cuda_check_pme_forces");
02494     NAMD_bug("cuda_errcheck missed error in cuda_check_pme_forces");
02495   } else if ( ++(argp->check_forces_count) >= count_limit ) {
02496     char errmsg[256];
02497     sprintf(errmsg,"cuda_check_pme_forces polled %d times over %f s on seq %d",
02498             argp->check_forces_count, walltime - argp->forces_time,
02499             argp->saved_sequence);
02500     cuda_errcheck(errmsg);
02501     NAMD_die(errmsg);
02502   } else {
02503     break; // call again
02504   }
02505  } // while ( 1 )
02506  CcdCallBacksReset(0,walltime);  // fix Charm++
02507  CUDA_POLL(cuda_check_pme_forces, arg);
02508 }

void cuda_errcheck ( const char *  msg  ) 

Definition at line 41 of file ComputeNonbondedCUDA.C.

References NAMD_die().

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_nonbonded_forces(), ComputeNonbondedCUDA::doWork(), ComputePmeMgr::initialize(), ComputePmeMgr::initialize_computes(), and ComputePmeMgr::ungridCalc().

00041                                     {
00042   cudaError_t err;
00043   if ((err = cudaGetLastError()) != cudaSuccess) {
00044     char host[128];
00045 #ifdef NOHOSTNAME
00046     sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00047 #else
00048     gethostname(host, 128);  host[127] = 0;
00049 #endif
00050     char devstr[128] = "";
00051     int devnum;
00052     if ( cudaGetDevice(&devnum) == cudaSuccess ) {
00053       sprintf(devstr, " device %d", devnum);
00054     }
00055     cudaDeviceProp deviceProp;
00056     if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
00057       sprintf(devstr, " device %d pci %x:%x:%x", devnum,
00058         deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
00059     }
00060     char errmsg[1024];
00061     sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
00062     NAMD_die(errmsg);
00063   }
00064 }

static int findRecipEvirPe (  )  [static]

Definition at line 241 of file ComputePme.C.

References NAMD_bug(), PatchMap::numPatchesOnNode(), and PatchMap::Object().

Referenced by PmeXPencil::evir_init(), and ComputePmeMgr::initialize().

00241                              {
00242     PatchMap *patchMap = PatchMap::Object();
00243     {
00244       int mype = CkMyPe();
00245       if ( patchMap->numPatchesOnNode(mype) ) {
00246         return mype; 
00247       }
00248     }
00249     {
00250       int node = CmiMyNode();
00251       int firstpe = CmiNodeFirst(node);
00252       int nodeSize = CmiNodeSize(node);
00253       int myrank = CkMyRank();
00254       for ( int i=0; i<nodeSize; ++i ) {
00255         int pe = firstpe + (myrank+i)%nodeSize;
00256         if ( patchMap->numPatchesOnNode(pe) ) {
00257           return pe;
00258         }
00259       }
00260     }
00261     {
00262       int *pelist;
00263       int nodeSize;
00264       CmiGetPesOnPhysicalNode(CmiPhysicalNodeID(CkMyPe()), &pelist, &nodeSize);
00265       int myrank;
00266       for ( int i=0; i<nodeSize; ++i ) {
00267         if ( pelist[i] == CkMyPe() ) myrank = i;
00268       }
00269       for ( int i=0; i<nodeSize; ++i ) {
00270         int pe = pelist[(myrank+i)%nodeSize];
00271         if ( patchMap->numPatchesOnNode(pe) ) {
00272           return pe;
00273         }
00274       }
00275     }
00276     {
00277       int mype = CkMyPe();
00278       int npes = CkNumPes();
00279       for ( int i=0; i<npes; ++i ) {
00280         int pe = (mype+i)%npes;
00281         if ( patchMap->numPatchesOnNode(pe) ) {
00282           return pe;
00283         }
00284       }
00285     }
00286     NAMD_bug("findRecipEvirPe() failed!");
00287     return -999;  // should never happen
00288 }

void generatePmePeList2 ( int *  gridPeMap,
int  numGridPes,
int *  transPeMap,
int  numTransPes 
)

Definition at line 292 of file ComputePme.C.

References WorkDistrib::peDiffuseOrdering.

Referenced by ComputePmeMgr::initialize().

00292                                                                                          {
00293   int ncpus = CkNumPes();
00294   
00295   for ( int i=0; i<numGridPes; ++i ) {
00296     gridPeMap[i] = WorkDistrib::peDiffuseOrdering[ncpus - numGridPes + i];
00297   }
00298   std::sort(gridPeMap,gridPeMap+numGridPes);
00299   int firstTransPe = ncpus - numGridPes - numTransPes;
00300   if ( firstTransPe < 0 ) {
00301     firstTransPe = 0;
00302     // 0 should be first in list, skip if possible
00303     if ( ncpus > numTransPes ) firstTransPe = 1;
00304   }
00305   for ( int i=0; i<numTransPes; ++i ) {
00306     transPeMap[i] = WorkDistrib::peDiffuseOrdering[firstTransPe + i];
00307   }
00308   std::sort(transPeMap,transPeMap+numTransPes);
00309 }

ResizeArray<ComputePme*>& getComputes ( ComputePmeMgr mgr  ) 

Definition at line 586 of file ComputePme.C.

References ComputePmeMgr::pmeComputes.

00586                                                           {
00587     return mgr->pmeComputes ;
00588 }

int isPmeProcessor ( int  p  ) 

Definition at line 597 of file ComputePme.C.

References Node::Object(), pencilPMEProcessors, Node::simParameters, and simParams.

00597                          { 
00598   SimParameters *simParams = Node::Object()->simParameters;
00599   if (simParams->usePMECUDA) {
00600     return 0;
00601   } else {
00602     return pencilPMEProcessors[p];
00603   }
00604 }

bool less_than_bit_reversed ( int  a,
int  b 
) [inline]

Definition at line 327 of file ComputePme.C.

Referenced by ijpair_sortop_bit_reversed::operator()(), and sortop_bit_reversed::operator()().

00327                                                  {
00328   int d = a ^ b;
00329   int c = 1;
00330   if ( d ) while ( ! (d & c) ) {
00331     c = c << 1;
00332   }
00333   return d && (b & c);
00334 }

void Pme_init (  ) 

Definition at line 853 of file ComputePme.C.

00854 {
00855 #if USE_TOPO_SFC
00856   if (CkMyRank() == 0) 
00857     tmgr_lock = CmiCreateLock();
00858 #endif
00859 }

static void PmeSlabSendTrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 1932 of file ComputePme.C.

References ComputePmeMgr::sendTransSubset().

Referenced by ComputePmeMgr::sendTrans().

01932                                                                                                  {
01933   ComputePmeMgr *mgr = (ComputePmeMgr *)param;
01934   mgr->sendTransSubset(first, last);
01935 }

static void PmeSlabSendUngrid ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 2362 of file ComputePme.C.

References ComputePmeMgr::sendUngridSubset().

Referenced by ComputePmeMgr::sendUngrid().

02362                                                                                                   {
02363   ComputePmeMgr *mgr = (ComputePmeMgr *)param;
02364   mgr->sendUngridSubset(first, last);
02365 }

static void PmeSlabSendUntrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 2176 of file ComputePme.C.

References ComputePmeMgr::sendUntransSubset().

Referenced by ComputePmeMgr::sendUntrans().

02176                                                                                                    {
02177   ComputePmeMgr *mgr = (ComputePmeMgr *)param;
02178   mgr->sendUntransSubset(first, last);
02179 }

static void PmeXPencilSendUntrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5738 of file ComputePme.C.

References PmeXPencil::send_subset_untrans().

Referenced by PmeXPencil::send_untrans().

05738                                                                                                      {
05739         int evirIdx = paraNum;
05740         PmeXPencil *xpencil = (PmeXPencil *)param;
05741         xpencil->send_subset_untrans(first, last, evirIdx);
05742 }

static void PmeXZPencilFFT ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5175 of file ComputePme.C.

Referenced by PmeZPencil::backward_fft(), PmeXPencil::backward_fft(), PmeXPencil::forward_fft(), and PmeZPencil::forward_fft().

05175                                                                                               {
05176 #ifdef NAMD_FFTW
05177 #ifdef NAMD_FFTW_3    
05178     fftwf_plan *plans = (fftwf_plan *)param;
05179     for(int i=first; i<=last; i++) fftwf_execute(plans[i]);
05180 #endif
05181 #endif        
05182 }

static void PmeYPencilBackwardFFT ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5975 of file ComputePme.C.

References PmeYPencil::backward_subset_fft().

Referenced by PmeYPencil::backward_fft().

05975                                                                                                      {
05976         PmeYPencil *ypencil = (PmeYPencil *)param;
05977         ypencil->backward_subset_fft(first, last);
05978 }

static void PmeYPencilForwardFFT ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5404 of file ComputePme.C.

References PmeYPencil::forward_subset_fft().

Referenced by PmeYPencil::forward_fft().

05404                                                                                                     {
05405         PmeYPencil *ypencil = (PmeYPencil *)param;
05406         ypencil->forward_subset_fft(first, last);
05407 }

static void PmeYPencilSendTrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5456 of file ComputePme.C.

References PmeYPencil::send_subset_trans().

Referenced by PmeYPencil::send_trans().

05456                                                                                                    {
05457         PmeYPencil *ypencil = (PmeYPencil *)param;
05458         ypencil->send_subset_trans(first, last);
05459 }

static void PmeYPencilSendUntrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 6034 of file ComputePme.C.

References PmeYPencil::send_subset_untrans().

Referenced by PmeYPencil::send_untrans().

06034                                                                                                      {
06035         int evirIdx = paraNum;
06036         PmeYPencil *ypencil = (PmeYPencil *)param;
06037         ypencil->send_subset_untrans(first, last, evirIdx);
06038 }

static void PmeZPencilSendTrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5240 of file ComputePme.C.

References PmeZPencil::send_subset_trans().

Referenced by PmeZPencil::send_trans().

05240                                                                                                    {
05241         PmeZPencil *zpencil = (PmeZPencil *)param;
05242         zpencil->send_subset_trans(first, last);        
05243 }

static void PmeZPencilSendUngrid ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 6335 of file ComputePme.C.

References PmeZPencil::send_subset_ungrid().

Referenced by PmeZPencil::send_all_ungrid().

06335                                                                                                     {
06336         //to take advantage of the interface which allows 3 user params at most.
06337         //under such situtation, no new parameter list needs to be created!! -Chao Mei
06338         int specialIdx = paraNum;
06339         PmeZPencil *zpencil = (PmeZPencil *)param;
06340         zpencil->send_subset_ungrid(first, last, specialIdx);
06341 }


Variable Documentation

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

char* pencilPMEProcessors

Definition at line 107 of file ComputePme.C.

Referenced by OptPmeMgr::initialize(), OptPmePencilMapZ::initialize(), OptPmePencilMapY::initialize(), OptPmePencilMapX::initialize(), ComputePmeMgr::initialize(), and isPmeProcessor().


Generated on Fri May 25 01:17:15 2018 for NAMD by  doxygen 1.4.7