ComputePme.C File Reference

#include <sfftw.h>
#include <srfftw.h>
#include <vector>
#include <algorithm>
#include <deque>
#include "InfoStream.h"
#include "Node.h"
#include "PatchMap.h"
#include "PatchMap.inl"
#include "AtomMap.h"
#include "ComputePme.h"
#include "ComputePmeMgr.decl.h"
#include "PmeBase.inl"
#include "PmeRealSpace.h"
#include "PmeKSpace.h"
#include "ComputeNonbondedUtil.h"
#include "PatchMgr.h"
#include "Molecule.h"
#include "ReductionMgr.h"
#include "ComputeMgr.h"
#include "ComputeMgr.decl.h"
#include "Debug.h"
#include "SimParameters.h"
#include "WorkDistrib.h"
#include "varsizemsg.h"
#include "Random.h"
#include "ckhashtable.h"
#include "Priorities.h"
#include "ComputeMoa.h"
#include "ComputeMoaMgr.decl.h"
#include "DeviceCUDA.h"
#include <cuda_runtime.h>
#include <cuda.h>
#include "ComputePmeCUDAKernel.h"
#include "ComputePmeMgr.def.h"

Go to the source code of this file.

Classes

class  PmeAckMsg
class  PmeGridMsg
class  PmeTransMsg
class  PmeSharedTransMsg
class  PmeUntransMsg
class  PmeSharedUntransMsg
class  PmeEvirMsg
class  PmePencilMap
struct  PmePencilInitMsgData
class  PmePencilInitMsg
struct  LocalPmeInfo
struct  NodePmeInfo
struct  sortop_bit_reversed
struct  ijpair
struct  ijpair_sortop_bit_reversed
class  ComputePmeMgr
struct  ComputePmeMgr::cuda_submit_charges_args
class  NodePmeMgr
class  PmePencil< T >
class  PmeZPencil
class  PmeYPencil
class  PmeXPencil

Defines

#define fftwf_malloc   fftw_malloc
#define fftwf_free   fftw_free
#define MIN_DEBUG_LEVEL   3
#define NUM_STREAMS   1
#define CUDA_STREAM_CREATE(X)   cudaStreamCreate(X)
#define CUDA_EVENT_ID_PME_CHARGES   80
#define CUDA_EVENT_ID_PME_FORCES   81
#define CUDA_EVENT_ID_PME_TICK   82
#define CUDA_EVENT_ID_PME_COPY   83
#define CUDA_EVENT_ID_PME_KERNEL   84
#define count_limit   1000000
#define CUDA_POLL(FN, ARG)   CcdCallFnAfter(FN,ARG,0.1)
#define EVENT_STRIDE   10
#define XCOPY(X)   masterPmeMgr->X = X;
#define XCOPY(X)   X = masterPmeMgr->X;
#define DEBUG_NODE_PAR_RECV   0

Functions

void cuda_errcheck (const char *msg)
static int findRecipEvirPe ()
void generatePmePeList2 (int *gridPeMap, int numGridPes, int *transPeMap, int numTransPes)
int compare_bit_reversed (int a, int b)
bool less_than_bit_reversed (int a, int b)
ResizeArray< ComputePme * > & getComputes (ComputePmeMgr *mgr)
int isPmeProcessor (int p)
void Pme_init ()
static void PmeSlabSendTrans (int first, int last, void *result, int paraNum, void *param)
static void PmeSlabSendUntrans (int first, int last, void *result, int paraNum, void *param)
static void PmeSlabSendUngrid (int first, int last, void *result, int paraNum, void *param)
void CcdCallBacksReset (void *ignored, double curWallTime)
void cuda_check_pme_forces (void *arg, double walltime)
void cuda_check_pme_charges (void *arg, double walltime)
static void PmeXZPencilFFT (int first, int last, void *result, int paraNum, void *param)
static void PmeZPencilSendTrans (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilForwardFFT (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilSendTrans (int first, int last, void *result, int paraNum, void *param)
static void PmeXPencilSendUntrans (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilBackwardFFT (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilSendUntrans (int first, int last, void *result, int paraNum, void *param)
static void PmeZPencilSendUngrid (int first, int last, void *result, int paraNum, void *param)

Variables

__thread DeviceCUDAdeviceCUDA
char * pencilPMEProcessors


Define Documentation

#define count_limit   1000000

Definition at line 2482 of file ComputePme.C.

#define CUDA_EVENT_ID_PME_CHARGES   80

Referenced by ComputePmeMgr::ComputePmeMgr(), and cuda_check_pme_charges().

#define CUDA_EVENT_ID_PME_COPY   83

Referenced by ComputePmeMgr::chargeGridSubmitted(), ComputePmeMgr::ComputePmeMgr(), ComputePmeMgr::cuda_submit_charges(), and ComputePmeMgr::ungridCalc().

#define CUDA_EVENT_ID_PME_FORCES   81

Referenced by ComputePmeMgr::ComputePmeMgr(), and cuda_check_pme_forces().

#define CUDA_EVENT_ID_PME_KERNEL   84

Referenced by ComputePmeMgr::ComputePmeMgr(), ComputePmeMgr::cuda_submit_charges(), and ComputePmeMgr::ungridCalc().

#define CUDA_EVENT_ID_PME_TICK   82

Referenced by ComputePmeMgr::ComputePmeMgr(), and ComputePmeMgr::ungridCalc().

#define CUDA_POLL ( FN,
ARG   )     CcdCallFnAfter(FN,ARG,0.1)

Definition at line 2483 of file ComputePme.C.

#define CUDA_STREAM_CREATE (  )     cudaStreamCreate(X)

Referenced by ComputePmeMgr::ComputePmeMgr().

#define DEBUG_NODE_PAR_RECV   0

Definition at line 5008 of file ComputePme.C.

#define EVENT_STRIDE   10

Definition at line 2484 of file ComputePme.C.

Referenced by cuda_check_pme_forces(), and ComputePmeMgr::ungridCalc().

#define fftwf_free   fftw_free

Definition at line 14 of file ComputePme.C.

Referenced by PmePencil< CBase_PmeZPencil >::~PmePencil().

#define fftwf_malloc   fftw_malloc

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 13 of file ComputePme.C.

Referenced by PmeXPencil::fft_init(), PmeYPencil::fft_init(), and PmeZPencil::fft_init().

#define MIN_DEBUG_LEVEL   3

Definition at line 47 of file ComputePme.C.

#define NUM_STREAMS   1

Definition at line 505 of file ComputePme.C.

Referenced by ComputePmeMgr::ComputePmeMgr().

#define XCOPY (  )     X = masterPmeMgr->X;

#define XCOPY (  )     masterPmeMgr->X = X;

Referenced by ComputePmeMgr::initialize_computes().


Function Documentation

void CcdCallBacksReset ( void *  ignored,
double  curWallTime 
)

int compare_bit_reversed ( int  a,
int  b 
)

Definition at line 307 of file ComputePme.C.

Referenced by pe_sortop_bit_reversed::operator()().

00307                                        {
00308   int d = a ^ b;
00309   int c = 1;
00310   if ( d ) while ( ! (d & c) ) {
00311     c = c << 1;
00312   }
00313   return (a & c) - (b & c);
00314 }

void cuda_check_pme_charges ( void *  arg,
double  walltime 
)

Definition at line 3463 of file ComputePme.C.

References CcdCallBacksReset(), ComputePmeMgr::charges_time, ComputePmeMgr::check_charges_count, count_limit, cuda_errcheck(), CUDA_EVENT_ID_PME_CHARGES, CUDA_POLL, ComputePmeMgr::end_charges, NAMD_bug(), NAMD_die(), ComputePmeMgr::saved_sequence, and ComputePmeMgr::sendChargeGridReady().

Referenced by ComputePmeMgr::pollChargeGridReady().

03463                                                         {
03464   ComputePmeMgr *argp = (ComputePmeMgr *) arg;
03465 
03466   cudaError_t err = cudaEventQuery(argp->end_charges);
03467   if ( err == cudaSuccess ) {
03468     traceUserBracketEvent(CUDA_EVENT_ID_PME_CHARGES,argp->charges_time,walltime);
03469     argp->charges_time = walltime - argp->charges_time;
03470     argp->sendChargeGridReady();
03471     argp->check_charges_count = 0;
03472   } else if ( err != cudaErrorNotReady ) {
03473     cuda_errcheck("in cuda_check_pme_charges");
03474     NAMD_bug("cuda_errcheck missed error in cuda_check_pme_charges");
03475   } else if ( ++(argp->check_charges_count) >= count_limit ) {
03476     char errmsg[256];
03477     sprintf(errmsg,"cuda_check_pme_charges polled %d times over %f s on seq %d",
03478             argp->check_charges_count, walltime - argp->charges_time,
03479             argp->saved_sequence);
03480     cuda_errcheck(errmsg);
03481     NAMD_die(errmsg);
03482   } else {
03483     CcdCallBacksReset(0,walltime);  // fix Charm++
03484     CUDA_POLL(cuda_check_pme_charges, arg);
03485   }
03486 }

void cuda_check_pme_forces ( void *  arg,
double  walltime 
)

Definition at line 2488 of file ComputePme.C.

References ComputePmeMgr::check_forces_count, count_limit, cuda_errcheck(), CUDA_EVENT_ID_PME_FORCES, ComputePmeMgr::end_forces, EVENT_STRIDE, ComputePmeMgr::forces_count, ComputePmeMgr::forces_done_count, ComputePmeMgr::forces_time, if(), WorkDistrib::messageEnqueueWork(), NAMD_bug(), NAMD_die(), ComputePmeMgr::pmeComputes, and ComputePmeMgr::saved_sequence.

Referenced by ComputePmeMgr::pollForcesReady().

02488                                                        {
02489   ComputePmeMgr *argp = (ComputePmeMgr *) arg;
02490 
02491  while ( 1 ) { // process multiple events per call
02492   cudaError_t err = cudaEventQuery(argp->end_forces[argp->forces_done_count/EVENT_STRIDE]);
02493   if ( err == cudaSuccess ) {
02494     argp->check_forces_count = 0;
02495     for ( int i=0; i<EVENT_STRIDE; ++i ) {
02496       WorkDistrib::messageEnqueueWork(argp->pmeComputes[argp->forces_done_count]);
02497       if ( ++(argp->forces_done_count) == argp->forces_count ) break;
02498     }
02499     if ( argp->forces_done_count == argp->forces_count ) { // last event
02500       traceUserBracketEvent(CUDA_EVENT_ID_PME_FORCES,argp->forces_time,walltime);
02501       argp->forces_time = walltime - argp->forces_time;
02502       //CkPrintf("cuda_check_pme_forces forces_time == %f\n", argp->forces_time);
02503       return;
02504     } else { // more events
02505       continue; // check next event
02506     }
02507   } else if ( err != cudaErrorNotReady ) {
02508     cuda_errcheck("in cuda_check_pme_forces");
02509     NAMD_bug("cuda_errcheck missed error in cuda_check_pme_forces");
02510   } else if ( ++(argp->check_forces_count) >= count_limit ) {
02511     char errmsg[256];
02512     sprintf(errmsg,"cuda_check_pme_forces polled %d times over %f s on seq %d",
02513             argp->check_forces_count, walltime - argp->forces_time,
02514             argp->saved_sequence);
02515     cuda_errcheck(errmsg);
02516     NAMD_die(errmsg);
02517   } else {
02518     break; // call again
02519   }
02520  } // while ( 1 )
02521  CcdCallBacksReset(0,walltime);  // fix Charm++
02522  CUDA_POLL(cuda_check_pme_forces, arg);
02523 }

void cuda_errcheck ( const char *  msg  ) 

Definition at line 41 of file ComputeNonbondedCUDA.C.

References NAMD_die().

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_nonbonded_forces(), ComputeNonbondedCUDA::doWork(), ComputePmeMgr::initialize(), ComputePmeMgr::initialize_computes(), and ComputePmeMgr::ungridCalc().

00041                                     {
00042   cudaError_t err;
00043   if ((err = cudaGetLastError()) != cudaSuccess) {
00044     char host[128];
00045 #ifdef NOHOSTNAME
00046     sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00047 #else
00048     gethostname(host, 128);  host[127] = 0;
00049 #endif
00050     char devstr[128] = "";
00051     int devnum;
00052     if ( cudaGetDevice(&devnum) == cudaSuccess ) {
00053       sprintf(devstr, " device %d", devnum);
00054     }
00055     char errmsg[1024];
00056     sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
00057     NAMD_die(errmsg);
00058   }
00059 }

static int findRecipEvirPe (  )  [static]

Definition at line 230 of file ComputePme.C.

References NAMD_bug(), PatchMap::numPatchesOnNode(), and PatchMap::Object().

Referenced by PmeXPencil::evir_init(), and ComputePmeMgr::initialize().

00230                              {
00231     PatchMap *patchMap = PatchMap::Object();
00232     {
00233       int mype = CkMyPe();
00234       if ( patchMap->numPatchesOnNode(mype) ) {
00235         return mype; 
00236       }
00237     }
00238     {
00239       int node = CmiMyNode();
00240       int firstpe = CmiNodeFirst(node);
00241       int nodeSize = CmiNodeSize(node);
00242       int myrank = CkMyRank();
00243       for ( int i=0; i<nodeSize; ++i ) {
00244         int pe = firstpe + (myrank+i)%nodeSize;
00245         if ( patchMap->numPatchesOnNode(pe) ) {
00246           return pe;
00247         }
00248       }
00249     }
00250     {
00251       int *pelist;
00252       int nodeSize;
00253       CmiGetPesOnPhysicalNode(CmiPhysicalNodeID(CkMyPe()), &pelist, &nodeSize);
00254       int myrank;
00255       for ( int i=0; i<nodeSize; ++i ) {
00256         if ( pelist[i] == CkMyPe() ) myrank = i;
00257       }
00258       for ( int i=0; i<nodeSize; ++i ) {
00259         int pe = pelist[(myrank+i)%nodeSize];
00260         if ( patchMap->numPatchesOnNode(pe) ) {
00261           return pe;
00262         }
00263       }
00264     }
00265     {
00266       int mype = CkMyPe();
00267       int npes = CkNumPes();
00268       for ( int i=0; i<npes; ++i ) {
00269         int pe = (mype+i)%npes;
00270         if ( patchMap->numPatchesOnNode(pe) ) {
00271           return pe;
00272         }
00273       }
00274     }
00275     NAMD_bug("findRecipEvirPe() failed!");
00276     return -999;  // should never happen
00277 }

void generatePmePeList2 ( int *  gridPeMap,
int  numGridPes,
int *  transPeMap,
int  numTransPes 
)

Definition at line 281 of file ComputePme.C.

References WorkDistrib::peDiffuseOrdering.

Referenced by ComputePmeMgr::initialize().

00281                                                                                          {
00282   int ncpus = CkNumPes();
00283   
00284   for ( int i=0; i<numGridPes; ++i ) {
00285     gridPeMap[i] = WorkDistrib::peDiffuseOrdering[ncpus - numGridPes + i];
00286   }
00287   std::sort(gridPeMap,gridPeMap+numGridPes);
00288   int firstTransPe = ncpus - numGridPes - numTransPes;
00289   if ( firstTransPe < 0 ) {
00290     firstTransPe = 0;
00291     // 0 should be first in list, skip if possible
00292     if ( ncpus > numTransPes ) firstTransPe = 1;
00293   }
00294   for ( int i=0; i<numTransPes; ++i ) {
00295     transPeMap[i] = WorkDistrib::peDiffuseOrdering[firstTransPe + i];
00296   }
00297   std::sort(transPeMap,transPeMap+numTransPes);
00298 }

ResizeArray<ComputePme*>& getComputes ( ComputePmeMgr mgr  ) 

Definition at line 578 of file ComputePme.C.

References ComputePmeMgr::pmeComputes.

00578                                                           {
00579     return mgr->pmeComputes ;
00580 }

int isPmeProcessor ( int  p  ) 

Definition at line 589 of file ComputePme.C.

References Node::Object(), pencilPMEProcessors, Node::simParameters, and simParams.

00589                          { 
00590   SimParameters *simParams = Node::Object()->simParameters;
00591   if (simParams->usePMECUDA) {
00592     return 0;
00593   } else {
00594     return pencilPMEProcessors[p];
00595   }
00596 }

bool less_than_bit_reversed ( int  a,
int  b 
) [inline]

Definition at line 316 of file ComputePme.C.

Referenced by ijpair_sortop_bit_reversed::operator()(), and sortop_bit_reversed::operator()().

00316                                                  {
00317   int d = a ^ b;
00318   int c = 1;
00319   if ( d ) while ( ! (d & c) ) {
00320     c = c << 1;
00321   }
00322   return d && (b & c);
00323 }

void Pme_init (  ) 

Definition at line 845 of file ComputePme.C.

00846 {
00847 #if USE_TOPO_SFC
00848   if (CkMyRank() == 0) 
00849     tmgr_lock = CmiCreateLock();
00850 #endif
00851 }

static void PmeSlabSendTrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 1946 of file ComputePme.C.

References ComputePmeMgr::sendTransSubset().

Referenced by ComputePmeMgr::sendTrans().

01946                                                                                                  {
01947   ComputePmeMgr *mgr = (ComputePmeMgr *)param;
01948   mgr->sendTransSubset(first, last);
01949 }

static void PmeSlabSendUngrid ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 2377 of file ComputePme.C.

References ComputePmeMgr::sendUngridSubset().

Referenced by ComputePmeMgr::sendUngrid().

02377                                                                                                   {
02378   ComputePmeMgr *mgr = (ComputePmeMgr *)param;
02379   mgr->sendUngridSubset(first, last);
02380 }

static void PmeSlabSendUntrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 2190 of file ComputePme.C.

References ComputePmeMgr::sendUntransSubset().

Referenced by ComputePmeMgr::sendUntrans().

02190                                                                                                    {
02191   ComputePmeMgr *mgr = (ComputePmeMgr *)param;
02192   mgr->sendUntransSubset(first, last);
02193 }

static void PmeXPencilSendUntrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5769 of file ComputePme.C.

References PmeXPencil::send_subset_untrans().

Referenced by PmeXPencil::send_untrans().

05769                                                                                                      {
05770         int evirIdx = paraNum;
05771         PmeXPencil *xpencil = (PmeXPencil *)param;
05772         xpencil->send_subset_untrans(first, last, evirIdx);
05773 }

static void PmeXZPencilFFT ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5206 of file ComputePme.C.

Referenced by PmeZPencil::backward_fft(), PmeXPencil::backward_fft(), PmeXPencil::forward_fft(), and PmeZPencil::forward_fft().

05206                                                                                               {
05207 #ifdef NAMD_FFTW
05208 #ifdef NAMD_FFTW_3    
05209     fftwf_plan *plans = (fftwf_plan *)param;
05210     for(int i=first; i<=last; i++) fftwf_execute(plans[i]);
05211 #endif
05212 #endif        
05213 }

static void PmeYPencilBackwardFFT ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 6006 of file ComputePme.C.

References PmeYPencil::backward_subset_fft().

Referenced by PmeYPencil::backward_fft().

06006                                                                                                      {
06007         PmeYPencil *ypencil = (PmeYPencil *)param;
06008         ypencil->backward_subset_fft(first, last);
06009 }

static void PmeYPencilForwardFFT ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5435 of file ComputePme.C.

References PmeYPencil::forward_subset_fft().

Referenced by PmeYPencil::forward_fft().

05435                                                                                                     {
05436         PmeYPencil *ypencil = (PmeYPencil *)param;
05437         ypencil->forward_subset_fft(first, last);
05438 }

static void PmeYPencilSendTrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5487 of file ComputePme.C.

References PmeYPencil::send_subset_trans().

Referenced by PmeYPencil::send_trans().

05487                                                                                                    {
05488         PmeYPencil *ypencil = (PmeYPencil *)param;
05489         ypencil->send_subset_trans(first, last);
05490 }

static void PmeYPencilSendUntrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 6065 of file ComputePme.C.

References PmeYPencil::send_subset_untrans().

Referenced by PmeYPencil::send_untrans().

06065                                                                                                      {
06066         int evirIdx = paraNum;
06067         PmeYPencil *ypencil = (PmeYPencil *)param;
06068         ypencil->send_subset_untrans(first, last, evirIdx);
06069 }

static void PmeZPencilSendTrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5271 of file ComputePme.C.

References PmeZPencil::send_subset_trans().

Referenced by PmeZPencil::send_trans().

05271                                                                                                    {
05272         PmeZPencil *zpencil = (PmeZPencil *)param;
05273         zpencil->send_subset_trans(first, last);        
05274 }

static void PmeZPencilSendUngrid ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 6366 of file ComputePme.C.

References PmeZPencil::send_subset_ungrid().

Referenced by PmeZPencil::send_all_ungrid().

06366                                                                                                     {
06367         //to take advantage of the interface which allows 3 user params at most.
06368         //under such situtation, no new parameter list needs to be created!! -Chao Mei
06369         int specialIdx = paraNum;
06370         PmeZPencil *zpencil = (PmeZPencil *)param;
06371         zpencil->send_subset_ungrid(first, last, specialIdx);
06372 }


Variable Documentation

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

char* pencilPMEProcessors

Definition at line 96 of file ComputePme.C.

Referenced by OptPmeMgr::initialize(), OptPmePencilMapZ::initialize(), OptPmePencilMapY::initialize(), OptPmePencilMapX::initialize(), ComputePmeMgr::initialize(), and isPmeProcessor().


Generated on Thu Sep 21 01:17:15 2017 for NAMD by  doxygen 1.4.7