ComputePme.C File Reference

#include <sfftw.h>
#include <srfftw.h>
#include <vector>
#include <algorithm>
#include <deque>
#include "InfoStream.h"
#include "Node.h"
#include "PatchMap.h"
#include "PatchMap.inl"
#include "AtomMap.h"
#include "ComputePme.h"
#include "ComputePmeMgr.decl.h"
#include "PmeBase.inl"
#include "PmeRealSpace.h"
#include "PmeKSpace.h"
#include "ComputeNonbondedUtil.h"
#include "PatchMgr.h"
#include "Molecule.h"
#include "ReductionMgr.h"
#include "ComputeMgr.h"
#include "ComputeMgr.decl.h"
#include "Debug.h"
#include "SimParameters.h"
#include "WorkDistrib.h"
#include "varsizemsg.h"
#include "Random.h"
#include "ckhashtable.h"
#include "Priorities.h"
#include "ComputeMoa.h"
#include "ComputeMoaMgr.decl.h"
#include "DeviceCUDA.h"
#include <cuda_runtime.h>
#include <cuda.h>
#include "ComputePmeCUDAKernel.h"
#include "ComputePmeMgr.def.h"

Go to the source code of this file.

Classes

class  PmeAckMsg
class  PmeGridMsg
class  PmeTransMsg
class  PmeSharedTransMsg
class  PmeUntransMsg
class  PmeSharedUntransMsg
class  PmeEvirMsg
class  PmePencilMap
struct  PmePencilInitMsgData
class  PmePencilInitMsg
struct  LocalPmeInfo
struct  NodePmeInfo
struct  sortop_bit_reversed
struct  ijpair
struct  ijpair_sortop_bit_reversed
class  ComputePmeMgr
struct  ComputePmeMgr::cuda_submit_charges_args
class  NodePmeMgr
class  PmePencil< T >
class  PmeZPencil
class  PmeYPencil
class  PmeXPencil

Defines

#define fftwf_malloc   fftw_malloc
#define fftwf_free   fftw_free
#define MIN_DEBUG_LEVEL   3
#define NUM_STREAMS   1
#define CUDA_STREAM_CREATE(X)   cudaStreamCreate(X)
#define CUDA_EVENT_ID_PME_CHARGES   80
#define CUDA_EVENT_ID_PME_FORCES   81
#define CUDA_EVENT_ID_PME_TICK   82
#define CUDA_EVENT_ID_PME_COPY   83
#define CUDA_EVENT_ID_PME_KERNEL   84
#define count_limit   1000000
#define CUDA_POLL(FN, ARG)   CcdCallFnAfter(FN,ARG,0.1)
#define EVENT_STRIDE   10
#define XCOPY(X)   masterPmeMgr->X = X;
#define XCOPY(X)   X = masterPmeMgr->X;
#define DEBUG_NODE_PAR_RECV   0

Functions

void cuda_errcheck (const char *msg)
static int findRecipEvirPe ()
void generatePmePeList2 (int *gridPeMap, int numGridPes, int *transPeMap, int numTransPes)
int compare_bit_reversed (int a, int b)
bool less_than_bit_reversed (int a, int b)
ResizeArray< ComputePme * > & getComputes (ComputePmeMgr *mgr)
int isPmeProcessor (int p)
void Pme_init ()
static void PmeSlabSendTrans (int first, int last, void *result, int paraNum, void *param)
static void PmeSlabSendUntrans (int first, int last, void *result, int paraNum, void *param)
static void PmeSlabSendUngrid (int first, int last, void *result, int paraNum, void *param)
void CcdCallBacksReset (void *ignored, double curWallTime)
void cuda_check_pme_forces (void *arg, double walltime)
void cuda_check_pme_charges (void *arg, double walltime)
static void PmeXZPencilFFT (int first, int last, void *result, int paraNum, void *param)
static void PmeZPencilSendTrans (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilForwardFFT (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilSendTrans (int first, int last, void *result, int paraNum, void *param)
static void PmeXPencilSendUntrans (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilBackwardFFT (int first, int last, void *result, int paraNum, void *param)
static void PmeYPencilSendUntrans (int first, int last, void *result, int paraNum, void *param)
static void PmeZPencilSendUngrid (int first, int last, void *result, int paraNum, void *param)

Variables

__thread DeviceCUDAdeviceCUDA
char * pencilPMEProcessors


Define Documentation

#define count_limit   1000000

Definition at line 2483 of file ComputePme.C.

#define CUDA_EVENT_ID_PME_CHARGES   80

Referenced by ComputePmeMgr::ComputePmeMgr(), and cuda_check_pme_charges().

#define CUDA_EVENT_ID_PME_COPY   83

Referenced by ComputePmeMgr::chargeGridSubmitted(), ComputePmeMgr::ComputePmeMgr(), ComputePmeMgr::cuda_submit_charges(), and ComputePmeMgr::ungridCalc().

#define CUDA_EVENT_ID_PME_FORCES   81

Referenced by ComputePmeMgr::ComputePmeMgr(), and cuda_check_pme_forces().

#define CUDA_EVENT_ID_PME_KERNEL   84

Referenced by ComputePmeMgr::ComputePmeMgr(), ComputePmeMgr::cuda_submit_charges(), and ComputePmeMgr::ungridCalc().

#define CUDA_EVENT_ID_PME_TICK   82

Referenced by ComputePmeMgr::ComputePmeMgr(), and ComputePmeMgr::ungridCalc().

#define CUDA_POLL ( FN,
ARG   )     CcdCallFnAfter(FN,ARG,0.1)

Definition at line 2484 of file ComputePme.C.

#define CUDA_STREAM_CREATE (  )     cudaStreamCreate(X)

Referenced by ComputePmeMgr::ComputePmeMgr().

#define DEBUG_NODE_PAR_RECV   0

Definition at line 5015 of file ComputePme.C.

#define EVENT_STRIDE   10

Definition at line 2485 of file ComputePme.C.

Referenced by cuda_check_pme_forces(), and ComputePmeMgr::ungridCalc().

#define fftwf_free   fftw_free

Definition at line 14 of file ComputePme.C.

Referenced by PmePencil< CBase_PmeZPencil >::~PmePencil().

#define fftwf_malloc   fftw_malloc

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 13 of file ComputePme.C.

Referenced by PmeXPencil::fft_init(), PmeYPencil::fft_init(), and PmeZPencil::fft_init().

#define MIN_DEBUG_LEVEL   3

Definition at line 47 of file ComputePme.C.

#define NUM_STREAMS   1

Definition at line 505 of file ComputePme.C.

Referenced by ComputePmeMgr::ComputePmeMgr().

#define XCOPY (  )     X = masterPmeMgr->X;

#define XCOPY (  )     masterPmeMgr->X = X;

Referenced by ComputePmeMgr::initialize_computes().


Function Documentation

void CcdCallBacksReset ( void *  ignored,
double  curWallTime 
)

int compare_bit_reversed ( int  a,
int  b 
)

Definition at line 307 of file ComputePme.C.

Referenced by pe_sortop_bit_reversed::operator()().

00307                                        {
00308   int d = a ^ b;
00309   int c = 1;
00310   if ( d ) while ( ! (d & c) ) {
00311     c = c << 1;
00312   }
00313   return (a & c) - (b & c);
00314 }

void cuda_check_pme_charges ( void *  arg,
double  walltime 
)

Definition at line 3470 of file ComputePme.C.

References CcdCallBacksReset(), ComputePmeMgr::charges_time, ComputePmeMgr::check_charges_count, count_limit, cuda_errcheck(), CUDA_EVENT_ID_PME_CHARGES, CUDA_POLL, ComputePmeMgr::end_charges, NAMD_bug(), NAMD_die(), ComputePmeMgr::saved_sequence, and ComputePmeMgr::sendChargeGridReady().

Referenced by ComputePmeMgr::pollChargeGridReady().

03470                                                         {
03471   ComputePmeMgr *argp = (ComputePmeMgr *) arg;
03472 
03473   cudaError_t err = cudaEventQuery(argp->end_charges);
03474   if ( err == cudaSuccess ) {
03475     traceUserBracketEvent(CUDA_EVENT_ID_PME_CHARGES,argp->charges_time,walltime);
03476     argp->charges_time = walltime - argp->charges_time;
03477     argp->sendChargeGridReady();
03478     argp->check_charges_count = 0;
03479   } else if ( err != cudaErrorNotReady ) {
03480     cuda_errcheck("in cuda_check_pme_charges");
03481     NAMD_bug("cuda_errcheck missed error in cuda_check_pme_charges");
03482   } else if ( ++(argp->check_charges_count) >= count_limit ) {
03483     char errmsg[256];
03484     sprintf(errmsg,"cuda_check_pme_charges polled %d times over %f s on seq %d",
03485             argp->check_charges_count, walltime - argp->charges_time,
03486             argp->saved_sequence);
03487     cuda_errcheck(errmsg);
03488     NAMD_die(errmsg);
03489   } else {
03490     CcdCallBacksReset(0,walltime);  // fix Charm++
03491     CUDA_POLL(cuda_check_pme_charges, arg);
03492   }
03493 }

void cuda_check_pme_forces ( void *  arg,
double  walltime 
)

Definition at line 2489 of file ComputePme.C.

References ComputePmeMgr::check_forces_count, count_limit, cuda_errcheck(), CUDA_EVENT_ID_PME_FORCES, ComputePmeMgr::end_forces, EVENT_STRIDE, ComputePmeMgr::forces_count, ComputePmeMgr::forces_done_count, ComputePmeMgr::forces_time, if(), WorkDistrib::messageEnqueueWork(), NAMD_bug(), NAMD_die(), ComputePmeMgr::pmeComputes, and ComputePmeMgr::saved_sequence.

Referenced by ComputePmeMgr::pollForcesReady().

02489                                                        {
02490   ComputePmeMgr *argp = (ComputePmeMgr *) arg;
02491 
02492  while ( 1 ) { // process multiple events per call
02493   cudaError_t err = cudaEventQuery(argp->end_forces[argp->forces_done_count/EVENT_STRIDE]);
02494   if ( err == cudaSuccess ) {
02495     argp->check_forces_count = 0;
02496     for ( int i=0; i<EVENT_STRIDE; ++i ) {
02497       WorkDistrib::messageEnqueueWork(argp->pmeComputes[argp->forces_done_count]);
02498       if ( ++(argp->forces_done_count) == argp->forces_count ) break;
02499     }
02500     if ( argp->forces_done_count == argp->forces_count ) { // last event
02501       traceUserBracketEvent(CUDA_EVENT_ID_PME_FORCES,argp->forces_time,walltime);
02502       argp->forces_time = walltime - argp->forces_time;
02503       //CkPrintf("cuda_check_pme_forces forces_time == %f\n", argp->forces_time);
02504       return;
02505     } else { // more events
02506       continue; // check next event
02507     }
02508   } else if ( err != cudaErrorNotReady ) {
02509     cuda_errcheck("in cuda_check_pme_forces");
02510     NAMD_bug("cuda_errcheck missed error in cuda_check_pme_forces");
02511   } else if ( ++(argp->check_forces_count) >= count_limit ) {
02512     char errmsg[256];
02513     sprintf(errmsg,"cuda_check_pme_forces polled %d times over %f s on seq %d",
02514             argp->check_forces_count, walltime - argp->forces_time,
02515             argp->saved_sequence);
02516     cuda_errcheck(errmsg);
02517     NAMD_die(errmsg);
02518   } else {
02519     break; // call again
02520   }
02521  } // while ( 1 )
02522  CcdCallBacksReset(0,walltime);  // fix Charm++
02523  CUDA_POLL(cuda_check_pme_forces, arg);
02524 }

void cuda_errcheck ( const char *  msg  ) 

Definition at line 41 of file ComputeNonbondedCUDA.C.

References NAMD_die().

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_nonbonded_forces(), ComputeNonbondedCUDA::doWork(), ComputePmeMgr::initialize(), ComputePmeMgr::initialize_computes(), and ComputePmeMgr::ungridCalc().

00041                                     {
00042   cudaError_t err;
00043   if ((err = cudaGetLastError()) != cudaSuccess) {
00044     char host[128];
00045 #ifdef NOHOSTNAME
00046     sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00047 #else
00048     gethostname(host, 128);  host[127] = 0;
00049 #endif
00050     char devstr[128] = "";
00051     int devnum;
00052     if ( cudaGetDevice(&devnum) == cudaSuccess ) {
00053       sprintf(devstr, " device %d", devnum);
00054     }
00055     char errmsg[1024];
00056     sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
00057     NAMD_die(errmsg);
00058   }
00059 }

static int findRecipEvirPe (  )  [static]

Definition at line 230 of file ComputePme.C.

References NAMD_bug(), PatchMap::numPatchesOnNode(), and PatchMap::Object().

Referenced by PmeXPencil::evir_init(), and ComputePmeMgr::initialize().

00230                              {
00231     PatchMap *patchMap = PatchMap::Object();
00232     {
00233       int mype = CkMyPe();
00234       if ( patchMap->numPatchesOnNode(mype) ) {
00235         return mype; 
00236       }
00237     }
00238     {
00239       int node = CmiMyNode();
00240       int firstpe = CmiNodeFirst(node);
00241       int nodeSize = CmiNodeSize(node);
00242       int myrank = CkMyRank();
00243       for ( int i=0; i<nodeSize; ++i ) {
00244         int pe = firstpe + (myrank+i)%nodeSize;
00245         if ( patchMap->numPatchesOnNode(pe) ) {
00246           return pe;
00247         }
00248       }
00249     }
00250     {
00251       int *pelist;
00252       int nodeSize;
00253       CmiGetPesOnPhysicalNode(CmiPhysicalNodeID(CkMyPe()), &pelist, &nodeSize);
00254       int myrank;
00255       for ( int i=0; i<nodeSize; ++i ) {
00256         if ( pelist[i] == CkMyPe() ) myrank = i;
00257       }
00258       for ( int i=0; i<nodeSize; ++i ) {
00259         int pe = pelist[(myrank+i)%nodeSize];
00260         if ( patchMap->numPatchesOnNode(pe) ) {
00261           return pe;
00262         }
00263       }
00264     }
00265     {
00266       int mype = CkMyPe();
00267       int npes = CkNumPes();
00268       for ( int i=0; i<npes; ++i ) {
00269         int pe = (mype+i)%npes;
00270         if ( patchMap->numPatchesOnNode(pe) ) {
00271           return pe;
00272         }
00273       }
00274     }
00275     NAMD_bug("findRecipEvirPe() failed!");
00276     return -999;  // should never happen
00277 }

void generatePmePeList2 ( int *  gridPeMap,
int  numGridPes,
int *  transPeMap,
int  numTransPes 
)

Definition at line 281 of file ComputePme.C.

References WorkDistrib::peDiffuseOrdering.

Referenced by ComputePmeMgr::initialize().

00281                                                                                          {
00282   int ncpus = CkNumPes();
00283   
00284   for ( int i=0; i<numGridPes; ++i ) {
00285     gridPeMap[i] = WorkDistrib::peDiffuseOrdering[ncpus - numGridPes + i];
00286   }
00287   std::sort(gridPeMap,gridPeMap+numGridPes);
00288   int firstTransPe = ncpus - numGridPes - numTransPes;
00289   if ( firstTransPe < 0 ) {
00290     firstTransPe = 0;
00291     // 0 should be first in list, skip if possible
00292     if ( ncpus > numTransPes ) firstTransPe = 1;
00293   }
00294   for ( int i=0; i<numTransPes; ++i ) {
00295     transPeMap[i] = WorkDistrib::peDiffuseOrdering[firstTransPe + i];
00296   }
00297   std::sort(transPeMap,transPeMap+numTransPes);
00298 }

ResizeArray<ComputePme*>& getComputes ( ComputePmeMgr mgr  ) 

Definition at line 578 of file ComputePme.C.

References ComputePmeMgr::pmeComputes.

00578                                                           {
00579     return mgr->pmeComputes ;
00580 }

int isPmeProcessor ( int  p  ) 

Definition at line 589 of file ComputePme.C.

References Node::Object(), pencilPMEProcessors, Node::simParameters, and simParams.

00589                          { 
00590   SimParameters *simParams = Node::Object()->simParameters;
00591   if (simParams->usePMECUDA) {
00592     return 0;
00593   } else {
00594     return pencilPMEProcessors[p];
00595   }
00596 }

bool less_than_bit_reversed ( int  a,
int  b 
) [inline]

Definition at line 316 of file ComputePme.C.

Referenced by ijpair_sortop_bit_reversed::operator()(), and sortop_bit_reversed::operator()().

00316                                                  {
00317   int d = a ^ b;
00318   int c = 1;
00319   if ( d ) while ( ! (d & c) ) {
00320     c = c << 1;
00321   }
00322   return d && (b & c);
00323 }

void Pme_init (  ) 

Definition at line 845 of file ComputePme.C.

00846 {
00847 #if USE_TOPO_SFC
00848   if (CkMyRank() == 0) 
00849     tmgr_lock = CmiCreateLock();
00850 #endif
00851 }

static void PmeSlabSendTrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 1947 of file ComputePme.C.

References ComputePmeMgr::sendTransSubset().

Referenced by ComputePmeMgr::sendTrans().

01947                                                                                                  {
01948   ComputePmeMgr *mgr = (ComputePmeMgr *)param;
01949   mgr->sendTransSubset(first, last);
01950 }

static void PmeSlabSendUngrid ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 2378 of file ComputePme.C.

References ComputePmeMgr::sendUngridSubset().

Referenced by ComputePmeMgr::sendUngrid().

02378                                                                                                   {
02379   ComputePmeMgr *mgr = (ComputePmeMgr *)param;
02380   mgr->sendUngridSubset(first, last);
02381 }

static void PmeSlabSendUntrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 2191 of file ComputePme.C.

References ComputePmeMgr::sendUntransSubset().

Referenced by ComputePmeMgr::sendUntrans().

02191                                                                                                    {
02192   ComputePmeMgr *mgr = (ComputePmeMgr *)param;
02193   mgr->sendUntransSubset(first, last);
02194 }

static void PmeXPencilSendUntrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5776 of file ComputePme.C.

References PmeXPencil::send_subset_untrans().

Referenced by PmeXPencil::send_untrans().

05776                                                                                                      {
05777         int evirIdx = paraNum;
05778         PmeXPencil *xpencil = (PmeXPencil *)param;
05779         xpencil->send_subset_untrans(first, last, evirIdx);
05780 }

static void PmeXZPencilFFT ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5213 of file ComputePme.C.

Referenced by PmeZPencil::backward_fft(), PmeXPencil::backward_fft(), PmeXPencil::forward_fft(), and PmeZPencil::forward_fft().

05213                                                                                               {
05214 #ifdef NAMD_FFTW
05215 #ifdef NAMD_FFTW_3    
05216     fftwf_plan *plans = (fftwf_plan *)param;
05217     for(int i=first; i<=last; i++) fftwf_execute(plans[i]);
05218 #endif
05219 #endif        
05220 }

static void PmeYPencilBackwardFFT ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 6013 of file ComputePme.C.

References PmeYPencil::backward_subset_fft().

Referenced by PmeYPencil::backward_fft().

06013                                                                                                      {
06014         PmeYPencil *ypencil = (PmeYPencil *)param;
06015         ypencil->backward_subset_fft(first, last);
06016 }

static void PmeYPencilForwardFFT ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5442 of file ComputePme.C.

References PmeYPencil::forward_subset_fft().

Referenced by PmeYPencil::forward_fft().

05442                                                                                                     {
05443         PmeYPencil *ypencil = (PmeYPencil *)param;
05444         ypencil->forward_subset_fft(first, last);
05445 }

static void PmeYPencilSendTrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5494 of file ComputePme.C.

References PmeYPencil::send_subset_trans().

Referenced by PmeYPencil::send_trans().

05494                                                                                                    {
05495         PmeYPencil *ypencil = (PmeYPencil *)param;
05496         ypencil->send_subset_trans(first, last);
05497 }

static void PmeYPencilSendUntrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 6072 of file ComputePme.C.

References PmeYPencil::send_subset_untrans().

Referenced by PmeYPencil::send_untrans().

06072                                                                                                      {
06073         int evirIdx = paraNum;
06074         PmeYPencil *ypencil = (PmeYPencil *)param;
06075         ypencil->send_subset_untrans(first, last, evirIdx);
06076 }

static void PmeZPencilSendTrans ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 5278 of file ComputePme.C.

References PmeZPencil::send_subset_trans().

Referenced by PmeZPencil::send_trans().

05278                                                                                                    {
05279         PmeZPencil *zpencil = (PmeZPencil *)param;
05280         zpencil->send_subset_trans(first, last);        
05281 }

static void PmeZPencilSendUngrid ( int  first,
int  last,
void *  result,
int  paraNum,
void *  param 
) [inline, static]

Definition at line 6373 of file ComputePme.C.

References PmeZPencil::send_subset_ungrid().

Referenced by PmeZPencil::send_all_ungrid().

06373                                                                                                     {
06374         //to take advantage of the interface which allows 3 user params at most.
06375         //under such situtation, no new parameter list needs to be created!! -Chao Mei
06376         int specialIdx = paraNum;
06377         PmeZPencil *zpencil = (PmeZPencil *)param;
06378         zpencil->send_subset_ungrid(first, last, specialIdx);
06379 }


Variable Documentation

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

char* pencilPMEProcessors

Definition at line 96 of file ComputePme.C.

Referenced by OptPmeMgr::initialize(), OptPmePencilMapZ::initialize(), OptPmePencilMapY::initialize(), OptPmePencilMapX::initialize(), ComputePmeMgr::initialize(), and isPmeProcessor().


Generated on Sun Nov 19 01:17:16 2017 for NAMD by  doxygen 1.4.7