ComputeNonbondedCUDA.C File Reference

#include "common.h"
#include "charm++.h"
#include <cuda_runtime.h>
#include <cuda.h>
#include "WorkDistrib.h"
#include "ComputeMgr.h"
#include "ProxyMgr.h"
#include "ComputeNonbondedCUDAKernel.h"
#include "ComputeNonbondedCUDA.h"
#include "LJTable.h"
#include "ObjectArena.h"
#include "SortAtoms.h"
#include "Priorities.h"
#include <algorithm>
#include "NamdTypes.h"
#include "DeviceCUDA.h"
#include "CudaUtils.h"

Go to the source code of this file.

Classes

struct  exlist_sortop
struct  pid_sortop_reverse_priority
struct  cr_sortop_distance
struct  cr_sortop_reverse_priority

Defines

#define SET_EXCL(EXCL, BASE, DIFF)   (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))
#define CUDA_POLL(FN, ARG)   CcdCallFnAfter(FN,ARG,0.1)
#define GBISP()
#define count_limit   1000000

Functions

void cuda_errcheck (const char *msg)
static bool sortop_bitreverse (int a, int b)
void send_build_cuda_force_table ()
void build_cuda_force_table ()
void build_cuda_exclusions ()
void register_cuda_compute_self (ComputeID c, PatchID pid)
void register_cuda_compute_pair (ComputeID c, PatchID pid[], int t[])
void unregister_cuda_compute (ComputeID c)
void init_arrays ()
void CcdCallBacksReset (void *ignored, double curWallTime)
void cuda_check_progress (void *arg, double walltime)
void cuda_check_remote_progress (void *arg, double walltime)
void cuda_check_local_progress (void *arg, double walltime)
void cuda_check_remote_calc (void *arg, double walltime)
void cuda_check_local_calc (void *arg, double walltime)

Variables

__thread int max_grid_size
__thread cudaStream_t stream
__thread cudaStream_t stream2
__thread DeviceCUDAdeviceCUDA
static __thread ComputeNonbondedCUDAcudaCompute = 0
static __thread ComputeMgrcomputeMgr = 0
static __thread int2 * exclusionsByAtom
static __thread cudaEvent_t start_calc
static __thread cudaEvent_t end_remote_download
static __thread cudaEvent_t end_local_download
static __thread ResizeArray<
patch_pair > * 
patch_pairs_ptr
static __thread ResizeArray<
int > * 
patch_pair_num_ptr
static __thread int atom_params_size
static __thread atom_param * atom_params
static __thread int vdw_types_size
static __thread int * vdw_types
static __thread int dummy_size
static __thread float * dummy_dev
static __thread int force_ready_queue_size
static __thread int * force_ready_queue
static __thread int force_ready_queue_len
static __thread int force_ready_queue_next
static __thread int block_order_size
static __thread int * block_order
static __thread int num_atoms
static __thread int num_local_atoms
static __thread int num_remote_atoms
static __thread int virials_size
static __thread float * virials
static __thread int num_virials
static __thread float * slow_virials
static __thread int energy_gbis_size
static __thread float * energy_gbis
static __thread int intRad0H_size
static __thread float * intRad0H
static __thread int intRadSH_size
static __thread float * intRadSH
static __thread int bornRadH_size
static __thread float * bornRadH
static __thread int dHdrPrefixH_size
static __thread float * dHdrPrefixH
static __thread int cuda_timer_count
static __thread double cuda_timer_total
static __thread double kernel_time
static __thread double remote_submit_time
static __thread double local_submit_time
static __thread int check_count
static __thread int check_remote_count
static __thread int check_local_count
static __thread int kernel_launch_state = 0


Define Documentation

#define count_limit   1000000

Definition at line 885 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), and cuda_check_remote_progress().

#define CUDA_POLL ( FN,
ARG   )     CcdCallFnAfter(FN,ARG,0.1)

Definition at line 875 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_calc(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_progress(), cuda_check_remote_calc(), cuda_check_remote_progress(), ComputePmeMgr::pollChargeGridReady(), ComputePmeMgr::pollForcesReady(), and ComputeNonbondedCUDA::recvYieldDevice().

 
#define GBISP (  ) 

Definition at line 882 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), ComputeNonbondedCUDA::noWork(), and ComputeNonbondedCUDA::recvYieldDevice().

#define SET_EXCL ( EXCL,
BASE,
DIFF   )     (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))

Referenced by ComputeNonbondedCUDA::build_exclusions().


Function Documentation

void build_cuda_exclusions (  ) 

Definition at line 252 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::build_exclusions(), deviceCUDA, and DeviceCUDA::getMasterPe().

Referenced by ComputeMgr::recvBuildCudaExclusions().

00252                              {
00253   if ( deviceCUDA->getMasterPe() != CkMyPe() ) return;
00254   ComputeNonbondedCUDA::build_exclusions();
00255 }

void build_cuda_force_table (  ) 

Definition at line 81 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::build_force_table(), ComputeNonbondedCUDA::build_lj_table(), deviceCUDA, and DeviceCUDA::getMasterPe().

Referenced by ComputeMgr::recvBuildCudaForceTable().

00081                               {
00082   if ( deviceCUDA->getMasterPe() != CkMyPe() ) return;
00083   ComputeNonbondedCUDA::build_lj_table();
00084   ComputeNonbondedCUDA::build_force_table();
00085 }

void CcdCallBacksReset ( void *  ignored,
double  curWallTime 
)

Referenced by cuda_check_local_calc(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_progress(), cuda_check_remote_calc(), cuda_check_remote_progress(), CudaPmeKSpaceCompute::energyAndVirialSetCallback(), CudaPmeRealSpaceCompute::gatherForceSetCallback(), ComputePmeMgr::pollChargeGridReady(), ComputePmeMgr::pollForcesReady(), and ComputeNonbondedCUDA::recvYieldDevice().

void cuda_check_local_calc ( void *  arg,
double  walltime 
)

Definition at line 1619 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), computeMgr, CUDA_POLL, deviceCUDA, end_local_download, DeviceCUDA::getNextPeSharingGpu(), and ComputeMgr::sendYieldDevice().

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

01619                                                        {
01620   // in theory we only need end_local_calc, but overlap isn't reliable
01621   // if ( cudaEventQuery(end_local_calc) == cudaSuccess ) {
01622   if ( cudaEventQuery(end_local_download) == cudaSuccess ) {
01623 // CkPrintf("Pe %d yielding to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
01624     computeMgr->sendYieldDevice(deviceCUDA->getNextPeSharingGpu());
01625 // CkPrintf("Pe %d yielded to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
01626   } else {
01627     CcdCallBacksReset(0,walltime);  // fix Charm++
01628     CUDA_POLL(cuda_check_local_calc, arg);
01629   }
01630 }

void cuda_check_local_progress ( void *  arg,
double  walltime 
)

Definition at line 998 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_local_count, check_remote_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_LOCAL, CUDA_TRACE_POLL_LOCAL, end_local_download, kernel_time, local_submit_time, WorkDistrib::messageFinishCUDA(), NAMD_bug(), and NAMD_die().

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

00998                                                            {
00999 
01000   CUDA_TRACE_POLL_LOCAL;
01001   cudaError_t err = cudaEventQuery(end_local_download);
01002   if ( err == cudaSuccess ) {
01003     CUDA_TRACE_LOCAL(local_submit_time,walltime);
01004     kernel_time = walltime - kernel_time;
01005     check_local_count = 0;
01006     cuda_errcheck("at cuda local stream completed");
01007     WorkDistrib::messageFinishCUDA((ComputeNonbondedCUDA *) arg);
01008   } else if ( err != cudaErrorNotReady ) {
01009     cuda_errcheck("in cuda_check_local_progress");
01010     NAMD_bug("cuda_errcheck missed error in cuda_check_local_progress");
01011   } else if ( ++check_local_count >= count_limit ) {
01012     char errmsg[256];
01013     sprintf(errmsg,"cuda_check_local_progress polled %d times over %f s on step %d",
01014             check_local_count, walltime - local_submit_time,
01015             ((ComputeNonbondedCUDA *) arg)->step);
01016     cuda_errcheck(errmsg);
01017     NAMD_die(errmsg);
01018   } else if ( check_remote_count ) {
01019     NAMD_bug("nonzero check_remote_count in cuda_check_local_progress");
01020   } else {
01021     CcdCallBacksReset(0,walltime);  // fix Charm++
01022     CUDA_POLL(cuda_check_local_progress, arg);
01023   }
01024 }

void cuda_check_progress ( void *  arg,
double  walltime 
)

Definition at line 931 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_LOCAL, CUDA_TRACE_POLL_REMOTE, force_ready_queue, force_ready_queue_len, force_ready_queue_next, kernel_time, NAMD_die(), and remote_submit_time.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

00931                                                      {
00932   CUDA_TRACE_POLL_REMOTE;
00933 
00934   int flindex;
00935   int poll_again = 1;
00936   while ( -1 != (flindex = force_ready_queue[force_ready_queue_next]) ) {
00937     //    CkPrintf("Pe %d forces ready %d is index %d at %lf\n",
00938     //       CkMyPe(), force_ready_queue_next, flindex, walltime);
00939     force_ready_queue[force_ready_queue_next] = -1;
00940     ++force_ready_queue_next;
00941     check_count = 0;
00942     if ( force_ready_queue_next == force_ready_queue_len ) {
00943       poll_again = 0;
00944       CUDA_TRACE_LOCAL(kernel_time,walltime);
00945       kernel_time = walltime - kernel_time;
00946       // need to guarantee this finishes before the last patch message!
00947       ((ComputeNonbondedCUDA *) arg)->workStarted = 0;
00948       ((ComputeNonbondedCUDA *) arg)->finishReductions();
00949     }
00950     ((ComputeNonbondedCUDA *) arg)->messageFinishPatch(flindex);
00951     if ( force_ready_queue_next == force_ready_queue_len ) break;
00952   }
00953   if ( ++check_count >= count_limit ) {
00954     char errmsg[256];
00955     sprintf(errmsg,"cuda_check_progress polled %d times over %f s on step %d",
00956             check_count, walltime - remote_submit_time,
00957             ((ComputeNonbondedCUDA *) arg)->step);
00958     cuda_errcheck(errmsg);
00959     NAMD_die(errmsg);
00960   }
00961   if ( poll_again ) {
00962     CcdCallBacksReset(0,walltime);  // fix Charm++
00963     CUDA_POLL(cuda_check_progress, arg);
00964   }
00965 }

void cuda_check_remote_calc ( void *  arg,
double  walltime 
)

Definition at line 1606 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), computeMgr, CUDA_POLL, deviceCUDA, end_remote_download, DeviceCUDA::getNextPeSharingGpu(), and ComputeMgr::sendYieldDevice().

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

01606                                                         {
01607   // in theory we only need end_remote_calc, but overlap isn't reliable
01608   // if ( cudaEventQuery(end_remote_calc) == cudaSuccess ) {
01609   if ( cudaEventQuery(end_remote_download) == cudaSuccess ) {
01610 // CkPrintf("Pe %d yielding to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
01611     computeMgr->sendYieldDevice(deviceCUDA->getNextPeSharingGpu());
01612 // CkPrintf("Pe %d yielded to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
01613   } else {
01614     CcdCallBacksReset(0,walltime);  // fix Charm++
01615     CUDA_POLL(cuda_check_remote_calc, arg);
01616   }
01617 }

void cuda_check_remote_progress ( void *  arg,
double  walltime 
)

Definition at line 967 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_local_count, check_remote_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_POLL_REMOTE, CUDA_TRACE_REMOTE, deviceCUDA, end_remote_download, DeviceCUDA::getMergeGrids(), kernel_time, local_submit_time, WorkDistrib::messageFinishCUDA(), NAMD_bug(), NAMD_die(), and remote_submit_time.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

00967                                                             {
00968 
00969   CUDA_TRACE_POLL_REMOTE;
00970   cudaError_t err = cudaEventQuery(end_remote_download);
00971   if ( err == cudaSuccess ) {
00972     local_submit_time = walltime;
00973     CUDA_TRACE_REMOTE(remote_submit_time,local_submit_time);
00974     if ( deviceCUDA->getMergeGrids() ) {  // no local
00975       kernel_time = local_submit_time - kernel_time;
00976     }
00977     check_remote_count = 0;
00978     cuda_errcheck("at cuda remote stream completed");
00979     WorkDistrib::messageFinishCUDA((ComputeNonbondedCUDA *) arg);
00980   } else if ( err != cudaErrorNotReady ) {
00981     cuda_errcheck("in cuda_check_remote_progress");
00982     NAMD_bug("cuda_errcheck missed error in cuda_check_remote_progress");
00983   } else if ( ++check_remote_count >= count_limit ) {
00984     char errmsg[256];
00985     sprintf(errmsg,"cuda_check_remote_progress polled %d times over %f s on step %d",
00986             check_remote_count, walltime - remote_submit_time,
00987             ((ComputeNonbondedCUDA *) arg)->step);
00988     cuda_errcheck(errmsg);
00989     NAMD_die(errmsg);
00990   } else if ( check_local_count ) {
00991     NAMD_bug("nonzero check_local_count in cuda_check_remote_progress");
00992   } else {
00993     CcdCallBacksReset(0,walltime);  // fix Charm++
00994     CUDA_POLL(cuda_check_remote_progress, arg);
00995   }
00996 }

void cuda_errcheck ( const char *  msg  ) 

Definition at line 41 of file ComputeNonbondedCUDA.C.

References NAMD_die().

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_nonbonded_forces(), ComputeNonbondedCUDA::doWork(), ComputePmeMgr::initialize(), ComputePmeMgr::initialize_computes(), and ComputePmeMgr::ungridCalc().

00041                                     {
00042   cudaError_t err;
00043   if ((err = cudaGetLastError()) != cudaSuccess) {
00044     char host[128];
00045 #ifdef NOHOSTNAME
00046     sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00047 #else
00048     gethostname(host, 128);  host[127] = 0;
00049 #endif
00050     char devstr[128] = "";
00051     int devnum;
00052     if ( cudaGetDevice(&devnum) == cudaSuccess ) {
00053       sprintf(devstr, " device %d", devnum);
00054     }
00055     cudaDeviceProp deviceProp;
00056     if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
00057       sprintf(devstr, " device %d pci %x:%x:%x", devnum,
00058         deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
00059     }
00060     char errmsg[1024];
00061     sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
00062     NAMD_die(errmsg);
00063   }
00064 }

void init_arrays (  ) 

Definition at line 890 of file ComputeNonbondedCUDA.C.

References atom_params, atom_params_size, block_order, block_order_size, bornRadH, bornRadH_size, dHdrPrefixH, dHdrPrefixH_size, dummy_dev, dummy_size, energy_gbis, energy_gbis_size, force_ready_queue, force_ready_queue_len, force_ready_queue_next, force_ready_queue_size, intRad0H, intRad0H_size, intRadSH, intRadSH_size, num_atoms, num_local_atoms, num_remote_atoms, num_virials, vdw_types, vdw_types_size, virials, and virials_size.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA().

00890                    {
00891 
00892   atom_params_size = 0;
00893   atom_params = NULL;
00894 
00895   vdw_types_size = 0;
00896   vdw_types = NULL;
00897   
00898   dummy_size = 0;
00899   dummy_dev = NULL;
00900 
00901   force_ready_queue_size = 0;
00902   force_ready_queue = NULL;
00903   force_ready_queue_len = 0;
00904   force_ready_queue_next = 0;
00905   
00906   block_order_size = 0;
00907   block_order = NULL;
00908   
00909   num_atoms = 0;
00910   num_local_atoms = 0;
00911   num_remote_atoms = 0;
00912 
00913   virials_size = 0;
00914   virials = NULL;
00915   num_virials = 0;
00916 
00917   energy_gbis_size = 0;
00918   energy_gbis = NULL;
00919 
00920   intRad0H_size = 0;
00921   intRad0H = NULL;
00922   intRadSH_size = 0;
00923   intRadSH = NULL;
00924   bornRadH_size = 0;
00925   bornRadH = NULL;
00926   dHdrPrefixH_size = 0;
00927   dHdrPrefixH = NULL;
00928 
00929 }

void register_cuda_compute_pair ( ComputeID  c,
PatchID  pid[],
int  t[] 
)

Definition at line 394 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::compute_record::c, PatchMap::center(), cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchMap, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, ComputeNonbondedCUDA::requirePatch(), Vector::x, Vector::y, and Vector::z.

Referenced by ComputeNonbondedPair::initialize().

00394                                                                      {
00395 
00396   if ( ! cudaCompute ) NAMD_bug("register_pair called early");
00397  
00398   cudaCompute->requirePatch(pid[0]);
00399   cudaCompute->requirePatch(pid[1]);
00400 
00401   ComputeNonbondedCUDA::compute_record cr;
00402   cr.c = c; 
00403   cr.pid[0] = pid[0];  cr.pid[1] = pid[1];
00404 
00405   int t1 = t[0];
00406   int t2 = t[1];
00407   Vector offset = cudaCompute->patchMap->center(pid[0])
00408                 - cudaCompute->patchMap->center(pid[1]);
00409   offset.x += (t1%3-1) - (t2%3-1);
00410   offset.y += ((t1/3)%3-1) - ((t2/3)%3-1);
00411   offset.z += (t1/9-1) - (t2/9-1);
00412   cr.offset = offset;
00413 
00414   if ( cudaCompute->patchRecords[pid[0]].isLocal ) {
00415     cudaCompute->localComputeRecords.add(cr);
00416   } else {
00417     cudaCompute->remoteComputeRecords.add(cr);
00418   }
00419 }

void register_cuda_compute_self ( ComputeID  c,
PatchID  pid 
)

Definition at line 377 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::compute_record::c, cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, and ComputeNonbondedCUDA::requirePatch().

Referenced by ComputeNonbondedSelf::initialize().

00377                                                           {
00378 
00379   if ( ! cudaCompute ) NAMD_bug("register_self called early");
00380 
00381   cudaCompute->requirePatch(pid);
00382 
00383   ComputeNonbondedCUDA::compute_record cr;
00384   cr.c = c;
00385   cr.pid[0] = pid;  cr.pid[1] = pid;
00386   cr.offset = 0.;
00387   if ( cudaCompute->patchRecords[pid].isLocal ) {
00388     cudaCompute->localComputeRecords.add(cr);
00389   } else {
00390     cudaCompute->remoteComputeRecords.add(cr);
00391   }
00392 }

void send_build_cuda_force_table (  ) 

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 77 of file ComputeNonbondedCUDA.C.

References computeMgr, and ComputeMgr::sendBuildCudaForceTable().

Referenced by ComputeNonbondedUtil::select().

00077                                    {
00078   computeMgr->sendBuildCudaForceTable();
00079 }

static bool sortop_bitreverse ( int  a,
int  b 
) [inline, static]

Definition at line 66 of file ComputeNonbondedCUDA.C.

Referenced by cr_sortop_reverse_priority::pid_compare_priority().

00066                                                    {
00067   if ( a == b ) return 0; 
00068   for ( int bit = 1; bit; bit *= 2 ) {
00069     if ( (a&bit) != (b&bit) ) return ((a&bit) < (b&bit));
00070   }
00071   return 0;
00072 }

void unregister_cuda_compute ( ComputeID  c  ) 

Definition at line 421 of file ComputeNonbondedCUDA.C.

References NAMD_bug().

00421                                           {  // static
00422 
00423   NAMD_bug("unregister_compute unimplemented");
00424 
00425 }


Variable Documentation

__thread atom_param* atom_params [static]

Definition at line 830 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_atom_params(), cuda_bind_patch_pairs(), cuda_init(), ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int atom_params_size [static]

Definition at line 829 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_patch_pairs(), cuda_init(), ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int* block_order [static]

Definition at line 844 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_virials(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int block_order_size [static]

Definition at line 843 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* bornRadH [static]

Definition at line 865 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int bornRadH_size [static]

Definition at line 864 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int check_count [static]

Definition at line 886 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress().

__thread int check_local_count [static]

Definition at line 888 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), and cuda_check_remote_progress().

__thread int check_remote_count [static]

Definition at line 887 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), and cuda_check_remote_progress().

__thread ComputeMgr* computeMgr = 0 [static]

Definition at line 75 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::assignPatches(), ComputePmeMgr::chargeGridSubmitted(), ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeMgr::createComputes(), cuda_check_local_calc(), cuda_check_remote_calc(), LdbCoordinator::ExecuteMigrations(), ComputeNonbondedCUDA::finishWork(), isMICProcessor(), ComputeNonbondedCUDA::messageFinishPatch(), ComputeNonbondedCUDA::noWork(), send_build_cuda_force_table(), ComputeMgr::sendBuildCudaExclusions(), ComputeMgr::sendBuildCudaForceTable(), ComputeMgr::sendBuildMICForceTable(), ComputeMgr::sendComputeDPMEData(), ComputeMgr::sendComputeDPMEResults(), ComputeMgr::sendComputeEwaldData(), ComputeMgr::sendComputeEwaldResults(), ComputeMgr::sendComputeGlobalData(), ComputeMgr::sendCreateNonbondedCUDASlave(), ComputeMgr::sendCreateNonbondedMICSlave(), ComputeMgr::sendMICPEData(), ComputeMgr::sendNonbondedCUDASlaveReady(), ComputeMgr::sendNonbondedMICSlaveReady(), and ComputeMgr::sendYieldDevice().

__thread int cuda_timer_count [static]

Definition at line 869 of file ComputeNonbondedCUDA.C.

__thread double cuda_timer_total [static]

Definition at line 870 of file ComputeNonbondedCUDA.C.

__thread ComputeNonbondedCUDA* cudaCompute = 0 [static]

Definition at line 74 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), register_cuda_compute_pair(), and register_cuda_compute_self().

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

__thread float* dHdrPrefixH [static]

Definition at line 867 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int dHdrPrefixH_size [static]

Definition at line 866 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* dummy_dev [static]

Definition at line 836 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int dummy_size [static]

Definition at line 835 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread cudaEvent_t end_local_download [static]

Definition at line 430 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_local_calc(), cuda_check_local_progress(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread cudaEvent_t end_remote_download [static]

Definition at line 429 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_remote_calc(), cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread float* energy_gbis [static]

Definition at line 857 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_GBIS_energy(), cuda_GBIS_P2(), cuda_init(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int energy_gbis_size [static]

Definition at line 856 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int2* exclusionsByAtom [static]

Definition at line 257 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::build_exclusions(), ComputeNonbondedCUDA::doWork(), and Molecule::get_exclusions_for_atom().

__thread int* force_ready_queue [static]

Definition at line 839 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_virials(), cuda_check_progress(), cuda_init(), ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int force_ready_queue_len [static]

Definition at line 840 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress(), ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int force_ready_queue_next [static]

Definition at line 841 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int force_ready_queue_size [static]

Definition at line 838 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* intRad0H [static]

Definition at line 861 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int intRad0H_size [static]

Definition at line 860 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* intRadSH [static]

Definition at line 863 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int intRadSH_size [static]

Definition at line 862 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int kernel_launch_state = 0 [static]

Definition at line 1044 of file ComputeNonbondedCUDA.C.

__thread double kernel_time [static]

Definition at line 871 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), cuda_check_progress(), and cuda_check_remote_progress().

__thread double local_submit_time [static]

Definition at line 873 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), and cuda_check_remote_progress().

__thread int max_grid_size

Definition at line 198 of file ComputeNonbondedCUDAKernel.cu.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), and cuda_nonbonded_forces().

__thread int num_atoms [static]

Definition at line 846 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_intRad(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_nonbonded_forces(), ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_local_atoms [static]

Definition at line 847 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_remote_atoms [static]

Definition at line 848 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_virials [static]

Definition at line 852 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_patch_pairs(), cuda_bind_virials(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), and init_arrays().

__thread ResizeArray<int>* patch_pair_num_ptr [static]

Definition at line 433 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::doWork().

__thread ResizeArray<patch_pair>* patch_pairs_ptr [static]

Definition at line 432 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::doWork().

__thread double remote_submit_time [static]

Definition at line 872 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress(), cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread float* slow_virials [static]

Definition at line 854 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_virials(), and ComputeNonbondedCUDA::doWork().

__thread cudaEvent_t start_calc [static]

Definition at line 428 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread cudaStream_t stream

Definition at line 200 of file ComputeNonbondedCUDAKernel.cu.

Referenced by clear_device_array(), ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), copy3D_DtoD(), copy3D_DtoH(), copy3D_HtoD(), copy3D_PeerDtoD(), copy_DtoD(), copy_DtoH(), copy_HtoD(), copy_PeerDtoD(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_intRad(), cuda_bind_vdw_types(), ComputePmeCUDAMgr::initialize_pencils(), CudaPmePencilXYZ::initializeDevice(), ComputeNonbondedCUDA::recvYieldDevice(), and ComputePmeCUDAMgr::~ComputePmeCUDAMgr().

__thread cudaStream_t stream2

Definition at line 201 of file ComputeNonbondedCUDAKernel.cu.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int* vdw_types [static]

Definition at line 833 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_init(), ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int vdw_types_size [static]

Definition at line 832 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_patch_pairs(), cuda_init(), ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* virials [static]

Definition at line 851 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_virials(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int virials_size [static]

Definition at line 850 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().


Generated on Tue May 22 01:17:17 2018 for NAMD by  doxygen 1.4.7