ComputeNonbondedCUDA.C File Reference

#include "common.h"
#include "charm++.h"
#include <cuda_runtime.h>
#include <cuda.h>
#include "WorkDistrib.h"
#include "ComputeMgr.h"
#include "ProxyMgr.h"
#include "ComputeNonbondedCUDAKernel.h"
#include "ComputeNonbondedCUDA.h"
#include "LJTable.h"
#include "ObjectArena.h"
#include "SortAtoms.h"
#include "Priorities.h"
#include <algorithm>
#include "NamdTypes.h"
#include "DeviceCUDA.h"
#include "CudaUtils.h"

Go to the source code of this file.

Classes

struct  exlist_sortop
struct  pid_sortop_reverse_priority
struct  cr_sortop_distance
struct  cr_sortop_reverse_priority

Defines

#define SET_EXCL(EXCL, BASE, DIFF)   (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))
#define CUDA_POLL(FN, ARG)   CcdCallFnAfter(FN,ARG,0.1)
#define GBISP()
#define count_limit   1000000

Functions

void cuda_errcheck (const char *msg)
static bool sortop_bitreverse (int a, int b)
void send_build_cuda_force_table ()
void build_cuda_force_table ()
void build_cuda_exclusions ()
void register_cuda_compute_self (ComputeID c, PatchID pid)
void register_cuda_compute_pair (ComputeID c, PatchID pid[], int t[])
void unregister_cuda_compute (ComputeID c)
void init_arrays ()
void CcdCallBacksReset (void *ignored, double curWallTime)
void cuda_check_progress (void *arg, double walltime)
void cuda_check_remote_progress (void *arg, double walltime)
void cuda_check_local_progress (void *arg, double walltime)
void cuda_check_remote_calc (void *arg, double walltime)
void cuda_check_local_calc (void *arg, double walltime)

Variables

__thread int max_grid_size
__thread cudaStream_t stream
__thread cudaStream_t stream2
__thread DeviceCUDAdeviceCUDA
static __thread ComputeNonbondedCUDAcudaCompute = 0
static __thread ComputeMgrcomputeMgr = 0
static __thread int2 * exclusionsByAtom
static __thread cudaEvent_t start_calc
static __thread cudaEvent_t end_remote_download
static __thread cudaEvent_t end_local_download
static __thread ResizeArray<
patch_pair > * 
patch_pairs_ptr
static __thread ResizeArray<
int > * 
patch_pair_num_ptr
static __thread int atom_params_size
static __thread atom_param * atom_params
static __thread int vdw_types_size
static __thread int * vdw_types
static __thread int dummy_size
static __thread float * dummy_dev
static __thread int force_ready_queue_size
static __thread int * force_ready_queue
static __thread int force_ready_queue_len
static __thread int force_ready_queue_next
static __thread int block_order_size
static __thread int * block_order
static __thread int num_atoms
static __thread int num_local_atoms
static __thread int num_remote_atoms
static __thread int virials_size
static __thread float * virials
static __thread int num_virials
static __thread float * slow_virials
static __thread int energy_gbis_size
static __thread float * energy_gbis
static __thread int intRad0H_size
static __thread float * intRad0H
static __thread int intRadSH_size
static __thread float * intRadSH
static __thread int bornRadH_size
static __thread float * bornRadH
static __thread int dHdrPrefixH_size
static __thread float * dHdrPrefixH
static __thread int cuda_timer_count
static __thread double cuda_timer_total
static __thread double kernel_time
static __thread double remote_submit_time
static __thread double local_submit_time
static __thread int check_count
static __thread int check_remote_count
static __thread int check_local_count
static __thread int kernel_launch_state = 0


Define Documentation

#define count_limit   1000000

Definition at line 1209 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), and cuda_check_remote_progress().

#define CUDA_POLL ( FN,
ARG   )     CcdCallFnAfter(FN,ARG,0.1)

Definition at line 1199 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_calc(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_progress(), cuda_check_remote_calc(), cuda_check_remote_progress(), ComputePmeMgr::pollChargeGridReady(), ComputePmeMgr::pollForcesReady(), and ComputeNonbondedCUDA::recvYieldDevice().

 
#define GBISP (  ) 

Definition at line 1206 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), ComputeNonbondedCUDA::noWork(), and ComputeNonbondedCUDA::recvYieldDevice().

#define SET_EXCL ( EXCL,
BASE,
DIFF   )     (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))

Referenced by ComputeNonbondedCUDA::build_exclusions().


Function Documentation

void build_cuda_exclusions (  ) 

Definition at line 576 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::build_exclusions(), deviceCUDA, and DeviceCUDA::getMasterPe().

Referenced by ComputeMgr::recvBuildCudaExclusions().

00576                              {
00577   if ( deviceCUDA->getMasterPe() != CkMyPe() ) return;
00578   ComputeNonbondedCUDA::build_exclusions();
00579 }

void build_cuda_force_table (  ) 

Definition at line 405 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::build_force_table(), ComputeNonbondedCUDA::build_lj_table(), deviceCUDA, and DeviceCUDA::getMasterPe().

Referenced by ComputeMgr::recvBuildCudaForceTable().

00405                               {
00406   if ( deviceCUDA->getMasterPe() != CkMyPe() ) return;
00407   ComputeNonbondedCUDA::build_lj_table();
00408   ComputeNonbondedCUDA::build_force_table();
00409 }

void CcdCallBacksReset ( void *  ignored,
double  curWallTime 
)

Referenced by cuda_check_local_calc(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_progress(), cuda_check_remote_calc(), cuda_check_remote_progress(), CudaPmeKSpaceCompute::energyAndVirialSetCallback(), CudaPmeRealSpaceCompute::gatherForceSetCallback(), ComputePmeMgr::pollChargeGridReady(), ComputePmeMgr::pollForcesReady(), and ComputeNonbondedCUDA::recvYieldDevice().

void cuda_check_local_calc ( void *  arg,
double  walltime 
)

Definition at line 1943 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), computeMgr, CUDA_POLL, deviceCUDA, end_local_download, DeviceCUDA::getNextPeSharingGpu(), and ComputeMgr::sendYieldDevice().

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

01943                                                        {
01944   // in theory we only need end_local_calc, but overlap isn't reliable
01945   // if ( cudaEventQuery(end_local_calc) == cudaSuccess ) {
01946   if ( cudaEventQuery(end_local_download) == cudaSuccess ) {
01947 // CkPrintf("Pe %d yielding to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
01948     computeMgr->sendYieldDevice(deviceCUDA->getNextPeSharingGpu());
01949 // CkPrintf("Pe %d yielded to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
01950   } else {
01951     CcdCallBacksReset(0,walltime);  // fix Charm++
01952     CUDA_POLL(cuda_check_local_calc, arg);
01953   }
01954 }

void cuda_check_local_progress ( void *  arg,
double  walltime 
)

Definition at line 1322 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_local_count, check_remote_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_LOCAL, CUDA_TRACE_POLL_LOCAL, end_local_download, kernel_time, local_submit_time, WorkDistrib::messageFinishCUDA(), NAMD_bug(), and NAMD_die().

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

01322                                                            {
01323 
01324   CUDA_TRACE_POLL_LOCAL;
01325   cudaError_t err = cudaEventQuery(end_local_download);
01326   if ( err == cudaSuccess ) {
01327     CUDA_TRACE_LOCAL(local_submit_time,walltime);
01328     kernel_time = walltime - kernel_time;
01329     check_local_count = 0;
01330     cuda_errcheck("at cuda local stream completed");
01331     WorkDistrib::messageFinishCUDA((ComputeNonbondedCUDA *) arg);
01332   } else if ( err != cudaErrorNotReady ) {
01333     cuda_errcheck("in cuda_check_local_progress");
01334     NAMD_bug("cuda_errcheck missed error in cuda_check_local_progress");
01335   } else if ( ++check_local_count >= count_limit ) {
01336     char errmsg[256];
01337     sprintf(errmsg,"cuda_check_local_progress polled %d times over %f s on step %d",
01338             check_local_count, walltime - local_submit_time,
01339             ((ComputeNonbondedCUDA *) arg)->step);
01340     cuda_errcheck(errmsg);
01341     NAMD_die(errmsg);
01342   } else if ( check_remote_count ) {
01343     NAMD_bug("nonzero check_remote_count in cuda_check_local_progress");
01344   } else {
01345     CcdCallBacksReset(0,walltime);  // fix Charm++
01346     CUDA_POLL(cuda_check_local_progress, arg);
01347   }
01348 }

void cuda_check_progress ( void *  arg,
double  walltime 
)

Definition at line 1255 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_LOCAL, CUDA_TRACE_POLL_REMOTE, force_ready_queue, force_ready_queue_len, force_ready_queue_next, kernel_time, NAMD_die(), and remote_submit_time.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

01255                                                      {
01256   CUDA_TRACE_POLL_REMOTE;
01257 
01258   int flindex;
01259   int poll_again = 1;
01260   while ( -1 != (flindex = force_ready_queue[force_ready_queue_next]) ) {
01261     //    CkPrintf("Pe %d forces ready %d is index %d at %lf\n",
01262     //       CkMyPe(), force_ready_queue_next, flindex, walltime);
01263     force_ready_queue[force_ready_queue_next] = -1;
01264     ++force_ready_queue_next;
01265     check_count = 0;
01266     if ( force_ready_queue_next == force_ready_queue_len ) {
01267       poll_again = 0;
01268       CUDA_TRACE_LOCAL(kernel_time,walltime);
01269       kernel_time = walltime - kernel_time;
01270       // need to guarantee this finishes before the last patch message!
01271       ((ComputeNonbondedCUDA *) arg)->workStarted = 0;
01272       ((ComputeNonbondedCUDA *) arg)->finishReductions();
01273     }
01274     ((ComputeNonbondedCUDA *) arg)->messageFinishPatch(flindex);
01275     if ( force_ready_queue_next == force_ready_queue_len ) break;
01276   }
01277   if ( ++check_count >= count_limit ) {
01278     char errmsg[256];
01279     sprintf(errmsg,"cuda_check_progress polled %d times over %f s on step %d",
01280             check_count, walltime - remote_submit_time,
01281             ((ComputeNonbondedCUDA *) arg)->step);
01282     cuda_errcheck(errmsg);
01283     NAMD_die(errmsg);
01284   }
01285   if ( poll_again ) {
01286     CcdCallBacksReset(0,walltime);  // fix Charm++
01287     CUDA_POLL(cuda_check_progress, arg);
01288   }
01289 }

void cuda_check_remote_calc ( void *  arg,
double  walltime 
)

Definition at line 1930 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), computeMgr, CUDA_POLL, deviceCUDA, end_remote_download, DeviceCUDA::getNextPeSharingGpu(), and ComputeMgr::sendYieldDevice().

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

01930                                                         {
01931   // in theory we only need end_remote_calc, but overlap isn't reliable
01932   // if ( cudaEventQuery(end_remote_calc) == cudaSuccess ) {
01933   if ( cudaEventQuery(end_remote_download) == cudaSuccess ) {
01934 // CkPrintf("Pe %d yielding to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
01935     computeMgr->sendYieldDevice(deviceCUDA->getNextPeSharingGpu());
01936 // CkPrintf("Pe %d yielded to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
01937   } else {
01938     CcdCallBacksReset(0,walltime);  // fix Charm++
01939     CUDA_POLL(cuda_check_remote_calc, arg);
01940   }
01941 }

void cuda_check_remote_progress ( void *  arg,
double  walltime 
)

Definition at line 1291 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_local_count, check_remote_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_POLL_REMOTE, CUDA_TRACE_REMOTE, deviceCUDA, end_remote_download, DeviceCUDA::getMergeGrids(), kernel_time, local_submit_time, WorkDistrib::messageFinishCUDA(), NAMD_bug(), NAMD_die(), and remote_submit_time.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

01291                                                             {
01292 
01293   CUDA_TRACE_POLL_REMOTE;
01294   cudaError_t err = cudaEventQuery(end_remote_download);
01295   if ( err == cudaSuccess ) {
01296     local_submit_time = walltime;
01297     CUDA_TRACE_REMOTE(remote_submit_time,local_submit_time);
01298     if ( deviceCUDA->getMergeGrids() ) {  // no local
01299       kernel_time = local_submit_time - kernel_time;
01300     }
01301     check_remote_count = 0;
01302     cuda_errcheck("at cuda remote stream completed");
01303     WorkDistrib::messageFinishCUDA((ComputeNonbondedCUDA *) arg);
01304   } else if ( err != cudaErrorNotReady ) {
01305     cuda_errcheck("in cuda_check_remote_progress");
01306     NAMD_bug("cuda_errcheck missed error in cuda_check_remote_progress");
01307   } else if ( ++check_remote_count >= count_limit ) {
01308     char errmsg[256];
01309     sprintf(errmsg,"cuda_check_remote_progress polled %d times over %f s on step %d",
01310             check_remote_count, walltime - remote_submit_time,
01311             ((ComputeNonbondedCUDA *) arg)->step);
01312     cuda_errcheck(errmsg);
01313     NAMD_die(errmsg);
01314   } else if ( check_local_count ) {
01315     NAMD_bug("nonzero check_local_count in cuda_check_remote_progress");
01316   } else {
01317     CcdCallBacksReset(0,walltime);  // fix Charm++
01318     CUDA_POLL(cuda_check_remote_progress, arg);
01319   }
01320 }

void cuda_errcheck ( const char *  msg  ) 

Definition at line 41 of file ComputeNonbondedCUDA.C.

References NAMD_die().

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_nonbonded_forces(), ComputeNonbondedCUDA::doWork(), ComputePmeMgr::initialize(), ComputePmeMgr::initialize_computes(), and ComputePmeMgr::ungridCalc().

00041                                     {
00042   cudaError_t err;
00043   if ((err = cudaGetLastError()) != cudaSuccess) {
00044     char host[128];
00045 #ifdef NOHOSTNAME
00046     sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00047 #else
00048     gethostname(host, 128);  host[127] = 0;
00049 #endif
00050     char devstr[128] = "";
00051     int devnum;
00052     if ( cudaGetDevice(&devnum) == cudaSuccess ) {
00053       sprintf(devstr, " device %d", devnum);
00054     }
00055     char errmsg[1024];
00056     sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
00057     NAMD_die(errmsg);
00058   }
00059 }

void init_arrays (  ) 

Definition at line 1214 of file ComputeNonbondedCUDA.C.

References atom_params, atom_params_size, block_order, block_order_size, bornRadH, bornRadH_size, dHdrPrefixH, dHdrPrefixH_size, dummy_dev, dummy_size, energy_gbis, energy_gbis_size, force_ready_queue, force_ready_queue_len, force_ready_queue_next, force_ready_queue_size, intRad0H, intRad0H_size, intRadSH, intRadSH_size, num_atoms, num_local_atoms, num_remote_atoms, num_virials, vdw_types, vdw_types_size, virials, and virials_size.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA().

01214                    {
01215 
01216   atom_params_size = 0;
01217   atom_params = NULL;
01218 
01219   vdw_types_size = 0;
01220   vdw_types = NULL;
01221   
01222   dummy_size = 0;
01223   dummy_dev = NULL;
01224 
01225   force_ready_queue_size = 0;
01226   force_ready_queue = NULL;
01227   force_ready_queue_len = 0;
01228   force_ready_queue_next = 0;
01229   
01230   block_order_size = 0;
01231   block_order = NULL;
01232   
01233   num_atoms = 0;
01234   num_local_atoms = 0;
01235   num_remote_atoms = 0;
01236 
01237   virials_size = 0;
01238   virials = NULL;
01239   num_virials = 0;
01240 
01241   energy_gbis_size = 0;
01242   energy_gbis = NULL;
01243 
01244   intRad0H_size = 0;
01245   intRad0H = NULL;
01246   intRadSH_size = 0;
01247   intRadSH = NULL;
01248   bornRadH_size = 0;
01249   bornRadH = NULL;
01250   dHdrPrefixH_size = 0;
01251   dHdrPrefixH = NULL;
01252 
01253 }

void register_cuda_compute_pair ( ComputeID  c,
PatchID  pid[],
int  t[] 
)

Definition at line 718 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::compute_record::c, PatchMap::center(), cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchMap, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, ComputeNonbondedCUDA::requirePatch(), Vector::x, Vector::y, and Vector::z.

Referenced by ComputeNonbondedPair::initialize().

00718                                                                      {
00719 
00720   if ( ! cudaCompute ) NAMD_bug("register_pair called early");
00721  
00722   cudaCompute->requirePatch(pid[0]);
00723   cudaCompute->requirePatch(pid[1]);
00724 
00725   ComputeNonbondedCUDA::compute_record cr;
00726   cr.c = c; 
00727   cr.pid[0] = pid[0];  cr.pid[1] = pid[1];
00728 
00729   int t1 = t[0];
00730   int t2 = t[1];
00731   Vector offset = cudaCompute->patchMap->center(pid[0])
00732                 - cudaCompute->patchMap->center(pid[1]);
00733   offset.x += (t1%3-1) - (t2%3-1);
00734   offset.y += ((t1/3)%3-1) - ((t2/3)%3-1);
00735   offset.z += (t1/9-1) - (t2/9-1);
00736   cr.offset = offset;
00737 
00738   if ( cudaCompute->patchRecords[pid[0]].isLocal ) {
00739     cudaCompute->localComputeRecords.add(cr);
00740   } else {
00741     cudaCompute->remoteComputeRecords.add(cr);
00742   }
00743 }

void register_cuda_compute_self ( ComputeID  c,
PatchID  pid 
)

Definition at line 701 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::compute_record::c, cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, and ComputeNonbondedCUDA::requirePatch().

Referenced by ComputeNonbondedSelf::initialize().

00701                                                           {
00702 
00703   if ( ! cudaCompute ) NAMD_bug("register_self called early");
00704 
00705   cudaCompute->requirePatch(pid);
00706 
00707   ComputeNonbondedCUDA::compute_record cr;
00708   cr.c = c;
00709   cr.pid[0] = pid;  cr.pid[1] = pid;
00710   cr.offset = 0.;
00711   if ( cudaCompute->patchRecords[pid].isLocal ) {
00712     cudaCompute->localComputeRecords.add(cr);
00713   } else {
00714     cudaCompute->remoteComputeRecords.add(cr);
00715   }
00716 }

void send_build_cuda_force_table (  ) 

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 401 of file ComputeNonbondedCUDA.C.

References computeMgr, and ComputeMgr::sendBuildCudaForceTable().

Referenced by ComputeNonbondedUtil::select().

00401                                    {
00402   computeMgr->sendBuildCudaForceTable();
00403 }

static bool sortop_bitreverse ( int  a,
int  b 
) [inline, static]

Definition at line 129 of file ComputeNonbondedCUDA.C.

Referenced by cr_sortop_reverse_priority::pid_compare_priority().

00129                                                    {
00130   if ( a == b ) return 0; 
00131   for ( int bit = 1; bit; bit *= 2 ) {
00132     if ( (a&bit) != (b&bit) ) return ((a&bit) < (b&bit));
00133   }
00134   return 0;
00135 }

void unregister_cuda_compute ( ComputeID  c  ) 

Definition at line 745 of file ComputeNonbondedCUDA.C.

References NAMD_bug().

00745                                           {  // static
00746 
00747   NAMD_bug("unregister_compute unimplemented");
00748 
00749 }


Variable Documentation

__thread atom_param* atom_params [static]

Definition at line 1154 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_atom_params(), cuda_bind_patch_pairs(), cuda_init(), ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int atom_params_size [static]

Definition at line 1153 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_patch_pairs(), cuda_init(), ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int* block_order [static]

Definition at line 1168 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_virials(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int block_order_size [static]

Definition at line 1167 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* bornRadH [static]

Definition at line 1189 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int bornRadH_size [static]

Definition at line 1188 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int check_count [static]

Definition at line 1210 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress().

__thread int check_local_count [static]

Definition at line 1212 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), and cuda_check_remote_progress().

__thread int check_remote_count [static]

Definition at line 1211 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), and cuda_check_remote_progress().

__thread ComputeMgr* computeMgr = 0 [static]

Definition at line 399 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::assignPatches(), ComputePmeMgr::chargeGridSubmitted(), ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeMgr::createComputes(), cuda_check_local_calc(), cuda_check_remote_calc(), LdbCoordinator::ExecuteMigrations(), ComputeNonbondedCUDA::finishWork(), isMICProcessor(), ComputeNonbondedCUDA::messageFinishPatch(), ComputeNonbondedCUDA::noWork(), send_build_cuda_force_table(), ComputeMgr::sendBuildCudaExclusions(), ComputeMgr::sendBuildCudaForceTable(), ComputeMgr::sendBuildMICForceTable(), ComputeMgr::sendComputeDPMEData(), ComputeMgr::sendComputeDPMEResults(), ComputeMgr::sendComputeEwaldData(), ComputeMgr::sendComputeEwaldResults(), ComputeMgr::sendComputeGlobalData(), ComputeMgr::sendCreateNonbondedCUDASlave(), ComputeMgr::sendCreateNonbondedMICSlave(), ComputeMgr::sendMICPEData(), ComputeMgr::sendNonbondedCUDASlaveReady(), ComputeMgr::sendNonbondedMICSlaveReady(), and ComputeMgr::sendYieldDevice().

__thread int cuda_timer_count [static]

Definition at line 1193 of file ComputeNonbondedCUDA.C.

__thread double cuda_timer_total [static]

Definition at line 1194 of file ComputeNonbondedCUDA.C.

__thread ComputeNonbondedCUDA* cudaCompute = 0 [static]

Definition at line 398 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), register_cuda_compute_pair(), and register_cuda_compute_self().

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

__thread float* dHdrPrefixH [static]

Definition at line 1191 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int dHdrPrefixH_size [static]

Definition at line 1190 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* dummy_dev [static]

Definition at line 1160 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int dummy_size [static]

Definition at line 1159 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread cudaEvent_t end_local_download [static]

Definition at line 754 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_local_calc(), cuda_check_local_progress(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread cudaEvent_t end_remote_download [static]

Definition at line 753 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_remote_calc(), cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread float* energy_gbis [static]

Definition at line 1181 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_GBIS_energy(), cuda_GBIS_P2(), cuda_init(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int energy_gbis_size [static]

Definition at line 1180 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int2* exclusionsByAtom [static]

Definition at line 581 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::build_exclusions(), ComputeNonbondedCUDA::doWork(), and Molecule::get_exclusions_for_atom().

__thread int* force_ready_queue [static]

Definition at line 1163 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_virials(), cuda_check_progress(), cuda_init(), ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int force_ready_queue_len [static]

Definition at line 1164 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress(), ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int force_ready_queue_next [static]

Definition at line 1165 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int force_ready_queue_size [static]

Definition at line 1162 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* intRad0H [static]

Definition at line 1185 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int intRad0H_size [static]

Definition at line 1184 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* intRadSH [static]

Definition at line 1187 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int intRadSH_size [static]

Definition at line 1186 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int kernel_launch_state = 0 [static]

Definition at line 1368 of file ComputeNonbondedCUDA.C.

__thread double kernel_time [static]

Definition at line 1195 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), cuda_check_progress(), and cuda_check_remote_progress().

__thread double local_submit_time [static]

Definition at line 1197 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), and cuda_check_remote_progress().

__thread int max_grid_size

Definition at line 198 of file ComputeNonbondedCUDAKernel.cu.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), and cuda_nonbonded_forces().

__thread int num_atoms [static]

Definition at line 1170 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_intRad(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_nonbonded_forces(), ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_local_atoms [static]

Definition at line 1171 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_remote_atoms [static]

Definition at line 1172 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_virials [static]

Definition at line 1176 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_patch_pairs(), cuda_bind_virials(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), and init_arrays().

__thread ResizeArray<int>* patch_pair_num_ptr [static]

Definition at line 757 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::doWork().

__thread ResizeArray<patch_pair>* patch_pairs_ptr [static]

Definition at line 756 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::doWork().

__thread double remote_submit_time [static]

Definition at line 1196 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress(), cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread float* slow_virials [static]

Definition at line 1178 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_virials(), and ComputeNonbondedCUDA::doWork().

__thread cudaEvent_t start_calc [static]

Definition at line 752 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread cudaStream_t stream

Definition at line 200 of file ComputeNonbondedCUDAKernel.cu.

Referenced by clear_device_array(), ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), copy3D_DtoD(), copy3D_DtoH(), copy3D_HtoD(), copy3D_PeerDtoD(), copy_DtoD(), copy_DtoH(), copy_HtoD(), copy_PeerDtoD(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_intRad(), cuda_bind_vdw_types(), ComputePmeCUDAMgr::initialize_pencils(), CudaPmePencilXYZ::initializeDevice(), ComputeNonbondedCUDA::recvYieldDevice(), and ComputePmeCUDAMgr::~ComputePmeCUDAMgr().

__thread cudaStream_t stream2

Definition at line 201 of file ComputeNonbondedCUDAKernel.cu.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int* vdw_types [static]

Definition at line 1157 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_init(), ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int vdw_types_size [static]

Definition at line 1156 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_patch_pairs(), cuda_init(), ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* virials [static]

Definition at line 1175 of file ComputeNonbondedCUDA.C.

Referenced by cuda_bind_virials(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int virials_size [static]

Definition at line 1174 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().


Generated on Thu Sep 21 01:17:15 2017 for NAMD by  doxygen 1.4.7