#include "common.h"
#include "charm++.h"
#include "HipDefines.h"
#include <cuda_runtime.h>
#include <cuda.h>
#include "WorkDistrib.h"
#include "ComputeMgr.h"
#include "ProxyMgr.h"
#include "ComputeNonbondedCUDAKernel.h"
#include "ComputeNonbondedCUDA.h"
#include "LJTable.h"
#include "ObjectArena.h"
#include "SortAtoms.h"
#include "Priorities.h"
#include <algorithm>
#include "NamdTypes.h"
#include "DeviceCUDA.h"
#include "CudaUtils.h"

Classes
struct	exlist_sortop

struct	pid_sortop_reverse_priority

struct	cr_sortop_distance

struct	cr_sortop_reverse_priority

Macros
#define	SET_EXCL(EXCL, BASE, DIFF) (EXCL)[((BASE)+(DIFF))>>5] \|= (1<<(((BASE)+(DIFF))&31))

#define	CUDA_POLL(FN, ARG) CcdCallFnAfter(FN,ARG,0.1)

#define	GBISP(...)

#define	count_limit 1000000

Functions
void	cuda_errcheck (const char *msg)

static bool	sortop_bitreverse (int a, int b)

void	send_build_cuda_force_table ()

void	build_cuda_force_table ()

void	build_cuda_exclusions ()

void	register_cuda_compute_self (ComputeID c, PatchID pid)

void	register_cuda_compute_pair (ComputeID c, PatchID pid[], int t[])

void	unregister_cuda_compute (ComputeID c)

void	init_arrays ()

void	CcdCallBacksReset (void *ignored, double curWallTime)

void	cuda_check_progress (void *arg, double walltime)

void	cuda_check_remote_progress (void *arg, double walltime)

void	cuda_check_local_progress (void *arg, double walltime)

void	cuda_check_remote_calc (void *arg, double walltime)

void	cuda_check_local_calc (void *arg, double walltime)

Variables
__thread int	max_grid_size

__thread cudaStream_t	stream

__thread cudaStream_t	stream2

__thread DeviceCUDA *	deviceCUDA

static __thread ComputeNonbondedCUDA *	cudaCompute = 0

static __thread ComputeMgr *	computeMgr = 0

static __thread int2 *	exclusionsByAtom

static __thread cudaEvent_t	start_calc

static __thread cudaEvent_t	end_remote_download

static __thread cudaEvent_t	end_local_download

static __thread ResizeArray < patch_pair > *	patch_pairs_ptr

static __thread ResizeArray < int > *	patch_pair_num_ptr

static __thread int	atom_params_size

static __thread atom_param *	atom_params

static __thread int	vdw_types_size

static __thread int *	vdw_types

static __thread int	dummy_size

static __thread float *	dummy_dev

static __thread int	force_ready_queue_size

static __thread int *	force_ready_queue

static __thread int	force_ready_queue_len

static __thread int	force_ready_queue_next

static __thread int	block_order_size

static __thread int *	block_order

static __thread int	num_atoms

static __thread int	num_local_atoms

static __thread int	num_remote_atoms

static __thread int	virials_size

static __thread float *	virials

static __thread int	num_virials

static __thread float *	slow_virials

static __thread int	energy_gbis_size

static __thread float *	energy_gbis

static __thread int	intRad0H_size

static __thread float *	intRad0H

static __thread int	intRadSH_size

static __thread float *	intRadSH

static __thread int	bornRadH_size

static __thread float *	bornRadH

static __thread int	dHdrPrefixH_size

static __thread float *	dHdrPrefixH

static __thread int	cuda_timer_count

static __thread double	cuda_timer_total

static __thread double	kernel_time

static __thread double	remote_submit_time

static __thread double	local_submit_time

static __thread int	check_count

static __thread int	check_remote_count

static __thread int	check_local_count

static __thread int	kernel_launch_state = 0

Macro Definition Documentation

#define count_limit 1000000

Definition at line 885 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), cuda_check_progress(), and cuda_check_remote_progress().

#define CUDA_POLL	(	FN,
		ARG
	)	CcdCallFnAfter(FN,ARG,0.1)

Definition at line 875 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_calc(), cuda_check_local_progress(), cuda_check_progress(), cuda_check_remote_calc(), cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice().

#define GBISP ( ... )

Definition at line 882 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishWork(), ComputeNonbondedCUDA::noWork(), and ComputeNonbondedCUDA::recvYieldDevice().

#define SET_EXCL	(	EXCL,
		BASE,
		DIFF
	)	(EXCL)[((BASE)+(DIFF))>>5] \|= (1<<(((BASE)+(DIFF))&31))

Referenced by ComputeNonbondedCUDA::build_exclusions().

Function Documentation

void build_cuda_exclusions ( )

Definition at line 252 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::build_exclusions(), deviceCUDA, and DeviceCUDA::getMasterPe().

Referenced by ComputeMgr::recvBuildCudaExclusions().

                              {
   if ( deviceCUDA->getMasterPe() != CkMyPe() ) return;
   ComputeNonbondedCUDA::build_exclusions();
 }

void build_cuda_force_table ( )

Definition at line 81 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::build_force_table(), ComputeNonbondedCUDA::build_lj_table(), deviceCUDA, and DeviceCUDA::getMasterPe().

Referenced by ComputeMgr::recvBuildCudaForceTable().

                               {
   if ( deviceCUDA->getMasterPe() != CkMyPe() ) return;
   ComputeNonbondedCUDA::build_lj_table();
   ComputeNonbondedCUDA::build_force_table();
 }

void CcdCallBacksReset	(	void *	ignored,
		double	curWallTime
	)

Referenced by cuda_check_local_calc(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), cuda_check_remote_calc(), cuda_check_remote_progress(), CudaPmeKSpaceCompute::energyAndVirialSetCallback(), CudaPmeRealSpaceCompute::gatherForceSetCallback(), ComputePmeMgr::pollChargeGridReady(), ComputePmeMgr::pollForcesReady(), and ComputeNonbondedCUDA::recvYieldDevice().

void cuda_check_local_calc	(	void *	arg,
		double	walltime
	)

Definition at line 1622 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), computeMgr, CUDA_POLL, deviceCUDA, end_local_download, DeviceCUDA::getNextPeSharingGpu(), and ComputeMgr::sendYieldDevice().

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

                                                        {
   // in theory we only need end_local_calc, but overlap isn't reliable
   // if ( cudaEventQuery(end_local_calc) == cudaSuccess ) {
   if ( cudaEventQuery(end_local_download) == cudaSuccess ) {
 // CkPrintf("Pe %d yielding to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
     computeMgr->sendYieldDevice(deviceCUDA->getNextPeSharingGpu());
 // CkPrintf("Pe %d yielded to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
   } else {
     CcdCallBacksReset(0,walltime);  // fix Charm++
     CUDA_POLL(cuda_check_local_calc, arg);
   }
 }

void cuda_check_local_progress	(	void *	arg,
		double	walltime
	)

Definition at line 999 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_local_count, check_remote_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_LOCAL, CUDA_TRACE_POLL_LOCAL, cudaDie(), end_local_download, kernel_time, local_submit_time, WorkDistrib::messageFinishCUDA(), and NAMD_bug().

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

                                                            {
 
   CUDA_TRACE_POLL_LOCAL;
   cudaError_t err = cudaEventQuery(end_local_download);
   if ( err == cudaSuccess ) {
     CUDA_TRACE_LOCAL(local_submit_time,walltime);
     kernel_time = walltime - kernel_time;
     check_local_count = 0;
     cuda_errcheck("at cuda local stream completed");
     WorkDistrib::messageFinishCUDA((ComputeNonbondedCUDA *) arg);
   } else if ( err != cudaErrorNotReady ) {
     char errmsg[256];
     sprintf(errmsg,"in cuda_check_local_progress after polling %d times over %f s on step %d",
             check_local_count, walltime - local_submit_time,
             ((ComputeNonbondedCUDA *) arg)->step);
     cudaDie(errmsg,err);
   } else if ( ++check_local_count >= count_limit ) {
     char errmsg[256];
     sprintf(errmsg,"cuda_check_local_progress polled %d times over %f s on step %d",
             check_local_count, walltime - local_submit_time,
             ((ComputeNonbondedCUDA *) arg)->step);
     cudaDie(errmsg,err);
   } else if ( check_remote_count ) {
     NAMD_bug("nonzero check_remote_count in cuda_check_local_progress");
   } else {
     CcdCallBacksReset(0,walltime);  // fix Charm++
     CUDA_POLL(cuda_check_local_progress, arg);
   }
 }

void cuda_check_progress	(	void *	arg,
		double	walltime
	)

Definition at line 931 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_count, count_limit, CUDA_POLL, CUDA_TRACE_LOCAL, CUDA_TRACE_POLL_REMOTE, cudaDie(), force_ready_queue, force_ready_queue_len, force_ready_queue_next, kernel_time, and remote_submit_time.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

                                                      {
   CUDA_TRACE_POLL_REMOTE;
 
   int flindex;
   int poll_again = 1;
   while ( -1 != (flindex = force_ready_queue[force_ready_queue_next]) ) {
     //    CkPrintf("Pe %d forces ready %d is index %d at %lf\n",
     //       CkMyPe(), force_ready_queue_next, flindex, walltime);
     force_ready_queue[force_ready_queue_next] = -1;
     ++force_ready_queue_next;
     check_count = 0;
     if ( force_ready_queue_next == force_ready_queue_len ) {
       poll_again = 0;
       CUDA_TRACE_LOCAL(kernel_time,walltime);
       kernel_time = walltime - kernel_time;
       // need to guarantee this finishes before the last patch message!
       ((ComputeNonbondedCUDA *) arg)->workStarted = 0;
       ((ComputeNonbondedCUDA *) arg)->finishReductions();
     }
     ((ComputeNonbondedCUDA *) arg)->messageFinishPatch(flindex);
     if ( force_ready_queue_next == force_ready_queue_len ) break;
   }
   if ( ++check_count >= count_limit ) {
     char errmsg[256];
     sprintf(errmsg,"cuda_check_progress polled %d times over %f s on step %d",
             check_count, walltime - remote_submit_time,
             ((ComputeNonbondedCUDA *) arg)->step);
     cudaDie(errmsg,cudaSuccess);
   }
   if ( poll_again ) {
     CcdCallBacksReset(0,walltime);  // fix Charm++
     CUDA_POLL(cuda_check_progress, arg);
   }
 }

void cuda_check_remote_calc	(	void *	arg,
		double	walltime
	)

Definition at line 1609 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), computeMgr, CUDA_POLL, deviceCUDA, end_remote_download, DeviceCUDA::getNextPeSharingGpu(), and ComputeMgr::sendYieldDevice().

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

                                                         {
   // in theory we only need end_remote_calc, but overlap isn't reliable
   // if ( cudaEventQuery(end_remote_calc) == cudaSuccess ) {
   if ( cudaEventQuery(end_remote_download) == cudaSuccess ) {
 // CkPrintf("Pe %d yielding to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
     computeMgr->sendYieldDevice(deviceCUDA->getNextPeSharingGpu());
 // CkPrintf("Pe %d yielded to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
   } else {
     CcdCallBacksReset(0,walltime);  // fix Charm++
     CUDA_POLL(cuda_check_remote_calc, arg);
   }
 }

void cuda_check_remote_progress	(	void *	arg,
		double	walltime
	)

Definition at line 966 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_local_count, check_remote_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_POLL_REMOTE, CUDA_TRACE_REMOTE, cudaDie(), deviceCUDA, end_remote_download, DeviceCUDA::getMergeGrids(), kernel_time, local_submit_time, WorkDistrib::messageFinishCUDA(), NAMD_bug(), and remote_submit_time.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

                                                             {
 
   CUDA_TRACE_POLL_REMOTE;
   cudaError_t err = cudaEventQuery(end_remote_download);
   if ( err == cudaSuccess ) {
     local_submit_time = walltime;
     CUDA_TRACE_REMOTE(remote_submit_time,local_submit_time);
     if ( deviceCUDA->getMergeGrids() ) {  // no local
       kernel_time = local_submit_time - kernel_time;
     }
     check_remote_count = 0;
     cuda_errcheck("at cuda remote stream completed");
     WorkDistrib::messageFinishCUDA((ComputeNonbondedCUDA *) arg);
   } else if ( err != cudaErrorNotReady ) {
     char errmsg[256];
     sprintf(errmsg,"in cuda_check_remote_progress after polling %d times over %f s on step %d",
             check_remote_count, walltime - remote_submit_time,
             ((ComputeNonbondedCUDA *) arg)->step);
     cudaDie(errmsg,err);
   } else if ( ++check_remote_count >= count_limit ) {
     char errmsg[256];
     sprintf(errmsg,"cuda_check_remote_progress polled %d times over %f s on step %d",
             check_remote_count, walltime - remote_submit_time,
             ((ComputeNonbondedCUDA *) arg)->step);
     cudaDie(errmsg,err);
   } else if ( check_local_count ) {
     NAMD_bug("nonzero check_local_count in cuda_check_remote_progress");
   } else {
     CcdCallBacksReset(0,walltime);  // fix Charm++
     CUDA_POLL(cuda_check_remote_progress, arg);
   }
 }

void cuda_errcheck ( const char * msg )

Definition at line 45 of file ComputeNonbondedCUDA.C.

References NAMD_die().

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_nonbonded_forces(), ComputePme::doWork(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), ComputePmeMgr::initialize(), ComputePmeMgr::initialize_computes(), and ComputePmeMgr::ungridCalc().

                                     {
   cudaError_t err;
   if ((err = cudaGetLastError()) != cudaSuccess) {
     char host[128];
     gethostname(host, 128);  host[127] = 0;
     char devstr[128] = "";
     int devnum;
     if ( cudaGetDevice(&devnum) == cudaSuccess ) {
       sprintf(devstr, " device %d", devnum);
     }
     cudaDeviceProp deviceProp;
     if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
       sprintf(devstr, " device %d pci %x:%x:%x", devnum,
         deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
     }
     char errmsg[1024];
     sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
     NAMD_die(errmsg);
   }
 }

void init_arrays ( )

Definition at line 890 of file ComputeNonbondedCUDA.C.

References atom_params, atom_params_size, block_order, block_order_size, bornRadH, bornRadH_size, dHdrPrefixH, dHdrPrefixH_size, dummy_dev, dummy_size, energy_gbis, energy_gbis_size, force_ready_queue, force_ready_queue_len, force_ready_queue_next, force_ready_queue_size, intRad0H, intRad0H_size, intRadSH, intRadSH_size, num_atoms, num_local_atoms, num_remote_atoms, num_virials, vdw_types, vdw_types_size, virials, and virials_size.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA().

                    {
 
   atom_params_size = 0;
   atom_params = NULL;
 
   vdw_types_size = 0;
   vdw_types = NULL;
   
   dummy_size = 0;
   dummy_dev = NULL;
 
   force_ready_queue_size = 0;
   force_ready_queue = NULL;
   force_ready_queue_len = 0;
   force_ready_queue_next = 0;
   
   block_order_size = 0;
   block_order = NULL;
   
   num_atoms = 0;
   num_local_atoms = 0;
   num_remote_atoms = 0;
 
   virials_size = 0;
   virials = NULL;
   num_virials = 0;
 
   energy_gbis_size = 0;
   energy_gbis = NULL;
 
   intRad0H_size = 0;
   intRad0H = NULL;
   intRadSH_size = 0;
   intRadSH = NULL;
   bornRadH_size = 0;
   bornRadH = NULL;
   dHdrPrefixH_size = 0;
   dHdrPrefixH = NULL;
 
 }

void register_cuda_compute_pair	(	ComputeID	c,
		PatchID	pid[],
		int	t[]
	)

Definition at line 394 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::compute_record::c, PatchMap::center(), cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchMap, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, ComputeNonbondedCUDA::requirePatch(), Vector::x, Vector::y, and Vector::z.

Referenced by ComputeNonbondedPair::initialize().

                                                                      {
 
   if ( ! cudaCompute ) NAMD_bug("register_pair called early");
  
   cudaCompute->requirePatch(pid[0]);
   cudaCompute->requirePatch(pid[1]);
 
   ComputeNonbondedCUDA::compute_record cr;
   cr.c = c; 
   cr.pid[0] = pid[0];  cr.pid[1] = pid[1];
 
   int t1 = t[0];
   int t2 = t[1];
   Vector offset = cudaCompute->patchMap->center(pid[0])
                 - cudaCompute->patchMap->center(pid[1]);
   offset.x += (t1%3-1) - (t2%3-1);
   offset.y += ((t1/3)%3-1) - ((t2/3)%3-1);
   offset.z += (t1/9-1) - (t2/9-1);
   cr.offset = offset;
 
   if ( cudaCompute->patchRecords[pid[0]].isLocal ) {
     cudaCompute->localComputeRecords.add(cr);
   } else {
     cudaCompute->remoteComputeRecords.add(cr);
   }
 }

void register_cuda_compute_self	(	ComputeID	c,
		PatchID	pid
	)

Definition at line 377 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::compute_record::c, cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, and ComputeNonbondedCUDA::requirePatch().

Referenced by ComputeNonbondedSelf::initialize().

                                                           {
 
   if ( ! cudaCompute ) NAMD_bug("register_self called early");
 
   cudaCompute->requirePatch(pid);
 
   ComputeNonbondedCUDA::compute_record cr;
   cr.c = c;
   cr.pid[0] = pid;  cr.pid[1] = pid;
   cr.offset = 0.;
   if ( cudaCompute->patchRecords[pid].isLocal ) {
     cudaCompute->localComputeRecords.add(cr);
   } else {
     cudaCompute->remoteComputeRecords.add(cr);
   }
 }

void send_build_cuda_force_table ( )

Definition at line 77 of file ComputeNonbondedCUDA.C.

References computeMgr, and ComputeMgr::sendBuildCudaForceTable().

Referenced by ComputeNonbondedUtil::select().

                                    {
   computeMgr->sendBuildCudaForceTable();
 }

static bool sortop_bitreverse	(	int	a,
		int	b
	)

inlinestatic

Definition at line 66 of file ComputeNonbondedCUDA.C.

Referenced by cr_sortop_reverse_priority::pid_compare_priority().

                                                    {
   if ( a == b ) return 0; 
   for ( int bit = 1; bit; bit *= 2 ) {
     if ( (a&bit) != (b&bit) ) return ((a&bit) < (b&bit));
   }
   return 0;
 }

void unregister_cuda_compute ( ComputeID c )

Definition at line 421 of file ComputeNonbondedCUDA.C.

References NAMD_bug().

                                           {  // static
 
   NAMD_bug("unregister_compute unimplemented");
 
 }

Variable Documentation

__thread atom_param* atom_params

static

Definition at line 830 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int atom_params_size

static

Definition at line 829 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int* block_order

static

Definition at line 844 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int block_order_size

static

Definition at line 843 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* bornRadH

static

Definition at line 865 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), ComputeNonbondedCUDA::recvYieldDevice(), and CudaComputeGBISKernel::updateBornRad().

__thread int bornRadH_size

static

Definition at line 864 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int check_count

static

Definition at line 886 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress().

__thread int check_local_count

static

Definition at line 888 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), and cuda_check_remote_progress().

__thread int check_remote_count

static

Definition at line 887 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), and cuda_check_remote_progress().

__thread ComputeMgr* computeMgr = 0

static

Definition at line 75 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::assignPatches(), ComputePmeMgr::chargeGridSubmitted(), ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_local_calc(), cuda_check_remote_calc(), LdbCoordinator::ExecuteMigrations(), ComputeNonbondedCUDA::finishWork(), ComputeNonbondedCUDA::messageFinishPatch(), ComputeNonbondedCUDA::noWork(), and send_build_cuda_force_table().

__thread int cuda_timer_count

static

Definition at line 869 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::finishReductions().

__thread double cuda_timer_total

static

Definition at line 870 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::finishReductions(), and ComputeNonbondedCUDA::finishWork().

__thread ComputeNonbondedCUDA* cudaCompute = 0

static

Definition at line 74 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::doWork(), register_cuda_compute_pair(), and register_cuda_compute_self().

__thread DeviceCUDA* deviceCUDA

Definition at line 22 of file DeviceCUDA.C.

__thread float* dHdrPrefixH

static

Definition at line 867 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), ComputeNonbondedCUDA::recvYieldDevice(), and CudaComputeGBISKernel::update_dHdrPrefix().

__thread int dHdrPrefixH_size

static

Definition at line 866 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* dummy_dev

static

Definition at line 836 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int dummy_size

static

Definition at line 835 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread cudaEvent_t end_local_download

static

Definition at line 430 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_local_calc(), cuda_check_local_progress(), ComputeNonbondedCUDA::finishReductions(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread cudaEvent_t end_remote_download

static

Definition at line 429 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_check_remote_calc(), cuda_check_remote_progress(), ComputeNonbondedCUDA::finishReductions(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread float* energy_gbis

static

Definition at line 857 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int energy_gbis_size

static

Definition at line 856 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int2* exclusionsByAtom

static

Definition at line 257 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::build_exclusions(), ComputeNonbondedCUDA::doWork(), Molecule::get_exclusions_for_atom(), and Molecule::~Molecule().

__thread int* force_ready_queue

static

Definition at line 839 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress(), ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int force_ready_queue_len

static

Definition at line 840 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress(), ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int force_ready_queue_next

static

Definition at line 841 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int force_ready_queue_size

static

Definition at line 838 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* intRad0H

static

Definition at line 861 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), ComputeNonbondedCUDA::recvYieldDevice(), and CudaComputeGBISKernel::updateIntRad().

__thread int intRad0H_size

static

Definition at line 860 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* intRadSH

static

Definition at line 863 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), ComputeNonbondedCUDA::recvYieldDevice(), and CudaComputeGBISKernel::updateIntRad().

__thread int intRadSH_size

static

Definition at line 862 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int kernel_launch_state = 0

static

Definition at line 1047 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

__thread double kernel_time

static

Definition at line 871 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), cuda_check_progress(), cuda_check_remote_progress(), ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::finishWork().

__thread double local_submit_time

static

Definition at line 873 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_local_progress(), and cuda_check_remote_progress().

__thread int max_grid_size

Definition at line 198 of file ComputeNonbondedCUDAKernel.cu.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), and cuda_nonbonded_forces().

__thread int num_atoms

static

Definition at line 846 of file ComputeNonbondedCUDA.C.

Referenced by HomePatch::addForceToMomentum(), HomePatch::addForceToMomentum3(), HomePatch::addVelocityToPosition(), ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_local_atoms

static

Definition at line 847 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_remote_atoms

static

Definition at line 848 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_virials

static

Definition at line 852 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), and init_arrays().

__thread ResizeArray<int>* patch_pair_num_ptr

static

Definition at line 433 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::doWork().

__thread ResizeArray<patch_pair>* patch_pairs_ptr

static

Definition at line 432 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::doWork().

__thread double remote_submit_time

static

Definition at line 872 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress(), cuda_check_remote_progress(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread float* slow_virials

static

Definition at line 854 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), and void().

__thread cudaEvent_t start_calc

static

Definition at line 428 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputeNonbondedCUDA::finishReductions(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread cudaStream_t stream

Definition at line 200 of file ComputeNonbondedCUDAKernel.cu.

Referenced by ComputeBondedCUDAKernel::bondedForce(), CudaTileListKernel::buildTileLists(), clear_device_array(), CudaTileListKernel::clearTileListStat(), ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), copy3D_DtoD(), copy3D_DtoH(), copy3D_HtoD(), copy3D_PeerDtoD(), copy_DtoD(), copy_DtoH(), copy_HtoD(), copy_PeerDtoD(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_intRad(), cuda_bind_vdw_types(), CudaTileListKernel::finishTileList(), CudaComputeGBISKernel::GBISphase1(), CudaComputeGBISKernel::GBISphase2(), ComputePmeCUDAMgr::initialize_pencils(), CudaPmePencilXYZ::initializeDevice(), CudaComputeNonbondedKernel::nonbondedForce(), CudaTileListKernel::prepareTileList(), ComputeNonbondedCUDA::recvYieldDevice(), CudaComputeNonbondedKernel::reduceVirialEnergy(), ComputeBondedCUDAKernel::update(), CudaComputeGBISKernel::update_dHdrPrefix(), CudaComputeGBISKernel::updateBornRad(), CudaTileListKernel::updateComputes(), CudaComputeGBISKernel::updateIntRad(), CudaComputeNonbondedKernel::updateVdwTypesExcl(), writeComplexToDisk(), writeRealToDisk(), and ComputePmeCUDAMgr::~ComputePmeCUDAMgr().

__thread cudaStream_t stream2

Definition at line 201 of file ComputeNonbondedCUDAKernel.cu.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int* vdw_types

static

Definition at line 833 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), init_arrays(), and ComputeNonbondedCUDA::recvYieldDevice().

__thread int vdw_types_size

static

Definition at line 832 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* virials

static

Definition at line 851 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), init_arrays(), ComputeNonbondedCUDA::recvYieldDevice(), and void().

__thread int virials_size

static

Definition at line 850 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

Classes

Macros

Functions

Variables

Macro Definition Documentation

Function Documentation

Variable Documentation