NAMD
Classes | Macros | Functions | Variables
ComputeNonbondedCUDA.C File Reference
#include "common.h"
#include "charm++.h"
#include "HipDefines.h"
#include <cuda_runtime.h>
#include <cuda.h>
#include "WorkDistrib.h"
#include "ComputeMgr.h"
#include "ProxyMgr.h"
#include "ComputeNonbondedCUDAKernel.h"
#include "ComputeNonbondedCUDA.h"
#include "LJTable.h"
#include "ObjectArena.h"
#include "SortAtoms.h"
#include "Priorities.h"
#include <algorithm>
#include "NamdTypes.h"
#include "DeviceCUDA.h"
#include "CudaUtils.h"

Go to the source code of this file.

Classes

struct  exlist_sortop
 
struct  pid_sortop_reverse_priority
 
struct  cr_sortop_distance
 
struct  cr_sortop_reverse_priority
 

Macros

#define SET_EXCL(EXCL, BASE, DIFF)   (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))
 
#define CUDA_POLL(FN, ARG)   CcdCallFnAfter(FN,ARG,0.1)
 
#define GBISP(...)
 
#define count_limit   1000000
 

Functions

void cuda_errcheck (const char *msg)
 
static bool sortop_bitreverse (int a, int b)
 
void send_build_cuda_force_table ()
 
void build_cuda_force_table ()
 
void build_cuda_exclusions ()
 
void register_cuda_compute_self (ComputeID c, PatchID pid)
 
void register_cuda_compute_pair (ComputeID c, PatchID pid[], int t[])
 
void unregister_cuda_compute (ComputeID c)
 
void init_arrays ()
 
void CcdCallBacksReset (void *ignored, double curWallTime)
 
void cuda_check_progress (void *arg, double walltime)
 
void cuda_check_remote_progress (void *arg, double walltime)
 
void cuda_check_local_progress (void *arg, double walltime)
 
void cuda_check_remote_calc (void *arg, double walltime)
 
void cuda_check_local_calc (void *arg, double walltime)
 

Variables

__thread int max_grid_size
 
__thread cudaStream_t stream
 
__thread cudaStream_t stream2
 
__thread DeviceCUDAdeviceCUDA
 
static __thread
ComputeNonbondedCUDA
cudaCompute = 0
 
static __thread ComputeMgrcomputeMgr = 0
 
static __thread int2 * exclusionsByAtom
 
static __thread cudaEvent_t start_calc
 
static __thread cudaEvent_t end_remote_download
 
static __thread cudaEvent_t end_local_download
 
static __thread ResizeArray
< patch_pair > * 
patch_pairs_ptr
 
static __thread ResizeArray
< int > * 
patch_pair_num_ptr
 
static __thread int atom_params_size
 
static __thread atom_param * atom_params
 
static __thread int vdw_types_size
 
static __thread int * vdw_types
 
static __thread int dummy_size
 
static __thread float * dummy_dev
 
static __thread int force_ready_queue_size
 
static __thread int * force_ready_queue
 
static __thread int force_ready_queue_len
 
static __thread int force_ready_queue_next
 
static __thread int block_order_size
 
static __thread int * block_order
 
static __thread int num_atoms
 
static __thread int num_local_atoms
 
static __thread int num_remote_atoms
 
static __thread int virials_size
 
static __thread float * virials
 
static __thread int num_virials
 
static __thread float * slow_virials
 
static __thread int energy_gbis_size
 
static __thread float * energy_gbis
 
static __thread int intRad0H_size
 
static __thread float * intRad0H
 
static __thread int intRadSH_size
 
static __thread float * intRadSH
 
static __thread int bornRadH_size
 
static __thread float * bornRadH
 
static __thread int dHdrPrefixH_size
 
static __thread float * dHdrPrefixH
 
static __thread int cuda_timer_count
 
static __thread double cuda_timer_total
 
static __thread double kernel_time
 
static __thread double remote_submit_time
 
static __thread double local_submit_time
 
static __thread int check_count
 
static __thread int check_remote_count
 
static __thread int check_local_count
 
static __thread int kernel_launch_state = 0
 

Macro Definition Documentation

#define count_limit   1000000
#define CUDA_POLL (   FN,
  ARG 
)    CcdCallFnAfter(FN,ARG,0.1)
#define GBISP (   ...)
#define SET_EXCL (   EXCL,
  BASE,
  DIFF 
)    (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))

Function Documentation

void build_cuda_exclusions ( )

Definition at line 252 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::build_exclusions(), deviceCUDA, and DeviceCUDA::getMasterPe().

Referenced by ComputeMgr::recvBuildCudaExclusions().

252  {
253  if ( deviceCUDA->getMasterPe() != CkMyPe() ) return;
255 }
int getMasterPe()
Definition: DeviceCUDA.h:105
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
void build_cuda_force_table ( )
void CcdCallBacksReset ( void ignored,
double  curWallTime 
)
void cuda_check_local_calc ( void arg,
double  walltime 
)

Definition at line 1622 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), computeMgr, CUDA_POLL, deviceCUDA, end_local_download, DeviceCUDA::getNextPeSharingGpu(), and ComputeMgr::sendYieldDevice().

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

1622  {
1623  // in theory we only need end_local_calc, but overlap isn't reliable
1624  // if ( cudaEventQuery(end_local_calc) == cudaSuccess ) {
1625  if ( cudaEventQuery(end_local_download) == cudaSuccess ) {
1626 // CkPrintf("Pe %d yielding to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
1628 // CkPrintf("Pe %d yielded to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
1629  } else {
1630  CcdCallBacksReset(0,walltime); // fix Charm++
1632  }
1633 }
void sendYieldDevice(int pe)
Definition: ComputeMgr.C:1434
static __thread ComputeMgr * computeMgr
void CcdCallBacksReset(void *ignored, double curWallTime)
static __thread cudaEvent_t end_local_download
void cuda_check_local_calc(void *arg, double walltime)
#define CUDA_POLL(FN, ARG)
int getNextPeSharingGpu()
Definition: DeviceCUDA.h:104
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
void cuda_check_local_progress ( void arg,
double  walltime 
)

Definition at line 999 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_local_count, check_remote_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_LOCAL, CUDA_TRACE_POLL_LOCAL, cudaDie(), end_local_download, kernel_time, local_submit_time, WorkDistrib::messageFinishCUDA(), and NAMD_bug().

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

999  {
1000 
1002  cudaError_t err = cudaEventQuery(end_local_download);
1003  if ( err == cudaSuccess ) {
1005  kernel_time = walltime - kernel_time;
1006  check_local_count = 0;
1007  cuda_errcheck("at cuda local stream completed");
1009  } else if ( err != cudaErrorNotReady ) {
1010  char errmsg[256];
1011  sprintf(errmsg,"in cuda_check_local_progress after polling %d times over %f s on step %d",
1013  ((ComputeNonbondedCUDA *) arg)->step);
1014  cudaDie(errmsg,err);
1015  } else if ( ++check_local_count >= count_limit ) {
1016  char errmsg[256];
1017  sprintf(errmsg,"cuda_check_local_progress polled %d times over %f s on step %d",
1019  ((ComputeNonbondedCUDA *) arg)->step);
1020  cudaDie(errmsg,err);
1021  } else if ( check_remote_count ) {
1022  NAMD_bug("nonzero check_remote_count in cuda_check_local_progress");
1023  } else {
1024  CcdCallBacksReset(0,walltime); // fix Charm++
1026  }
1027 }
static void messageFinishCUDA(Compute *)
Definition: WorkDistrib.C:2901
static __thread int check_remote_count
void cuda_check_local_progress(void *arg, double walltime)
#define count_limit
void CcdCallBacksReset(void *ignored, double curWallTime)
static __thread cudaEvent_t end_local_download
#define CUDA_POLL(FN, ARG)
static __thread double kernel_time
void NAMD_bug(const char *err_msg)
Definition: common.C:129
void cudaDie(const char *msg, cudaError_t err=cudaSuccess)
Definition: CudaUtils.C:9
static __thread double local_submit_time
#define CUDA_TRACE_POLL_LOCAL
Definition: DeviceCUDA.h:25
void cuda_errcheck(const char *msg)
#define CUDA_TRACE_LOCAL(START, END)
Definition: DeviceCUDA.h:31
static __thread int check_local_count
void cuda_check_progress ( void arg,
double  walltime 
)

Definition at line 931 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_count, count_limit, CUDA_POLL, CUDA_TRACE_LOCAL, CUDA_TRACE_POLL_REMOTE, cudaDie(), force_ready_queue, force_ready_queue_len, force_ready_queue_next, kernel_time, and remote_submit_time.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

931  {
933 
934  int flindex;
935  int poll_again = 1;
936  while ( -1 != (flindex = force_ready_queue[force_ready_queue_next]) ) {
937  // CkPrintf("Pe %d forces ready %d is index %d at %lf\n",
938  // CkMyPe(), force_ready_queue_next, flindex, walltime);
941  check_count = 0;
942  if ( force_ready_queue_next == force_ready_queue_len ) {
943  poll_again = 0;
944  CUDA_TRACE_LOCAL(kernel_time,walltime);
945  kernel_time = walltime - kernel_time;
946  // need to guarantee this finishes before the last patch message!
947  ((ComputeNonbondedCUDA *) arg)->workStarted = 0;
948  ((ComputeNonbondedCUDA *) arg)->finishReductions();
949  }
950  ((ComputeNonbondedCUDA *) arg)->messageFinishPatch(flindex);
951  if ( force_ready_queue_next == force_ready_queue_len ) break;
952  }
953  if ( ++check_count >= count_limit ) {
954  char errmsg[256];
955  sprintf(errmsg,"cuda_check_progress polled %d times over %f s on step %d",
956  check_count, walltime - remote_submit_time,
957  ((ComputeNonbondedCUDA *) arg)->step);
958  cudaDie(errmsg,cudaSuccess);
959  }
960  if ( poll_again ) {
961  CcdCallBacksReset(0,walltime); // fix Charm++
963  }
964 }
static __thread int check_count
void cuda_check_progress(void *arg, double walltime)
#define CUDA_TRACE_POLL_REMOTE
Definition: DeviceCUDA.h:22
#define count_limit
void CcdCallBacksReset(void *ignored, double curWallTime)
static __thread int force_ready_queue_next
#define CUDA_POLL(FN, ARG)
static __thread double kernel_time
void cudaDie(const char *msg, cudaError_t err=cudaSuccess)
Definition: CudaUtils.C:9
static __thread double remote_submit_time
static __thread int force_ready_queue_len
static __thread int * force_ready_queue
#define CUDA_TRACE_LOCAL(START, END)
Definition: DeviceCUDA.h:31
void cuda_check_remote_calc ( void arg,
double  walltime 
)

Definition at line 1609 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), computeMgr, CUDA_POLL, deviceCUDA, end_remote_download, DeviceCUDA::getNextPeSharingGpu(), and ComputeMgr::sendYieldDevice().

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

1609  {
1610  // in theory we only need end_remote_calc, but overlap isn't reliable
1611  // if ( cudaEventQuery(end_remote_calc) == cudaSuccess ) {
1612  if ( cudaEventQuery(end_remote_download) == cudaSuccess ) {
1613 // CkPrintf("Pe %d yielding to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
1615 // CkPrintf("Pe %d yielded to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
1616  } else {
1617  CcdCallBacksReset(0,walltime); // fix Charm++
1619  }
1620 }
void sendYieldDevice(int pe)
Definition: ComputeMgr.C:1434
static __thread cudaEvent_t end_remote_download
static __thread ComputeMgr * computeMgr
void CcdCallBacksReset(void *ignored, double curWallTime)
#define CUDA_POLL(FN, ARG)
int getNextPeSharingGpu()
Definition: DeviceCUDA.h:104
void cuda_check_remote_calc(void *arg, double walltime)
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
void cuda_check_remote_progress ( void arg,
double  walltime 
)

Definition at line 966 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_local_count, check_remote_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_POLL_REMOTE, CUDA_TRACE_REMOTE, cudaDie(), deviceCUDA, end_remote_download, DeviceCUDA::getMergeGrids(), kernel_time, local_submit_time, WorkDistrib::messageFinishCUDA(), NAMD_bug(), and remote_submit_time.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

966  {
967 
969  cudaError_t err = cudaEventQuery(end_remote_download);
970  if ( err == cudaSuccess ) {
971  local_submit_time = walltime;
973  if ( deviceCUDA->getMergeGrids() ) { // no local
975  }
976  check_remote_count = 0;
977  cuda_errcheck("at cuda remote stream completed");
979  } else if ( err != cudaErrorNotReady ) {
980  char errmsg[256];
981  sprintf(errmsg,"in cuda_check_remote_progress after polling %d times over %f s on step %d",
983  ((ComputeNonbondedCUDA *) arg)->step);
984  cudaDie(errmsg,err);
985  } else if ( ++check_remote_count >= count_limit ) {
986  char errmsg[256];
987  sprintf(errmsg,"cuda_check_remote_progress polled %d times over %f s on step %d",
989  ((ComputeNonbondedCUDA *) arg)->step);
990  cudaDie(errmsg,err);
991  } else if ( check_local_count ) {
992  NAMD_bug("nonzero check_local_count in cuda_check_remote_progress");
993  } else {
994  CcdCallBacksReset(0,walltime); // fix Charm++
996  }
997 }
static void messageFinishCUDA(Compute *)
Definition: WorkDistrib.C:2901
static __thread int check_remote_count
#define CUDA_TRACE_POLL_REMOTE
Definition: DeviceCUDA.h:22
static __thread cudaEvent_t end_remote_download
#define CUDA_TRACE_REMOTE(START, END)
Definition: DeviceCUDA.h:28
int getMergeGrids()
Definition: DeviceCUDA.h:100
#define count_limit
void CcdCallBacksReset(void *ignored, double curWallTime)
#define CUDA_POLL(FN, ARG)
static __thread double kernel_time
void NAMD_bug(const char *err_msg)
Definition: common.C:129
void cudaDie(const char *msg, cudaError_t err=cudaSuccess)
Definition: CudaUtils.C:9
static __thread double remote_submit_time
static __thread double local_submit_time
void cuda_check_remote_progress(void *arg, double walltime)
void cuda_errcheck(const char *msg)
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
static __thread int check_local_count
void cuda_errcheck ( const char *  msg)

Definition at line 45 of file ComputeNonbondedCUDA.C.

References NAMD_die().

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_nonbonded_forces(), ComputePme::doWork(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), ComputePmeMgr::initialize(), ComputePmeMgr::initialize_computes(), and ComputePmeMgr::ungridCalc().

45  {
46  cudaError_t err;
47  if ((err = cudaGetLastError()) != cudaSuccess) {
48  char host[128];
49  gethostname(host, 128); host[127] = 0;
50  char devstr[128] = "";
51  int devnum;
52  if ( cudaGetDevice(&devnum) == cudaSuccess ) {
53  sprintf(devstr, " device %d", devnum);
54  }
55  cudaDeviceProp deviceProp;
56  if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
57  sprintf(devstr, " device %d pci %x:%x:%x", devnum,
58  deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
59  }
60  char errmsg[1024];
61  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
62  NAMD_die(errmsg);
63  }
64 }
void NAMD_die(const char *err_msg)
Definition: common.C:85
void init_arrays ( )

Definition at line 890 of file ComputeNonbondedCUDA.C.

References atom_params, atom_params_size, block_order, block_order_size, bornRadH, bornRadH_size, dHdrPrefixH, dHdrPrefixH_size, dummy_dev, dummy_size, energy_gbis, energy_gbis_size, force_ready_queue, force_ready_queue_len, force_ready_queue_next, force_ready_queue_size, intRad0H, intRad0H_size, intRadSH, intRadSH_size, num_atoms, num_local_atoms, num_remote_atoms, num_virials, vdw_types, vdw_types_size, virials, and virials_size.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA().

890  {
891 
892  atom_params_size = 0;
893  atom_params = NULL;
894 
895  vdw_types_size = 0;
896  vdw_types = NULL;
897 
898  dummy_size = 0;
899  dummy_dev = NULL;
900 
902  force_ready_queue = NULL;
905 
906  block_order_size = 0;
907  block_order = NULL;
908 
909  num_atoms = 0;
910  num_local_atoms = 0;
911  num_remote_atoms = 0;
912 
913  virials_size = 0;
914  virials = NULL;
915  num_virials = 0;
916 
917  energy_gbis_size = 0;
918  energy_gbis = NULL;
919 
920  intRad0H_size = 0;
921  intRad0H = NULL;
922  intRadSH_size = 0;
923  intRadSH = NULL;
924  bornRadH_size = 0;
925  bornRadH = NULL;
926  dHdrPrefixH_size = 0;
927  dHdrPrefixH = NULL;
928 
929 }
static __thread int * block_order
static __thread int intRadSH_size
static __thread int dummy_size
static __thread float * bornRadH
static __thread float * dHdrPrefixH
static __thread int dHdrPrefixH_size
static __thread int force_ready_queue_next
static __thread int intRad0H_size
static __thread float * intRadSH
static __thread int force_ready_queue_size
static __thread int num_remote_atoms
static __thread int virials_size
static __thread float * virials
static __thread int bornRadH_size
static __thread int num_virials
static __thread int vdw_types_size
static __thread int force_ready_queue_len
static __thread int energy_gbis_size
static __thread float * dummy_dev
static __thread float * energy_gbis
static __thread atom_param * atom_params
static __thread int block_order_size
static __thread int * vdw_types
static __thread int atom_params_size
static __thread int * force_ready_queue
static __thread int num_atoms
static __thread int num_local_atoms
static __thread float * intRad0H
void register_cuda_compute_pair ( ComputeID  c,
PatchID  pid[],
int  t[] 
)

Definition at line 394 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::compute_record::c, PatchMap::center(), cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchMap, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, ComputeNonbondedCUDA::requirePatch(), Vector::x, Vector::y, and Vector::z.

Referenced by ComputeNonbondedPair::initialize().

394  {
395 
396  if ( ! cudaCompute ) NAMD_bug("register_pair called early");
397 
398  cudaCompute->requirePatch(pid[0]);
399  cudaCompute->requirePatch(pid[1]);
400 
402  cr.c = c;
403  cr.pid[0] = pid[0]; cr.pid[1] = pid[1];
404 
405  int t1 = t[0];
406  int t2 = t[1];
407  Vector offset = cudaCompute->patchMap->center(pid[0])
408  - cudaCompute->patchMap->center(pid[1]);
409  offset.x += (t1%3-1) - (t2%3-1);
410  offset.y += ((t1/3)%3-1) - ((t2/3)%3-1);
411  offset.z += (t1/9-1) - (t2/9-1);
412  cr.offset = offset;
413 
414  if ( cudaCompute->patchRecords[pid[0]].isLocal ) {
416  } else {
418  }
419 }
Definition: Vector.h:64
BigReal z
Definition: Vector.h:66
void NAMD_bug(const char *err_msg)
Definition: common.C:129
BigReal x
Definition: Vector.h:66
ResizeArray< compute_record > localComputeRecords
ResizeArray< compute_record > remoteComputeRecords
ScaledPosition center(int pid) const
Definition: PatchMap.h:99
BigReal y
Definition: Vector.h:66
ResizeArray< patch_record > patchRecords
static __thread ComputeNonbondedCUDA * cudaCompute
void register_cuda_compute_self ( ComputeID  c,
PatchID  pid 
)

Definition at line 377 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::compute_record::c, cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, and ComputeNonbondedCUDA::requirePatch().

Referenced by ComputeNonbondedSelf::initialize().

377  {
378 
379  if ( ! cudaCompute ) NAMD_bug("register_self called early");
380 
382 
384  cr.c = c;
385  cr.pid[0] = pid; cr.pid[1] = pid;
386  cr.offset = 0.;
387  if ( cudaCompute->patchRecords[pid].isLocal ) {
389  } else {
391  }
392 }
void NAMD_bug(const char *err_msg)
Definition: common.C:129
ResizeArray< compute_record > localComputeRecords
ResizeArray< compute_record > remoteComputeRecords
ResizeArray< patch_record > patchRecords
static __thread ComputeNonbondedCUDA * cudaCompute
void send_build_cuda_force_table ( )

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 77 of file ComputeNonbondedCUDA.C.

References computeMgr, and ComputeMgr::sendBuildCudaForceTable().

Referenced by ComputeNonbondedUtil::select().

77  {
79 }
void sendBuildCudaForceTable()
Definition: ComputeMgr.C:1467
static __thread ComputeMgr * computeMgr
static bool sortop_bitreverse ( int  a,
int  b 
)
inlinestatic

Definition at line 66 of file ComputeNonbondedCUDA.C.

Referenced by cr_sortop_reverse_priority::pid_compare_priority().

66  {
67  if ( a == b ) return 0;
68  for ( int bit = 1; bit; bit *= 2 ) {
69  if ( (a&bit) != (b&bit) ) return ((a&bit) < (b&bit));
70  }
71  return 0;
72 }
void unregister_cuda_compute ( ComputeID  c)

Definition at line 421 of file ComputeNonbondedCUDA.C.

References NAMD_bug().

421  { // static
422 
423  NAMD_bug("unregister_compute unimplemented");
424 
425 }
void NAMD_bug(const char *err_msg)
Definition: common.C:129

Variable Documentation

__thread atom_param* atom_params
static
__thread int atom_params_size
static

Definition at line 829 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int* block_order
static
__thread int block_order_size
static

Definition at line 843 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* bornRadH
static
__thread int bornRadH_size
static

Definition at line 864 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int check_count
static

Definition at line 886 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress().

__thread int check_local_count
static
__thread int check_remote_count
static
__thread ComputeMgr* computeMgr = 0
static
__thread int cuda_timer_count
static

Definition at line 869 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::finishReductions().

__thread double cuda_timer_total
static
__thread ComputeNonbondedCUDA* cudaCompute = 0
static
__thread DeviceCUDA* deviceCUDA

Definition at line 22 of file DeviceCUDA.C.

__thread float* dHdrPrefixH
static
__thread int dHdrPrefixH_size
static

Definition at line 866 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* dummy_dev
static
__thread int dummy_size
static
__thread cudaEvent_t end_local_download
static
__thread cudaEvent_t end_remote_download
static
__thread float* energy_gbis
static
__thread int energy_gbis_size
static

Definition at line 856 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int2* exclusionsByAtom
static
__thread int* force_ready_queue
static
__thread int force_ready_queue_len
static
__thread int force_ready_queue_next
static
__thread int force_ready_queue_size
static

Definition at line 838 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* intRad0H
static
__thread int intRad0H_size
static

Definition at line 860 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* intRadSH
static
__thread int intRadSH_size
static

Definition at line 862 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int kernel_launch_state = 0
static

Definition at line 1047 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

__thread double kernel_time
static
__thread double local_submit_time
static
__thread int max_grid_size
__thread int num_atoms
static
__thread int num_local_atoms
static

Definition at line 847 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_remote_atoms
static

Definition at line 848 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_virials
static
__thread ResizeArray<int>* patch_pair_num_ptr
static
__thread ResizeArray<patch_pair>* patch_pairs_ptr
static
__thread double remote_submit_time
static
__thread float* slow_virials
static
__thread cudaEvent_t start_calc
static
__thread cudaStream_t stream
__thread cudaStream_t stream2
__thread int* vdw_types
static
__thread int vdw_types_size
static

Definition at line 832 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* virials
static
__thread int virials_size
static

Definition at line 850 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().