00001 #include "ComputeNonbondedUtil.h"
00002 #include "ComputeHomeTuples.h"
00003
00004 class ComputeMgr;
00005
00006 class ComputeNonbondedCUDAKernel;
00007
00008 class float4;
00009
00010 int cuda_device_pe();
00011
00012 bool cuda_device_shared_with_pe(int pe);
00013
00014 class ComputeNonbondedCUDA : public Compute, private ComputeNonbondedUtil {
00015 public:
00016
00017 struct compute_record {
00018 ComputeID c;
00019 PatchID pid[2];
00020 Vector offset;
00021 };
00022
00023 struct patch_record {
00024 int localIndex;
00025 int localStart;
00026 int numAtoms;
00027 int numFreeAtoms;
00028 int refCount;
00029 int isLocal;
00030 int hostPe;
00031 PatchID patchID;
00032 Patch *p;
00033 Box<Patch,CompAtom> *positionBox;
00034 Box<Patch,Results> *forceBox;
00035 Box<Patch,Real> *intRadBox;
00036 Box<Patch,GBReal> *psiSumBox;
00037 Box<Patch,Real> *bornRadBox;
00038 Box<Patch,GBReal> *dEdaSumBox;
00039 Box<Patch,Real> *dHdrPrefixBox;
00040 CompAtom *x;
00041 CompAtomExt *xExt;
00042 Results *r;
00043 Force *f;
00044 Real *intRad;
00045 GBReal *psiSum;
00046 Real *bornRad;
00047 GBReal *dEdaSum;
00048 Real *dHdrPrefix;
00049
00050 patch_record() { refCount = 0; }
00051 };
00052
00053
00054 ComputeNonbondedCUDA(ComputeID c, ComputeMgr *mgr,
00055 ComputeNonbondedCUDA *m = 0, int idx = -1);
00056 ~ComputeNonbondedCUDA();
00057
00058 void atomUpdate();
00059 void doWork();
00060 int noWork();
00061
00062 void recvYieldDevice(int pe);
00063 LocalWorkMsg *localWorkMsg2;
00064
00065 int workStarted;
00066 Lattice lattice;
00067 int doSlow, doEnergy;
00068 int step;
00069 int finishWork();
00070 void messageFinishWork();
00071
00072 static void build_lj_table();
00073 static void build_force_table();
00074
00075 void build_exclusions();
00076
00077 void requirePatch(int pid);
00078 void assignPatches();
00079 void registerPatches();
00080 ResizeArray<int> activePatches, localActivePatches, remoteActivePatches;
00081 ResizeArray<int> hostedPatches, localHostedPatches, remoteHostedPatches;
00082 ResizeArray<patch_record> patchRecords;
00083 ResizeArray<compute_record> computeRecords;
00084 ResizeArray<compute_record> localComputeRecords, remoteComputeRecords;
00085
00086 int num_atom_records;
00087 int num_local_atom_records;
00088 int num_remote_atom_records;
00089 int num_force_records;
00090
00091 float4 *forces;
00092 float4 *slow_forces;
00093 GBReal *psiSumH;
00094 GBReal *dEdaSumH;
00095
00096 PatchMap *patchMap;
00097 AtomMap *atomMap;
00098 SubmitReduction *reduction;
00099
00100 ComputeNonbondedCUDAKernel *kernel;
00101
00102 ComputeNonbondedCUDA *master;
00103 int masterPe;
00104 int slaveIndex;
00105 ComputeNonbondedCUDA **slaves;
00106 int *slavePes;
00107 int numSlaves;
00108 };
00109
00110