NAMD
Public Member Functions | Friends | List of all members
ComputePme Class Reference

#include <ComputePme.h>

Inheritance diagram for ComputePme:
Compute ComputePmeUtil

Public Member Functions

 ComputePme (ComputeID c, PatchID pid)
 
virtual ~ComputePme ()
 
void initialize ()
 
void atomUpdate ()
 
int noWork ()
 
void doWork ()
 
void doQMWork ()
 
void ungridForces ()
 
void setMgr (ComputePmeMgr *mgr)
 
- Public Member Functions inherited from Compute
 Compute (ComputeID)
 
int type ()
 
virtual ~Compute ()
 
void setNumPatches (int n)
 
int getNumPatches ()
 
virtual void patchReady (PatchID, int doneMigration, int seq)
 
virtual void finishPatch (int)
 
int sequence (void)
 
int priority (void)
 
int getGBISPhase (void)
 
virtual void gbisP2PatchReady (PatchID, int seq)
 
virtual void gbisP3PatchReady (PatchID, int seq)
 
- Public Member Functions inherited from ComputePmeUtil
 ComputePmeUtil ()
 
 ~ComputePmeUtil ()
 

Friends

class ComputePmeMgr
 

Additional Inherited Members

- Static Public Member Functions inherited from ComputePmeUtil
static void select (void)
 
- Public Attributes inherited from Compute
const ComputeID cid
 
LDObjHandle ldObjHandle
 
LocalWorkMsg *const localWorkMsg
 
- Static Public Attributes inherited from ComputePmeUtil
static int numGrids
 
static Bool alchOn
 
static Bool alchFepOn
 
static Bool alchThermIntOn
 
static Bool alchDecouple
 
static BigReal alchElecLambdaStart
 
static Bool lesOn
 
static int lesFactor
 
static Bool pairOn
 
static Bool selfOn
 
- Protected Member Functions inherited from Compute
void enqueueWork ()
 
- Protected Attributes inherited from Compute
int computeType
 
int basePriority
 
int gbisPhase
 
int gbisPhasePriority [3]
 

Detailed Description

Definition at line 46 of file ComputePme.h.

Constructor & Destructor Documentation

ComputePme::ComputePme ( ComputeID  c,
PatchID  pid 
)

Definition at line 2673 of file ComputePme.C.

References Compute::basePriority, DebugM, PmeGrid::dim2, PmeGrid::dim3, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, ComputePmeUtil::numGrids, Node::Object(), PmeGrid::order, PME_PRIORITY, SimParameters::PMEGridSizeX, SimParameters::PMEGridSizeY, SimParameters::PMEGridSizeZ, SimParameters::PMEInterpOrder, SimParameters::PMEOffload, SimParameters::qmForcesOn, Compute::setNumPatches(), and Node::simParameters.

2673  : Compute(c), patchID(pid)
2674 {
2675  DebugM(4,"ComputePme created.\n");
2677  setNumPatches(1);
2678 
2679  CProxy_ComputePmeMgr::ckLocalBranch(
2680  CkpvAccess(BOCclass_group).computePmeMgr)->addCompute(this);
2681 
2683 
2684  qmForcesOn = simParams->qmForcesOn;
2685  offload = simParams->PMEOffload;
2686 
2687  numGridsMax = numGrids;
2688 
2689  myGrid.K1 = simParams->PMEGridSizeX;
2690  myGrid.K2 = simParams->PMEGridSizeY;
2691  myGrid.K3 = simParams->PMEGridSizeZ;
2692  myGrid.order = simParams->PMEInterpOrder;
2693  myGrid.dim2 = myGrid.K2;
2694  myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
2695 
2696 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2697  cuda_atoms_offset = 0;
2698  f_data_host = 0;
2699  f_data_dev = 0;
2700  if ( ! offload )
2701 #endif
2702  {
2703  for ( int g=0; g<numGrids; ++g ) myRealSpace[g] = new PmeRealSpace(myGrid);
2704  }
2705 
2706  atomsChanged = 0;
2707 
2708  qmLoclIndx = 0;
2709  qmLocalCharges = 0;
2710 }
static Node * Object()
Definition: Node.h:86
void setNumPatches(int n)
Definition: Compute.h:52
int dim2
Definition: PmeBase.h:19
int dim3
Definition: PmeBase.h:19
int K2
Definition: PmeBase.h:18
SimParameters * simParameters
Definition: Node.h:178
int K1
Definition: PmeBase.h:18
#define DebugM(x, y)
Definition: Debug.h:59
static int numGrids
Definition: ComputePme.h:32
#define PME_PRIORITY
Definition: Priorities.h:29
int order
Definition: PmeBase.h:20
#define simParams
Definition: Output.C:127
int K3
Definition: PmeBase.h:18
int basePriority
Definition: Compute.h:37
Compute(ComputeID)
Definition: Compute.C:33
ComputePme::~ComputePme ( )
virtual

Definition at line 2946 of file ComputePme.C.

2947 {
2948 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2949  if ( ! offload )
2950 #endif
2951  {
2952  for ( int g=0; g<numGridsMax; ++g ) delete myRealSpace[g];
2953  }
2954 }

Member Function Documentation

void ComputePme::atomUpdate ( void  )
virtual

Reimplemented from Compute.

Definition at line 2671 of file ComputePme.C.

2671 { atomsChanged = 1; }
void ComputePme::doQMWork ( )

Definition at line 3039 of file ComputePme.C.

References doWork(), Molecule::get_numQMAtoms(), Molecule::get_qmAtmChrg(), Molecule::get_qmAtmIndx(), Molecule::get_qmAtomGroup(), Patch::getCompAtomExtInfo(), Patch::getNumAtoms(), Node::molecule, and Node::Object().

3039  {
3040 
3041 // iout << CkMyPe() << ") ----> PME doQMWork.\n" << endi ;
3042 
3043 
3044  int numQMAtms = Node::Object()->molecule->get_numQMAtoms();
3045  const Real *qmAtmChrg = Node::Object()->molecule->get_qmAtmChrg() ;
3046  const int *qmAtmIndx = Node::Object()->molecule->get_qmAtmIndx() ;
3047  const Real *qmAtomGroup = Node::Object()->molecule->get_qmAtomGroup() ;
3048 
3049  const CompAtomExt *xExt = patch->getCompAtomExtInfo();
3050 
3051  // Determine number of qm atoms in this patch for the current step.
3052  numLocalQMAtoms = 0;
3053  for (int paIter=0; paIter<patch->getNumAtoms(); paIter++) {
3054  if ( qmAtomGroup[xExt[paIter].id] != 0 ) {
3055  numLocalQMAtoms++;
3056  }
3057  }
3058 
3059  // We prepare a charge vector with QM charges for use in the PME calculation.
3060 
3061  // Clears data from last step, if there is any.
3062  if (qmLoclIndx != 0)
3063  delete [] qmLoclIndx;
3064  if (qmLocalCharges != 0)
3065  delete [] qmLocalCharges;
3066 
3067  qmLoclIndx = new int[numLocalQMAtoms] ;
3068  qmLocalCharges = new Real[numLocalQMAtoms] ;
3069 
3070  // I am assuming there will be (in general) more QM atoms among all QM groups
3071  // than MM atoms in a patch.
3072  int procAtms = 0;
3073 
3074  for (int paIter=0; paIter<patch->getNumAtoms(); paIter++) {
3075 
3076  for (int i=0; i<numQMAtms; i++) {
3077 
3078  if (qmAtmIndx[i] == xExt[paIter].id) {
3079 
3080  qmLoclIndx[procAtms] = paIter ;
3081  qmLocalCharges[procAtms] = qmAtmChrg[i];
3082 
3083  procAtms++;
3084  break;
3085  }
3086 
3087  }
3088 
3089  if (procAtms == numLocalQMAtoms)
3090  break;
3091  }
3092 
3093  doWork();
3094  return ;
3095 }
static Node * Object()
Definition: Node.h:86
void doWork()
Definition: ComputePme.C:3097
const int * get_qmAtmIndx()
Definition: Molecule.h:824
int get_numQMAtoms()
Definition: Molecule.h:826
float Real
Definition: common.h:109
const Real * get_qmAtomGroup() const
Definition: Molecule.h:820
Real * get_qmAtmChrg()
Definition: Molecule.h:823
int getNumAtoms()
Definition: Patch.h:105
Molecule * molecule
Definition: Node.h:176
CompAtomExt * getCompAtomExtInfo()
Definition: Patch.h:117
void ComputePme::doWork ( void  )
virtual

Reimplemented from Compute.

Definition at line 3097 of file ComputePme.C.

References ComputePmeMgr::a_data_dev, ComputePmeMgr::a_data_host, ComputePmeUtil::alchDecouple, ComputePmeUtil::alchOn, Compute::basePriority, ResizeArray< T >::begin(), PmeParticle::cg, CompAtom::charge, ComputePmeMgr::chargeGridReady(), Compute::cid, Box< Owner, Data >::close(), COMPUTE_HOME_PRIORITY, COULOMB, ComputePmeMgr::cuda_atoms_alloc, ComputePmeMgr::cuda_atoms_count, ComputePmeMgr::cuda_busy, cuda_errcheck(), ComputePmeMgr::cuda_lock, ComputePmeMgr::cuda_submit_charges(), ComputePmeMgr::cuda_submit_charges_deque, DebugM, deviceCUDA, ComputeNonbondedUtil::dielectric_1, Flags::doMolly, ComputeNonbondedUtil::ewaldcof, PmeRealSpace::fill_charges(), Patch::flags, DeviceCUDA::getDeviceID(), DeviceCUDA::getMasterPe(), Patch::getNumAtoms(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Flags::lattice, ComputePmeMgr::cuda_submit_charges_args::lattice, ComputePmeUtil::lesOn, ComputePmeMgr::cuda_submit_charges_args::mgr, NAMD_bug(), ComputePmeUtil::numGrids, Box< Owner, Data >::open(), PmeGrid::order, ComputePmeUtil::pairOn, CompAtom::partition, PATCH_PRIORITY, PME_OFFLOAD_PRIORITY, PME_PRIORITY, ComputePmeMgr::pmeComputes, CompAtom::position, ResizeArray< T >::resize(), scale_coordinates(), ComputeNonbondedUtil::scaling, ComputePmeUtil::selfOn, Compute::sequence(), ComputePmeMgr::cuda_submit_charges_args::sequence, PmeRealSpace::set_num_atoms(), ResizeArray< T >::size(), SQRT_PI, ComputePmeMgr::submitReductions(), TRACE_COMPOBJ_IDOFFSET, ungridForces(), PmeParticle::x, Vector::x, PmeParticle::y, Vector::y, PmeParticle::z, and Vector::z.

Referenced by doQMWork().

3098 {
3099  DebugM(4,"Entering ComputePme::doWork().\n");
3100 
3102 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3104 #else
3106 #endif
3107  ungridForces();
3108  // CkPrintf("doWork 2 pe %d %d %d\n", CkMyPe(), myMgr->ungridForcesCount, myMgr->recipEvirCount);
3109  if ( ! --(myMgr->ungridForcesCount) && ! myMgr->recipEvirCount ) myMgr->submitReductions();
3110  return;
3111  }
3113  // CkPrintf("doWork 1 pe %d %d %d\n", CkMyPe(), myMgr->ungridForcesCount, myMgr->recipEvirCount);
3114 
3115 #ifdef TRACE_COMPUTE_OBJECTS
3116  double traceObjStartTime = CmiWallTimer();
3117 #endif
3118 
3119 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3120  if ( offload ) cudaSetDevice(deviceCUDA->getDeviceID());
3121 #endif
3122 
3123  // allocate storage
3124  numLocalAtoms = patch->getNumAtoms();
3125 
3126  Lattice &lattice = patch->flags.lattice;
3127 
3128  localData_alloc.resize(numLocalAtoms*(numGrids+ ((numGrids>1 || selfOn)?1:0)));
3129  localData = localData_alloc.begin();
3130  localPartition_alloc.resize(numLocalAtoms);
3131  localPartition = localPartition_alloc.begin();
3132 
3133  int g;
3134  for ( g=0; g<numGrids; ++g ) {
3135  localGridData[g] = localData + numLocalAtoms*(g+1);
3136  }
3137 
3138  // get positions and charges
3139  PmeParticle * data_ptr = localData;
3140  unsigned char * part_ptr = localPartition;
3141  const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
3143 
3144  {
3145  CompAtom *x = positionBox->open();
3146  // CompAtomExt *xExt = patch->getCompAtomExtInfo();
3147  if ( patch->flags.doMolly ) {
3148  positionBox->close(&x);
3149  x = avgPositionBox->open();
3150  }
3151  int numAtoms = patch->getNumAtoms();
3152 
3153  for(int i=0; i<numAtoms; ++i)
3154  {
3155  data_ptr->x = x[i].position.x;
3156  data_ptr->y = x[i].position.y;
3157  data_ptr->z = x[i].position.z;
3158  data_ptr->cg = coulomb_sqrt * x[i].charge;
3159  ++data_ptr;
3160  *part_ptr = x[i].partition;
3161  ++part_ptr;
3162  }
3163 
3164  // QM loop to overwrite charges of QM atoms.
3165  // They are zero for NAMD, but are updated in ComputeQM.
3166  if ( qmForcesOn ) {
3167 
3168  for(int i=0; i<numLocalQMAtoms; ++i)
3169  {
3170  localData[qmLoclIndx[i]].cg = coulomb_sqrt * qmLocalCharges[i];
3171  }
3172 
3173  }
3174 
3175  if ( patch->flags.doMolly ) { avgPositionBox->close(&x); }
3176  else { positionBox->close(&x); }
3177  }
3178 
3179  // copy to other grids if needed
3180  if ( (alchOn && (!alchDecouple)) || lesOn ) {
3181  for ( g=0; g<numGrids; ++g ) {
3182  PmeParticle *lgd = localGridData[g];
3183  if (g < 2) {
3184  int nga = 0;
3185  for(int i=0; i<numLocalAtoms; ++i) {
3186  if ( localPartition[i] == 0 || localPartition[i] == (g+1) || localPartition[i] == (g+3)) {
3187  // for FEP/TI: grid 0 gets non-alch + partition 1 + partition 3;
3188  // grid 1 gets non-alch + partition 2 + + partition 4;
3189  lgd[nga++] = localData[i];
3190  }
3191  }
3192  numGridAtoms[g] = nga;
3193  } else {
3194  int nga = 0;
3195  for(int i=0; i<numLocalAtoms; ++i) {
3196  if ( localPartition[i] == 0 ) {
3197  // grid 2 (only if called for with numGrids=3) gets only non-alch
3198  lgd[nga++] = localData[i];
3199  }
3200  }
3201  numGridAtoms[g] = nga;
3202  }
3203  }
3204  } else if ( alchOn && alchDecouple) {
3205  // alchemical decoupling: four grids
3206  // g=0: partition 0 and partition 1
3207  // g=1: partition 0 and partition 2
3208  // g=2: only partition 1 atoms
3209  // g=3: only partition 2 atoms
3210  // plus one grid g=4, only partition 0, if numGrids=5
3211  for ( g=0; g<2; ++g ) { // same as before for first 2
3212  PmeParticle *lgd = localGridData[g];
3213  int nga = 0;
3214  for(int i=0; i<numLocalAtoms; ++i) {
3215  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
3216  lgd[nga++] = localData[i];
3217  }
3218  }
3219  numGridAtoms[g] = nga;
3220  }
3221  for (g=2 ; g<4 ; ++g ) { // only alchemical atoms for these 2
3222  PmeParticle *lgd = localGridData[g];
3223  int nga = 0;
3224  for(int i=0; i<numLocalAtoms; ++i) {
3225  if ( localPartition[i] == (g-1) ) {
3226  lgd[nga++] = localData[i];
3227  }
3228  }
3229  numGridAtoms[g] = nga;
3230  }
3231  for (g=4 ; g<numGrids ; ++g ) { // only non-alchemical atoms
3232  // numGrids=5 only if alchElecLambdaStart > 0
3233  PmeParticle *lgd = localGridData[g];
3234  int nga = 0;
3235  for(int i=0; i<numLocalAtoms; ++i) {
3236  if ( localPartition[i] == 0 ) {
3237  lgd[nga++] = localData[i];
3238  }
3239  }
3240  numGridAtoms[g] = nga;
3241  }
3242  } else if ( selfOn ) {
3243  if ( numGrids != 1 ) NAMD_bug("ComputePme::doWork assertion 1 failed");
3244  g = 0;
3245  PmeParticle *lgd = localGridData[g];
3246  int nga = 0;
3247  for(int i=0; i<numLocalAtoms; ++i) {
3248  if ( localPartition[i] == 1 ) {
3249  lgd[nga++] = localData[i];
3250  }
3251  }
3252  numGridAtoms[g] = nga;
3253  } else if ( pairOn ) {
3254  if ( numGrids != 3 ) NAMD_bug("ComputePme::doWork assertion 2 failed");
3255  g = 0;
3256  PmeParticle *lgd = localGridData[g];
3257  int nga = 0;
3258  for(int i=0; i<numLocalAtoms; ++i) {
3259  if ( localPartition[i] == 1 || localPartition[i] == 2 ) {
3260  lgd[nga++] = localData[i];
3261  }
3262  }
3263  numGridAtoms[g] = nga;
3264  for ( g=1; g<3; ++g ) {
3265  PmeParticle *lgd = localGridData[g];
3266  int nga = 0;
3267  for(int i=0; i<numLocalAtoms; ++i) {
3268  if ( localPartition[i] == g ) {
3269  lgd[nga++] = localData[i];
3270  }
3271  }
3272  numGridAtoms[g] = nga;
3273  }
3274  } else {
3275  if ( numGrids != 1 ) NAMD_bug("ComputePme::doWork assertion 3 failed");
3276  localGridData[0] = localData;
3277  numGridAtoms[0] = numLocalAtoms;
3278  }
3279 
3280  if ( ! myMgr->doWorkCount ) {
3281  myMgr->doWorkCount = myMgr->pmeComputes.size();
3282 
3283 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3284  if ( ! offload )
3285 #endif // NAMD_CUDA
3286  {
3287  memset( (void*) myMgr->fz_arr, 0, (myGrid.K3+myGrid.order-1) * sizeof(char) );
3288 
3289  for (int i=0; i<myMgr->q_count; ++i) {
3290  memset( (void*) (myMgr->q_list[i]), 0, (myGrid.K3+myGrid.order-1) * sizeof(float) );
3291  }
3292  }
3293 
3294  for ( g=0; g<numGrids; ++g ) {
3295  myMgr->evir[g] = 0;
3296  }
3297 
3298  myMgr->strayChargeErrors = 0;
3299 
3300  myMgr->compute_sequence = sequence();
3301  }
3302 
3303  if ( sequence() != myMgr->compute_sequence ) NAMD_bug("ComputePme sequence mismatch in doWork()");
3304 
3305  int strayChargeErrors = 0;
3306 
3307  // calculate self energy
3309  for ( g=0; g<numGrids; ++g ) {
3310  BigReal selfEnergy = 0;
3311  data_ptr = localGridData[g];
3312  int i;
3313  for(i=0; i<numGridAtoms[g]; ++i)
3314  {
3315  selfEnergy += data_ptr->cg * data_ptr->cg;
3316  ++data_ptr;
3317  }
3318  selfEnergy *= -1. * ewaldcof / SQRT_PI;
3319  myMgr->evir[g][0] += selfEnergy;
3320 
3321  float **q = myMgr->q_arr + g*myMgr->fsize;
3322  char *f = myMgr->f_arr + g*myMgr->fsize;
3323 
3324  scale_coordinates(localGridData[g], numGridAtoms[g], lattice, myGrid);
3325 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3326  if ( offload ) {
3327  if ( myMgr->cuda_atoms_alloc == 0 ) { // first call
3328  int na = myMgr->cuda_atoms_alloc = 1.2 * (myMgr->cuda_atoms_count + 1000);
3329  cuda_errcheck("before malloc atom data for pme");
3330  cudaMallocHost((void**) &(myMgr->a_data_host), 7*na*sizeof(float));
3331  cudaMalloc((void**) &(myMgr->a_data_dev), 7*na*sizeof(float));
3332  cuda_errcheck("malloc atom data for pme");
3333  myMgr->cuda_atoms_count = 0;
3334  }
3335  cuda_atoms_offset = myMgr->cuda_atoms_count;
3336  int n = numGridAtoms[g];
3337  myMgr->cuda_atoms_count += n;
3338  if ( myMgr->cuda_atoms_count > myMgr->cuda_atoms_alloc ) {
3339  CkPrintf("Pe %d expanding CUDA PME atoms allocation because %d > %d\n",
3340  CkMyPe(), myMgr->cuda_atoms_count, myMgr->cuda_atoms_alloc);
3341  cuda_errcheck("before malloc expanded atom data for pme");
3342  int na = myMgr->cuda_atoms_alloc = 1.2 * (myMgr->cuda_atoms_count + 1000);
3343  const float *a_data_host_old = myMgr->a_data_host;
3344  cudaMallocHost((void**) &(myMgr->a_data_host), 7*na*sizeof(float));
3345  cuda_errcheck("malloc expanded host atom data for pme");
3346  memcpy(myMgr->a_data_host, a_data_host_old, 7*cuda_atoms_offset*sizeof(float));
3347  cudaFreeHost((void*) a_data_host_old);
3348  cuda_errcheck("free expanded host atom data for pme");
3349  cudaFree(myMgr->a_data_dev);
3350  cuda_errcheck("free expanded dev atom data for pme");
3351  cudaMalloc((void**) &(myMgr->a_data_dev), 7*na*sizeof(float));
3352  cuda_errcheck("malloc expanded dev atom data for pme");
3353  }
3354  float *a_data_host = myMgr->a_data_host + 7 * cuda_atoms_offset;
3355  data_ptr = localGridData[g];
3356  double order_1 = myGrid.order - 1;
3357  double K1 = myGrid.K1;
3358  double K2 = myGrid.K2;
3359  double K3 = myGrid.K3;
3360  int found_negative = 0;
3361  for ( int i=0; i<n; ++i ) {
3362  if ( data_ptr[i].x < 0 || data_ptr[i].y < 0 || data_ptr[i].z < 0 ) {
3363  found_negative = 1;
3364  // CkPrintf("low coord: %f %f %f\n", data_ptr[i].x, data_ptr[i].y, data_ptr[i].z);
3365  }
3366  double x_int = (int) data_ptr[i].x;
3367  double y_int = (int) data_ptr[i].y;
3368  double z_int = (int) data_ptr[i].z;
3369  a_data_host[7*i ] = data_ptr[i].x - x_int; // subtract in double precision
3370  a_data_host[7*i+1] = data_ptr[i].y - y_int;
3371  a_data_host[7*i+2] = data_ptr[i].z - z_int;
3372  a_data_host[7*i+3] = data_ptr[i].cg;
3373  x_int -= order_1; if ( x_int < 0 ) x_int += K1;
3374  y_int -= order_1; if ( y_int < 0 ) y_int += K2;
3375  z_int -= order_1; if ( z_int < 0 ) z_int += K3;
3376  a_data_host[7*i+4] = x_int;
3377  a_data_host[7*i+5] = y_int;
3378  a_data_host[7*i+6] = z_int;
3379  }
3380  if ( found_negative ) NAMD_bug("found negative atom coordinate in ComputePme::doWork");
3381  } else
3382 #endif // NAMD_CUDA
3383  {
3384  myRealSpace[g]->set_num_atoms(numGridAtoms[g]);
3385  myRealSpace[g]->fill_charges(q, myMgr->q_list, myMgr->q_count, strayChargeErrors, f, myMgr->fz_arr, localGridData[g]);
3386  }
3387  }
3388  myMgr->strayChargeErrors += strayChargeErrors;
3389 
3390 #ifdef TRACE_COMPUTE_OBJECTS
3391  traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
3392 #endif
3393 
3394  if ( --(myMgr->doWorkCount) == 0 ) {
3395 // cudaDeviceSynchronize(); // XXXX
3396 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3397  if ( offload ) {
3399  args.mgr = myMgr;
3400  args.lattice = &lattice;
3401  args.sequence = sequence();
3402  CmiLock(ComputePmeMgr::cuda_lock);
3403  if ( ComputePmeMgr::cuda_busy ) {
3405  } else if ( CkMyPe() == deviceCUDA->getMasterPe() ) {
3406  // avoid adding work to nonbonded data preparation pe
3407  args.mgr->cuda_submit_charges(*args.lattice, args.sequence);
3408  } else {
3409  ComputePmeMgr::cuda_busy = true;
3410  while ( 1 ) {
3411  CmiUnlock(ComputePmeMgr::cuda_lock);
3412  args.mgr->cuda_submit_charges(*args.lattice, args.sequence);
3413  CmiLock(ComputePmeMgr::cuda_lock);
3417  } else {
3418  ComputePmeMgr::cuda_busy = false;
3419  break;
3420  }
3421  }
3422  }
3423  CmiUnlock(ComputePmeMgr::cuda_lock);
3424  } else
3425 #endif // NAMD_CUDA
3426  {
3427  myMgr->chargeGridReady(lattice,sequence());
3428  }
3429  }
3430  atomsChanged = 0;
3431 }
static void scale_coordinates(PmeParticle p[], int N, Lattice lattice, PmeGrid grid)
Definition: PmeBase.inl:17
unsigned char partition
Definition: NamdTypes.h:56
void ungridForces()
Definition: ComputePme.C:4018
int sequence(void)
Definition: Compute.h:64
float * a_data_dev
Definition: ComputePme.C:425
#define TRACE_COMPOBJ_IDOFFSET
Definition: Compute.h:77
double x
Definition: PmeBase.h:26
int K2
Definition: PmeBase.h:18
int K1
Definition: PmeBase.h:18
#define COULOMB
Definition: common.h:46
#define DebugM(x, y)
Definition: Debug.h:59
static int numGrids
Definition: ComputePme.h:32
BigReal z
Definition: Vector.h:66
Position position
Definition: NamdTypes.h:53
static Bool alchOn
Definition: ComputePme.h:33
double cg
Definition: PmeBase.h:27
double z
Definition: PmeBase.h:26
double y
Definition: PmeBase.h:26
Flags flags
Definition: Patch.h:127
#define SQRT_PI
Definition: ComputeExt.C:30
Charge charge
Definition: NamdTypes.h:54
void chargeGridReady(Lattice &lattice, int sequence)
Definition: ComputePme.C:3554
int cuda_atoms_alloc
Definition: ComputePme.C:429
#define PME_PRIORITY
Definition: Priorities.h:29
int order
Definition: PmeBase.h:20
#define COMPUTE_HOME_PRIORITY
Definition: Priorities.h:76
int getMasterPe()
Definition: DeviceCUDA.h:105
void NAMD_bug(const char *err_msg)
Definition: common.C:129
void fill_charges(float **q_arr, float **q_arr_list, int &q_arr_count, int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[])
Definition: PmeRealSpace.C:47
gridSize z
static std::deque< cuda_submit_charges_args > cuda_submit_charges_deque
Definition: ComputePme.C:447
BigReal x
Definition: Vector.h:66
#define PME_OFFLOAD_PRIORITY
Definition: Priorities.h:41
static Bool alchDecouple
Definition: ComputePme.h:36
void submitReductions()
Definition: ComputePme.C:4214
int getDeviceID()
Definition: DeviceCUDA.h:112
static Bool pairOn
Definition: ComputePme.h:40
int K3
Definition: PmeBase.h:18
int cuda_atoms_count
Definition: ComputePme.C:428
static Bool lesOn
Definition: ComputePme.h:38
void resize(int i)
Definition: ResizeArray.h:84
void cuda_errcheck(const char *msg)
BigReal y
Definition: Vector.h:66
int getNumAtoms()
Definition: Patch.h:105
ResizeArray< ComputePme * > pmeComputes
Definition: ComputePme.C:460
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
static Bool selfOn
Definition: ComputePme.h:41
Lattice lattice
Definition: PatchTypes.h:44
void set_num_atoms(int natoms)
Definition: PmeRealSpace.C:20
gridSize y
int basePriority
Definition: Compute.h:37
int size(void) const
Definition: ResizeArray.h:127
gridSize x
void cuda_submit_charges(Lattice &lattice, int sequence)
Definition: ComputePme.C:3435
Data * open(void)
Definition: Box.h:39
const ComputeID cid
Definition: Compute.h:43
void close(Data **const t)
Definition: Box.h:49
int doMolly
Definition: PatchTypes.h:24
float * a_data_host
Definition: ComputePme.C:424
static bool cuda_busy
Definition: ComputePme.C:448
double BigReal
Definition: common.h:114
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
static CmiNodeLock cuda_lock
Definition: ComputePme.C:430
iterator begin(void)
Definition: ResizeArray.h:36
void ComputePme::initialize ( void  )
virtual

Reimplemented from Compute.

Definition at line 2712 of file ComputePme.C.

References ComputePmeMgr::cuda_atoms_count, Patch::getNumAtoms(), NAMD_bug(), PatchMap::Object(), Patch::registerAvgPositionPickup(), Patch::registerForceDeposit(), and Patch::registerPositionPickup().

2712  {
2713  if (!(patch = PatchMap::Object()->patch(patchID))) {
2714  NAMD_bug("ComputePme used with unknown patch.");
2715  }
2716  positionBox = patch->registerPositionPickup(this);
2717  avgPositionBox = patch->registerAvgPositionPickup(this);
2718  forceBox = patch->registerForceDeposit(this);
2719 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2720  if ( offload ) {
2721  myMgr->cuda_atoms_count += patch->getNumAtoms();
2722  }
2723 #endif
2724 }
Box< Patch, CompAtom > * registerAvgPositionPickup(Compute *cid)
Definition: Patch.C:134
static PatchMap * Object()
Definition: PatchMap.h:27
void NAMD_bug(const char *err_msg)
Definition: common.C:129
int cuda_atoms_count
Definition: ComputePme.C:428
int getNumAtoms()
Definition: Patch.h:105
Box< Patch, CompAtom > * registerPositionPickup(Compute *cid)
Definition: Patch.C:107
Box< Patch, Results > * registerForceDeposit(Compute *cid)
Definition: Patch.C:228
int ComputePme::noWork ( )
virtual

Reimplemented from Compute.

Definition at line 3000 of file ComputePme.C.

References ResizeArray< T >::add(), Flags::doFullElectrostatics, Patch::flags, ComputePmeMgr::pmeComputes, ResizeArray< T >::size(), Box< Owner, Data >::skip(), and SubmitReduction::submit().

3000  {
3001 
3002  if ( patch->flags.doFullElectrostatics ) {
3003  // In QM/MM simulations, atom charges form QM regions need special treatment.
3004  if ( qmForcesOn ) {
3005  return 1;
3006  }
3007  if ( ! myMgr->ungridForcesCount && ! myMgr->recipEvirCount ) return 0; // work to do, enqueue as usual
3008  myMgr->heldComputes.add(this);
3009  return 1; // don't enqueue yet
3010  }
3011 
3012  positionBox->skip();
3013  forceBox->skip();
3014 
3015  if ( ++(myMgr->noWorkCount) == myMgr->pmeComputes.size() ) {
3016  myMgr->noWorkCount = 0;
3017  myMgr->reduction->submit();
3018  }
3019 
3020  atomsChanged = 0;
3021 
3022  return 1; // no work for this step
3023 }
Flags flags
Definition: Patch.h:127
int doFullElectrostatics
Definition: PatchTypes.h:23
void skip(void)
Definition: Box.h:63
int add(const Elem &elem)
Definition: ResizeArray.h:97
ResizeArray< ComputePme * > pmeComputes
Definition: ComputePme.C:460
void submit(void)
Definition: ReductionMgr.h:323
int size(void) const
Definition: ResizeArray.h:127
void ComputePme::setMgr ( ComputePmeMgr mgr)
inline

Definition at line 56 of file ComputePme.h.

56 { myMgr = mgr; }
void ComputePme::ungridForces ( )

Definition at line 4018 of file ComputePme.C.

References ADD_VECTOR_OBJECT, ComputePmeUtil::alchDecouple, ComputePmeUtil::alchFepOn, ComputePmeUtil::alchOn, ResizeArray< T >::begin(), Box< Owner, Data >::close(), SimParameters::commOnly, PmeRealSpace::compute_forces(), endi(), Results::f, Patch::flags, SimParameters::getCurrentLambda(), SimParameters::getCurrentLambda2(), SimParameters::getElecLambda(), Patch::getNumAtoms(), iERROR(), iout, Flags::lattice, ComputePmeUtil::lesFactor, ComputePmeUtil::lesOn, NAMD_bug(), ComputePmeUtil::numGrids, Node::Object(), Box< Owner, Data >::open(), ComputePmeUtil::pairOn, ResizeArray< T >::resize(), scale_forces(), ComputePmeUtil::selfOn, Compute::sequence(), Node::simParameters, Results::slow, Flags::step, Vector::x, Vector::y, and Vector::z.

Referenced by doWork().

4018  {
4019 
4020  if ( sequence() != myMgr->compute_sequence ) NAMD_bug("ComputePme sequence mismatch in ungridForces()");
4021 
4023 
4024  localResults_alloc.resize(numLocalAtoms* ((numGrids>1 || selfOn)?2:1));
4025  Vector *localResults = localResults_alloc.begin();
4026  Vector *gridResults;
4027 
4028  if ( alchOn || lesOn || selfOn || pairOn ) {
4029  for(int i=0; i<numLocalAtoms; ++i) { localResults[i] = 0.; }
4030  gridResults = localResults + numLocalAtoms;
4031  } else {
4032  gridResults = localResults;
4033  }
4034 
4035  Vector pairForce = 0.;
4036  Lattice &lattice = patch->flags.lattice;
4037  int g = 0;
4038  if(!simParams->commOnly) {
4039  for ( g=0; g<numGrids; ++g ) {
4040 #ifdef NETWORK_PROGRESS
4041  CmiNetworkProgress();
4042 #endif
4043 
4044 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
4045  if ( offload ) {
4046  int errfound = 0;
4047  for ( int n=numGridAtoms[g], i=0; i<n; ++i ) {
4048  // Neither isnan() nor x != x worked when testing on Cray; this does.
4049  if ( ((int*)f_data_host)[3*i] == 0x7fffffff ) { errfound = 1; } // CUDA NaN
4050  gridResults[i].x = f_data_host[3*i];
4051  gridResults[i].y = f_data_host[3*i+1];
4052  gridResults[i].z = f_data_host[3*i+2];
4053  }
4054  if ( errfound ) {
4055  int errcount = 0;
4056  for ( int n=numGridAtoms[g], i=0; i<n; ++i ) {
4057  float f = f_data_host[3*i];
4058  if ( ((int*)f_data_host)[3*i] == 0x7fffffff ) { // CUDA NaN
4059  ++errcount;
4060  gridResults[i] = 0.;
4061  }
4062  }
4063  iout << iERROR << "Stray PME grid charges detected: "
4064  << errcount << " atoms on pe " << CkMyPe() << "\n" << endi;
4065  }
4066  } else
4067 #endif // NAMD_CUDA
4068  {
4069  myRealSpace[g]->compute_forces(myMgr->q_arr+g*myMgr->fsize, localGridData[g], gridResults);
4070  }
4071  scale_forces(gridResults, numGridAtoms[g], lattice);
4072 
4073  if (alchOn) {
4074  float scale = 1.;
4075  BigReal elecLambdaUp, elecLambdaDown;
4076  BigReal alchLambda = simParams->getCurrentLambda(patch->flags.step);
4077  myMgr->alchLambda = alchLambda;
4078  BigReal alchLambda2 = simParams->getCurrentLambda2(patch->flags.step);
4079  myMgr->alchLambda2 = alchLambda2;
4080  elecLambdaUp = simParams->getElecLambda(alchLambda);
4081  elecLambdaDown = simParams->getElecLambda(1. - alchLambda);
4082 
4083  if ( g == 0 ) scale = elecLambdaUp;
4084  else if ( g == 1 ) scale = elecLambdaDown;
4085  else if ( g == 2 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
4086 
4087  if (alchDecouple) {
4088  if ( g == 2 ) scale = 1 - elecLambdaUp;
4089  else if ( g == 3 ) scale = 1 - elecLambdaDown;
4090  else if ( g == 4 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
4091  }
4092  int nga = 0;
4093  if (!alchDecouple) {
4094  if (g < 2 ) {
4095  for(int i=0; i<numLocalAtoms; ++i) {
4096  if ( localPartition[i] == 0 || localPartition[i] == (g+1) || localPartition[i] == (g+3) ) {
4097  // (g=0: only partition 0 and partiton 1 and partion 3)
4098  // (g=1: only partition 0 and partiton 2 and partion 4)
4099  localResults[i] += gridResults[nga++] * scale;
4100  }
4101  }
4102  } else {
4103  for(int i=0; i<numLocalAtoms; ++i) {
4104  if ( localPartition[i] == 0 ) {
4105  // (g=2: only partition 0)
4106  localResults[i] += gridResults[nga++] * scale;
4107  }
4108  }
4109  }
4110  } else { // alchDecouple
4111  if ( g < 2 ) {
4112  for(int i=0; i<numLocalAtoms; ++i) {
4113  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
4114  // g = 0: partition 0 or partition 1
4115  // g = 1: partition 0 or partition 2
4116  localResults[i] += gridResults[nga++] * scale;
4117  }
4118  }
4119  }
4120  else {
4121  for(int i=0; i<numLocalAtoms; ++i) {
4122  if ( localPartition[i] == (g-1) || localPartition[i] == (g-4)) {
4123  // g = 2: partition 1 only
4124  // g = 3: partition 2 only
4125  // g = 4: partition 0 only
4126  localResults[i] += gridResults[nga++] * scale;
4127  }
4128  }
4129  }
4130  }
4131  } else if ( lesOn ) {
4132  float scale = 1.;
4133  if ( alchFepOn ) {
4134  BigReal alchLambda = simParams->getCurrentLambda(patch->flags.step);
4135  myMgr->alchLambda = alchLambda;
4136  BigReal alchLambda2 = simParams->getCurrentLambda2(patch->flags.step);
4137  myMgr->alchLambda2 = alchLambda2;
4138  if ( g == 0 ) scale = alchLambda;
4139  else if ( g == 1 ) scale = 1. - alchLambda;
4140  } else if ( lesOn ) {
4141  scale = 1.0 / (float)lesFactor;
4142  }
4143  int nga = 0;
4144  for(int i=0; i<numLocalAtoms; ++i) {
4145  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
4146  localResults[i] += gridResults[nga++] * scale;
4147  }
4148  }
4149  } else if ( selfOn ) {
4150  PmeParticle *lgd = localGridData[g];
4151  int nga = 0;
4152  for(int i=0; i<numLocalAtoms; ++i) {
4153  if ( localPartition[i] == 1 ) {
4154  pairForce += gridResults[nga]; // should add up to almost zero
4155  localResults[i] += gridResults[nga++];
4156  }
4157  }
4158  } else if ( pairOn ) {
4159  if ( g == 0 ) {
4160  int nga = 0;
4161  for(int i=0; i<numLocalAtoms; ++i) {
4162  if ( localPartition[i] == 1 ) {
4163  pairForce += gridResults[nga];
4164  }
4165  if ( localPartition[i] == 1 || localPartition[i] == 2 ) {
4166  localResults[i] += gridResults[nga++];
4167  }
4168  }
4169  } else if ( g == 1 ) {
4170  int nga = 0;
4171  for(int i=0; i<numLocalAtoms; ++i) {
4172  if ( localPartition[i] == g ) {
4173  pairForce -= gridResults[nga]; // should add up to almost zero
4174  localResults[i] -= gridResults[nga++];
4175  }
4176  }
4177  } else {
4178  int nga = 0;
4179  for(int i=0; i<numLocalAtoms; ++i) {
4180  if ( localPartition[i] == g ) {
4181  localResults[i] -= gridResults[nga++];
4182  }
4183  }
4184  }
4185  }
4186  }
4187  }
4188 
4189  Vector *results_ptr = localResults;
4190 
4191  // add in forces
4192  {
4193  Results *r = forceBox->open();
4194  Force *f = r->f[Results::slow];
4195  int numAtoms = patch->getNumAtoms();
4196 
4197  if ( ! myMgr->strayChargeErrors && ! simParams->commOnly ) {
4198  for(int i=0; i<numAtoms; ++i) {
4199  f[i].x += results_ptr->x;
4200  f[i].y += results_ptr->y;
4201  f[i].z += results_ptr->z;
4202  ++results_ptr;
4203  }
4204  }
4205  forceBox->close(&r);
4206  }
4207 
4208  if ( pairOn || selfOn ) {
4209  ADD_VECTOR_OBJECT(myMgr->reduction,REDUCTION_PAIR_ELECT_FORCE,pairForce);
4210  }
4211 
4212 }
static Node * Object()
Definition: Node.h:86
void compute_forces(const float *const *q_arr, const PmeParticle p[], Vector f[])
Definition: PmeRealSpace.C:141
int sequence(void)
Definition: Compute.h:64
Definition: Vector.h:64
SimParameters * simParameters
Definition: Node.h:178
static int numGrids
Definition: ComputePme.h:32
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
BigReal z
Definition: Vector.h:66
static Bool alchOn
Definition: ComputePme.h:33
#define iout
Definition: InfoStream.h:51
Flags flags
Definition: Patch.h:127
void NAMD_bug(const char *err_msg)
Definition: common.C:129
BigReal getCurrentLambda2(const int)
Force * f[maxNumForces]
Definition: PatchTypes.h:67
static void scale_forces(Vector f[], int N, Lattice &lattice)
Definition: PmeBase.inl:60
BigReal x
Definition: Vector.h:66
static Bool alchDecouple
Definition: ComputePme.h:36
static int lesFactor
Definition: ComputePme.h:39
#define simParams
Definition: Output.C:127
static Bool pairOn
Definition: ComputePme.h:40
static Bool lesOn
Definition: ComputePme.h:38
void resize(int i)
Definition: ResizeArray.h:84
BigReal y
Definition: Vector.h:66
int getNumAtoms()
Definition: Patch.h:105
BigReal getCurrentLambda(const int)
#define ADD_VECTOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:27
static Bool selfOn
Definition: ComputePme.h:41
Lattice lattice
Definition: PatchTypes.h:44
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:83
static Bool alchFepOn
Definition: ComputePme.h:34
Data * open(void)
Definition: Box.h:39
void close(Data **const t)
Definition: Box.h:49
BigReal getElecLambda(const BigReal)
double BigReal
Definition: common.h:114
int step
Definition: PatchTypes.h:16
iterator begin(void)
Definition: ResizeArray.h:36

Friends And Related Function Documentation

friend class ComputePmeMgr
friend

Definition at line 58 of file ComputePme.h.


The documentation for this class was generated from the following files: