NAMD
Public Member Functions | Friends | List of all members
ComputePme Class Reference

#include <ComputePme.h>

Inheritance diagram for ComputePme:
Compute ComputePmeUtil

Public Member Functions

 ComputePme (ComputeID c, PatchID pid)
 
virtual ~ComputePme ()
 
void initialize ()
 
void atomUpdate ()
 
int noWork ()
 
void doWork ()
 
void doQMWork ()
 
void ungridForces ()
 
void setMgr (ComputePmeMgr *mgr)
 
- Public Member Functions inherited from Compute
 Compute (ComputeID)
 
int type ()
 
virtual ~Compute ()
 
void setNumPatches (int n)
 
int getNumPatches ()
 
virtual void patchReady (PatchID, int doneMigration, int seq)
 
virtual void finishPatch (int)
 
int sequence (void)
 
int priority (void)
 
int getGBISPhase (void)
 
virtual void gbisP2PatchReady (PatchID, int seq)
 
virtual void gbisP3PatchReady (PatchID, int seq)
 
- Public Member Functions inherited from ComputePmeUtil
 ComputePmeUtil ()
 
 ~ComputePmeUtil ()
 

Friends

class ComputePmeMgr
 

Additional Inherited Members

- Static Public Member Functions inherited from ComputePmeUtil
static void select (void)
 
- Public Attributes inherited from Compute
const ComputeID cid
 
LDObjHandle ldObjHandle
 
LocalWorkMsg *const localWorkMsg
 
- Static Public Attributes inherited from ComputePmeUtil
static int numGrids
 
static Bool alchOn
 
static Bool alchFepOn
 
static Bool alchThermIntOn
 
static Bool alchDecouple
 
static BigReal alchElecLambdaStart
 
static Bool lesOn
 
static int lesFactor
 
static Bool pairOn
 
static Bool selfOn
 
- Protected Member Functions inherited from Compute
void enqueueWork ()
 
- Protected Attributes inherited from Compute
int computeType
 
int basePriority
 
int gbisPhase
 
int gbisPhasePriority [3]
 

Detailed Description

Definition at line 46 of file ComputePme.h.

Constructor & Destructor Documentation

◆ ComputePme()

ComputePme::ComputePme ( ComputeID  c,
PatchID  pid 
)

Definition at line 2703 of file ComputePme.C.

References Compute::basePriority, DebugM, PmeGrid::dim2, PmeGrid::dim3, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, ComputePmeUtil::numGrids, Node::Object(), PmeGrid::order, PME_PRIORITY, Compute::setNumPatches(), Node::simParameters, and simParams.

2703  : Compute(c), patchID(pid)
2704 {
2705  DebugM(4,"ComputePme created.\n");
2707  setNumPatches(1);
2708 
2709  CProxy_ComputePmeMgr::ckLocalBranch(
2710  CkpvAccess(BOCclass_group).computePmeMgr)->addCompute(this);
2711 
2713 
2714  qmForcesOn = simParams->qmForcesOn;
2715  offload = simParams->PMEOffload;
2716 
2717  numGridsMax = numGrids;
2718 
2719  myGrid.K1 = simParams->PMEGridSizeX;
2720  myGrid.K2 = simParams->PMEGridSizeY;
2721  myGrid.K3 = simParams->PMEGridSizeZ;
2722  myGrid.order = simParams->PMEInterpOrder;
2723  myGrid.dim2 = myGrid.K2;
2724  myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
2725 
2726 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2727  cuda_atoms_offset = 0;
2728  f_data_host = 0;
2729  f_data_dev = 0;
2730  if ( ! offload )
2731 #endif
2732  {
2733  for ( int g=0; g<numGrids; ++g ) myRealSpace[g] = new PmeRealSpace(myGrid);
2734  }
2735 
2736  atomsChanged = 0;
2737 
2738  qmLoclIndx = 0;
2739  qmLocalCharges = 0;
2740 }
static Node * Object()
Definition: Node.h:86
void setNumPatches(int n)
Definition: Compute.h:52
int dim2
Definition: PmeBase.h:22
int dim3
Definition: PmeBase.h:22
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
#define DebugM(x, y)
Definition: Debug.h:75
static int numGrids
Definition: ComputePme.h:32
#define PME_PRIORITY
Definition: Priorities.h:29
int order
Definition: PmeBase.h:23
#define simParams
Definition: Output.C:129
int K3
Definition: PmeBase.h:21
int basePriority
Definition: Compute.h:37
Compute(ComputeID)
Definition: Compute.C:37

◆ ~ComputePme()

ComputePme::~ComputePme ( )
virtual

Definition at line 2976 of file ComputePme.C.

2977 {
2978 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2979  if ( ! offload )
2980 #endif
2981  {
2982  for ( int g=0; g<numGridsMax; ++g ) delete myRealSpace[g];
2983  }
2984 }

Member Function Documentation

◆ atomUpdate()

void ComputePme::atomUpdate ( void  )
virtual

Reimplemented from Compute.

Definition at line 2701 of file ComputePme.C.

2701 { atomsChanged = 1; }

◆ doQMWork()

void ComputePme::doQMWork ( )

Definition at line 3069 of file ComputePme.C.

References doWork(), Molecule::get_numQMAtoms(), Molecule::get_qmAtmChrg(), Molecule::get_qmAtmIndx(), Molecule::get_qmAtomGroup(), Patch::getCompAtomExtInfo(), Patch::getNumAtoms(), Node::molecule, and Node::Object().

3069  {
3070 
3071 // iout << CkMyPe() << ") ----> PME doQMWork.\n" << endi ;
3072 
3073 
3074  int numQMAtms = Node::Object()->molecule->get_numQMAtoms();
3075  const Real *qmAtmChrg = Node::Object()->molecule->get_qmAtmChrg() ;
3076  const int *qmAtmIndx = Node::Object()->molecule->get_qmAtmIndx() ;
3077  const Real *qmAtomGroup = Node::Object()->molecule->get_qmAtomGroup() ;
3078 
3079  const CompAtomExt *xExt = patch->getCompAtomExtInfo();
3080 
3081  // Determine number of qm atoms in this patch for the current step.
3082  numLocalQMAtoms = 0;
3083  for (int paIter=0; paIter<patch->getNumAtoms(); paIter++) {
3084  if ( qmAtomGroup[xExt[paIter].id] != 0 ) {
3085  numLocalQMAtoms++;
3086  }
3087  }
3088 
3089  // We prepare a charge vector with QM charges for use in the PME calculation.
3090 
3091  // Clears data from last step, if there is any.
3092  if (qmLoclIndx != 0)
3093  delete [] qmLoclIndx;
3094  if (qmLocalCharges != 0)
3095  delete [] qmLocalCharges;
3096 
3097  qmLoclIndx = new int[numLocalQMAtoms] ;
3098  qmLocalCharges = new Real[numLocalQMAtoms] ;
3099 
3100  // I am assuming there will be (in general) more QM atoms among all QM groups
3101  // than MM atoms in a patch.
3102  int procAtms = 0;
3103 
3104  for (int paIter=0; paIter<patch->getNumAtoms(); paIter++) {
3105 
3106  for (int i=0; i<numQMAtms; i++) {
3107 
3108  if (qmAtmIndx[i] == xExt[paIter].id) {
3109 
3110  qmLoclIndx[procAtms] = paIter ;
3111  qmLocalCharges[procAtms] = qmAtmChrg[i];
3112 
3113  procAtms++;
3114  break;
3115  }
3116 
3117  }
3118 
3119  if (procAtms == numLocalQMAtoms)
3120  break;
3121  }
3122 
3123  doWork();
3124  return ;
3125 }
static Node * Object()
Definition: Node.h:86
int getNumAtoms() const
Definition: Patch.h:105
void doWork()
Definition: ComputePme.C:3127
const int * get_qmAtmIndx()
Definition: Molecule.h:857
int get_numQMAtoms()
Definition: Molecule.h:859
float Real
Definition: common.h:118
Real * get_qmAtmChrg()
Definition: Molecule.h:856
const Real * get_qmAtomGroup() const
Definition: Molecule.h:853
Molecule * molecule
Definition: Node.h:179
CompAtomExt * getCompAtomExtInfo()
Definition: Patch.h:117

◆ doWork()

void ComputePme::doWork ( void  )
virtual

Reimplemented from Compute.

Definition at line 3127 of file ComputePme.C.

References ComputePmeMgr::a_data_dev, ComputePmeMgr::a_data_host, ComputePmeUtil::alchDecouple, ComputePmeUtil::alchOn, Compute::basePriority, ResizeArray< Elem >::begin(), PmeParticle::cg, CompAtom::charge, ComputePmeMgr::chargeGridReady(), Compute::cid, Box< Owner, Data >::close(), COMPUTE_HOME_PRIORITY, COULOMB, ComputePmeMgr::cuda_atoms_alloc, ComputePmeMgr::cuda_atoms_count, ComputePmeMgr::cuda_busy, cuda_errcheck(), ComputePmeMgr::cuda_lock, ComputePmeMgr::cuda_submit_charges(), ComputePmeMgr::cuda_submit_charges_deque, DebugM, deviceCUDA, ComputeNonbondedUtil::dielectric_1, Flags::doMolly, ComputeNonbondedUtil::ewaldcof, PmeRealSpace::fill_charges(), Patch::flags, DeviceCUDA::getDeviceID(), DeviceCUDA::getMasterPe(), Patch::getNumAtoms(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Flags::lattice, ComputePmeMgr::cuda_submit_charges_args::lattice, ComputePmeUtil::lesOn, ComputePmeMgr::cuda_submit_charges_args::mgr, NAMD_bug(), ComputePmeUtil::numGrids, Box< Owner, Data >::open(), PmeGrid::order, ComputePmeUtil::pairOn, CompAtom::partition, PATCH_PRIORITY, PME_OFFLOAD_PRIORITY, PME_PRIORITY, ComputePmeMgr::pmeComputes, CompAtom::position, ResizeArray< Elem >::resize(), scale_coordinates(), ComputeNonbondedUtil::scaling, ComputePmeUtil::selfOn, Compute::sequence(), ComputePmeMgr::cuda_submit_charges_args::sequence, PmeRealSpace::set_num_atoms(), ResizeArray< Elem >::size(), SQRT_PI, ComputePmeMgr::submitReductions(), TRACE_COMPOBJ_IDOFFSET, ungridForces(), PmeParticle::x, Vector::x, PmeParticle::y, Vector::y, PmeParticle::z, and Vector::z.

Referenced by doQMWork().

3128 {
3129  DebugM(4,"Entering ComputePme::doWork().\n");
3130 
3132 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3134 #else
3136 #endif
3137  ungridForces();
3138  // CkPrintf("doWork 2 pe %d %d %d\n", CkMyPe(), myMgr->ungridForcesCount, myMgr->recipEvirCount);
3139  if ( ! --(myMgr->ungridForcesCount) && ! myMgr->recipEvirCount ) myMgr->submitReductions();
3140  return;
3141  }
3143  // CkPrintf("doWork 1 pe %d %d %d\n", CkMyPe(), myMgr->ungridForcesCount, myMgr->recipEvirCount);
3144 
3145 #ifdef TRACE_COMPUTE_OBJECTS
3146  double traceObjStartTime = CmiWallTimer();
3147 #endif
3148 
3149 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3150  if ( offload ) cudaSetDevice(deviceCUDA->getDeviceID());
3151 #endif
3152 
3153  // allocate storage
3154  numLocalAtoms = patch->getNumAtoms();
3155 
3156  Lattice &lattice = patch->flags.lattice;
3157 
3158  localData_alloc.resize(numLocalAtoms*(numGrids+ ((numGrids>1 || selfOn)?1:0)));
3159  localData = localData_alloc.begin();
3160  localPartition_alloc.resize(numLocalAtoms);
3161  localPartition = localPartition_alloc.begin();
3162 
3163  int g;
3164  for ( g=0; g<numGrids; ++g ) {
3165  localGridData[g] = localData + numLocalAtoms*(g+1);
3166  }
3167 
3168  // get positions and charges
3169  PmeParticle * data_ptr = localData;
3170  unsigned char * part_ptr = localPartition;
3171  const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
3173 
3174  {
3175  CompAtom *x = positionBox->open();
3176  // CompAtomExt *xExt = patch->getCompAtomExtInfo();
3177  if ( patch->flags.doMolly ) {
3178  positionBox->close(&x);
3179  x = avgPositionBox->open();
3180  }
3181  int numAtoms = patch->getNumAtoms();
3182 
3183  for(int i=0; i<numAtoms; ++i)
3184  {
3185  data_ptr->x = x[i].position.x;
3186  data_ptr->y = x[i].position.y;
3187  data_ptr->z = x[i].position.z;
3188  data_ptr->cg = coulomb_sqrt * x[i].charge;
3189  ++data_ptr;
3190  *part_ptr = x[i].partition;
3191  ++part_ptr;
3192  }
3193 
3194  // QM loop to overwrite charges of QM atoms.
3195  // They are zero for NAMD, but are updated in ComputeQM.
3196  if ( qmForcesOn ) {
3197 
3198  for(int i=0; i<numLocalQMAtoms; ++i)
3199  {
3200  localData[qmLoclIndx[i]].cg = coulomb_sqrt * qmLocalCharges[i];
3201  }
3202 
3203  }
3204 
3205  if ( patch->flags.doMolly ) { avgPositionBox->close(&x); }
3206  else { positionBox->close(&x); }
3207  }
3208 
3209  // copy to other grids if needed
3210  if ( (alchOn && (!alchDecouple)) || lesOn ) {
3211  for ( g=0; g<numGrids; ++g ) {
3212  PmeParticle *lgd = localGridData[g];
3213  if (g < 2) {
3214  int nga = 0;
3215  for(int i=0; i<numLocalAtoms; ++i) {
3216  if ( localPartition[i] == 0 || localPartition[i] == (g+1) || localPartition[i] == (g+3)) {
3217  // for FEP/TI: grid 0 gets non-alch + partition 1 + partition 3;
3218  // grid 1 gets non-alch + partition 2 + + partition 4;
3219  lgd[nga++] = localData[i];
3220  }
3221  }
3222  numGridAtoms[g] = nga;
3223  } else {
3224  int nga = 0;
3225  for(int i=0; i<numLocalAtoms; ++i) {
3226  if ( localPartition[i] == 0 ) {
3227  // grid 2 (only if called for with numGrids=3) gets only non-alch
3228  lgd[nga++] = localData[i];
3229  }
3230  }
3231  numGridAtoms[g] = nga;
3232  }
3233  }
3234  } else if ( alchOn && alchDecouple) {
3235  // alchemical decoupling: four grids
3236  // g=0: partition 0 and partition 1
3237  // g=1: partition 0 and partition 2
3238  // g=2: only partition 1 atoms
3239  // g=3: only partition 2 atoms
3240  // plus one grid g=4, only partition 0, if numGrids=5
3241  for ( g=0; g<2; ++g ) { // same as before for first 2
3242  PmeParticle *lgd = localGridData[g];
3243  int nga = 0;
3244  for(int i=0; i<numLocalAtoms; ++i) {
3245  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
3246  lgd[nga++] = localData[i];
3247  }
3248  }
3249  numGridAtoms[g] = nga;
3250  }
3251  for (g=2 ; g<4 ; ++g ) { // only alchemical atoms for these 2
3252  PmeParticle *lgd = localGridData[g];
3253  int nga = 0;
3254  for(int i=0; i<numLocalAtoms; ++i) {
3255  if ( localPartition[i] == (g-1) ) {
3256  lgd[nga++] = localData[i];
3257  }
3258  }
3259  numGridAtoms[g] = nga;
3260  }
3261  for (g=4 ; g<numGrids ; ++g ) { // only non-alchemical atoms
3262  // numGrids=5 only if alchElecLambdaStart > 0
3263  PmeParticle *lgd = localGridData[g];
3264  int nga = 0;
3265  for(int i=0; i<numLocalAtoms; ++i) {
3266  if ( localPartition[i] == 0 ) {
3267  lgd[nga++] = localData[i];
3268  }
3269  }
3270  numGridAtoms[g] = nga;
3271  }
3272  } else if ( selfOn ) {
3273  if ( numGrids != 1 ) NAMD_bug("ComputePme::doWork assertion 1 failed");
3274  g = 0;
3275  PmeParticle *lgd = localGridData[g];
3276  int nga = 0;
3277  for(int i=0; i<numLocalAtoms; ++i) {
3278  if ( localPartition[i] == 1 ) {
3279  lgd[nga++] = localData[i];
3280  }
3281  }
3282  numGridAtoms[g] = nga;
3283  } else if ( pairOn ) {
3284  if ( numGrids != 3 ) NAMD_bug("ComputePme::doWork assertion 2 failed");
3285  g = 0;
3286  PmeParticle *lgd = localGridData[g];
3287  int nga = 0;
3288  for(int i=0; i<numLocalAtoms; ++i) {
3289  if ( localPartition[i] == 1 || localPartition[i] == 2 ) {
3290  lgd[nga++] = localData[i];
3291  }
3292  }
3293  numGridAtoms[g] = nga;
3294  for ( g=1; g<3; ++g ) {
3295  PmeParticle *lgd = localGridData[g];
3296  int nga = 0;
3297  for(int i=0; i<numLocalAtoms; ++i) {
3298  if ( localPartition[i] == g ) {
3299  lgd[nga++] = localData[i];
3300  }
3301  }
3302  numGridAtoms[g] = nga;
3303  }
3304  } else {
3305  if ( numGrids != 1 ) NAMD_bug("ComputePme::doWork assertion 3 failed");
3306  localGridData[0] = localData;
3307  numGridAtoms[0] = numLocalAtoms;
3308  }
3309 
3310  if ( ! myMgr->doWorkCount ) {
3311  myMgr->doWorkCount = myMgr->pmeComputes.size();
3312 
3313 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3314  if ( ! offload )
3315 #endif // NAMD_CUDA
3316  {
3317  memset( (void*) myMgr->fz_arr, 0, (myGrid.K3+myGrid.order-1) * sizeof(char) );
3318 
3319  for (int i=0; i<myMgr->q_count; ++i) {
3320  memset( (void*) (myMgr->q_list[i]), 0, (myGrid.K3+myGrid.order-1) * sizeof(float) );
3321  }
3322  }
3323 
3324  for ( g=0; g<numGrids; ++g ) {
3325  myMgr->evir[g] = 0;
3326  }
3327 
3328  myMgr->strayChargeErrors = 0;
3329 
3330  myMgr->compute_sequence = sequence();
3331  }
3332 
3333  if ( sequence() != myMgr->compute_sequence ) NAMD_bug("ComputePme sequence mismatch in doWork()");
3334 
3335  int strayChargeErrors = 0;
3336 
3337  // calculate self energy
3339  for ( g=0; g<numGrids; ++g ) {
3340  BigReal selfEnergy = 0;
3341  data_ptr = localGridData[g];
3342  int i;
3343  for(i=0; i<numGridAtoms[g]; ++i)
3344  {
3345  selfEnergy += data_ptr->cg * data_ptr->cg;
3346  ++data_ptr;
3347  }
3348  selfEnergy *= -1. * ewaldcof / SQRT_PI;
3349  myMgr->evir[g][0] += selfEnergy;
3350 
3351  float **q = myMgr->q_arr + g*myMgr->fsize;
3352  char *f = myMgr->f_arr + g*myMgr->fsize;
3353 
3354  scale_coordinates(localGridData[g], numGridAtoms[g], lattice, myGrid);
3355 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3356  if ( offload ) {
3357  if ( myMgr->cuda_atoms_alloc == 0 ) { // first call
3358  int na = myMgr->cuda_atoms_alloc = 1.2 * (myMgr->cuda_atoms_count + 1000);
3359  cuda_errcheck("before malloc atom data for pme");
3360  cudaMallocHost((void**) &(myMgr->a_data_host), 7*na*sizeof(float));
3361  cudaMalloc((void**) &(myMgr->a_data_dev), 7*na*sizeof(float));
3362  cuda_errcheck("malloc atom data for pme");
3363  myMgr->cuda_atoms_count = 0;
3364  }
3365  cuda_atoms_offset = myMgr->cuda_atoms_count;
3366  int n = numGridAtoms[g];
3367  myMgr->cuda_atoms_count += n;
3368  if ( myMgr->cuda_atoms_count > myMgr->cuda_atoms_alloc ) {
3369  CkPrintf("Pe %d expanding CUDA PME atoms allocation because %d > %d\n",
3370  CkMyPe(), myMgr->cuda_atoms_count, myMgr->cuda_atoms_alloc);
3371  cuda_errcheck("before malloc expanded atom data for pme");
3372  int na = myMgr->cuda_atoms_alloc = 1.2 * (myMgr->cuda_atoms_count + 1000);
3373  const float *a_data_host_old = myMgr->a_data_host;
3374  cudaMallocHost((void**) &(myMgr->a_data_host), 7*na*sizeof(float));
3375  cuda_errcheck("malloc expanded host atom data for pme");
3376  memcpy(myMgr->a_data_host, a_data_host_old, 7*cuda_atoms_offset*sizeof(float));
3377  cudaFreeHost((void*) a_data_host_old);
3378  cuda_errcheck("free expanded host atom data for pme");
3379  cudaFree(myMgr->a_data_dev);
3380  cuda_errcheck("free expanded dev atom data for pme");
3381  cudaMalloc((void**) &(myMgr->a_data_dev), 7*na*sizeof(float));
3382  cuda_errcheck("malloc expanded dev atom data for pme");
3383  }
3384  float *a_data_host = myMgr->a_data_host + 7 * cuda_atoms_offset;
3385  data_ptr = localGridData[g];
3386  double order_1 = myGrid.order - 1;
3387  double K1 = myGrid.K1;
3388  double K2 = myGrid.K2;
3389  double K3 = myGrid.K3;
3390  int found_negative = 0;
3391  for ( int i=0; i<n; ++i ) {
3392  if ( data_ptr[i].x < 0 || data_ptr[i].y < 0 || data_ptr[i].z < 0 ) {
3393  found_negative = 1;
3394  // CkPrintf("low coord: %f %f %f\n", data_ptr[i].x, data_ptr[i].y, data_ptr[i].z);
3395  }
3396  double x_int = (int) data_ptr[i].x;
3397  double y_int = (int) data_ptr[i].y;
3398  double z_int = (int) data_ptr[i].z;
3399  a_data_host[7*i ] = data_ptr[i].x - x_int; // subtract in double precision
3400  a_data_host[7*i+1] = data_ptr[i].y - y_int;
3401  a_data_host[7*i+2] = data_ptr[i].z - z_int;
3402  a_data_host[7*i+3] = data_ptr[i].cg;
3403  x_int -= order_1; if ( x_int < 0 ) x_int += K1;
3404  y_int -= order_1; if ( y_int < 0 ) y_int += K2;
3405  z_int -= order_1; if ( z_int < 0 ) z_int += K3;
3406  a_data_host[7*i+4] = x_int;
3407  a_data_host[7*i+5] = y_int;
3408  a_data_host[7*i+6] = z_int;
3409  }
3410  if ( found_negative ) NAMD_bug("found negative atom coordinate in ComputePme::doWork");
3411  } else
3412 #endif // NAMD_CUDA
3413  {
3414  myRealSpace[g]->set_num_atoms(numGridAtoms[g]);
3415  myRealSpace[g]->fill_charges(q, myMgr->q_list, myMgr->q_count, strayChargeErrors, f, myMgr->fz_arr, localGridData[g]);
3416  }
3417  }
3418  myMgr->strayChargeErrors += strayChargeErrors;
3419 
3420 #ifdef TRACE_COMPUTE_OBJECTS
3421  traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
3422 #endif
3423 
3424  if ( --(myMgr->doWorkCount) == 0 ) {
3425 // cudaDeviceSynchronize(); // XXXX
3426 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3427  if ( offload ) {
3429  args.mgr = myMgr;
3430  args.lattice = &lattice;
3431  args.sequence = sequence();
3432  CmiLock(ComputePmeMgr::cuda_lock);
3433  if ( ComputePmeMgr::cuda_busy ) {
3435  } else if ( CkMyPe() == deviceCUDA->getMasterPe() ) {
3436  // avoid adding work to nonbonded data preparation pe
3437  args.mgr->cuda_submit_charges(*args.lattice, args.sequence);
3438  } else {
3439  ComputePmeMgr::cuda_busy = true;
3440  while ( 1 ) {
3441  CmiUnlock(ComputePmeMgr::cuda_lock);
3442  args.mgr->cuda_submit_charges(*args.lattice, args.sequence);
3443  CmiLock(ComputePmeMgr::cuda_lock);
3447  } else {
3448  ComputePmeMgr::cuda_busy = false;
3449  break;
3450  }
3451  }
3452  }
3453  CmiUnlock(ComputePmeMgr::cuda_lock);
3454  } else
3455 #endif // NAMD_CUDA
3456  {
3457  myMgr->chargeGridReady(lattice,sequence());
3458  }
3459  }
3460  atomsChanged = 0;
3461 }
static void scale_coordinates(PmeParticle p[], int N, Lattice lattice, PmeGrid grid)
Definition: PmeBase.inl:17
void ungridForces()
Definition: ComputePme.C:4043
int getNumAtoms() const
Definition: Patch.h:105
int sequence(void)
Definition: Compute.h:64
int size(void) const
Definition: ResizeArray.h:131
float * a_data_dev
Definition: ComputePme.C:445
#define TRACE_COMPOBJ_IDOFFSET
Definition: Compute.h:77
void cuda_errcheck(const char *msg)
Definition: ComputePme.C:67
double x
Definition: PmeBase.h:37
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
#define COULOMB
Definition: common.h:53
#define DebugM(x, y)
Definition: Debug.h:75
static int numGrids
Definition: ComputePme.h:32
BigReal z
Definition: Vector.h:74
Position position
Definition: NamdTypes.h:77
static Bool alchOn
Definition: ComputePme.h:33
double cg
Definition: PmeBase.h:38
double z
Definition: PmeBase.h:37
double y
Definition: PmeBase.h:37
Flags flags
Definition: Patch.h:128
#define SQRT_PI
Definition: ComputeExt.C:30
void resize(int i)
Definition: ResizeArray.h:84
Charge charge
Definition: NamdTypes.h:78
void chargeGridReady(Lattice &lattice, int sequence)
Definition: ComputePme.C:3579
int cuda_atoms_alloc
Definition: ComputePme.C:449
#define PME_PRIORITY
Definition: Priorities.h:29
int order
Definition: PmeBase.h:23
#define COMPUTE_HOME_PRIORITY
Definition: Priorities.h:76
int getMasterPe()
Definition: DeviceCUDA.h:137
void NAMD_bug(const char *err_msg)
Definition: common.C:195
void fill_charges(float **q_arr, float **q_arr_list, int &q_arr_count, int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[])
Definition: PmeRealSpace.C:47
static std::deque< cuda_submit_charges_args > cuda_submit_charges_deque
Definition: ComputePme.C:467
uint8 partition
Definition: NamdTypes.h:80
BigReal x
Definition: Vector.h:74
#define PME_OFFLOAD_PRIORITY
Definition: Priorities.h:41
static Bool alchDecouple
Definition: ComputePme.h:36
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
void submitReductions()
Definition: ComputePme.C:4239
int getDeviceID()
Definition: DeviceCUDA.h:144
static Bool pairOn
Definition: ComputePme.h:40
int K3
Definition: PmeBase.h:21
int cuda_atoms_count
Definition: ComputePme.C:448
static Bool lesOn
Definition: ComputePme.h:38
iterator begin(void)
Definition: ResizeArray.h:36
BigReal y
Definition: Vector.h:74
ResizeArray< ComputePme * > pmeComputes
Definition: ComputePme.C:480
static Bool selfOn
Definition: ComputePme.h:41
Lattice lattice
Definition: PatchTypes.h:45
void set_num_atoms(int natoms)
Definition: PmeRealSpace.C:20
int basePriority
Definition: Compute.h:37
void cuda_submit_charges(Lattice &lattice, int sequence)
Definition: ComputePme.C:3465
Data * open(void)
Definition: Box.h:39
const ComputeID cid
Definition: Compute.h:43
void close(Data **const t)
Definition: Box.h:49
int doMolly
Definition: PatchTypes.h:24
float * a_data_host
Definition: ComputePme.C:444
static bool cuda_busy
Definition: ComputePme.C:468
double BigReal
Definition: common.h:123
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
static CmiNodeLock cuda_lock
Definition: ComputePme.C:450

◆ initialize()

void ComputePme::initialize ( void  )
virtual

Reimplemented from Compute.

Definition at line 2742 of file ComputePme.C.

References ComputePmeMgr::cuda_atoms_count, Patch::getNumAtoms(), NAMD_bug(), PatchMap::Object(), Patch::registerAvgPositionPickup(), Patch::registerForceDeposit(), and Patch::registerPositionPickup().

2742  {
2743  if (!(patch = PatchMap::Object()->patch(patchID))) {
2744  NAMD_bug("ComputePme used with unknown patch.");
2745  }
2746  positionBox = patch->registerPositionPickup(this);
2747  avgPositionBox = patch->registerAvgPositionPickup(this);
2748  forceBox = patch->registerForceDeposit(this);
2749 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2750  if ( offload ) {
2751  myMgr->cuda_atoms_count += patch->getNumAtoms();
2752  }
2753 #endif
2754 }
Box< Patch, CompAtom > * registerAvgPositionPickup(Compute *cid)
Definition: Patch.C:133
int getNumAtoms() const
Definition: Patch.h:105
static PatchMap * Object()
Definition: PatchMap.h:27
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int cuda_atoms_count
Definition: ComputePme.C:448
Box< Patch, CompAtom > * registerPositionPickup(Compute *cid)
Definition: Patch.C:106
Box< Patch, Results > * registerForceDeposit(Compute *cid)
Definition: Patch.C:227

◆ noWork()

int ComputePme::noWork ( )
virtual

Reimplemented from Compute.

Definition at line 3030 of file ComputePme.C.

References ResizeArray< Elem >::add(), Flags::doFullElectrostatics, Patch::flags, ComputePmeMgr::pmeComputes, ResizeArray< Elem >::size(), Box< Owner, Data >::skip(), and SubmitReduction::submit().

3030  {
3031 
3032  if ( patch->flags.doFullElectrostatics ) {
3033  // In QM/MM simulations, atom charges form QM regions need special treatment.
3034  if ( qmForcesOn ) {
3035  return 1;
3036  }
3037  if ( ! myMgr->ungridForcesCount && ! myMgr->recipEvirCount ) return 0; // work to do, enqueue as usual
3038  myMgr->heldComputes.add(this);
3039  return 1; // don't enqueue yet
3040  }
3041 
3042  positionBox->skip();
3043  forceBox->skip();
3044 
3045  if ( ++(myMgr->noWorkCount) == myMgr->pmeComputes.size() ) {
3046  myMgr->noWorkCount = 0;
3047  myMgr->reduction->submit();
3048  }
3049 
3050  atomsChanged = 0;
3051 
3052  return 1; // no work for this step
3053 }
int size(void) const
Definition: ResizeArray.h:131
int add(const Elem &elem)
Definition: ResizeArray.h:101
Flags flags
Definition: Patch.h:128
int doFullElectrostatics
Definition: PatchTypes.h:23
void skip(void)
Definition: Box.h:63
ResizeArray< ComputePme * > pmeComputes
Definition: ComputePme.C:480
void submit(void)
Definition: ReductionMgr.h:324

◆ setMgr()

void ComputePme::setMgr ( ComputePmeMgr mgr)
inline

Definition at line 56 of file ComputePme.h.

56 { myMgr = mgr; }

◆ ungridForces()

void ComputePme::ungridForces ( )

Definition at line 4043 of file ComputePme.C.

References ADD_VECTOR_OBJECT, ComputePmeUtil::alchDecouple, ComputePmeUtil::alchFepOn, ComputePmeUtil::alchOn, ResizeArray< Elem >::begin(), Box< Owner, Data >::close(), PmeRealSpace::compute_forces(), endi(), Results::f, Patch::flags, Patch::getNumAtoms(), iERROR(), iout, Flags::lattice, ComputePmeUtil::lesFactor, ComputePmeUtil::lesOn, NAMD_bug(), ComputePmeUtil::numGrids, Node::Object(), Box< Owner, Data >::open(), ComputePmeUtil::pairOn, ResizeArray< Elem >::resize(), scale_forces(), ComputePmeUtil::selfOn, Compute::sequence(), Node::simParameters, simParams, Results::slow, Flags::step, Vector::x, Vector::y, and Vector::z.

Referenced by doWork().

4043  {
4044 
4045  if ( sequence() != myMgr->compute_sequence ) NAMD_bug("ComputePme sequence mismatch in ungridForces()");
4046 
4048 
4049  localResults_alloc.resize(numLocalAtoms* ((numGrids>1 || selfOn)?2:1));
4050  Vector *localResults = localResults_alloc.begin();
4051  Vector *gridResults;
4052 
4053  if ( alchOn || lesOn || selfOn || pairOn ) {
4054  for(int i=0; i<numLocalAtoms; ++i) { localResults[i] = 0.; }
4055  gridResults = localResults + numLocalAtoms;
4056  } else {
4057  gridResults = localResults;
4058  }
4059 
4060  Vector pairForce = 0.;
4061  Lattice &lattice = patch->flags.lattice;
4062  int g = 0;
4063  if(!simParams->commOnly) {
4064  for ( g=0; g<numGrids; ++g ) {
4065 #ifdef NETWORK_PROGRESS
4066  CmiNetworkProgress();
4067 #endif
4068 
4069 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
4070  if ( offload ) {
4071  int errfound = 0;
4072  for ( int n=numGridAtoms[g], i=0; i<n; ++i ) {
4073  // Neither isnan() nor x != x worked when testing on Cray; this does.
4074  if ( ((int*)f_data_host)[3*i] == 0x7fffffff ) { errfound = 1; } // CUDA NaN
4075  gridResults[i].x = f_data_host[3*i];
4076  gridResults[i].y = f_data_host[3*i+1];
4077  gridResults[i].z = f_data_host[3*i+2];
4078  }
4079  if ( errfound ) {
4080  int errcount = 0;
4081  for ( int n=numGridAtoms[g], i=0; i<n; ++i ) {
4082  float f = f_data_host[3*i];
4083  if ( ((int*)f_data_host)[3*i] == 0x7fffffff ) { // CUDA NaN
4084  ++errcount;
4085  gridResults[i] = 0.;
4086  }
4087  }
4088  iout << iERROR << "Stray PME grid charges detected: "
4089  << errcount << " atoms on pe " << CkMyPe() << "\n" << endi;
4090  }
4091  } else
4092 #endif // NAMD_CUDA
4093  {
4094  myRealSpace[g]->compute_forces(myMgr->q_arr+g*myMgr->fsize, localGridData[g], gridResults);
4095  }
4096  scale_forces(gridResults, numGridAtoms[g], lattice);
4097 
4098  if (alchOn) {
4099  float scale = 1.;
4100  BigReal elecLambdaUp, elecLambdaDown;
4101  BigReal alchLambda = simParams->getCurrentLambda(patch->flags.step);
4102  myMgr->alchLambda = alchLambda;
4103  BigReal alchLambda2 = simParams->getCurrentLambda2(patch->flags.step);
4104  myMgr->alchLambda2 = alchLambda2;
4105  elecLambdaUp = simParams->getElecLambda(alchLambda);
4106  elecLambdaDown = simParams->getElecLambda(1. - alchLambda);
4107 
4108  if ( g == 0 ) scale = elecLambdaUp;
4109  else if ( g == 1 ) scale = elecLambdaDown;
4110  else if ( g == 2 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
4111 
4112  if (alchDecouple) {
4113  if ( g == 2 ) scale = 1 - elecLambdaUp;
4114  else if ( g == 3 ) scale = 1 - elecLambdaDown;
4115  else if ( g == 4 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
4116  }
4117  int nga = 0;
4118  if (!alchDecouple) {
4119  if (g < 2 ) {
4120  for(int i=0; i<numLocalAtoms; ++i) {
4121  if ( localPartition[i] == 0 || localPartition[i] == (g+1) || localPartition[i] == (g+3) ) {
4122  // (g=0: only partition 0 and partiton 1 and partion 3)
4123  // (g=1: only partition 0 and partiton 2 and partion 4)
4124  localResults[i] += gridResults[nga++] * scale;
4125  }
4126  }
4127  } else {
4128  for(int i=0; i<numLocalAtoms; ++i) {
4129  if ( localPartition[i] == 0 ) {
4130  // (g=2: only partition 0)
4131  localResults[i] += gridResults[nga++] * scale;
4132  }
4133  }
4134  }
4135  } else { // alchDecouple
4136  if ( g < 2 ) {
4137  for(int i=0; i<numLocalAtoms; ++i) {
4138  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
4139  // g = 0: partition 0 or partition 1
4140  // g = 1: partition 0 or partition 2
4141  localResults[i] += gridResults[nga++] * scale;
4142  }
4143  }
4144  }
4145  else {
4146  for(int i=0; i<numLocalAtoms; ++i) {
4147  if ( localPartition[i] == (g-1) || localPartition[i] == (g-4)) {
4148  // g = 2: partition 1 only
4149  // g = 3: partition 2 only
4150  // g = 4: partition 0 only
4151  localResults[i] += gridResults[nga++] * scale;
4152  }
4153  }
4154  }
4155  }
4156  } else if ( lesOn ) {
4157  float scale = 1.;
4158  if ( alchFepOn ) {
4159  BigReal alchLambda = simParams->getCurrentLambda(patch->flags.step);
4160  myMgr->alchLambda = alchLambda;
4161  BigReal alchLambda2 = simParams->getCurrentLambda2(patch->flags.step);
4162  myMgr->alchLambda2 = alchLambda2;
4163  if ( g == 0 ) scale = alchLambda;
4164  else if ( g == 1 ) scale = 1. - alchLambda;
4165  } else if ( lesOn ) {
4166  scale = 1.0 / (float)lesFactor;
4167  }
4168  int nga = 0;
4169  for(int i=0; i<numLocalAtoms; ++i) {
4170  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
4171  localResults[i] += gridResults[nga++] * scale;
4172  }
4173  }
4174  } else if ( selfOn ) {
4175  PmeParticle *lgd = localGridData[g];
4176  int nga = 0;
4177  for(int i=0; i<numLocalAtoms; ++i) {
4178  if ( localPartition[i] == 1 ) {
4179  pairForce += gridResults[nga]; // should add up to almost zero
4180  localResults[i] += gridResults[nga++];
4181  }
4182  }
4183  } else if ( pairOn ) {
4184  if ( g == 0 ) {
4185  int nga = 0;
4186  for(int i=0; i<numLocalAtoms; ++i) {
4187  if ( localPartition[i] == 1 ) {
4188  pairForce += gridResults[nga];
4189  }
4190  if ( localPartition[i] == 1 || localPartition[i] == 2 ) {
4191  localResults[i] += gridResults[nga++];
4192  }
4193  }
4194  } else if ( g == 1 ) {
4195  int nga = 0;
4196  for(int i=0; i<numLocalAtoms; ++i) {
4197  if ( localPartition[i] == g ) {
4198  pairForce -= gridResults[nga]; // should add up to almost zero
4199  localResults[i] -= gridResults[nga++];
4200  }
4201  }
4202  } else {
4203  int nga = 0;
4204  for(int i=0; i<numLocalAtoms; ++i) {
4205  if ( localPartition[i] == g ) {
4206  localResults[i] -= gridResults[nga++];
4207  }
4208  }
4209  }
4210  }
4211  }
4212  }
4213 
4214  Vector *results_ptr = localResults;
4215 
4216  // add in forces
4217  {
4218  Results *r = forceBox->open();
4219  Force *f = r->f[Results::slow];
4220  int numAtoms = patch->getNumAtoms();
4221 
4222  if ( ! myMgr->strayChargeErrors && ! simParams->commOnly ) {
4223  for(int i=0; i<numAtoms; ++i) {
4224  f[i].x += results_ptr->x;
4225  f[i].y += results_ptr->y;
4226  f[i].z += results_ptr->z;
4227  ++results_ptr;
4228  }
4229  }
4230  forceBox->close(&r);
4231  }
4232 
4233  if ( pairOn || selfOn ) {
4234  ADD_VECTOR_OBJECT(myMgr->reduction,REDUCTION_PAIR_ELECT_FORCE,pairForce);
4235  }
4236 
4237 }
static Node * Object()
Definition: Node.h:86
void compute_forces(const float *const *q_arr, const PmeParticle p[], Vector f[])
Definition: PmeRealSpace.C:141
int getNumAtoms() const
Definition: Patch.h:105
int sequence(void)
Definition: Compute.h:64
Definition: Vector.h:72
SimParameters * simParameters
Definition: Node.h:181
static int numGrids
Definition: ComputePme.h:32
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
BigReal z
Definition: Vector.h:74
static Bool alchOn
Definition: ComputePme.h:33
#define iout
Definition: InfoStream.h:51
Flags flags
Definition: Patch.h:128
void resize(int i)
Definition: ResizeArray.h:84
void NAMD_bug(const char *err_msg)
Definition: common.C:195
Force * f[maxNumForces]
Definition: PatchTypes.h:146
static void scale_forces(Vector f[], int N, Lattice &lattice)
Definition: PmeBase.inl:60
BigReal x
Definition: Vector.h:74
static Bool alchDecouple
Definition: ComputePme.h:36
static int lesFactor
Definition: ComputePme.h:39
#define simParams
Definition: Output.C:129
static Bool pairOn
Definition: ComputePme.h:40
static Bool lesOn
Definition: ComputePme.h:38
iterator begin(void)
Definition: ResizeArray.h:36
BigReal y
Definition: Vector.h:74
#define ADD_VECTOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:28
static Bool selfOn
Definition: ComputePme.h:41
Lattice lattice
Definition: PatchTypes.h:45
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:83
static Bool alchFepOn
Definition: ComputePme.h:34
Data * open(void)
Definition: Box.h:39
void close(Data **const t)
Definition: Box.h:49
double BigReal
Definition: common.h:123
int step
Definition: PatchTypes.h:16

Friends And Related Function Documentation

◆ ComputePmeMgr

friend class ComputePmeMgr
friend

Definition at line 58 of file ComputePme.h.


The documentation for this class was generated from the following files: