00001
00007 #ifdef NAMD_FFTW
00008 #ifdef NAMD_FFTW_3
00009 #include <fftw3.h>
00010 #else
00011 #ifdef NAMD_FFTW_NO_TYPE_PREFIX
00012 #include <fftw.h>
00013 #include <rfftw.h>
00014 #else
00015 #include <sfftw.h>
00016 #include <srfftw.h>
00017 #endif
00018 #endif
00019 #endif
00020
00021 #include <assert.h>
00022
00023 #include "InfoStream.h"
00024 #include "Node.h"
00025 #include "PatchMap.h"
00026 #include "PatchMap.inl"
00027 #include "AtomMap.h"
00028 #include "OptPme.h"
00029 #include "OptPmeMgr.decl.h"
00030 #include "OptPmeRealSpace.h"
00031 #include "PmeKSpace.h"
00032 #include "ComputeNonbondedUtil.h"
00033 #include "PatchMgr.h"
00034 #include "Molecule.h"
00035 #include "ReductionMgr.h"
00036
00037 #include "ComputeMgr.decl.h"
00038
00039 #define MIN_DEBUG_LEVEL 3
00040 #include "Debug.h"
00041 #include "SimParameters.h"
00042 #include "WorkDistrib.h"
00043 #include "varsizemsg.h"
00044 #include "Random.h"
00045 #include "Priorities.h"
00046 #include "PmeBase.inl"
00047
00048 extern char *pencilPMEProcessors;
00049
00050 #include "fftlib.h"
00051 #include "fftmap.h"
00052
00053
00054 int many_to_many_start = 0x7fffffff;
00055
00056 class OptPmeMgr : public BOCclass {
00057 public:
00058 friend class OptPmeCompute;
00059 OptPmeMgr();
00060 ~OptPmeMgr();
00061
00062 void initialize(CkQdMsg*);
00063 void initialize_pencils(CkQdMsg*);
00064 void activate_pencils(CkQdMsg*);
00065 void recvArrays(CProxy_OptPmeXPencil, CProxy_OptPmeYPencil, CProxy_OptPmeZPencil);
00066
00067 void recvUngrid(OptPmeGridMsg *);
00068 void ungridCalc(OptPmeDummyMsg *);
00069 void ungridCalc_subcompute(OptPmeSubComputeMsg *);
00070 void ungridCalc_subcompute_done(OptPmeSubComputeMsg *);
00071 void doWorkOnPeer(OptPmeSubComputeMsg *);
00072 void recvEvir (CkReductionMsg *msg);
00073
00074 void setCompute(OptPmeCompute *c) { pmeCompute = c; c->setMgr(this); }
00075
00076 private:
00077 CProxy_OptPmeMgr pmeProxy;
00078 CProxy_OptPmeMgr pmeProxyDir;
00079 OptPmeCompute *pmeCompute;
00080 PmeGrid myGrid;
00081 PmeKSpace *myKSpace;
00082
00083 CProxy_OptPmeXPencil xPencil;
00084 CProxy_OptPmeYPencil yPencil;
00085 CProxy_OptPmeZPencil zPencil;
00086 int numPencilsActive;
00087 int ungrid_count;
00088 SubmitReduction *reduction;
00089 SubmitReduction *amd_reduction;
00090 int _iter;
00091 void *handle;
00092 bool constant_pressure;
00093 int subcompute_count;
00094
00095 int peersAllocated;
00096 int peers [SUBCOMPUTE_NPAR];
00097 OptPmeSubComputeMsg *subcompute_msgs[SUBCOMPUTE_NPAR];
00098 };
00099
00100
00101 void pme_f2d (double *dst, float *src, int N);
00102 void pme_d2f (float *dst, double *src, int N);
00103
00104 static inline void initializePmeGrid (SimParameters *simParams, PmeGrid &grid) {
00105 int xBlocks = 0, yBlocks = 0, zBlocks= 0;
00106
00107 if ( simParams->PMEPencils > 1 ) {
00108 xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00109 } else {
00110 int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00111 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00112 if ( nb2 > CkNumPes() ) nb2 = CkNumPes();
00113 if ( nb2 < 1 ) nb2 = 1;
00114 int nb = (int) sqrt((float)nb2);
00115 if ( nb < 1 ) nb = 1;
00116 xBlocks = zBlocks = nb;
00117 yBlocks = nb2 / nb;
00118 }
00119
00120 int dimx = simParams->PMEGridSizeX;
00121 int bx = 1 + ( dimx - 1 ) / xBlocks;
00122 xBlocks = 1 + ( dimx - 1 ) / bx;
00123
00124 int dimy = simParams->PMEGridSizeY;
00125 int by = 1 + ( dimy - 1 ) / yBlocks;
00126 yBlocks = 1 + ( dimy - 1 ) / by;
00127
00128 int dimz = simParams->PMEGridSizeZ / 2 + 1;
00129 int bz = 1 + ( dimz - 1 ) / zBlocks;
00130 zBlocks = 1 + ( dimz - 1 ) / bz;
00131
00132 grid.xBlocks = xBlocks;
00133 grid.yBlocks = yBlocks;
00134 grid.zBlocks = zBlocks;
00135
00136 grid.K1 = simParams->PMEGridSizeX;
00137 grid.K2 = simParams->PMEGridSizeY;
00138 grid.K3 = simParams->PMEGridSizeZ;
00139 grid.order = simParams->PMEInterpOrder;
00140 grid.dim2 = grid.K2;
00141 grid.dim3 = 2 * (grid.K3/2 + 1);
00142
00143 grid.block1 = ( grid.K1 + xBlocks - 1 ) / xBlocks;
00144 grid.block2 = ( grid.K2 + yBlocks - 1 ) / yBlocks;
00145 grid.block3 = ( grid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;
00146 }
00147
00148 #ifdef NAMD_FFTW
00149 static CmiNodeLock fftw_plan_lock;
00150 #endif
00151
00152
00153 static inline void scale_n_copy_coordinates(CompAtom *x, PmeParticle p[],
00154 int &N,
00155 Lattice &lattice, PmeGrid grid,
00156
00157 double xmin, double xlen,
00158 double ymin, double ylen,
00159 double zmin, double zlen,
00160 int &scg) {
00161 Vector origin = lattice.origin();
00162 Vector recip1 = lattice.a_r();
00163 Vector recip2 = lattice.b_r();
00164 Vector recip3 = lattice.c_r();
00165 double ox = origin.x;
00166 double oy = origin.y;
00167 double oz = origin.z;
00168 double r1x = recip1.x;
00169 double r1y = recip1.y;
00170 double r1z = recip1.z;
00171 double r2x = recip2.x;
00172 double r2y = recip2.y;
00173 double r2z = recip2.z;
00174 double r3x = recip3.x;
00175 double r3y = recip3.y;
00176 double r3z = recip3.z;
00177 int K1 = grid.K1;
00178 int K2 = grid.K2;
00179 int K3 = grid.K3;
00180
00181 const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
00182 * ComputeNonbondedUtil::dielectric_1 );
00183
00184
00185 int natoms = 0;
00186 for (int i=0; i<N; i++) {
00187 double px = x[i].position.x - ox;
00188 double py = x[i].position.y - oy;
00189 double pz = x[i].position.z - oz;
00190 double sx = px*r1x + py*r1y + pz*r1z;
00191 double sy = px*r2x + py*r2y + pz*r2z;
00192 double sz = px*r3x + py*r3y + pz*r3z;
00193 p[natoms].x = K1 * ( sx - floor(sx) );
00194 p[natoms].y = K2 * ( sy - floor(sy) );
00195 p[natoms].z = K3 * ( sz - floor(sz) );
00196 #ifndef ARCH_POWERPC
00197
00198
00199 if ( p[natoms].x == K1 ) p[natoms].x = 0;
00200 if ( p[natoms].y == K2 ) p[natoms].y = 0;
00201 if ( p[natoms].z == K3 ) p[natoms].z = 0;
00202 #endif
00203
00204 #if 1 //stray charge detection
00205 BigReal u1,u2,u3;
00206 u1 = (int) (p[natoms].x - xmin);
00207 if (u1 >= grid.K1) u1 -= grid.K1;
00208 u2 = (int) (p[natoms].y - ymin);
00209 if (u2 >= grid.K2) u2 -= grid.K2;
00210 u3 = (int) (p[natoms].z - zmin);
00211 if (u3 >= grid.K3) u3 -= grid.K3;
00212
00213 if ( (u1 < 0.0) || (u1 >= xlen) ||
00214 (u2 < 0.0) || (u2 >= ylen) ||
00215 (u3 < 0.0) || (u3 >= zlen) ) {
00216 scg ++;
00217 continue;
00218 }
00219 #endif
00220
00221 p[natoms].cg = coulomb_sqrt * x[i].charge;
00222 natoms ++;
00223 }
00224 N = natoms;
00225 }
00226
00227
00228 OptPmeMgr::OptPmeMgr() : pmeProxy(thisgroup),
00229 pmeProxyDir(thisgroup), pmeCompute(0) {
00230
00231 CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00232
00233 myKSpace = 0;
00234 ungrid_count = 0;
00235 peersAllocated = 0;
00236
00237 #ifdef NAMD_FFTW
00238 if ( CmiMyRank() == 0 ) {
00239 fftw_plan_lock = CmiCreateLock();
00240 }
00241 #endif
00242 }
00243
00244
00245 void OptPmeMgr::recvArrays(CProxy_OptPmeXPencil x, CProxy_OptPmeYPencil y, CProxy_OptPmeZPencil z) {
00246 xPencil = x; yPencil = y; zPencil = z;
00247 }
00248
00249 void OptPmeMgr::initialize(CkQdMsg *msg) {
00250 delete msg;
00251
00252 _iter = 0;
00253
00254 #if CHARM_VERSION > 60000
00255 handle = CmiDirect_manytomany_allocate_handle ();
00256 #endif
00257
00258 SimParameters *simParams = Node::Object()->simParameters;
00259 PatchMap *patchMap = PatchMap::Object();
00260
00261 initializePmeGrid (simParams, myGrid);
00262
00263 if (simParams->langevinPistonOn || simParams->berendsenPressureOn)
00264 constant_pressure = true;
00265 else
00266 constant_pressure = false;
00267
00268 bool useManyToMany = simParams->useManyToMany;
00269
00270
00271
00272
00273
00274
00275 int npes = patchMap->numPatches();
00276 if (npes < myGrid.xBlocks * myGrid.yBlocks)
00277 npes = myGrid.xBlocks * myGrid.yBlocks;
00278 if (npes < myGrid.zBlocks * myGrid.xBlocks)
00279 npes = myGrid.zBlocks * myGrid.xBlocks;
00280 if (npes < myGrid.yBlocks * myGrid.zBlocks)
00281 npes = myGrid.yBlocks * myGrid.zBlocks;
00282
00283 if (npes >= CkNumPes()) {
00284 if (CkMyPe() == 0)
00285 printf ("Warning : Not enough processors for the many-to-many optimization \n");
00286 useManyToMany = false;
00287 }
00288
00289 #if CHARM_VERSION > 60000
00290 if (useManyToMany) {
00291 if (CkMyPe() == 0)
00292 printf ("Enabling the Many-to-many optimization\n");
00293
00294 many_to_many_start = MANY_TO_MANY_START;
00295 }
00296 #endif
00297
00298 if (CkMyRank() == 0) {
00299 pencilPMEProcessors = new char [CkNumPes()];
00300 memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
00301 }
00302
00303 if ( CkMyPe() == 0) {
00304 iout << iINFO << "PME using " << myGrid.xBlocks << " x " <<
00305 myGrid.yBlocks << " x " << myGrid.zBlocks <<
00306 " pencil grid for FFT and reciprocal sum.\n" << endi;
00307
00308 CProxy_OptPmePencilMapZ mapz;
00309 CProxy_OptPmePencilMapY mapy;
00310 CProxy_OptPmePencilMapX mapx;
00311
00312 mapz = CProxy_OptPmePencilMapZ::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00313 mapy = CProxy_OptPmePencilMapY::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00314 mapx = CProxy_OptPmePencilMapX::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00315
00316 CkArrayOptions optsz;
00317 optsz.setMap (mapz);
00318 CkArrayOptions optsy;
00319 optsy.setMap (mapy);
00320 CkArrayOptions optsx;
00321 optsx.setMap (mapx);
00322
00323 zPencil = CProxy_OptPmeZPencil::ckNew(optsz);
00324 yPencil = CProxy_OptPmeYPencil::ckNew(optsy);
00325 xPencil = CProxy_OptPmeXPencil::ckNew(optsx);
00326
00327 int x,y,z;
00328 for (x = 0; x < myGrid.xBlocks; ++x)
00329 for (y = 0; y < myGrid.yBlocks; ++y ) {
00330 zPencil(x,y,0).insert();
00331 }
00332 zPencil.doneInserting();
00333
00334 for (z = 0; z < myGrid.zBlocks; ++z )
00335 for (x = 0; x < myGrid.xBlocks; ++x ) {
00336 yPencil(x,0,z).insert();
00337 }
00338 yPencil.doneInserting();
00339
00340 for (y = 0; y < myGrid.yBlocks; ++y )
00341 for (z = 0; z < myGrid.zBlocks; ++z ) {
00342 xPencil(0,y,z).insert();
00343 }
00344 xPencil.doneInserting();
00345
00346 pmeProxy.recvArrays(xPencil,yPencil,zPencil);
00347 OptPmePencilInitMsgData msgdata;
00348 msgdata.grid = myGrid;
00349 msgdata.xBlocks = myGrid.xBlocks;
00350 msgdata.yBlocks = myGrid.yBlocks;
00351 msgdata.zBlocks = myGrid.zBlocks;
00352 msgdata.xPencil = xPencil;
00353 msgdata.yPencil = yPencil;
00354 msgdata.zPencil = zPencil;
00355 msgdata.constant_pressure = constant_pressure;
00356
00357 CkCallback cb (CkIndex_OptPmeMgr::recvEvir(NULL), thisProxy[0]);
00358 msgdata.cb_energy = cb;
00359
00360 msgdata.pmeProxy = pmeProxyDir;
00361 xPencil.init(new OptPmePencilInitMsg(msgdata));
00362 yPencil.init(new OptPmePencilInitMsg(msgdata));
00363 zPencil.init(new OptPmePencilInitMsg(msgdata));
00364
00365 #if 0
00366 reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00367 if (simParams->accelMDOn) {
00368 amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
00369 } else {
00370 amd_reduction = NULL;
00371 }
00372 #endif
00373
00374 #ifndef NAMD_FFTW
00375 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00376 #endif
00377 }
00378
00379 }
00380
00381 void OptPmeMgr::initialize_pencils(CkQdMsg *msg) {
00382 delete msg;
00383
00384 SimParameters *simParams = Node::Object()->simParameters;
00385
00386 PatchMap *patchMap = PatchMap::Object();
00387 Lattice lattice = simParams->lattice;
00388 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00389 BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00390 BigReal cutoff = simParams->cutoff;
00391 BigReal patchdim = simParams->patchDimension;
00392 int numPatches = patchMap->numPatches();
00393
00394
00395
00396 char *pencilActive = new char[myGrid.xBlocks*myGrid.yBlocks];
00397 for ( int i=0; i<myGrid.xBlocks; ++i ) {
00398 for ( int j=0; j<myGrid.yBlocks; ++j ) {
00399 pencilActive[i*myGrid.yBlocks+j] = 0;
00400 }
00401 }
00402
00403
00404 assert (patchMap->numPatchesOnNode(CkMyPe()) <= 1);
00405 for ( int pid=0; pid < numPatches; ++pid ) {
00406 int pnode = patchMap->node(pid);
00407 if ( pnode != CkMyPe() ) continue;
00408
00409 BigReal minx = patchMap->min_a(pid);
00410 BigReal maxx = patchMap->max_a(pid);
00411 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00412
00413 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00414 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00415
00416 BigReal miny = patchMap->min_b(pid);
00417 BigReal maxy = patchMap->max_b(pid);
00418 BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00419
00420 int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00421 int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00422
00423 for ( int i=min1; i<=max1; ++i ) {
00424 int ix = i;
00425 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00426 while ( ix < 0 ) ix += myGrid.K1;
00427 for ( int j=min2; j<=max2; ++j ) {
00428 int jy = j;
00429 while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00430 while ( jy < 0 ) jy += myGrid.K2;
00431 pencilActive[(ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2)] = 1;
00432 }
00433 }
00434 }
00435
00436 numPencilsActive = 0;
00437 for ( int i=0; i<myGrid.xBlocks; ++i ) {
00438 for ( int j=0; j<myGrid.yBlocks; ++j ) {
00439 if ( pencilActive[i*myGrid.yBlocks+j] ) {
00440 ++numPencilsActive;
00441
00442 zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
00443 }
00444 }
00445 }
00446
00447 ungrid_count = numPencilsActive;
00448 delete [] pencilActive;
00449 }
00450
00451
00452 void OptPmeMgr::activate_pencils(CkQdMsg *msg) {
00453 if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
00454 }
00455
00456
00457 OptPmeMgr::~OptPmeMgr() {
00458 delete myKSpace;
00459 }
00460
00461 void OptPmeMgr::recvUngrid(OptPmeGridMsg *msg) {
00462 if ( ungrid_count == 0 ) {
00463 NAMD_bug("Message order failure in OptPmeMgr::recvUngrid\n");
00464 }
00465
00466 pmeCompute->copyPencils(msg);
00467 delete msg;
00468 --ungrid_count;
00469
00470 if ( ungrid_count == 0 ) {
00471
00472 ungridCalc(NULL);
00473 }
00474 }
00475
00476 void OptPmeMgr::ungridCalc(OptPmeDummyMsg *dmsg) {
00477 pmeCompute->ungridForces_init();
00478 if ( CmiMyNodeSize() >= SUBCOMPUTE_NPAR ) {
00479 int npar = SUBCOMPUTE_NPAR;
00480 OptPmeSubComputeMsg *smsg = NULL;
00481
00482 if (!peersAllocated) {
00483 peersAllocated = 1;
00484 int next_rank = CmiMyRank();
00485 PatchMap *patchMap = PatchMap::Object();
00486
00487 for (int i = 1; i < npar; ++i) {
00488 smsg = new (PRIORITY_SIZE) OptPmeSubComputeMsg;
00489 subcompute_msgs[i] = smsg;
00490 smsg->src_pe = CkMyPe();
00491 smsg->compute = pmeCompute;
00492
00493 next_rank ++;
00494 if (next_rank >= CmiMyNodeSize())
00495 next_rank = 0;
00496 int n = 0;
00497 int nr = next_rank;
00498 while(n < CmiMyNodeSize() &&
00499 patchMap->numPatchesOnNode(CmiNodeFirst(CmiMyNode())+nr) > 0)
00500 {
00501 nr ++;
00502 if (nr >= CmiMyNodeSize())
00503 nr = 0;
00504 n++;
00505 }
00506 if (n < CmiMyNodeSize())
00507 next_rank = nr;
00508
00509 smsg->dest = next_rank;
00510 }
00511
00512
00513 smsg = new (PRIORITY_SIZE) OptPmeSubComputeMsg;
00514 subcompute_msgs[0] = smsg;
00515 smsg->src_pe = CkMyPe();
00516 smsg->compute = pmeCompute;
00517 smsg->dest = CmiMyRank();
00518 }
00519
00520 int start = 0;
00521 int nlocal = pmeCompute->getNumLocalAtoms();
00522
00523 if (nlocal < npar)
00524 npar = nlocal;
00525 if (npar == 0)
00526 npar = 1;
00527 int n_per_iter = nlocal / npar;
00528
00529 subcompute_count = npar;
00530
00531 for (int i = 0; i < npar; ++i) {
00532 smsg = subcompute_msgs[i];
00533 smsg->start = start;
00534 smsg->end = start + n_per_iter;
00535 start += n_per_iter;
00536 if (i == npar - 1)
00537 smsg->end = nlocal;
00538 pmeProxy[CmiNodeFirst(CmiMyNode())+smsg->dest].ungridCalc_subcompute(smsg);
00539 }
00540 }
00541 else {
00542 pmeCompute->ungridForces_compute(0, 0);
00543 pmeCompute->ungridForces_finalize();
00544 ungrid_count = numPencilsActive;
00545 }
00546 }
00547
00548 void OptPmeMgr::ungridCalc_subcompute(OptPmeSubComputeMsg *msg){
00549 OptPmeCompute *compute = (OptPmeCompute *) msg->compute;
00550 compute->ungridForces_compute(msg->start, msg->end);
00551 pmeProxy[msg->src_pe].ungridCalc_subcompute_done(msg);
00552 }
00553
00554 void OptPmeMgr::ungridCalc_subcompute_done(OptPmeSubComputeMsg *msg){
00555 subcompute_count --;
00556
00557 if (subcompute_count == 0) {
00558 pmeCompute->ungridForces_finalize();
00559 ungrid_count = numPencilsActive;
00560 }
00561 }
00562
00563 void OptPmeMgr::doWorkOnPeer(OptPmeSubComputeMsg *msg) {
00564 OptPmeCompute *compute = (OptPmeCompute *) msg->compute;
00565 compute->doWorkOnPeer();
00566
00567 }
00568
00569 void OptPmeMgr::recvEvir (CkReductionMsg *msg) {
00570
00571 assert (CkMyPe() == 0);
00572
00573 double *data = (double *) msg->getData();
00574 assert (msg->getSize() == 7 * sizeof(double));
00575
00576
00577
00578 double scale = 1.;
00579 reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00580 reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00581 reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00582 reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00583 reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00584 reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00585 reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00586 reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00587 reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00588 reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;
00589
00590 if (amd_reduction) {
00591 amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00592 amd_reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00593 amd_reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00594 amd_reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00595 amd_reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00596 amd_reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00597 amd_reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00598 amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00599 amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00600 amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;
00601 }
00602
00603 delete msg;
00604
00605 SimParameters *simParams = Node::Object()->simParameters;
00606 int fef = simParams->fullElectFrequency;
00607 for (int i = 0; i < fef; i++) {
00608 reduction->submit();
00609 if (amd_reduction) amd_reduction->submit();
00610 }
00611 }
00612
00613 OptPmeCompute::OptPmeCompute(ComputeID c) :
00614 ComputeHomePatches(c)
00615 {
00616 DebugM(4,"OptPmeCompute created.\n");
00617
00618 CProxy_OptPmeMgr::ckLocalBranch(
00619 CkpvAccess(BOCclass_group).computePmeMgr)->setCompute(this);
00620
00621 _initialized = false;
00622
00623 useAvgPositions = 1;
00624 localResults = NULL;
00625
00626 reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00627 SimParameters *simParams = Node::Object()->simParameters;
00628 if (simParams->accelMDOn) {
00629 amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
00630 } else {
00631 amd_reduction = NULL;
00632 }
00633 }
00634
00635 void recv_ungrid_done (void *m) {
00636 OptPmeDummyMsg *msg = (OptPmeDummyMsg *) m;
00637 CProxy_OptPmeMgr pmeProxy (CkpvAccess(BOCclass_group).computePmeMgr);
00638 pmeProxy[msg->to_pe].ungridCalc (msg);
00639 }
00640
00641
00642 void OptPmeCompute::resetPatchCoordinates (const Lattice &lattice) {
00643 PatchMap *patchMap = PatchMap::Object();
00644 SimParameters *simParams = Node::Object()->simParameters;
00645
00646 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00647 BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00648 BigReal sysdimc = lattice.c_r().unit() * lattice.c();
00649 BigReal cutoff = simParams->cutoff;
00650 BigReal patchdim = simParams->patchDimension;
00651
00652 int pid = patchList[0].patchID;
00653 assert (patchList.size() == 1);
00654
00655
00656
00657 BigReal minx = patchMap->min_a(pid);
00658 BigReal maxx = patchMap->max_a(pid);
00659 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00660
00661 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00662 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00663
00664 BigReal miny = patchMap->min_b(pid);
00665 BigReal maxy = patchMap->max_b(pid);
00666 BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00667
00668 int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00669 int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00670
00671 BigReal minz = patchMap->min_c(pid);
00672 BigReal maxz = patchMap->max_c(pid);
00673 BigReal marginc = 0.5 * ( patchdim - cutoff ) / sysdimc;
00674
00675 int min3 = ((int) floor(myGrid.K3 * (minz - marginc))) - myGrid.order + 1;
00676 int max3 = ((int) floor(myGrid.K3 * (maxz + marginc)));
00677
00678 xlen = max1 - min1 + 1;
00679 xstart = min1;
00680
00681 ylen = max2 - min2 + 1;
00682 ystart = min2;
00683
00684 zlen = max3 - min3 + 1;
00685 zstart = min3;
00686 }
00687
00688 void OptPmeCompute::initializeOptPmeCompute () {
00689
00690 _initialized = true;
00691
00692 strayChargeErrors = 0;
00693
00694 SimParameters *simParams = Node::Object()->simParameters;
00695 PatchMap *patchMap = PatchMap::Object();
00696
00697 initializePmeGrid (simParams, myGrid);
00698
00699 alchFepOn = simParams->alchFepOn;
00700 alchThermIntOn = simParams->alchThermIntOn;
00701 lesOn = simParams->lesOn;
00702 pairOn = simParams->pairInteractionOn;
00703
00704 assert (!alchFepOn);
00705 assert (!alchThermIntOn);
00706 assert (!lesOn);
00707 assert (!pairOn);
00708
00709 qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
00710 fsize = myGrid.K1 * myGrid.dim2;
00711 q_arr = new double*[fsize];
00712 memset( (void*) q_arr, 0, fsize * sizeof(double*) );
00713
00714 assert (myMgr != NULL);
00715
00716 const Lattice lattice = simParams->lattice;
00717 resetPatchCoordinates (lattice);
00718
00719 PencilElement *pencilActive = new PencilElement [myGrid.xBlocks * myGrid.yBlocks];
00720 memset (pencilActive, 0, myGrid.xBlocks * myGrid.yBlocks * sizeof(PencilElement));
00721
00722 for ( int i=xstart; i<=xstart+xlen-1; ++i ) {
00723 int ix = i;
00724 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00725 while ( ix < 0 ) ix += myGrid.K1;
00726 for ( int j=ystart; j<=ystart+ylen-1; ++j ) {
00727 int jy = j;
00728 while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00729 while ( jy < 0 ) jy += myGrid.K2;
00730
00731 int pencil_idx = (ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2);
00732
00733 if (! pencilActive[pencil_idx].isActive) {
00734 pencilActive[pencil_idx].isActive = 1;
00735 pencilActive[pencil_idx].xmin = ix;
00736 pencilActive[pencil_idx].xmax = ix;
00737 pencilActive[pencil_idx].ymin = jy;
00738 pencilActive[pencil_idx].ymax = jy;
00739 pencilActive[pencil_idx].ib = ix / myGrid.block1;
00740 pencilActive[pencil_idx].jb = jy / myGrid.block2;
00741 }
00742 else {
00743 if (pencilActive[pencil_idx].xmin > ix)
00744 pencilActive[pencil_idx].xmin = ix;
00745 if (pencilActive[pencil_idx].xmax < ix)
00746 pencilActive[pencil_idx].xmax = ix;
00747 if (pencilActive[pencil_idx].ymin > jy)
00748 pencilActive[pencil_idx].ymin = jy;
00749 if (pencilActive[pencil_idx].ymax < jy)
00750 pencilActive[pencil_idx].ymax = jy;
00751 }
00752 }
00753 }
00754
00755 int nactive = 0;
00756 for (int ib=0; ib<myGrid.xBlocks; ++ib)
00757 for (int jb=0; jb<myGrid.yBlocks; ++jb)
00758 if (pencilActive[ib * myGrid.yBlocks + jb].isActive)
00759 nactive ++;
00760
00761 assert (nactive == myMgr->numPencilsActive);
00762
00763 nzlines = xlen * ylen;
00764 zline_storage = new double [nzlines * zlen];
00765 sp_zstorage = new float [nzlines * zlen];
00766
00767
00768
00769
00770 assert (zline_storage != NULL);
00771 double * zblock = zline_storage;
00772
00773 for (int ib=0; ib<myGrid.xBlocks; ++ib) {
00774 for (int jb=0; jb<myGrid.yBlocks; ++jb) {
00775 int index = ib * myGrid.yBlocks + jb;
00776 if (pencilActive[index].isActive) {
00777 pencilActive[index].data = zblock;
00778 for (int i = pencilActive[index].xmin; i <= pencilActive[index].xmax; i++)
00779 for (int j = pencilActive[index].ymin; j <= pencilActive[index].ymax; j++) {
00780 q_arr[i*myGrid.dim2 + j] = zblock;
00781 zblock += zlen;
00782 assert ((char *) zblock <= (char *)(zline_storage + nzlines*zlen));
00783 }
00784 }
00785 }
00786 }
00787
00788 pencilVec.resize (nactive);
00789 nactive = 0;
00790 for (int ib=0; ib<myGrid.xBlocks; ++ib)
00791 for (int jb=0; jb<myGrid.yBlocks; ++jb)
00792 if (pencilActive[ib*myGrid.yBlocks + jb].isActive)
00793 pencilVec[nactive ++] = pencilActive [ib * myGrid.yBlocks + jb];
00794
00795
00796 delete [] pencilActive;
00797
00798 #if CHARM_VERSION > 60000
00799
00800 OptPmeDummyMsg *m = new (PRIORITY_SIZE) OptPmeDummyMsg;
00801 m->to_pe = CkMyPe();
00802 CmiDirect_manytomany_initialize_recvbase (myMgr->handle, PHASE_UG,
00803 recv_ungrid_done, m, (char *)sp_zstorage,
00804 myGrid.xBlocks*myGrid.yBlocks, -1);
00805
00806 CkCallback cbi (CkCallback::ignore);
00807 CmiDirect_manytomany_initialize_sendbase (myMgr->handle, PHASE_GR, NULL, NULL,
00808 (char *)sp_zstorage,
00809 pencilVec.size(), patchList[0].patchID);
00810
00811 for (int idx = 0; idx < pencilVec.size(); idx++) {
00812 int ib = pencilVec[idx].ib;
00813 int jb = pencilVec[idx].jb;
00814 double * data = pencilVec[idx].data;
00815 int offset = (data - zline_storage)*sizeof(float);
00816 int xlen = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00817 int ylen = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00818 int fcount = xlen * ylen * zlen;
00819
00820 CkArrayIndex3D index (ib, jb, 0);
00821 CProxy_OptPmePencilMapZ zproxy (global_map_z);
00822 int pe = zproxy.ckLocalBranch()->procNum(0, index);
00823 CmiDirect_manytomany_initialize_send (myMgr->handle, PHASE_GR, idx,
00824 offset, fcount*sizeof(float), pe);
00825
00826 int srcnode = ib * myGrid.yBlocks + jb;
00827 CmiDirect_manytomany_initialize_recv (myMgr->handle, PHASE_UG,
00828 srcnode, offset, fcount*sizeof(float), pe);
00829 }
00830
00831 #endif
00832
00833 }
00834
00835 OptPmeCompute::~OptPmeCompute()
00836 {
00837 delete [] zline_storage;
00838 delete [] sp_zstorage;
00839 delete [] q_arr;
00840 }
00841
00842
00843 void OptPmeCompute::doWork()
00844 {
00845 DebugM(4,"Entering OptPmeCompute::doWork().\n");
00846
00847 #ifdef TRACE_COMPUTE_OBJECTS
00848 double traceObjStartTime = CmiWallTimer();
00849 #endif
00850
00851 if (!_initialized) initializeOptPmeCompute();
00852
00853 ResizeArrayIter<PatchElem> ap(patchList);
00854
00855
00856 if ( ! patchList[0].p->flags.doFullElectrostatics )
00857 {
00858 for (ap = ap.begin(); ap != ap.end(); ap++) {
00859 CompAtom *x = (*ap).positionBox->open();
00860 Results *r = (*ap).forceBox->open();
00861 (*ap).positionBox->close(&x);
00862 (*ap).forceBox->close(&r);
00863 }
00864 reduction->submit();
00865 if (amd_reduction) amd_reduction->submit();
00866 return;
00867 }
00868
00869 myMgr->_iter ++;
00870
00871
00872 numLocalAtoms = 0;
00873 for (ap = ap.begin(); ap != ap.end(); ap++) {
00874 numLocalAtoms += (*ap).p->getNumAtoms();
00875 }
00876
00877 Lattice &lattice = patchList[0].p->flags.lattice;
00878
00879 localData = new PmeParticle[numLocalAtoms];
00880
00881 PmeParticle * data_ptr = localData;
00882
00883 int natoms = 0;
00884
00885
00886
00887 for (ap = ap.begin(); ap != ap.end(); ap++) {
00888 #ifdef NETWORK_PROGRESS
00889 CmiNetworkProgress();
00890 #endif
00891
00892 CompAtom *x = (*ap).positionBox->open();
00893 if ( patchList[0].p->flags.doMolly ) {
00894 (*ap).positionBox->close(&x);
00895 x = (*ap).avgPositionBox->open();
00896 }
00897 int numAtoms = (*ap).p->getNumAtoms();
00898 int order_1 = myGrid.order - 1;
00899 scale_n_copy_coordinates(x, localData, numAtoms,
00900 lattice, myGrid,
00901 xstart + order_1, xlen - order_1,
00902 ystart + order_1, ylen - order_1,
00903 zstart + order_1, zlen - order_1,
00904 strayChargeErrors);
00905 natoms += numAtoms;
00906
00907 if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
00908 else { (*ap).positionBox->close(&x); }
00909 }
00910
00911 numLocalAtoms = natoms;
00912
00913
00914 BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
00915 evir = 0;
00916 BigReal selfEnergy = 0;
00917 data_ptr = localData;
00918 int i;
00919 for(i=0; i<numLocalAtoms; ++i)
00920 {
00921 selfEnergy += data_ptr->cg * data_ptr->cg;
00922 ++data_ptr;
00923 }
00924 selfEnergy *= -1. * ewaldcof / SQRT_PI;
00925 evir[0] += selfEnergy;
00926
00927 #if 0
00928 if (myMgr->_iter > many_to_many_start) {
00929 OptPmeSubComputeMsg *smsg = myMgr->subcompute_msgs[1];
00930 CProxy_OptPmeMgr pmeProxy (CkpvAccess(BOCclass_group).computePmeMgr);
00931 pmeProxy[CmiNodeFirst(CmiMyNode())+smsg->dest].doWorkOnPeer(smsg);
00932 }
00933 else
00934 #endif
00935 doWorkOnPeer();
00936
00937 #ifdef TRACE_COMPUTE_OBJECTS
00938 traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
00939 #endif
00940
00941 }
00942
00943 void OptPmeCompute::doWorkOnPeer()
00944 {
00945 Lattice &lattice = patchList[0].p->flags.lattice;
00946 double **q = q_arr;
00947 memset( (void*) zline_storage, 0, zlen * nzlines * sizeof(double) );
00948
00949 myRealSpace = new OptPmeRealSpace(myGrid,numLocalAtoms);
00950 if (!strayChargeErrors)
00951 myRealSpace->fill_charges(q, localData, zstart, zlen);
00952
00953 if (myMgr->constant_pressure && patchList[0].patchID == 0)
00954 myMgr->xPencil.recvLattice (lattice);
00955
00956 if (myMgr->_iter <= many_to_many_start)
00957 sendPencils();
00958 #if CHARM_VERSION > 60000
00959 else {
00960 pme_d2f (sp_zstorage, zline_storage, nzlines * zlen);
00961 CmiDirect_manytomany_start (myMgr->handle, PHASE_GR);
00962 }
00963 #endif
00964 }
00965
00966
00967
00968 void OptPmeCompute::sendPencils() {
00969
00970
00971
00972 int xBlocks = myGrid.xBlocks;
00973 int yBlocks = myGrid.yBlocks;
00974 int zBlocks = myGrid.zBlocks;
00975
00976 int K1 = myGrid.K1;
00977 int K2 = myGrid.K2;
00978 int dim2 = myGrid.dim2;
00979 int dim3 = myGrid.dim3;
00980 int block1 = myGrid.block1;
00981 int block2 = myGrid.block2;
00982
00983
00984
00985 int nactive = 0;
00986 for (int idx = 0; idx < pencilVec.size(); idx++) {
00987 int xstart = pencilVec[idx].xmin;
00988 int ystart = pencilVec[idx].ymin;
00989 int xlen = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00990 int ylen = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00991 int ib = pencilVec[idx].ib;
00992 int jb = pencilVec[idx].jb;
00993 double *data = pencilVec[idx].data;
00994
00995 int fcount = xlen * ylen * zlen;
00996 OptPmeGridMsg *msg = new (fcount, PRIORITY_SIZE) OptPmeGridMsg;
00997 msg->zstart = zstart;
00998 msg->zlen = zlen;
00999 msg->xstart = xstart;
01000 msg->xlen = xlen;
01001 msg->ystart = ystart;
01002 msg->ylen = ylen;
01003 msg->sourceNode = CkMyPe();
01004 msg->patchID = patchList[0].patchID;
01005
01006 float *qmsg = msg->qgrid;
01007 #pragma disjoint (*data, *qmsg)
01008 #pragma unroll(8)
01009 for ( int k=0; k< fcount; ++k )
01010 *(qmsg++) = data[k];
01011
01012 myMgr->zPencil(ib,jb,0).recvGrid(msg);
01013 }
01014 }
01015
01016
01017 void OptPmeCompute::copyPencils(OptPmeGridMsg *msg) {
01018
01019 if (!_initialized) initializeOptPmeCompute();
01020
01021 int ibegin = msg->xstart;
01022 int iend = msg->xstart + msg->xlen;
01023 int jbegin = msg->ystart;
01024 int jend = msg->ylen;
01025 int fcount = zlen * msg->xlen * msg->ylen;
01026
01027 float *qmsg = msg->qgrid;
01028 double *data = q_arr[ibegin * myGrid.dim2 + jbegin];
01029
01030 #pragma disjoint (*qmsg, *data)
01031 #pragma unroll(8)
01032 for ( int k=0; k<fcount; ++k )
01033 data[k] = *(qmsg++);
01034 }
01035
01036 void OptPmeCompute::ungridForces_init() {
01037
01038
01039
01040 if (myMgr->_iter > many_to_many_start)
01041 pme_f2d (zline_storage, sp_zstorage, nzlines * zlen);
01042
01043 localResults = new Vector[numLocalAtoms];
01044
01045 }
01046
01047 void OptPmeCompute::ungridForces_compute(int istart,
01048 int iend)
01049 {
01050 Vector *gridResults;
01051 gridResults = localResults;
01052
01053 if (iend == 0)
01054 iend = numLocalAtoms;
01055
01056 SimParameters *simParams = Node::Object()->simParameters;
01057 Vector pairForce = 0.;
01058 Lattice &lattice = patchList[0].p->flags.lattice;
01059 if(!simParams->commOnly) {
01060 #ifdef NETWORK_PROGRESS
01061 CmiNetworkProgress();
01062 #endif
01063 if (!strayChargeErrors) {
01064 myRealSpace->compute_forces(q_arr, localData, gridResults,
01065 zstart, zlen, istart, iend);
01066 scale_forces(gridResults + istart, iend - istart, lattice);
01067 }
01068 }
01069 }
01070
01071 void OptPmeCompute::ungridForces_finalize() {
01072 SimParameters *simParams = Node::Object()->simParameters;
01073 delete myRealSpace;
01074
01075 delete [] localData;
01076
01077
01078 Vector *results_ptr = localResults;
01079 ResizeArrayIter<PatchElem> ap(patchList);
01080
01081
01082 for (ap = ap.begin(); ap != ap.end(); ap++) {
01083 Results *r = (*ap).forceBox->open();
01084 Force *f = r->f[Results::slow];
01085 int numAtoms = (*ap).p->getNumAtoms();
01086
01087 if ( ! strayChargeErrors && ! simParams->commOnly ) {
01088 for(int i=0; i<numAtoms; ++i) {
01089 f[i].x += results_ptr->x;
01090 f[i].y += results_ptr->y;
01091 f[i].z += results_ptr->z;
01092 ++results_ptr;
01093 }
01094 }
01095
01096 (*ap).forceBox->close(&r);
01097 }
01098
01099 delete [] localResults;
01100
01101 double scale = 1.;
01102
01103 reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0] * scale;
01104 reduction->item(REDUCTION_STRAY_CHARGE_ERRORS) += strayChargeErrors;
01105 strayChargeErrors = 0;
01106 reduction->submit();
01107 if (amd_reduction) {
01108 amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0] * scale;
01109 amd_reduction->submit();
01110 }
01111 }
01112
01113
01114
01115 #include "fftlib.C"
01116 #include "OptPmeMgr.def.h"
01117
01118 void pme_f2d (double *dst, float *src, int N) {
01119 #pragma disjoint (*src, *dst)
01120 #pragma unroll (8)
01121 for (int i = 0; i < N; i++)
01122 dst[i] = src[i];
01123 }
01124
01125 void pme_d2f (float *dst, double *src, int N) {
01126 #pragma disjoint (*src, *dst)
01127 #pragma unroll (8)
01128 for (int i = 0; i < N; i++)
01129 dst[i] = src[i];
01130 }