00001
00007 #ifdef NAMD_FFTW
00008 #ifdef NAMD_FFTW_3
00009 #include <fftw3.h>
00010 #else
00011 #ifdef NAMD_FFTW_NO_TYPE_PREFIX
00012 #include <fftw.h>
00013 #include <rfftw.h>
00014 #else
00015 #include <sfftw.h>
00016 #include <srfftw.h>
00017 #endif
00018 #endif
00019 #endif
00020
00021 #include <assert.h>
00022
00023 #include "InfoStream.h"
00024 #include "Node.h"
00025 #include "PatchMap.h"
00026 #include "PatchMap.inl"
00027 #include "AtomMap.h"
00028 #include "OptPme.h"
00029 #include "OptPmeMgr.decl.h"
00030 #include "OptPmeRealSpace.h"
00031 #include "PmeKSpace.h"
00032 #include "ComputeNonbondedUtil.h"
00033 #include "PatchMgr.h"
00034 #include "Molecule.h"
00035 #include "ReductionMgr.h"
00036
00037 #include "ComputeMgr.decl.h"
00038
00039 #define MIN_DEBUG_LEVEL 3
00040 #include "Debug.h"
00041 #include "SimParameters.h"
00042 #include "WorkDistrib.h"
00043 #include "varsizemsg.h"
00044 #include "Random.h"
00045 #include "Priorities.h"
00046
00047
00048 extern char *pencilPMEProcessors;
00049
00050 #include "fftlib.h"
00051 #include "fftmap.h"
00052
00053
00054 int many_to_many_start = 0x7fffffff;
00055
00056 class OptPmeMgr : public BOCclass {
00057 public:
00058 friend class OptPmeCompute;
00059 OptPmeMgr();
00060 ~OptPmeMgr();
00061
00062 void initialize(CkQdMsg*);
00063 void initialize_pencils(CkQdMsg*);
00064 void activate_pencils(CkQdMsg*);
00065 void recvArrays(CProxy_OptPmeXPencil, CProxy_OptPmeYPencil, CProxy_OptPmeZPencil);
00066
00067 void recvUngrid(OptPmeGridMsg *);
00068 void ungridCalc(OptPmeDummyMsg *);
00069 void recvEvir (CkReductionMsg *msg);
00070
00071 void setCompute(OptPmeCompute *c) { pmeCompute = c; c->setMgr(this); }
00072
00073 private:
00074 CProxy_OptPmeMgr pmeProxy;
00075 CProxy_OptPmeMgr pmeProxyDir;
00076 OptPmeCompute *pmeCompute;
00077 PmeGrid myGrid;
00078 PmeKSpace *myKSpace;
00079
00080 CProxy_OptPmeXPencil xPencil;
00081 CProxy_OptPmeYPencil yPencil;
00082 CProxy_OptPmeZPencil zPencil;
00083 int numPencilsActive;
00084 int ungrid_count, usePencils;
00085 SubmitReduction *reduction;
00086 SubmitReduction *amd_reduction;
00087 int _iter;
00088 void *handle;
00089 bool constant_pressure;
00090 };
00091
00092
00093 void pme_f2d (double *dst, float *src, int N);
00094 void pme_d2f (float *dst, double *src, int N);
00095
00096 static inline void initializePmeGrid (SimParameters *simParams, PmeGrid &grid) {
00097 int xBlocks = 0, yBlocks = 0, zBlocks= 0;
00098
00099 if ( simParams->PMEPencils > 1 ) {
00100 xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00101 } else {
00102 int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00103 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00104 if ( nb2 > CkNumPes() ) nb2 = CkNumPes();
00105 int nb = (int) sqrt((float)nb2);
00106 xBlocks = zBlocks = nb;
00107 yBlocks = nb2 / nb;
00108 }
00109
00110 int dimx = simParams->PMEGridSizeX;
00111 int bx = 1 + ( dimx - 1 ) / xBlocks;
00112 xBlocks = 1 + ( dimx - 1 ) / bx;
00113
00114 int dimy = simParams->PMEGridSizeY;
00115 int by = 1 + ( dimy - 1 ) / yBlocks;
00116 yBlocks = 1 + ( dimy - 1 ) / by;
00117
00118 int dimz = simParams->PMEGridSizeZ / 2 + 1;
00119 int bz = 1 + ( dimz - 1 ) / zBlocks;
00120 zBlocks = 1 + ( dimz - 1 ) / bz;
00121
00122 grid.xBlocks = xBlocks;
00123 grid.yBlocks = yBlocks;
00124 grid.zBlocks = zBlocks;
00125
00126 grid.K1 = simParams->PMEGridSizeX;
00127 grid.K2 = simParams->PMEGridSizeY;
00128 grid.K3 = simParams->PMEGridSizeZ;
00129 grid.order = simParams->PMEInterpOrder;
00130 grid.dim2 = grid.K2;
00131 grid.dim3 = 2 * (grid.K3/2 + 1);
00132
00133 grid.block1 = ( grid.K1 + xBlocks - 1 ) / xBlocks;
00134 grid.block2 = ( grid.K2 + yBlocks - 1 ) / yBlocks;
00135 grid.block3 = ( grid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;
00136 }
00137
00138 #ifdef NAMD_FFTW
00139 static CmiNodeLock fftw_plan_lock;
00140 #endif
00141
00142
00143 static inline void scale_n_copy_coordinates(CompAtom *x, PmeParticle p[],
00144 int &N,
00145 Lattice &lattice, PmeGrid grid,
00146
00147 double xmin, double xlen,
00148 double ymin, double ylen,
00149 double zmin, double zlen,
00150 int &scg) {
00151 Vector origin = lattice.origin();
00152 Vector recip1 = lattice.a_r();
00153 Vector recip2 = lattice.b_r();
00154 Vector recip3 = lattice.c_r();
00155 double ox = origin.x;
00156 double oy = origin.y;
00157 double oz = origin.z;
00158 double r1x = recip1.x;
00159 double r1y = recip1.y;
00160 double r1z = recip1.z;
00161 double r2x = recip2.x;
00162 double r2y = recip2.y;
00163 double r2z = recip2.z;
00164 double r3x = recip3.x;
00165 double r3y = recip3.y;
00166 double r3z = recip3.z;
00167 int K1 = grid.K1;
00168 int K2 = grid.K2;
00169 int K3 = grid.K3;
00170
00171 const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
00172 * ComputeNonbondedUtil::dielectric_1 );
00173
00174
00175 int natoms = 0;
00176 for (int i=0; i<N; i++) {
00177 double px = x[i].position.x - ox;
00178 double py = x[i].position.y - oy;
00179 double pz = x[i].position.z - oz;
00180 double sx = px*r1x + py*r1y + pz*r1z;
00181 double sy = px*r2x + py*r2y + pz*r2z;
00182 double sz = px*r3x + py*r3y + pz*r3z;
00183 p[natoms].x = K1 * ( sx - floor(sx) );
00184 p[natoms].y = K2 * ( sy - floor(sy) );
00185 p[natoms].z = K3 * ( sz - floor(sz) );
00186
00187
00188 if ( p[natoms].x == K1 ) p[natoms].x = 0;
00189 if ( p[natoms].y == K2 ) p[natoms].y = 0;
00190 if ( p[natoms].z == K3 ) p[natoms].z = 0;
00191
00192 BigReal u1,u2,u3;
00193 u1 = (int) (p[natoms].x - xmin);
00194 if (u1 >= grid.K1) u1 -= grid.K1;
00195 u2 = (int) (p[natoms].y - ymin);
00196 if (u2 >= grid.K2) u2 -= grid.K2;
00197 u3 = (int) (p[natoms].z - zmin);
00198 if (u3 >= grid.K3) u3 -= grid.K3;
00199
00200 if ( (u1 < 0.0) || (u1 >= xlen) ||
00201 (u2 < 0.0) || (u2 >= ylen) ||
00202 (u3 < 0.0) || (u3 >= zlen) ) {
00203 scg ++;
00204 continue;
00205 }
00206
00207 p[natoms].cg = coulomb_sqrt * x[i].charge;
00208 natoms ++;
00209 }
00210 N = natoms;
00211 }
00212
00213
00214 OptPmeMgr::OptPmeMgr() : pmeProxy(thisgroup),
00215 pmeProxyDir(thisgroup), pmeCompute(0) {
00216
00217 CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00218
00219 myKSpace = 0;
00220 ungrid_count = 0;
00221 usePencils = 0;
00222
00223 #ifdef NAMD_FFTW
00224 if ( CmiMyRank() == 0 ) {
00225 fftw_plan_lock = CmiCreateLock();
00226 }
00227 #endif
00228 }
00229
00230
00231 void OptPmeMgr::recvArrays(CProxy_OptPmeXPencil x, CProxy_OptPmeYPencil y, CProxy_OptPmeZPencil z) {
00232 xPencil = x; yPencil = y; zPencil = z;
00233 }
00234
00235 void OptPmeMgr::initialize(CkQdMsg *msg) {
00236 delete msg;
00237
00238 _iter = 0;
00239
00240 #if CHARM_VERSION > 60000
00241 handle = CmiDirect_manytomany_allocate_handle ();
00242 #endif
00243
00244 SimParameters *simParams = Node::Object()->simParameters;
00245 PatchMap *patchMap = PatchMap::Object();
00246
00247 if (simParams->PMEPencils > 0)
00248 usePencils = 1;
00249 else return;
00250
00251 initializePmeGrid (simParams, myGrid);
00252
00253 if (simParams->langevinPistonOn || simParams->berendsenPressureOn)
00254 constant_pressure = true;
00255 else
00256 constant_pressure = false;
00257
00258 bool useManyToMany = simParams->useManyToMany;
00259
00260 int npes = patchMap->numPatches() +
00261 myGrid.xBlocks * myGrid.yBlocks +
00262 myGrid.zBlocks * myGrid.xBlocks +
00263 myGrid.yBlocks * myGrid.zBlocks;
00264
00265 if (npes >= CkNumPes()) {
00266 if (CkMyPe() == 0)
00267 printf ("Warning : Not enough processors for the many-to-many optimization \n");
00268 useManyToMany = false;
00269 }
00270
00271 #if CHARM_VERSION > 60000
00272 if (useManyToMany) {
00273 if (CkMyPe() == 0)
00274 printf ("Enabling the Many-to-many optimization\n");
00275
00276 many_to_many_start = MANY_TO_MANY_START;
00277 }
00278 #endif
00279
00280 if ( CkMyPe() == 0) {
00281 iout << iINFO << "PME using " << myGrid.xBlocks << " x " <<
00282 myGrid.yBlocks << " x " << myGrid.zBlocks <<
00283 " pencil grid for FFT and reciprocal sum.\n" << endi;
00284
00285 pencilPMEProcessors = new char [CkNumPes()];
00286 memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
00287
00288 CProxy_OptPmePencilMapZ mapz;
00289 CProxy_OptPmePencilMapY mapy;
00290 CProxy_OptPmePencilMapX mapx;
00291
00292 mapz = CProxy_OptPmePencilMapZ::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00293 mapy = CProxy_OptPmePencilMapY::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00294 mapx = CProxy_OptPmePencilMapX::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00295
00296 CkArrayOptions optsz;
00297 optsz.setMap (mapz);
00298 CkArrayOptions optsy;
00299 optsy.setMap (mapy);
00300 CkArrayOptions optsx;
00301 optsx.setMap (mapx);
00302
00303 zPencil = CProxy_OptPmeZPencil::ckNew(optsz);
00304 yPencil = CProxy_OptPmeYPencil::ckNew(optsy);
00305 xPencil = CProxy_OptPmeXPencil::ckNew(optsx);
00306
00307 int x,y,z;
00308 for (x = 0; x < myGrid.xBlocks; ++x)
00309 for (y = 0; y < myGrid.yBlocks; ++y ) {
00310 zPencil(x,y,0).insert();
00311 }
00312 zPencil.doneInserting();
00313
00314 for (z = 0; z < myGrid.zBlocks; ++z )
00315 for (x = 0; x < myGrid.xBlocks; ++x ) {
00316 yPencil(x,0,z).insert();
00317 }
00318 yPencil.doneInserting();
00319
00320 for (y = 0; y < myGrid.yBlocks; ++y )
00321 for (z = 0; z < myGrid.zBlocks; ++z ) {
00322 xPencil(0,y,z).insert();
00323 }
00324 xPencil.doneInserting();
00325
00326 pmeProxy.recvArrays(xPencil,yPencil,zPencil);
00327 OptPmePencilInitMsgData msgdata;
00328 msgdata.grid = myGrid;
00329 msgdata.xBlocks = myGrid.xBlocks;
00330 msgdata.yBlocks = myGrid.yBlocks;
00331 msgdata.zBlocks = myGrid.zBlocks;
00332 msgdata.xPencil = xPencil;
00333 msgdata.yPencil = yPencil;
00334 msgdata.zPencil = zPencil;
00335 msgdata.constant_pressure = constant_pressure;
00336
00337 CkCallback cb (CkIndex_OptPmeMgr::recvEvir(NULL), thisProxy[0]);
00338 msgdata.cb_energy = cb;
00339
00340 msgdata.pmeProxy = pmeProxyDir;
00341 xPencil.init(new OptPmePencilInitMsg(msgdata));
00342 yPencil.init(new OptPmePencilInitMsg(msgdata));
00343 zPencil.init(new OptPmePencilInitMsg(msgdata));
00344
00345 reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00346 if (simParams->accelMDOn) {
00347 amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
00348 } else {
00349 amd_reduction = NULL;
00350 }
00351
00352 #ifndef NAMD_FFTW
00353 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00354 #endif
00355 }
00356
00357 }
00358
00359 void OptPmeMgr::initialize_pencils(CkQdMsg *msg) {
00360 delete msg;
00361
00362 SimParameters *simParams = Node::Object()->simParameters;
00363
00364 PatchMap *patchMap = PatchMap::Object();
00365 Lattice lattice = simParams->lattice;
00366 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00367 BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00368 BigReal cutoff = simParams->cutoff;
00369 BigReal patchdim = simParams->patchDimension;
00370 int numPatches = patchMap->numPatches();
00371
00372 char *pencilActive = new char[myGrid.xBlocks*myGrid.yBlocks];
00373 for ( int i=0; i<myGrid.xBlocks; ++i ) {
00374 for ( int j=0; j<myGrid.yBlocks; ++j ) {
00375 pencilActive[i*myGrid.yBlocks+j] = 0;
00376 }
00377 }
00378
00379
00380 assert (patchMap->numPatchesOnNode(CkMyPe()) <= 1);
00381 for ( int pid=0; pid < numPatches; ++pid ) {
00382 int pnode = patchMap->node(pid);
00383 if ( pnode != CkMyPe() ) continue;
00384
00385 BigReal minx = patchMap->min_a(pid);
00386 BigReal maxx = patchMap->max_a(pid);
00387 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00388
00389 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00390 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00391
00392 BigReal miny = patchMap->min_b(pid);
00393 BigReal maxy = patchMap->max_b(pid);
00394 BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00395
00396 int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00397 int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00398
00399 for ( int i=min1; i<=max1; ++i ) {
00400 int ix = i;
00401 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00402 while ( ix < 0 ) ix += myGrid.K1;
00403 for ( int j=min2; j<=max2; ++j ) {
00404 int jy = j;
00405 while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00406 while ( jy < 0 ) jy += myGrid.K2;
00407 pencilActive[(ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2)] = 1;
00408 }
00409 }
00410 }
00411
00412 numPencilsActive = 0;
00413 for ( int i=0; i<myGrid.xBlocks; ++i ) {
00414 for ( int j=0; j<myGrid.yBlocks; ++j ) {
00415 if ( pencilActive[i*myGrid.yBlocks+j] ) {
00416 ++numPencilsActive;
00417
00418 zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
00419 }
00420 }
00421 }
00422
00423 ungrid_count = numPencilsActive;
00424 delete [] pencilActive;
00425 }
00426
00427
00428 void OptPmeMgr::activate_pencils(CkQdMsg *msg) {
00429 if ( ! usePencils ) return;
00430 if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
00431 }
00432
00433
00434 OptPmeMgr::~OptPmeMgr() {
00435 delete myKSpace;
00436 }
00437
00438 void OptPmeMgr::recvUngrid(OptPmeGridMsg *msg) {
00439 if ( ungrid_count == 0 ) {
00440 NAMD_bug("Message order failure in OptPmeMgr::recvUngrid\n");
00441 }
00442
00443 pmeCompute->copyPencils(msg);
00444 delete msg;
00445 --ungrid_count;
00446
00447 if ( ungrid_count == 0 ) {
00448
00449 ungridCalc(NULL);
00450 }
00451 }
00452
00453 void OptPmeMgr::ungridCalc(OptPmeDummyMsg *msg) {
00454 pmeCompute->ungridForces();
00455 ungrid_count = numPencilsActive;
00456 }
00457
00458
00459 void OptPmeMgr::recvEvir (CkReductionMsg *msg) {
00460
00461 assert (CkMyPe() == 0);
00462
00463 double *data = (double *) msg->getData();
00464 assert (msg->getSize() == 7 * sizeof(double));
00465
00466
00467
00468 double scale = 1.;
00469 reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00470 reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00471 reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00472 reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00473 reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00474 reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00475 reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00476 reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00477 reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00478 reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;
00479
00480 if (amd_reduction) {
00481 amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00482 amd_reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00483 amd_reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00484 amd_reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00485 amd_reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00486 amd_reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00487 amd_reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00488 amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00489 amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00490 amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;
00491 }
00492
00493 delete msg;
00494
00495 SimParameters *simParams = Node::Object()->simParameters;
00496 int fef = simParams->fullElectFrequency;
00497 for (int i = 0; i < fef; i++) {
00498 reduction->submit();
00499 if (amd_reduction) amd_reduction->submit();
00500 }
00501 }
00502
00503 OptPmeCompute::OptPmeCompute(ComputeID c) :
00504 ComputeHomePatches(c)
00505 {
00506 DebugM(4,"OptPmeCompute created.\n");
00507
00508 CProxy_OptPmeMgr::ckLocalBranch(
00509 CkpvAccess(BOCclass_group).computePmeMgr)->setCompute(this);
00510
00511 _initialized = false;
00512
00513 useAvgPositions = 1;
00514
00515 reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00516 SimParameters *simParams = Node::Object()->simParameters;
00517 if (simParams->accelMDOn) {
00518 amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
00519 } else {
00520 amd_reduction = NULL;
00521 }
00522 }
00523
00524 void recv_ungrid_done (void *m) {
00525 OptPmeDummyMsg *msg = (OptPmeDummyMsg *) m;
00526 CProxy_OptPmeMgr pmeProxy (CkpvAccess(BOCclass_group).computePmeMgr);
00527 pmeProxy[CkMyPe()].ungridCalc (msg);
00528 }
00529
00530
00531 void OptPmeCompute::resetPatchCoordinates (const Lattice &lattice) {
00532 PatchMap *patchMap = PatchMap::Object();
00533 SimParameters *simParams = Node::Object()->simParameters;
00534
00535 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00536 BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00537 BigReal sysdimc = lattice.c_r().unit() * lattice.c();
00538 BigReal cutoff = simParams->cutoff;
00539 BigReal patchdim = simParams->patchDimension;
00540
00541 int pid = patchList[0].patchID;
00542 assert (patchList.size() == 1);
00543
00544
00545
00546 BigReal minx = patchMap->min_a(pid);
00547 BigReal maxx = patchMap->max_a(pid);
00548 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00549
00550 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00551 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00552
00553 BigReal miny = patchMap->min_b(pid);
00554 BigReal maxy = patchMap->max_b(pid);
00555 BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00556
00557 int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00558 int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00559
00560 BigReal minz = patchMap->min_c(pid);
00561 BigReal maxz = patchMap->max_c(pid);
00562 BigReal marginc = 0.5 * ( patchdim - cutoff ) / sysdimc;
00563
00564 int min3 = ((int) floor(myGrid.K3 * (minz - marginc))) - myGrid.order + 1;
00565 int max3 = ((int) floor(myGrid.K3 * (maxz + marginc)));
00566
00567 xlen = max1 - min1 + 1;
00568 xstart = min1;
00569
00570 ylen = max2 - min2 + 1;
00571 ystart = min2;
00572
00573 zlen = max3 - min3 + 1;
00574 zstart = min3;
00575 }
00576
00577 void OptPmeCompute::initializeOptPmeCompute () {
00578
00579 _initialized = true;
00580
00581 strayChargeErrors = 0;
00582
00583 SimParameters *simParams = Node::Object()->simParameters;
00584 PatchMap *patchMap = PatchMap::Object();
00585
00586 initializePmeGrid (simParams, myGrid);
00587
00588 alchFepOn = simParams->alchFepOn;
00589 alchThermIntOn = simParams->alchThermIntOn;
00590 lesOn = simParams->lesOn;
00591 pairOn = simParams->pairInteractionOn;
00592
00593 assert (!alchFepOn);
00594 assert (!alchThermIntOn);
00595 assert (!lesOn);
00596 assert (!pairOn);
00597
00598 qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
00599 fsize = myGrid.K1 * myGrid.dim2;
00600 q_arr = new double*[fsize];
00601 memset( (void*) q_arr, 0, fsize * sizeof(double*) );
00602
00603 assert (myMgr != NULL);
00604
00605 const Lattice lattice = simParams->lattice;
00606 resetPatchCoordinates (lattice);
00607
00608 PencilElement *pencilActive = new PencilElement [myGrid.xBlocks * myGrid.yBlocks];
00609 memset (pencilActive, 0, myGrid.xBlocks * myGrid.yBlocks * sizeof(PencilElement));
00610
00611 for ( int i=xstart; i<=xstart+xlen-1; ++i ) {
00612 int ix = i;
00613 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00614 while ( ix < 0 ) ix += myGrid.K1;
00615 for ( int j=ystart; j<=ystart+ylen-1; ++j ) {
00616 int jy = j;
00617 while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00618 while ( jy < 0 ) jy += myGrid.K2;
00619
00620 int pencil_idx = (ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2);
00621
00622 if (! pencilActive[pencil_idx].isActive) {
00623 pencilActive[pencil_idx].isActive = 1;
00624 pencilActive[pencil_idx].xmin = ix;
00625 pencilActive[pencil_idx].xmax = ix;
00626 pencilActive[pencil_idx].ymin = jy;
00627 pencilActive[pencil_idx].ymax = jy;
00628 pencilActive[pencil_idx].ib = ix / myGrid.block1;
00629 pencilActive[pencil_idx].jb = jy / myGrid.block2;
00630 }
00631 else {
00632 if (pencilActive[pencil_idx].xmin > ix)
00633 pencilActive[pencil_idx].xmin = ix;
00634 if (pencilActive[pencil_idx].xmax < ix)
00635 pencilActive[pencil_idx].xmax = ix;
00636 if (pencilActive[pencil_idx].ymin > jy)
00637 pencilActive[pencil_idx].ymin = jy;
00638 if (pencilActive[pencil_idx].ymax < jy)
00639 pencilActive[pencil_idx].ymax = jy;
00640 }
00641 }
00642 }
00643
00644 int nactive = 0;
00645 for (int ib=0; ib<myGrid.xBlocks; ++ib)
00646 for (int jb=0; jb<myGrid.yBlocks; ++jb)
00647 if (pencilActive[ib * myGrid.yBlocks + jb].isActive)
00648 nactive ++;
00649
00650 assert (nactive == myMgr->numPencilsActive);
00651
00652 nzlines = xlen * ylen;
00653 zline_storage = new double [nzlines * zlen];
00654 sp_zstorage = new float [nzlines * zlen];
00655
00656
00657
00658
00659 assert (zline_storage != NULL);
00660 double * zblock = zline_storage;
00661
00662 for (int ib=0; ib<myGrid.xBlocks; ++ib) {
00663 for (int jb=0; jb<myGrid.yBlocks; ++jb) {
00664 int index = ib * myGrid.yBlocks + jb;
00665 if (pencilActive[index].isActive) {
00666 pencilActive[index].data = zblock;
00667 for (int i = pencilActive[index].xmin; i <= pencilActive[index].xmax; i++)
00668 for (int j = pencilActive[index].ymin; j <= pencilActive[index].ymax; j++) {
00669 q_arr[i*myGrid.dim2 + j] = zblock;
00670 zblock += zlen;
00671 assert ((char *) zblock <= (char *)(zline_storage + nzlines*zlen));
00672 }
00673 }
00674 }
00675 }
00676
00677 pencilVec.resize (nactive);
00678 nactive = 0;
00679 for (int ib=0; ib<myGrid.xBlocks; ++ib)
00680 for (int jb=0; jb<myGrid.yBlocks; ++jb)
00681 if (pencilActive[ib*myGrid.yBlocks + jb].isActive)
00682 pencilVec[nactive ++] = pencilActive [ib * myGrid.yBlocks + jb];
00683
00684
00685 delete [] pencilActive;
00686
00687 #if CHARM_VERSION > 60000
00688
00689 OptPmeDummyMsg *m = new (PRIORITY_SIZE) OptPmeDummyMsg;
00690 CmiDirect_manytomany_initialize_recvbase (myMgr->handle, PHASE_UG,
00691 recv_ungrid_done, m, (char *)sp_zstorage,
00692 myGrid.xBlocks*myGrid.yBlocks, -1);
00693
00694 CkCallback cbi (CkCallback::ignore);
00695 CmiDirect_manytomany_initialize_sendbase (myMgr->handle, PHASE_GR, NULL, NULL,
00696 (char *)sp_zstorage,
00697 pencilVec.size(), patchList[0].patchID);
00698
00699 for (int idx = 0; idx < pencilVec.size(); idx++) {
00700 int ib = pencilVec[idx].ib;
00701 int jb = pencilVec[idx].jb;
00702 double * data = pencilVec[idx].data;
00703 int offset = (data - zline_storage)*sizeof(float);
00704 int xlen = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00705 int ylen = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00706 int fcount = xlen * ylen * zlen;
00707
00708 CkArrayIndex3D index (ib, jb, 0);
00709 CProxy_OptPmePencilMapZ zproxy (global_map_z);
00710 int pe = zproxy.ckLocalBranch()->procNum(0, index);
00711 CmiDirect_manytomany_initialize_send (myMgr->handle, PHASE_GR, idx,
00712 offset, fcount*sizeof(float), pe);
00713
00714 int srcnode = ib * myGrid.yBlocks + jb;
00715 CmiDirect_manytomany_initialize_recv (myMgr->handle, PHASE_UG,
00716 srcnode, offset, fcount*sizeof(float), pe);
00717 }
00718
00719 #endif
00720
00721 }
00722
00723 OptPmeCompute::~OptPmeCompute()
00724 {
00725 delete [] zline_storage;
00726 delete [] q_arr;
00727 }
00728
00729
00730 void OptPmeCompute::doWork()
00731 {
00732 DebugM(4,"Entering OptPmeCompute::doWork().\n");
00733
00734 #ifdef TRACE_COMPUTE_OBJECTS
00735 double traceObjStartTime = CmiWallTimer();
00736 #endif
00737
00738 if (!_initialized) initializeOptPmeCompute();
00739
00740 ResizeArrayIter<PatchElem> ap(patchList);
00741
00742
00743 if ( ! patchList[0].p->flags.doFullElectrostatics )
00744 {
00745 for (ap = ap.begin(); ap != ap.end(); ap++) {
00746 CompAtom *x = (*ap).positionBox->open();
00747 Results *r = (*ap).forceBox->open();
00748 (*ap).positionBox->close(&x);
00749 (*ap).forceBox->close(&r);
00750 }
00751 reduction->submit();
00752 if (amd_reduction) amd_reduction->submit();
00753 return;
00754 }
00755
00756 myMgr->_iter ++;
00757
00758
00759 numLocalAtoms = 0;
00760 for (ap = ap.begin(); ap != ap.end(); ap++) {
00761 numLocalAtoms += (*ap).p->getNumAtoms();
00762 }
00763
00764 Lattice &lattice = patchList[0].p->flags.lattice;
00765
00766 localData = new PmeParticle[numLocalAtoms];
00767
00768 PmeParticle * data_ptr = localData;
00769
00770 int natoms = 0;
00771 if (myMgr->constant_pressure)
00772 resetPatchCoordinates(lattice);
00773
00774 for (ap = ap.begin(); ap != ap.end(); ap++) {
00775 #ifdef NETWORK_PROGRESS
00776 CmiNetworkProgress();
00777 #endif
00778
00779 CompAtom *x = (*ap).positionBox->open();
00780 if ( patchList[0].p->flags.doMolly ) {
00781 (*ap).positionBox->close(&x);
00782 x = (*ap).avgPositionBox->open();
00783 }
00784 int numAtoms = (*ap).p->getNumAtoms();
00785 int order_1 = myGrid.order - 1;
00786 scale_n_copy_coordinates(x, localData, numAtoms,
00787 lattice, myGrid,
00788 xstart + order_1, xlen - order_1,
00789 ystart + order_1, ylen - order_1,
00790 zstart + order_1, zlen - order_1,
00791 strayChargeErrors);
00792 natoms += numAtoms;
00793
00794 if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
00795 else { (*ap).positionBox->close(&x); }
00796 }
00797
00798 numLocalAtoms = natoms;
00799
00800
00801 BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
00802 evir = 0;
00803 BigReal selfEnergy = 0;
00804 data_ptr = localData;
00805 int i;
00806 for(i=0; i<numLocalAtoms; ++i)
00807 {
00808 selfEnergy += data_ptr->cg * data_ptr->cg;
00809 ++data_ptr;
00810 }
00811 selfEnergy *= -1. * ewaldcof / SQRT_PI;
00812 evir[0] += selfEnergy;
00813
00814 double **q = q_arr;
00815 memset( (void*) zline_storage, 0, zlen * nzlines * sizeof(double) );
00816
00817 myRealSpace = new OptPmeRealSpace(myGrid,numLocalAtoms);
00818
00819 if (!strayChargeErrors)
00820 myRealSpace->fill_charges(q, localData, zstart, zlen);
00821
00822 #ifdef TRACE_COMPUTE_OBJECTS
00823 traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
00824 #endif
00825
00826 if (myMgr->constant_pressure && patchList[0].patchID == 0)
00827 myMgr->xPencil.recvLattice (lattice);
00828
00829 if (myMgr->_iter <= many_to_many_start)
00830 sendPencils();
00831 #if CHARM_VERSION > 60000
00832 else {
00833 pme_d2f (sp_zstorage, zline_storage, nzlines * zlen);
00834 CmiDirect_manytomany_start (myMgr->handle, PHASE_GR);
00835 }
00836 #endif
00837 }
00838
00839
00840
00841 void OptPmeCompute::sendPencils() {
00842
00843 assert ( myMgr->usePencils );
00844
00845
00846
00847 int xBlocks = myGrid.xBlocks;
00848 int yBlocks = myGrid.yBlocks;
00849 int zBlocks = myGrid.zBlocks;
00850
00851 int K1 = myGrid.K1;
00852 int K2 = myGrid.K2;
00853 int dim2 = myGrid.dim2;
00854 int dim3 = myGrid.dim3;
00855 int block1 = myGrid.block1;
00856 int block2 = myGrid.block2;
00857
00858
00859
00860 int nactive = 0;
00861 for (int idx = 0; idx < pencilVec.size(); idx++) {
00862 int xstart = pencilVec[idx].xmin;
00863 int ystart = pencilVec[idx].ymin;
00864 int xlen = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00865 int ylen = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00866 int ib = pencilVec[idx].ib;
00867 int jb = pencilVec[idx].jb;
00868 double *data = pencilVec[idx].data;
00869
00870 int fcount = xlen * ylen * zlen;
00871 OptPmeGridMsg *msg = new (fcount, PRIORITY_SIZE) OptPmeGridMsg;
00872 msg->zstart = zstart;
00873 msg->zlen = zlen;
00874 msg->xstart = xstart;
00875 msg->xlen = xlen;
00876 msg->ystart = ystart;
00877 msg->ylen = ylen;
00878 msg->sourceNode = CkMyPe();
00879 msg->patchID = patchList[0].patchID;
00880
00881 float *qmsg = msg->qgrid;
00882 #pragma disjoint (*data, *qmsg)
00883 #pragma unroll(8)
00884 for ( int k=0; k< fcount; ++k )
00885 *(qmsg++) = data[k];
00886
00887 myMgr->zPencil(ib,jb,0).recvGrid(msg);
00888 }
00889 }
00890
00891
00892 void OptPmeCompute::copyPencils(OptPmeGridMsg *msg) {
00893
00894 if (!_initialized) initializeOptPmeCompute();
00895
00896 int ibegin = msg->xstart;
00897 int iend = msg->xstart + msg->xlen;
00898 int jbegin = msg->ystart;
00899 int jend = msg->ylen;
00900 int fcount = zlen * msg->xlen * msg->ylen;
00901
00902 float *qmsg = msg->qgrid;
00903 double *data = q_arr[ibegin * myGrid.dim2 + jbegin];
00904
00905 #pragma disjoint (*qmsg, *data)
00906 #pragma unroll(8)
00907 for ( int k=0; k<fcount; ++k )
00908 data[k] = *(qmsg++);
00909 }
00910
00911 void OptPmeCompute::ungridForces() {
00912
00913
00914
00915 if (myMgr->_iter > many_to_many_start)
00916 pme_f2d (zline_storage, sp_zstorage, nzlines * zlen);
00917
00918 SimParameters *simParams = Node::Object()->simParameters;
00919 Vector *localResults = new Vector[numLocalAtoms];
00920
00921
00922 Vector *gridResults;
00923 gridResults = localResults;
00924
00925 Vector pairForce = 0.;
00926 Lattice lattice = patchList[0].p->flags.lattice;
00927 if(!simParams->commOnly) {
00928 #ifdef NETWORK_PROGRESS
00929 CmiNetworkProgress();
00930 #endif
00931
00932 if (!strayChargeErrors) {
00933 myRealSpace->compute_forces(q_arr, localData, gridResults, zstart, zlen);
00934 scale_forces(gridResults, numLocalAtoms, lattice);
00935 }
00936 delete myRealSpace;
00937 }
00938 delete [] localData;
00939
00940
00941 Vector *results_ptr = localResults;
00942 ResizeArrayIter<PatchElem> ap(patchList);
00943
00944
00945 for (ap = ap.begin(); ap != ap.end(); ap++) {
00946 Results *r = (*ap).forceBox->open();
00947 Force *f = r->f[Results::slow];
00948 int numAtoms = (*ap).p->getNumAtoms();
00949
00950 if ( ! strayChargeErrors && ! simParams->commOnly ) {
00951 for(int i=0; i<numAtoms; ++i) {
00952 f[i].x += results_ptr->x;
00953 f[i].y += results_ptr->y;
00954 f[i].z += results_ptr->z;
00955 ++results_ptr;
00956 }
00957 }
00958
00959 (*ap).forceBox->close(&r);
00960 }
00961
00962 delete [] localResults;
00963
00964 double scale = 1.;
00965
00966 reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0] * scale;
00967 reduction->item(REDUCTION_STRAY_CHARGE_ERRORS) += strayChargeErrors;
00968 strayChargeErrors = 0;
00969 reduction->submit();
00970 if (amd_reduction) {
00971 amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0] * scale;
00972 amd_reduction->submit();
00973 }
00974 }
00975
00976
00977
00978 #include "fftlib.C"
00979 #include "OptPmeMgr.def.h"
00980
00981 void pme_f2d (double *dst, float *src, int N) {
00982 #pragma disjoint (*src, *dst)
00983 #pragma unroll (8)
00984 for (int i = 0; i < N; i++)
00985 dst[i] = src[i];
00986 }
00987
00988 void pme_d2f (float *dst, double *src, int N) {
00989 #pragma disjoint (*src, *dst)
00990 #pragma unroll (8)
00991 for (int i = 0; i < N; i++)
00992 dst[i] = src[i];
00993 }