00001
00007 #ifdef NAMD_FFTW
00008 #ifdef NAMD_FFTW_NO_TYPE_PREFIX
00009 #include <fftw.h>
00010 #include <rfftw.h>
00011 #else
00012 #include <sfftw.h>
00013 #include <srfftw.h>
00014 #endif
00015 #endif
00016
00017 #include <assert.h>
00018
00019 #include "InfoStream.h"
00020 #include "Node.h"
00021 #include "PatchMap.h"
00022 #include "PatchMap.inl"
00023 #include "AtomMap.h"
00024 #include "OptPme.h"
00025 #include "OptPmeMgr.decl.h"
00026 #include "OptPmeRealSpace.h"
00027 #include "PmeKSpace.h"
00028 #include "ComputeNonbondedUtil.h"
00029 #include "PatchMgr.h"
00030 #include "Molecule.h"
00031 #include "ReductionMgr.h"
00032
00033 #include "ComputeMgr.decl.h"
00034
00035 #define MIN_DEBUG_LEVEL 3
00036 #include "Debug.h"
00037 #include "SimParameters.h"
00038 #include "WorkDistrib.h"
00039 #include "varsizemsg.h"
00040 #include "Random.h"
00041 #include "Priorities.h"
00042
00043
00044 extern char *pencilPMEProcessors;
00045
00046 #include "fftlib.h"
00047 #include "fftmap.h"
00048
00049
00050 int many_to_many_start = 0x7fffffff;
00051
00052 class OptPmeMgr : public BOCclass {
00053 public:
00054 friend class OptPmeCompute;
00055 OptPmeMgr();
00056 ~OptPmeMgr();
00057
00058 void initialize(CkQdMsg*);
00059 void initialize_pencils(CkQdMsg*);
00060 void activate_pencils(CkQdMsg*);
00061 void recvArrays(CProxy_OptPmeXPencil, CProxy_OptPmeYPencil, CProxy_OptPmeZPencil);
00062
00063 void recvUngrid(OptPmeGridMsg *);
00064 void ungridCalc(OptPmeDummyMsg *);
00065 void recvEvir (CkReductionMsg *msg);
00066
00067 void setCompute(OptPmeCompute *c) { pmeCompute = c; c->setMgr(this); }
00068
00069 private:
00070 CProxy_OptPmeMgr pmeProxy;
00071 CProxy_OptPmeMgr pmeProxyDir;
00072 OptPmeCompute *pmeCompute;
00073 PmeGrid myGrid;
00074 PmeKSpace *myKSpace;
00075
00076 CProxy_OptPmeXPencil xPencil;
00077 CProxy_OptPmeYPencil yPencil;
00078 CProxy_OptPmeZPencil zPencil;
00079 int numPencilsActive;
00080 int ungrid_count, usePencils;
00081 SubmitReduction *reduction;
00082 int _iter;
00083 void *handle;
00084 bool constant_pressure;
00085 };
00086
00087
00088 void pme_f2d (double *dst, float *src, int N);
00089 void pme_d2f (float *dst, double *src, int N);
00090
00091 static inline void initializePmeGrid (SimParameters *simParams, PmeGrid &grid) {
00092 int xBlocks = 0, yBlocks = 0, zBlocks= 0;
00093
00094 if ( simParams->PMEPencils > 1 ) {
00095 xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00096 } else {
00097 int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00098 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00099 if ( nb2 > CkNumPes() ) nb2 = CkNumPes();
00100 int nb = (int) sqrt((float)nb2);
00101 xBlocks = zBlocks = nb;
00102 yBlocks = nb2 / nb;
00103 }
00104
00105 int dimx = simParams->PMEGridSizeX;
00106 int bx = 1 + ( dimx - 1 ) / xBlocks;
00107 xBlocks = 1 + ( dimx - 1 ) / bx;
00108
00109 int dimy = simParams->PMEGridSizeY;
00110 int by = 1 + ( dimy - 1 ) / yBlocks;
00111 yBlocks = 1 + ( dimy - 1 ) / by;
00112
00113 int dimz = simParams->PMEGridSizeZ / 2 + 1;
00114 int bz = 1 + ( dimz - 1 ) / zBlocks;
00115 zBlocks = 1 + ( dimz - 1 ) / bz;
00116
00117 grid.xBlocks = xBlocks;
00118 grid.yBlocks = yBlocks;
00119 grid.zBlocks = zBlocks;
00120
00121 grid.K1 = simParams->PMEGridSizeX;
00122 grid.K2 = simParams->PMEGridSizeY;
00123 grid.K3 = simParams->PMEGridSizeZ;
00124 grid.order = simParams->PMEInterpOrder;
00125 grid.dim2 = grid.K2;
00126 grid.dim3 = 2 * (grid.K3/2 + 1);
00127
00128 grid.block1 = ( grid.K1 + xBlocks - 1 ) / xBlocks;
00129 grid.block2 = ( grid.K2 + yBlocks - 1 ) / yBlocks;
00130 grid.block3 = ( grid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;
00131 }
00132
00133 #ifdef NAMD_FFTW
00134 static CmiNodeLock fftw_plan_lock;
00135 #endif
00136
00137
00138 static inline void scale_n_copy_coordinates(CompAtom *x, PmeParticle p[],
00139 int &N,
00140 Lattice &lattice, PmeGrid grid,
00141
00142 double xmin, double xlen,
00143 double ymin, double ylen,
00144 double zmin, double zlen,
00145 int &scg) {
00146 Vector origin = lattice.origin();
00147 Vector recip1 = lattice.a_r();
00148 Vector recip2 = lattice.b_r();
00149 Vector recip3 = lattice.c_r();
00150 double ox = origin.x;
00151 double oy = origin.y;
00152 double oz = origin.z;
00153 double r1x = recip1.x;
00154 double r1y = recip1.y;
00155 double r1z = recip1.z;
00156 double r2x = recip2.x;
00157 double r2y = recip2.y;
00158 double r2z = recip2.z;
00159 double r3x = recip3.x;
00160 double r3y = recip3.y;
00161 double r3z = recip3.z;
00162 int K1 = grid.K1;
00163 int K2 = grid.K2;
00164 int K3 = grid.K3;
00165
00166 const BigReal coloumb_sqrt = sqrt( COLOUMB * ComputeNonbondedUtil::scaling
00167 * ComputeNonbondedUtil::dielectric_1 );
00168
00169
00170 int natoms = 0;
00171 for (int i=0; i<N; i++) {
00172 double px = x[i].position.x - ox;
00173 double py = x[i].position.y - oy;
00174 double pz = x[i].position.z - oz;
00175 double sx = px*r1x + py*r1y + pz*r1z;
00176 double sy = px*r2x + py*r2y + pz*r2z;
00177 double sz = px*r3x + py*r3y + pz*r3z;
00178 p[natoms].x = K1 * ( sx - floor(sx) );
00179 p[natoms].y = K2 * ( sy - floor(sy) );
00180 p[natoms].z = K3 * ( sz - floor(sz) );
00181
00182
00183 if ( p[natoms].x == K1 ) p[natoms].x = 0;
00184 if ( p[natoms].y == K2 ) p[natoms].y = 0;
00185 if ( p[natoms].z == K3 ) p[natoms].z = 0;
00186
00187 BigReal u1,u2,u3;
00188 u1 = (int) (p[natoms].x - xmin);
00189 if (u1 >= grid.K1) u1 -= grid.K1;
00190 u2 = (int) (p[natoms].y - ymin);
00191 if (u2 >= grid.K2) u2 -= grid.K2;
00192 u3 = (int) (p[natoms].z - zmin);
00193 if (u3 >= grid.K3) u3 -= grid.K3;
00194
00195 if ( (u1 < 0.0) || (u1 >= xlen) ||
00196 (u2 < 0.0) || (u2 >= ylen) ||
00197 (u3 < 0.0) || (u3 >= zlen) ) {
00198 scg ++;
00199 continue;
00200 }
00201
00202 p[natoms].cg = coloumb_sqrt * x[i].charge;
00203 natoms ++;
00204 }
00205 N = natoms;
00206 }
00207
00208
00209 OptPmeMgr::OptPmeMgr() : pmeProxy(thisgroup),
00210 pmeProxyDir(thisgroup), pmeCompute(0) {
00211
00212 CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00213
00214 myKSpace = 0;
00215 ungrid_count = 0;
00216 usePencils = 0;
00217
00218 #ifdef NAMD_FFTW
00219 if ( CmiMyRank() == 0 ) {
00220 fftw_plan_lock = CmiCreateLock();
00221 }
00222 #endif
00223 }
00224
00225
00226 void OptPmeMgr::recvArrays(CProxy_OptPmeXPencil x, CProxy_OptPmeYPencil y, CProxy_OptPmeZPencil z) {
00227 xPencil = x; yPencil = y; zPencil = z;
00228 }
00229
00230 void OptPmeMgr::initialize(CkQdMsg *msg) {
00231 delete msg;
00232
00233 _iter = 0;
00234
00235 #if CHARM_VERSION > 60000
00236 handle = CmiDirect_manytomany_allocate_handle ();
00237 #endif
00238
00239 SimParameters *simParams = Node::Object()->simParameters;
00240 PatchMap *patchMap = PatchMap::Object();
00241
00242 if (simParams->PMEPencils > 0)
00243 usePencils = 1;
00244 else return;
00245
00246 initializePmeGrid (simParams, myGrid);
00247
00248 if (simParams->langevinPistonOn || simParams->berendsenPressureOn)
00249 constant_pressure = true;
00250 else
00251 constant_pressure = false;
00252
00253 bool useManyToMany = simParams->useManyToMany;
00254
00255 int npes = patchMap->numPatches() +
00256 myGrid.xBlocks * myGrid.yBlocks +
00257 myGrid.zBlocks * myGrid.xBlocks +
00258 myGrid.yBlocks * myGrid.zBlocks;
00259
00260 if (npes >= CkNumPes()) {
00261 if (CkMyPe() == 0)
00262 printf ("Warning : Not enough processors for the many-to-many optimization \n");
00263 useManyToMany = false;
00264 }
00265
00266 #if CHARM_VERSION > 60000
00267 if (useManyToMany) {
00268 if (CkMyPe() == 0)
00269 printf ("Enabling the Many-to-many optimization\n");
00270
00271 many_to_many_start = MANY_TO_MANY_START;
00272 }
00273 #endif
00274
00275 if ( CkMyPe() == 0) {
00276 iout << iINFO << "PME using " << myGrid.xBlocks << " x " <<
00277 myGrid.yBlocks << " x " << myGrid.zBlocks <<
00278 " pencil grid for FFT and reciprocal sum.\n" << endi;
00279
00280 pencilPMEProcessors = new char [CkNumPes()];
00281 memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
00282
00283 CProxy_OptPmePencilMapZ mapz;
00284 CProxy_OptPmePencilMapY mapy;
00285 CProxy_OptPmePencilMapX mapx;
00286
00287 mapz = CProxy_OptPmePencilMapZ::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00288 mapy = CProxy_OptPmePencilMapY::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00289 mapx = CProxy_OptPmePencilMapX::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00290
00291 CkArrayOptions optsz;
00292 optsz.setMap (mapz);
00293 CkArrayOptions optsy;
00294 optsy.setMap (mapy);
00295 CkArrayOptions optsx;
00296 optsx.setMap (mapx);
00297
00298 zPencil = CProxy_OptPmeZPencil::ckNew(optsz);
00299 yPencil = CProxy_OptPmeYPencil::ckNew(optsy);
00300 xPencil = CProxy_OptPmeXPencil::ckNew(optsx);
00301
00302 int x,y,z;
00303 for (x = 0; x < myGrid.xBlocks; ++x)
00304 for (y = 0; y < myGrid.yBlocks; ++y ) {
00305 zPencil(x,y,0).insert();
00306 }
00307 zPencil.doneInserting();
00308
00309 for (z = 0; z < myGrid.zBlocks; ++z )
00310 for (x = 0; x < myGrid.xBlocks; ++x ) {
00311 yPencil(x,0,z).insert();
00312 }
00313 yPencil.doneInserting();
00314
00315 for (y = 0; y < myGrid.yBlocks; ++y )
00316 for (z = 0; z < myGrid.zBlocks; ++z ) {
00317 xPencil(0,y,z).insert();
00318 }
00319 xPencil.doneInserting();
00320
00321 pmeProxy.recvArrays(xPencil,yPencil,zPencil);
00322 OptPmePencilInitMsgData msgdata;
00323 msgdata.grid = myGrid;
00324 msgdata.xBlocks = myGrid.xBlocks;
00325 msgdata.yBlocks = myGrid.yBlocks;
00326 msgdata.zBlocks = myGrid.zBlocks;
00327 msgdata.xPencil = xPencil;
00328 msgdata.yPencil = yPencil;
00329 msgdata.zPencil = zPencil;
00330 msgdata.constant_pressure = constant_pressure;
00331
00332 CkCallback cb (CkIndex_OptPmeMgr::recvEvir(NULL), thisProxy[0]);
00333 msgdata.cb_energy = cb;
00334
00335 msgdata.pmeProxy = pmeProxyDir;
00336 xPencil.init(new OptPmePencilInitMsg(msgdata));
00337 yPencil.init(new OptPmePencilInitMsg(msgdata));
00338 zPencil.init(new OptPmePencilInitMsg(msgdata));
00339
00340 reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00341
00342 #ifndef NAMD_FFTW
00343 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00344 #endif
00345 }
00346
00347 }
00348
00349 void OptPmeMgr::initialize_pencils(CkQdMsg *msg) {
00350 delete msg;
00351
00352 SimParameters *simParams = Node::Object()->simParameters;
00353
00354 PatchMap *patchMap = PatchMap::Object();
00355 Lattice lattice = simParams->lattice;
00356 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00357 BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00358 BigReal cutoff = simParams->cutoff;
00359 BigReal patchdim = simParams->patchDimension;
00360 int numPatches = patchMap->numPatches();
00361
00362 char *pencilActive = new char[myGrid.xBlocks*myGrid.yBlocks];
00363 for ( int i=0; i<myGrid.xBlocks; ++i ) {
00364 for ( int j=0; j<myGrid.yBlocks; ++j ) {
00365 pencilActive[i*myGrid.yBlocks+j] = 0;
00366 }
00367 }
00368
00369
00370 assert (patchMap->numPatchesOnNode(CkMyPe()) <= 1);
00371 for ( int pid=0; pid < numPatches; ++pid ) {
00372 int pnode = patchMap->node(pid);
00373 if ( pnode != CkMyPe() ) continue;
00374
00375 BigReal minx = patchMap->min_a(pid);
00376 BigReal maxx = patchMap->max_a(pid);
00377 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00378
00379 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00380 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00381
00382 BigReal miny = patchMap->min_b(pid);
00383 BigReal maxy = patchMap->max_b(pid);
00384 BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00385
00386 int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00387 int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00388
00389 for ( int i=min1; i<=max1; ++i ) {
00390 int ix = i;
00391 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00392 while ( ix < 0 ) ix += myGrid.K1;
00393 for ( int j=min2; j<=max2; ++j ) {
00394 int jy = j;
00395 while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00396 while ( jy < 0 ) jy += myGrid.K2;
00397 pencilActive[(ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2)] = 1;
00398 }
00399 }
00400 }
00401
00402 numPencilsActive = 0;
00403 for ( int i=0; i<myGrid.xBlocks; ++i ) {
00404 for ( int j=0; j<myGrid.yBlocks; ++j ) {
00405 if ( pencilActive[i*myGrid.yBlocks+j] ) {
00406 ++numPencilsActive;
00407
00408 zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
00409 }
00410 }
00411 }
00412
00413 ungrid_count = numPencilsActive;
00414 delete [] pencilActive;
00415 }
00416
00417
00418 void OptPmeMgr::activate_pencils(CkQdMsg *msg) {
00419 if ( ! usePencils ) return;
00420 if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
00421 }
00422
00423
00424 OptPmeMgr::~OptPmeMgr() {
00425 delete myKSpace;
00426 }
00427
00428 void OptPmeMgr::recvUngrid(OptPmeGridMsg *msg) {
00429 if ( ungrid_count == 0 ) {
00430 NAMD_bug("Message order failure in OptPmeMgr::recvUngrid\n");
00431 }
00432
00433 pmeCompute->copyPencils(msg);
00434 delete msg;
00435 --ungrid_count;
00436
00437 if ( ungrid_count == 0 ) {
00438
00439 ungridCalc(NULL);
00440 }
00441 }
00442
00443 void OptPmeMgr::ungridCalc(OptPmeDummyMsg *msg) {
00444 pmeCompute->ungridForces();
00445 ungrid_count = numPencilsActive;
00446 }
00447
00448
00449 void OptPmeMgr::recvEvir (CkReductionMsg *msg) {
00450
00451 assert (CkMyPe() == 0);
00452
00453 double *data = (double *) msg->getData();
00454 assert (msg->getSize() == 7 * sizeof(double));
00455
00456
00457
00458 double scale = 1.;
00459 reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00460 reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00461 reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00462 reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00463 reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00464 reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00465 reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00466 reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00467 reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00468 reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;
00469
00470 delete msg;
00471
00472 SimParameters *simParams = Node::Object()->simParameters;
00473 int fef = simParams->fullElectFrequency;
00474 for (int i = 0; i < fef; i++)
00475 reduction->submit();
00476 }
00477
00478 OptPmeCompute::OptPmeCompute(ComputeID c) :
00479 ComputeHomePatches(c)
00480 {
00481 DebugM(4,"OptPmeCompute created.\n");
00482
00483 CProxy_OptPmeMgr::ckLocalBranch(
00484 CkpvAccess(BOCclass_group).computePmeMgr)->setCompute(this);
00485
00486 _initialized = false;
00487
00488 useAvgPositions = 1;
00489
00490 reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00491 }
00492
00493 void recv_ungrid_done (void *m) {
00494 OptPmeDummyMsg *msg = (OptPmeDummyMsg *) m;
00495 CProxy_OptPmeMgr pmeProxy (CkpvAccess(BOCclass_group).computePmeMgr);
00496 pmeProxy[CkMyPe()].ungridCalc (msg);
00497 }
00498
00499
00500 void OptPmeCompute::resetPatchCoordinates (const Lattice &lattice) {
00501 PatchMap *patchMap = PatchMap::Object();
00502 SimParameters *simParams = Node::Object()->simParameters;
00503
00504 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00505 BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00506 BigReal sysdimc = lattice.c_r().unit() * lattice.c();
00507 BigReal cutoff = simParams->cutoff;
00508 BigReal patchdim = simParams->patchDimension;
00509
00510 int pid = patchList[0].patchID;
00511 assert (patchList.size() == 1);
00512
00513
00514
00515 BigReal minx = patchMap->min_a(pid);
00516 BigReal maxx = patchMap->max_a(pid);
00517 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00518
00519 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00520 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00521
00522 BigReal miny = patchMap->min_b(pid);
00523 BigReal maxy = patchMap->max_b(pid);
00524 BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00525
00526 int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00527 int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00528
00529 BigReal minz = patchMap->min_c(pid);
00530 BigReal maxz = patchMap->max_c(pid);
00531 BigReal marginc = 0.5 * ( patchdim - cutoff ) / sysdimc;
00532
00533 int min3 = ((int) floor(myGrid.K3 * (minz - marginc))) - myGrid.order + 1;
00534 int max3 = ((int) floor(myGrid.K3 * (maxz + marginc)));
00535
00536 xlen = max1 - min1 + 1;
00537 xstart = min1;
00538
00539 ylen = max2 - min2 + 1;
00540 ystart = min2;
00541
00542 zlen = max3 - min3 + 1;
00543 zstart = min3;
00544 }
00545
00546 void OptPmeCompute::initializeOptPmeCompute () {
00547
00548 _initialized = true;
00549
00550 strayChargeErrors = 0;
00551
00552 SimParameters *simParams = Node::Object()->simParameters;
00553 PatchMap *patchMap = PatchMap::Object();
00554
00555 initializePmeGrid (simParams, myGrid);
00556
00557 alchFepOn = simParams->alchFepOn;
00558 alchThermIntOn = simParams->alchThermIntOn;
00559 lesOn = simParams->lesOn;
00560 pairOn = simParams->pairInteractionOn;
00561
00562 assert (!alchFepOn);
00563 assert (!alchThermIntOn);
00564 assert (!lesOn);
00565 assert (!pairOn);
00566
00567 qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
00568 fsize = myGrid.K1 * myGrid.dim2;
00569 q_arr = new double*[fsize];
00570 memset( (void*) q_arr, 0, fsize * sizeof(double*) );
00571
00572 assert (myMgr != NULL);
00573
00574 const Lattice lattice = simParams->lattice;
00575 resetPatchCoordinates (lattice);
00576
00577 PencilElement *pencilActive = new PencilElement [myGrid.xBlocks * myGrid.yBlocks];
00578 memset (pencilActive, 0, myGrid.xBlocks * myGrid.yBlocks * sizeof(PencilElement));
00579
00580 for ( int i=xstart; i<=xstart+xlen-1; ++i ) {
00581 int ix = i;
00582 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00583 while ( ix < 0 ) ix += myGrid.K1;
00584 for ( int j=ystart; j<=ystart+ylen-1; ++j ) {
00585 int jy = j;
00586 while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00587 while ( jy < 0 ) jy += myGrid.K2;
00588
00589 int pencil_idx = (ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2);
00590
00591 if (! pencilActive[pencil_idx].isActive) {
00592 pencilActive[pencil_idx].isActive = 1;
00593 pencilActive[pencil_idx].xmin = ix;
00594 pencilActive[pencil_idx].xmax = ix;
00595 pencilActive[pencil_idx].ymin = jy;
00596 pencilActive[pencil_idx].ymax = jy;
00597 pencilActive[pencil_idx].ib = ix / myGrid.block1;
00598 pencilActive[pencil_idx].jb = jy / myGrid.block2;
00599 }
00600 else {
00601 if (pencilActive[pencil_idx].xmin > ix)
00602 pencilActive[pencil_idx].xmin = ix;
00603 if (pencilActive[pencil_idx].xmax < ix)
00604 pencilActive[pencil_idx].xmax = ix;
00605 if (pencilActive[pencil_idx].ymin > jy)
00606 pencilActive[pencil_idx].ymin = jy;
00607 if (pencilActive[pencil_idx].ymax < jy)
00608 pencilActive[pencil_idx].ymax = jy;
00609 }
00610 }
00611 }
00612
00613 int nactive = 0;
00614 for (int ib=0; ib<myGrid.xBlocks; ++ib)
00615 for (int jb=0; jb<myGrid.yBlocks; ++jb)
00616 if (pencilActive[ib * myGrid.yBlocks + jb].isActive)
00617 nactive ++;
00618
00619 assert (nactive == myMgr->numPencilsActive);
00620
00621 nzlines = xlen * ylen;
00622 zline_storage = new double [nzlines * zlen];
00623 sp_zstorage = new float [nzlines * zlen];
00624
00625
00626
00627
00628 assert (zline_storage != NULL);
00629 double * zblock = zline_storage;
00630
00631 for (int ib=0; ib<myGrid.xBlocks; ++ib) {
00632 for (int jb=0; jb<myGrid.yBlocks; ++jb) {
00633 int index = ib * myGrid.yBlocks + jb;
00634 if (pencilActive[index].isActive) {
00635 pencilActive[index].data = zblock;
00636 for (int i = pencilActive[index].xmin; i <= pencilActive[index].xmax; i++)
00637 for (int j = pencilActive[index].ymin; j <= pencilActive[index].ymax; j++) {
00638 q_arr[i*myGrid.dim2 + j] = zblock;
00639 zblock += zlen;
00640 assert ((char *) zblock <= (char *)(zline_storage + nzlines*zlen));
00641 }
00642 }
00643 }
00644 }
00645
00646 pencilVec.resize (nactive);
00647 nactive = 0;
00648 for (int ib=0; ib<myGrid.xBlocks; ++ib)
00649 for (int jb=0; jb<myGrid.yBlocks; ++jb)
00650 if (pencilActive[ib*myGrid.yBlocks + jb].isActive)
00651 pencilVec[nactive ++] = pencilActive [ib * myGrid.yBlocks + jb];
00652
00653
00654 delete [] pencilActive;
00655
00656 #if CHARM_VERSION > 60000
00657
00658 OptPmeDummyMsg *m = new (PRIORITY_SIZE) OptPmeDummyMsg;
00659 CmiDirect_manytomany_initialize_recvbase (myMgr->handle, PHASE_UG,
00660 recv_ungrid_done, m, (char *)sp_zstorage,
00661 myGrid.xBlocks*myGrid.yBlocks, -1);
00662
00663 CkCallback cbi (CkCallback::ignore);
00664 CmiDirect_manytomany_initialize_sendbase (myMgr->handle, PHASE_GR, NULL, NULL,
00665 (char *)sp_zstorage,
00666 pencilVec.size(), patchList[0].patchID);
00667
00668 for (int idx = 0; idx < pencilVec.size(); idx++) {
00669 int ib = pencilVec[idx].ib;
00670 int jb = pencilVec[idx].jb;
00671 double * data = pencilVec[idx].data;
00672 int offset = (data - zline_storage)*sizeof(float);
00673 int xlen = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00674 int ylen = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00675 int fcount = xlen * ylen * zlen;
00676
00677 CkArrayIndex3D index (ib, jb, 0);
00678 CProxy_OptPmePencilMapZ zproxy (global_map_z);
00679 int pe = zproxy.ckLocalBranch()->procNum(0, index);
00680 CmiDirect_manytomany_initialize_send (myMgr->handle, PHASE_GR, idx,
00681 offset, fcount*sizeof(float), pe);
00682
00683 int srcnode = ib * myGrid.yBlocks + jb;
00684 CmiDirect_manytomany_initialize_recv (myMgr->handle, PHASE_UG,
00685 srcnode, offset, fcount*sizeof(float), pe);
00686 }
00687
00688 #endif
00689
00690 }
00691
00692 OptPmeCompute::~OptPmeCompute()
00693 {
00694 delete [] zline_storage;
00695 delete [] q_arr;
00696 }
00697
00698
00699 void OptPmeCompute::doWork()
00700 {
00701 DebugM(4,"Entering OptPmeCompute::doWork().\n");
00702
00703 #ifdef TRACE_COMPUTE_OBJECTS
00704 double traceObjStartTime = CmiWallTimer();
00705 #endif
00706
00707 if (!_initialized) initializeOptPmeCompute();
00708
00709 ResizeArrayIter<PatchElem> ap(patchList);
00710
00711
00712 if ( ! patchList[0].p->flags.doFullElectrostatics )
00713 {
00714 for (ap = ap.begin(); ap != ap.end(); ap++) {
00715 CompAtom *x = (*ap).positionBox->open();
00716 Results *r = (*ap).forceBox->open();
00717 (*ap).positionBox->close(&x);
00718 (*ap).forceBox->close(&r);
00719 }
00720 reduction->submit();
00721 return;
00722 }
00723
00724 myMgr->_iter ++;
00725
00726
00727 numLocalAtoms = 0;
00728 for (ap = ap.begin(); ap != ap.end(); ap++) {
00729 numLocalAtoms += (*ap).p->getNumAtoms();
00730 }
00731
00732 Lattice &lattice = patchList[0].p->flags.lattice;
00733
00734 localData = new PmeParticle[numLocalAtoms];
00735
00736 PmeParticle * data_ptr = localData;
00737
00738 int natoms = 0;
00739 if (myMgr->constant_pressure)
00740 resetPatchCoordinates(lattice);
00741
00742 for (ap = ap.begin(); ap != ap.end(); ap++) {
00743 #ifdef NETWORK_PROGRESS
00744 CmiNetworkProgress();
00745 #endif
00746
00747 CompAtom *x = (*ap).positionBox->open();
00748 if ( patchList[0].p->flags.doMolly ) {
00749 (*ap).positionBox->close(&x);
00750 x = (*ap).avgPositionBox->open();
00751 }
00752 int numAtoms = (*ap).p->getNumAtoms();
00753 int order_1 = myGrid.order - 1;
00754 scale_n_copy_coordinates(x, localData, numAtoms,
00755 lattice, myGrid,
00756 xstart + order_1, xlen - order_1,
00757 ystart + order_1, ylen - order_1,
00758 zstart + order_1, zlen - order_1,
00759 strayChargeErrors);
00760 natoms += numAtoms;
00761
00762 if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
00763 else { (*ap).positionBox->close(&x); }
00764 }
00765
00766 numLocalAtoms = natoms;
00767
00768
00769 BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
00770 evir[0] = 0;
00771 BigReal selfEnergy = 0;
00772 data_ptr = localData;
00773 int i;
00774 for(i=0; i<numLocalAtoms; ++i)
00775 {
00776 selfEnergy += data_ptr->cg * data_ptr->cg;
00777 ++data_ptr;
00778 }
00779 selfEnergy *= -1. * ewaldcof / SQRT_PI;
00780 evir[0][0] += selfEnergy;
00781
00782 double **q = q_arr;
00783 memset( (void*) zline_storage, 0, zlen * nzlines * sizeof(double) );
00784
00785 myRealSpace[0] = new OptPmeRealSpace(myGrid,numLocalAtoms);
00786
00787 if (!strayChargeErrors)
00788 myRealSpace[0]->fill_charges(q, localData, zstart, zlen);
00789
00790 #ifdef TRACE_COMPUTE_OBJECTS
00791 traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
00792 #endif
00793
00794 if (myMgr->constant_pressure && patchList[0].patchID == 0)
00795 myMgr->xPencil.recvLattice (lattice);
00796
00797 if (myMgr->_iter <= many_to_many_start)
00798 sendPencils();
00799 #if CHARM_VERSION > 60000
00800 else {
00801 pme_d2f (sp_zstorage, zline_storage, nzlines * zlen);
00802 CmiDirect_manytomany_start (myMgr->handle, PHASE_GR);
00803 }
00804 #endif
00805 }
00806
00807
00808
00809 void OptPmeCompute::sendPencils() {
00810
00811 assert ( myMgr->usePencils );
00812
00813
00814
00815 int xBlocks = myGrid.xBlocks;
00816 int yBlocks = myGrid.yBlocks;
00817 int zBlocks = myGrid.zBlocks;
00818
00819 int K1 = myGrid.K1;
00820 int K2 = myGrid.K2;
00821 int dim2 = myGrid.dim2;
00822 int dim3 = myGrid.dim3;
00823 int block1 = myGrid.block1;
00824 int block2 = myGrid.block2;
00825
00826
00827
00828 int nactive = 0;
00829 for (int idx = 0; idx < pencilVec.size(); idx++) {
00830 int xstart = pencilVec[idx].xmin;
00831 int ystart = pencilVec[idx].ymin;
00832 int xlen = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00833 int ylen = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00834 int ib = pencilVec[idx].ib;
00835 int jb = pencilVec[idx].jb;
00836 double *data = pencilVec[idx].data;
00837
00838 int fcount = xlen * ylen * zlen;
00839 OptPmeGridMsg *msg = new (fcount, PRIORITY_SIZE) OptPmeGridMsg;
00840 msg->zstart = zstart;
00841 msg->zlen = zlen;
00842 msg->xstart = xstart;
00843 msg->xlen = xlen;
00844 msg->ystart = ystart;
00845 msg->ylen = ylen;
00846 msg->sourceNode = CkMyPe();
00847 msg->patchID = patchList[0].patchID;
00848
00849 float *qmsg = msg->qgrid;
00850 #pragma disjoint (*data, *qmsg)
00851 #pragma unroll(8)
00852 for ( int k=0; k< fcount; ++k )
00853 *(qmsg++) = data[k];
00854
00855 myMgr->zPencil(ib,jb,0).recvGrid(msg);
00856 }
00857 }
00858
00859
00860 void OptPmeCompute::copyPencils(OptPmeGridMsg *msg) {
00861
00862 if (!_initialized) initializeOptPmeCompute();
00863
00864 int ibegin = msg->xstart;
00865 int iend = msg->xstart + msg->xlen;
00866 int jbegin = msg->ystart;
00867 int jend = msg->ylen;
00868 int fcount = zlen * msg->xlen * msg->ylen;
00869
00870 float *qmsg = msg->qgrid;
00871 double *data = q_arr[ibegin * myGrid.dim2 + jbegin];
00872
00873 #pragma disjoint (*qmsg, *data)
00874 #pragma unroll(8)
00875 for ( int k=0; k<fcount; ++k )
00876 data[k] = *(qmsg++);
00877 }
00878
00879 void OptPmeCompute::ungridForces() {
00880
00881
00882
00883 if (myMgr->_iter > many_to_many_start)
00884 pme_f2d (zline_storage, sp_zstorage, nzlines * zlen);
00885
00886 SimParameters *simParams = Node::Object()->simParameters;
00887 Vector *localResults = new Vector[numLocalAtoms];
00888
00889
00890 Vector *gridResults;
00891 gridResults = localResults;
00892
00893 Vector pairForce = 0.;
00894 Lattice lattice = patchList[0].p->flags.lattice;
00895 if(!simParams->commOnly) {
00896 #ifdef NETWORK_PROGRESS
00897 CmiNetworkProgress();
00898 #endif
00899
00900 if (!strayChargeErrors) {
00901 myRealSpace[0]->compute_forces(q_arr, localData, gridResults, zstart, zlen);
00902 scale_forces(gridResults, numLocalAtoms, lattice);
00903 }
00904 delete myRealSpace[0];
00905 }
00906 delete [] localData;
00907
00908
00909 Vector *results_ptr = localResults;
00910 ResizeArrayIter<PatchElem> ap(patchList);
00911
00912
00913 for (ap = ap.begin(); ap != ap.end(); ap++) {
00914 Results *r = (*ap).forceBox->open();
00915 Force *f = r->f[Results::slow];
00916 int numAtoms = (*ap).p->getNumAtoms();
00917
00918 if ( ! strayChargeErrors && ! simParams->commOnly ) {
00919 for(int i=0; i<numAtoms; ++i) {
00920 f[i].x += results_ptr->x;
00921 f[i].y += results_ptr->y;
00922 f[i].z += results_ptr->z;
00923 ++results_ptr;
00924 }
00925 }
00926
00927 (*ap).forceBox->close(&r);
00928 }
00929
00930 delete [] localResults;
00931
00932 double scale = 1.;
00933
00934 reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0][0] * scale;
00935 reduction->item(REDUCTION_STRAY_CHARGE_ERRORS) += strayChargeErrors;
00936 strayChargeErrors = 0;
00937 reduction->submit();
00938 }
00939
00940
00941
00942 #include "fftlib.C"
00943 #include "OptPmeMgr.def.h"
00944
00945 void pme_f2d (double *dst, float *src, int N) {
00946 #pragma disjoint (*src, *dst)
00947 #pragma unroll (8)
00948 for (int i = 0; i < N; i++)
00949 dst[i] = src[i];
00950 }
00951
00952 void pme_d2f (float *dst, double *src, int N) {
00953 #pragma disjoint (*src, *dst)
00954 #pragma unroll (8)
00955 for (int i = 0; i < N; i++)
00956 dst[i] = src[i];
00957 }