00001
00007
00008
00009
00010
00011 #include "Node.h"
00012 #include "PatchMap.h"
00013 #include "PatchMap.inl"
00014 #include "AtomMap.h"
00015 #include "ComputeEwald.h"
00016 #include "PatchMgr.h"
00017 #include "Molecule.h"
00018 #include "ReductionMgr.h"
00019 #include "ComputeMgr.h"
00020 #include "ComputeMgr.decl.h"
00021 #include "ComputeNonbondedUtil.h"
00022 #include "SimParameters.h"
00023 #include "PmeBase.h"
00024 #include <stdio.h>
00025
00026
00027 #define MIN_DEBUG_LEVEL 1
00028 #include "Debug.h"
00029
00030 class EwaldParticle {
00031 public:
00032 float x, y, z;
00033 float cg;
00034 };
00035
00036
00037
00038
00039
00040 static int *generateAtomTypeTable(int ntypes) {
00041 int *table = new int[ntypes*ntypes];
00042 int ind = 0;
00043 for (int i=0; i<ntypes; i++) {
00044 for (int j=i; j<ntypes; j++) {
00045 table[ntypes*i+j] = table[ntypes*j+i] = ind++;
00046 }
00047 }
00048 return table;
00049 }
00050
00051 ComputeEwald::ComputeEwald(ComputeID c, ComputeMgr *m)
00052 : ComputeHomePatches(c)
00053 {
00054 DebugM(3,"Constructing client\n");
00055 comm = m;
00056 SimParameters *sp = Node::Object()->simParameters;
00057 kxmax = sp->pressureProfileEwaldX;
00058 kymax = sp->pressureProfileEwaldY;
00059 kzmax = sp->pressureProfileEwaldZ;
00060
00061 ktot = (1+kxmax) * (2*kymax+1) * (2*kzmax+1);
00062 kappa = ComputeNonbondedUtil::ewaldcof;
00063 pressureProfileSlabs = sp->pressureProfileSlabs;
00064 numAtomTypes = sp->pressureProfileAtomTypes;
00065 int nelements = 3*pressureProfileSlabs * (numAtomTypes*(numAtomTypes+1))/2;
00066 pressureProfileData = new float[nelements];
00067 reduction = ReductionMgr::Object()->willSubmit(
00068 REDUCTIONS_PPROF_NONBONDED, nelements);
00069
00070
00071 numWorkingPes = (PatchMap::Object())->numNodesWithPatches();
00072 masterNode = numWorkingPes - 1;
00073
00074 recvCount = 0;
00075 localAtoms = NULL;
00076 localPartitions = NULL;
00077
00078 expx = new float[kxmax+1];
00079 expy = new float[kymax+1];
00080 expz = new float[kzmax+1];
00081
00082 if (CkMyPe() == masterNode) {
00083 eiktotal = new float[2 * ktot * numAtomTypes];
00084 memset(eiktotal, 0, 2 * ktot * numAtomTypes*sizeof(float));
00085 } else {
00086 eiktotal = NULL;
00087 }
00088
00089 eiky = new floatcomplex[2*kymax+1];
00090
00091 eikz = new floatcomplex[2*kzmax+1];
00092 Qk = new float[3*ktot];
00093
00094 gridsForAtomType = generateAtomTypeTable(numAtomTypes);
00095 }
00096
00097 ComputeEwald::~ComputeEwald()
00098 {
00099 delete reduction;
00100 delete [] expx;
00101 delete [] expy;
00102 delete [] expz;
00103 delete [] eiktotal;
00104 delete [] eiky;
00105 delete [] eikz;
00106 delete [] pressureProfileData;
00107 delete [] Qk;
00108 delete [] gridsForAtomType;
00109
00110 if (localAtoms) free(localAtoms);
00111 if (localPartitions) free(localPartitions);
00112 }
00113
00114 void ComputeEwald::doWork() {
00115 ResizeArrayIter<PatchElem> ap(patchList);
00116
00117 if ( ! patchList[0].p->flags.doFullElectrostatics )
00118 {
00119 for (ap = ap.begin(); ap != ap.end(); ap++) {
00120 CompAtom *x = (*ap).positionBox->open();
00121 Results *r = (*ap).forceBox->open();
00122 (*ap).positionBox->close(&x);
00123 (*ap).forceBox->close(&r);
00124 }
00125 reduction->submit();
00126 return;
00127 }
00128
00129
00130 lattice = patchList[0].p->lattice;
00131 Vector o = lattice.origin();
00132
00133
00134 pressureProfileThickness = lattice.c().z / pressureProfileSlabs;
00135 pressureProfileMin = lattice.origin().z - 0.5*lattice.c().z;
00136
00137 const BigReal coloumb_sqrt = sqrt( COLOUMB * ComputeNonbondedUtil::scaling
00138 * ComputeNonbondedUtil::dielectric_1 );
00139
00140
00141 numLocalAtoms = 0;
00142 for (ap = ap.begin(); ap != ap.end(); ap++) {
00143 numLocalAtoms += (*ap).p->getNumAtoms();
00144 }
00145 localAtoms = (EwaldParticle *)realloc(localAtoms, numLocalAtoms*sizeof(EwaldParticle));
00146 localPartitions = (int *)realloc(localPartitions, numLocalAtoms*sizeof(int));
00147
00148 EwaldParticle *data_ptr = localAtoms;
00149 int *part_ptr = localPartitions;
00150
00151 for (ap = ap.begin(); ap != ap.end(); ap++) {
00152 CompAtom *x = (*ap).positionBox->open();
00153
00154 Results *r = (*ap).forceBox->open();
00155 if ( patchList[0].p->flags.doMolly ) {
00156 (*ap).positionBox->close(&x);
00157 x = (*ap).avgPositionBox->open();
00158 }
00159 int numAtoms = (*ap).p->getNumAtoms();
00160
00161 for(int i=0; i<numAtoms; ++i) {
00162
00163 Vector pos = x[i].position;
00164 pos += lattice.wrap_delta(pos) - o;
00165 *part_ptr++ = x[i].partition;
00166 data_ptr->x = pos.x;
00167 data_ptr->y = pos.y;
00168 data_ptr->z = pos.z;
00169 data_ptr->cg = coloumb_sqrt * x[i].charge;
00170 ++data_ptr;
00171 }
00172 (*ap).positionBox->close(&x);
00173 (*ap).forceBox->close(&r);
00174 }
00175
00176
00177
00178 int msgsize = 2 * numAtomTypes * ktot;
00179 ComputeEwaldMsg *msg = new (msgsize,0) ComputeEwaldMsg;
00180 memset(msg->eik, 0, msgsize*sizeof(float));
00181 compute_structurefactor(msg->eik);
00182
00183
00184 comm->sendComputeEwaldData(msg);
00185 }
00186
00187 void ComputeEwald::recvData(ComputeEwaldMsg *msg) {
00188
00189 int nvecs = 2 * ktot * numAtomTypes;
00190 for (int i=0; i<nvecs; i++) {
00191 eiktotal[i] += msg->eik[i];
00192 }
00193 delete msg;
00194 if (++recvCount == numWorkingPes) {
00195 recvCount = 0;
00196 int msgsize = 2 * ktot * numAtomTypes;
00197 msg = new (msgsize,0) ComputeEwaldMsg;
00198 memcpy(msg->eik, eiktotal, msgsize*sizeof(float));
00199 memset(eiktotal, 0, msgsize*sizeof(float));
00200 comm->sendComputeEwaldResults(msg);
00201 }
00202 }
00203
00204 void ComputeEwald::recvResults(ComputeEwaldMsg *msg) {
00205
00206 computePprofile(msg->eik);
00207 delete msg;
00208 float scalefac = 1.0 / (2 * M_PI * lattice.volume());
00209
00210 int nelements = 3*pressureProfileSlabs * (numAtomTypes*(numAtomTypes+1))/2;
00211 for (int i=0; i<nelements; i++) {
00212 reduction->item(i) += pressureProfileData[i] * scalefac;
00213 }
00214 reduction->submit();
00215 }
00216
00217 void ComputeEwald::compute_structurefactor(float *eik) {
00218
00219 float recipx = lattice.a_r().x;
00220 float recipy = lattice.b_r().y;
00221 float recipz = lattice.c_r().z;
00222 int i, j;
00223 for (i=0; i<numLocalAtoms; i++) {
00224
00225 float krx = 2 * M_PI * localAtoms[i].x * recipx;
00226 float kry = 2 * M_PI * localAtoms[i].y * recipy;
00227 float krz = 2 * M_PI * localAtoms[i].z * recipz;
00228 float cg = localAtoms[i].cg;
00229
00230 const int offset = 2*ktot*localPartitions[i];
00231
00232 floatcomplex eikx1(cos(krx), sin(krx));
00233 floatcomplex eiky1(cos(kry), sin(kry));
00234 floatcomplex eikz1(cos(krz), sin(krz));
00235
00236
00237 floatcomplex *ptr = eiky + kymax;
00238
00239 ptr[0] = 1;
00240 floatcomplex y(1,0);
00241 for (j=1; j<= kymax; j++) {
00242 y *= eiky1;
00243 ptr[j] = y;
00244 ptr[-j] = y.star();
00245 }
00246
00247
00248 ptr = eikz + kzmax;
00249
00250 ptr[0] = 1;
00251 floatcomplex z(1,0);
00252 for (j=1; j<= kzmax; j++) {
00253 z *= eikz1;
00254 ptr[j] = z;
00255 ptr[-j] = z.star();
00256 }
00257
00258
00259 floatcomplex ex(cg);
00260 int ind = offset;
00261 for (int kx=0; kx <= kxmax; kx++) {
00262 for (int ky=0; ky <= 2*kymax; ky++) {
00263 floatcomplex exy = eiky[ky];
00264 exy *= ex;
00265 const int max = 2*kzmax;
00266 const float exyr = exy.r;
00267 const float exyi = exy.i;
00268 const float *eikzd = (const float *)eikz;
00269 #pragma vector always
00270 for (int kz=0; kz <= max; kz++) {
00271 float ezr = *eikzd++;
00272 float ezi = *eikzd++;
00273 float eikr = ezr*exyr - ezi*exyi;
00274 float eiki = ezr*exyi + ezi*exyr;
00275
00276
00277 eik[ind ] += eikr;
00278 eik[ind+1] += eiki;
00279
00280
00281 ind += 2;
00282 }
00283 }
00284 ex *= eikx1;
00285 }
00286 }
00287 }
00288
00289
00290 static void init_exp(float *xp, int K, float recip, float kappa) {
00291 float piob = M_PI / kappa;
00292 piob *= piob;
00293 float fac = -piob*recip*recip;
00294 for (int i=0; i<= K; i++)
00295 xp[i] = exp(fac*i*i);
00296 }
00297
00298 void ComputeEwald::computePprofile(const float *eik) const {
00299 float recipx = lattice.a_r().x;
00300 float recipy = lattice.b_r().y;
00301 float recipz = lattice.c_r().z;
00302
00303 init_exp(expx, kxmax, recipx, kappa);
00304 init_exp(expy, kymax, recipy, kappa);
00305 init_exp(expz, kzmax, recipz, kappa);
00306
00307
00308 float piob = M_PI / kappa;
00309 piob *= piob;
00310
00311
00312 int ind = 0;
00313 for (int kx=0; kx <= kxmax; kx++) {
00314 float m11 = recipx * kx;
00315 m11 *= m11;
00316 float xfac = expx[kx] * (kx ? 2 : 1);
00317 for (int ky=-kymax; ky <= kymax; ky++) {
00318 float m22 = recipy * ky;
00319 m22 *= m22;
00320 float xyfac = expy[abs(ky)] * xfac;
00321 for (int kz=-kzmax; kz <= kzmax; kz++) {
00322 float m33 = recipz * kz;
00323 m33 *= m33;
00324 float msq = m11 + m22 + m33;
00325 float imsq = msq ? 1.0 / msq : 0;
00326 float fac = expz[abs(kz)] * xyfac * imsq;
00327
00328 float pfac = 2*(imsq + piob);
00329 Qk[ind++] = fac*(1-pfac*m11);
00330 Qk[ind++] = fac*(1-pfac*m22);
00331 Qk[ind++] = fac*(1-pfac*m33);
00332 }
00333 }
00334 }
00335
00336 const int nslabs = pressureProfileSlabs;
00337
00338 int nelements = 3*nslabs * (numAtomTypes*(numAtomTypes+1))/2;
00339 memset(pressureProfileData, 0, nelements*sizeof(float));
00340 int i, j;
00341 for (i=0; i<numLocalAtoms; i++) {
00342
00343 float krx = 2 * M_PI * localAtoms[i].x * recipx;
00344 float kry = 2 * M_PI * localAtoms[i].y * recipy;
00345 float krz = 2 * M_PI * localAtoms[i].z * recipz;
00346 float cg = localAtoms[i].cg;
00347 int atype = localPartitions[i];
00348 const int *grids = gridsForAtomType+atype*numAtomTypes;
00349
00350
00351 int slab = (int)floor((localAtoms[i].z - pressureProfileMin)/pressureProfileThickness);
00352 if (slab < 0) slab += nslabs;
00353 else if (slab >= nslabs) slab -= nslabs;
00354 float *pprofptr = pressureProfileData + 3*slab;
00355
00356 floatcomplex eikx1(cos(krx), sin(krx));
00357 floatcomplex eiky1(cos(kry), sin(kry));
00358 floatcomplex eikz1(cos(krz), sin(krz));
00359
00360
00361 floatcomplex *ptr = eiky + kymax;
00362
00363 ptr[0] = 1;
00364 floatcomplex y(1,0);
00365 for (j=1; j<= kymax; j++) {
00366 y *= eiky1;
00367 ptr[j] = y;
00368 ptr[-j] = y.star();
00369 }
00370
00371
00372 ptr = eikz + kzmax;
00373
00374 ptr[0] = 1;
00375 floatcomplex z(1,0);
00376 for (j=1; j<= kzmax; j++) {
00377 z *= eikz1;
00378 ptr[j] = z;
00379 ptr[-j] = z.star();
00380 }
00381
00382 int ind = 0;
00383 const float *Qkptr = Qk;
00384 floatcomplex ex(cg);
00385 const float *eikptr = eik;
00386 for (int kx=0; kx <= kxmax; kx++) {
00387 for (int ky=0; ky <= 2*kymax; ky++) {
00388 floatcomplex exy = eiky[ky];
00389 exy *= ex;
00390 const int kzmax2 = 2*kzmax+1;
00391 const float exyr = exy.r;
00392 const float exyi = exy.i;
00393 const float *eikzptr = (const float *)eikz;
00394 #pragma vector always
00395 for (int kz=0; kz < kzmax2; kz++) {
00396 float ezr = *eikzptr++;
00397 float ezi = *eikzptr++;
00398 float exyzr = ezr * exyr - ezi * exyi;
00399 float exyzi = ezr * exyi + ezi * exyr;
00400
00401
00402
00403
00404
00405 for (int igrid=0; igrid<numAtomTypes; igrid++) {
00406 float eikr = eikptr[igrid*2*ktot ];
00407 float eiki = eikptr[igrid*2*ktot+1];
00408 int grid = grids[igrid];
00409 int offset = 3*nslabs*grid;
00410
00411 float E = exyzr * eikr + exyzi * eiki;
00412 float vx = Qkptr[0] * E;
00413 float vy = Qkptr[1] * E;
00414 float vz = Qkptr[2] * E;
00415 pprofptr[offset ] += vx;
00416 pprofptr[offset+1] += vy;
00417 pprofptr[offset+2] += vz;
00418 }
00419
00420 eikptr += 2;
00421 Qkptr += 3;
00422 }
00423 }
00424 ex *= eikx1;
00425 }
00426 }
00427 }
00428