PmeRealSpace.C

Go to the documentation of this file.
00001 
00007 #include <string.h>
00008 #include "PmeBase.inl"
00009 #include "PmeRealSpace.h"
00010 #include "Node.h"
00011 #include "SimParameters.h"
00012 
00013 PmeRealSpace::PmeRealSpace(PmeGrid grid)
00014   : myGrid(grid) {
00015 }
00016 
00017 PmeRealSpace::~PmeRealSpace() {
00018 }
00019 
00020 void PmeRealSpace::set_num_atoms(int natoms) {
00021   N = natoms;
00022   int order = myGrid.order;
00023   M_alloc.resize(3*N*order);
00024   M = M_alloc.begin();
00025   dM_alloc.resize(3*N*order);
00026   dM = dM_alloc.begin();
00027 }
00028 
00029 template <int order>
00030 void PmeRealSpace::fill_b_spline(PmeParticle p[]) {
00031   float fr[3]; 
00032   float *Mi, *dMi;
00033   int i, stride;
00034 
00035   stride = 3*order;
00036   Mi = M; dMi = dM;
00037   for (i=0; i<N; i++) {
00038     fr[0] = (float)(p[i].x - (double)(int)(p[i].x));  // subtract in double precision
00039     fr[1] = (float)(p[i].y - (double)(int)(p[i].y));
00040     fr[2] = (float)(p[i].z - (double)(int)(p[i].z));
00041     compute_b_spline(fr, Mi, dMi, order);
00042     Mi += stride;
00043     dMi += stride;
00044   }
00045 }
00046 
00047 void PmeRealSpace::fill_charges(float **q_arr, float **q_arr_list, int &q_arr_count, 
00048                        int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[]) {
00049 
00050   switch (myGrid.order) {
00051   case 4:
00052     fill_charges_order4(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
00053     break;
00054   case 6:
00055     fill_charges_order<6>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
00056     break;
00057   case 8:
00058     fill_charges_order<8>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
00059     break;
00060   case 10:
00061     fill_charges_order<10>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
00062     break;
00063   default: NAMD_die("unsupported PMEInterpOrder");
00064   }
00065 
00066 }
00067   
00068 template <int order>
00069 void PmeRealSpace::fill_charges_order(float **q_arr, float **q_arr_list, int &q_arr_count, 
00070                        int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[]) {
00071   
00072   int i, j, k, l;
00073   int stride;
00074   int K1, K2, K3, dim2, dim3;
00075 
00076   if ( order != myGrid.order ) NAMD_bug("fill_charges_order template mismatch");
00077 
00078   if ( order == 4 ) {
00079     fill_charges_order4(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
00080     return;
00081   }
00082 
00083   float * __restrict Mi;
00084   Mi = M;
00085   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2; dim3=myGrid.dim3;
00086   stride = 3*order;
00087 
00088   fill_b_spline<order>(p);
00089 
00090   for (i=0; i<N; i++) {
00091     float q;
00092     int u1, u2, u2i, u3i;
00093     q = p[i].cg;
00094     u1 = (int)(p[i].x);
00095     u2i = (int)(p[i].y);
00096     u3i = (int)(p[i].z);
00097     u1 -= order;
00098     u2i -= order;
00099     u3i -= order;
00100     u3i += 1;
00101     if ( u3i < 0 ) u3i += K3;
00102     for (j=0; j<order; j++) {
00103       float m1;
00104       int ind1;
00105       m1 = Mi[j]*q;
00106       u1++;
00107       ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
00108       u2 = u2i;
00109       for (k=0; k<order; k++) {
00110         float m1m2;
00111         int ind2;
00112         m1m2 = m1*Mi[order+k];
00113         u2++;
00114         ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
00115         float * __restrict qline = q_arr[ind2];
00116         if ( ! qline ) {
00117           if ( f_arr[ind2] ) {
00118             f_arr[ind2] = 3;
00119             ++stray_count;
00120             continue;
00121           }
00122           qline = q_arr[ind2] = q_arr_list[q_arr_count++]
00123                                         = new float[K3+order-1];
00124           memset( (void*) qline, 0, (K3+order-1) * sizeof(float) );
00125         }
00126         f_arr[ind2] = 1;
00127         for (l=0; l<order; l++) {
00128           qline[u3i+l] += m1m2 * Mi[2*order + l];
00129         }
00130       }
00131     }
00132     Mi += stride;
00133     for (l=0; l<order; l++) {
00134       int u3 = u3i + l;
00135       int ind = u3 + (u3 < 0 ? K3 : 0);
00136       fz_arr[ind] = 1;
00137     }
00138   }
00139 }
00140 
00141 void PmeRealSpace::compute_forces(const float * const *q_arr,
00142                                 const PmeParticle p[], Vector f[]) {
00143 
00144   switch (myGrid.order) {
00145   case 4:
00146     compute_forces_order4(q_arr, p, f);
00147     break;
00148   case 6:
00149     compute_forces_order<6>(q_arr, p, f);
00150     break;
00151   case 8:
00152     compute_forces_order<8>(q_arr, p, f);
00153     break;
00154   case 10:
00155     compute_forces_order<10>(q_arr, p, f);
00156     break;
00157   default: NAMD_die("unsupported PMEInterpOrder");
00158   }
00159 
00160 }
00161   
00162 template <int order>
00163 void PmeRealSpace::compute_forces_order(const float * const *q_arr,
00164                                 const PmeParticle p[], Vector f[]) {
00165   
00166   int i, j, k, l, stride;
00167   float f1, f2, f3;
00168   float *Mi, *dMi;
00169   int K1, K2, K3, dim2;
00170 
00171   if ( order != myGrid.order ) NAMD_bug("compute_forces_order template mismatch");
00172 
00173   if ( order == 4 ) {
00174     compute_forces_order4(q_arr, p, f);
00175     return;
00176   }
00177 
00178   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2;
00179   stride=3*order;
00180   Mi = M; dMi = dM;
00181  
00182   for (i=0; i<N; i++) {
00183     float q;
00184     int u1, u2, u2i, u3i;
00185     q = p[i].cg;
00186     f1=f2=f3=0.0;
00187     u1 = (int)(p[i].x);
00188     u2i = (int)(p[i].y);
00189     u3i = (int)(p[i].z);
00190     u1 -= order;
00191     u2i -= order;
00192     u3i -= order;
00193     u3i += 1;
00194     if ( u3i < 0 ) u3i += K3;
00195     for (j=0; j<order; j++) {
00196       float m1, d1;
00197       int ind1;
00198       m1=Mi[j]*q;
00199       d1=K1*dMi[j]*q;
00200       u1++;
00201       ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2; 
00202       u2 = u2i;
00203       for (k=0; k<order; k++) {
00204         float m2, d2, m1m2, m1d2, d1m2;
00205         int ind2;
00206         m2=Mi[order+k];
00207         d2=K2*dMi[order+k];
00208         m1m2=m1*m2;
00209         m1d2=m1*d2;
00210         d1m2=d1*m2;
00211         u2++;
00212         ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
00213         const float *qline = q_arr[ind2];
00214         if ( ! qline ) continue;
00215         for (l=0; l<order; l++) {
00216           float term, m3, d3;
00217           m3=Mi[2*order+l];
00218           d3=K3*dMi[2*order+l];
00219           term = qline[u3i+l];
00220           f1 -= d1m2 * m3 * term;
00221           f2 -= m1d2 * m3 * term;
00222           f3 -= m1m2 * d3 * term;
00223         }
00224       }
00225     }
00226     Mi += stride;
00227     dMi += stride;
00228     f[i].x = f1;
00229     f[i].y = f2;
00230     f[i].z = f3;
00231   }
00232 }
00233 
00234 // this should definitely help the compiler
00235 #define order 4
00236 
00237 void PmeRealSpace::fill_charges_order4(float **q_arr, float **q_arr_list, int &q_arr_count, 
00238                        int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[]) {
00239   
00240   int i, j, k, l;
00241   int stride;
00242   int K1, K2, K3, dim2, dim3;
00243   float * __restrict Mi, * __restrict dMi;
00244   Mi = M; dMi = dM;
00245   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2; dim3=myGrid.dim3;
00246   // order = myGrid.order;
00247   stride = 3*order;
00248 
00249   // fill_b_spline(p);   leave this off!  replaced with code below
00250 
00251 #ifdef NAMD_CUDA
00252   for ( int istart = 0; istart < N; istart += 1000 ) {
00253    int iend = istart + 1000;
00254    if ( iend > N ) iend = N;
00255    CmiNetworkProgress();
00256    for (i=istart; i<iend; i++) {
00257 #else
00258   {
00259    for (i=0; i<N; i++) {
00260 #endif
00261     float q;
00262     int u1, u2, u2i, u3i;
00263     u1 = (int)p[i].x;
00264     u2i = (int)p[i].y;
00265     u3i = (int)p[i].z;
00266     float fr1 = (float)(p[i].x - (double)u1);  // subtract in double precision
00267     float fr2 = (float)(p[i].y - (double)u2i);
00268     float fr3 = (float)(p[i].z - (double)u3i);
00269     q = p[i].cg;
00270 
00271     // calculate b_spline for order = 4
00272     Mi[0] = ( ( (-1./6.) * fr1 + 0.5 ) * fr1 - 0.5 ) * fr1 + (1./6.);
00273     Mi[1] = ( ( 0.5 * fr1 - 1.0 ) * fr1 ) * fr1 + (2./3.);
00274     Mi[2] = ( ( -0.5 * fr1 + 0.5 ) * fr1 + 0.5 ) * fr1 + (1./6.);
00275     Mi[3] = (1./6.) * fr1 * fr1 * fr1;
00276     dMi[0] = ( -0.5 * fr1 + 1.0 )* fr1 - 0.5;
00277     dMi[1] = ( 1.5 * fr1 - 2.0 ) * fr1;
00278     dMi[2] = ( -1.5 * fr1 + 1.0 ) * fr1 + 0.5;
00279     dMi[3] = 0.5 * fr1 * fr1;
00280     Mi[4] = ( ( (-1./6.) * fr2 + 0.5 ) * fr2 - 0.5 ) * fr2 + (1./6.);
00281     Mi[5] = ( ( 0.5 * fr2 - 1.0 ) * fr2 ) * fr2 + (2./3.);
00282     Mi[6] = ( ( -0.5 * fr2 + 0.5 ) * fr2 + 0.5 ) * fr2 + (1./6.);
00283     Mi[7] = (1./6.) * fr2 * fr2 * fr2;
00284     dMi[4] = ( -0.5 * fr2 + 1.0 )* fr2 - 0.5;
00285     dMi[5] = ( 1.5 * fr2 - 2.0 ) * fr2;
00286     dMi[6] = ( -1.5 * fr2 + 1.0 ) * fr2 + 0.5;
00287     dMi[7] = 0.5 * fr2 * fr2;
00288     Mi[8] = ( ( (-1./6.) * fr3 + 0.5 ) * fr3 - 0.5 ) * fr3 + (1./6.);
00289     Mi[9] = ( ( 0.5 * fr3 - 1.0 ) * fr3 ) * fr3 + (2./3.);
00290     Mi[10] = ( ( -0.5 * fr3 + 0.5 ) * fr3 + 0.5 ) * fr3 + (1./6.);
00291     Mi[11] = (1./6.) * fr3 * fr3 * fr3;
00292     dMi[8] = ( -0.5 * fr3 + 1.0 )* fr3 - 0.5;
00293     dMi[9] = ( 1.5 * fr3 - 2.0 ) * fr3;
00294     dMi[10] = ( -1.5 * fr3 + 1.0 ) * fr3 + 0.5;
00295     dMi[11] = 0.5 * fr3 * fr3;
00296 
00297     u1 -= order;
00298     u2i -= order;
00299     u3i -= order;
00300     u3i += 1;
00301     if ( u3i < 0 ) u3i += K3;
00302     for (j=0; j<order; j++) {
00303       float m1;
00304       int ind1;
00305       m1 = Mi[j]*q;
00306       u1++;
00307       ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
00308       u2 = u2i;
00309       for (k=0; k<order; k++) {
00310         float m1m2;
00311         int ind2;
00312         m1m2 = m1*Mi[order+k];
00313         u2++;
00314         ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
00315         float * __restrict qline = q_arr[ind2];
00316         if ( ! qline ) {
00317           if ( f_arr[ind2] ) {
00318             f_arr[ind2] = 3;
00319             ++stray_count;
00320             continue;
00321           }
00322           qline = q_arr[ind2] = q_arr_list[q_arr_count++]
00323                                         = new float[K3+order-1];
00324           memset( (void*) qline, 0, (K3+order-1) * sizeof(float) );
00325         }
00326         f_arr[ind2] = 1;
00327         for (l=0; l<order; l++) {
00328           qline[u3i+l] += m1m2 * Mi[2*order + l];
00329         }
00330       }
00331     }
00332     Mi += stride;
00333     dMi += stride;
00334     for (l=0; l<order; l++) {
00335       int u3 = u3i + l;
00336       int ind = u3 + (u3 < 0 ? K3 : 0);
00337       fz_arr[ind] = 1;
00338     }
00339    }
00340   }
00341 }
00342 
00343 static inline void compute_forces_order4_helper(int first, int last, void *result, int paraNum, void *param){
00344     void **params = (void **)param;
00345     PmeRealSpace *rs = (PmeRealSpace *)params[0];
00346     const float * const *q_arr = (const float * const *)params[1];
00347     const PmeParticle *p = (const PmeParticle *)params[2];
00348     Vector *f = (Vector *)params[3];
00349     rs->compute_forces_order4_partial(first, last, q_arr, p, f);
00350 }
00351 
00352 void PmeRealSpace::compute_forces_order4(const float * const *q_arr,
00353                                 const PmeParticle p[], Vector f[]) {
00354   
00355   int i, j, k, l, stride;
00356   float f1, f2, f3;
00357   float *Mi, *dMi;
00358   int K1, K2, K3, dim2;
00359 
00360 #if     CMK_SMP && USE_CKLOOP
00361   int useCkLoop = Node::Object()->simParameters->useCkLoop;
00362   if(useCkLoop>=CKLOOP_CTRL_PME_UNGRIDCALC){          
00363       //compute_forces_order4_partial(0, N-1, q_arr, p, f);
00364       void *params[] = {(void *)this, (void *)q_arr, (void *)p, (void *)f};
00365       CkLoop_Parallelize(compute_forces_order4_helper, 4, (void *)params, CkMyNodeSize(), 0, N-1);
00366       return;
00367   }
00368 #endif
00369   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2;
00370   // order = myGrid.order;
00371   stride=3*order;
00372   Mi = M; dMi = dM;
00373  
00374   for (i=0; i<N; i++) {
00375     float q;
00376     int u1, u2, u2i, u3i;
00377     q = p[i].cg;
00378     f1=f2=f3=0.0;
00379     u1 = (int)(p[i].x);
00380     u2i = (int)(p[i].y);
00381     u3i = (int)(p[i].z);
00382     u1 -= order;
00383     u2i -= order;
00384     u3i -= order;
00385     u3i += 1;
00386     if ( u3i < 0 ) u3i += K3;
00387     for (j=0; j<order; j++) {
00388       float m1, d1;
00389       int ind1;
00390       m1=Mi[j]*q;
00391       d1=K1*dMi[j]*q;
00392       u1++;
00393       ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2; 
00394       u2 = u2i;
00395       for (k=0; k<order; k++) {
00396         float m2, d2, m1m2, m1d2, d1m2;
00397         int ind2;
00398         m2=Mi[order+k];
00399         d2=K2*dMi[order+k];
00400         m1m2=m1*m2;
00401         m1d2=m1*d2;
00402         d1m2=d1*m2;
00403         u2++;
00404         ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
00405         const float *qline = q_arr[ind2];
00406         if ( ! qline ) continue;
00407         for (l=0; l<order; l++) {
00408           float term, m3, d3;
00409           m3=Mi[2*order+l];
00410           d3=K3*dMi[2*order+l];
00411           term = qline[u3i+l];
00412           f1 -= d1m2 * m3 * term;
00413           f2 -= m1d2 * m3 * term;
00414           f3 -= m1m2 * d3 * term;
00415         }
00416       }
00417     }
00418     Mi += stride;
00419     dMi += stride;
00420     f[i].x = f1;
00421     f[i].y = f2;
00422     f[i].z = f3;
00423   }
00424 }
00425 
00426 void PmeRealSpace::compute_forces_order4_partial(int first, int last, 
00427                 const float * const *q_arr,
00428                                 const PmeParticle p[], Vector f[]) {
00429   
00430   int i, j, k, l, stride;
00431   float f1, f2, f3;
00432   float *Mi, *dMi;
00433   int K1, K2, K3, dim2;
00434 
00435   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2;
00436   // order = myGrid.order;
00437   stride=3*order;
00438   Mi = M; dMi = dM;
00439  
00440   for (i=first; i<=last; i++) {
00441     Mi = M + i*stride;
00442     dMi = dM + i*stride;
00443     float q;
00444     int u1, u2, u2i, u3i;
00445     q = p[i].cg;
00446     f1=f2=f3=0.0;
00447     u1 = (int)(p[i].x);
00448     u2i = (int)(p[i].y);
00449     u3i = (int)(p[i].z);
00450     u1 -= order;
00451     u2i -= order;
00452     u3i -= order;
00453     u3i += 1;
00454     if ( u3i < 0 ) u3i += K3;
00455     for (j=0; j<order; j++) {
00456       float m1, d1;
00457       int ind1;
00458       m1=Mi[j]*q;
00459       d1=K1*dMi[j]*q;
00460       u1++;
00461       ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2; 
00462       u2 = u2i;
00463       for (k=0; k<order; k++) {
00464         float m2, d2, m1m2, m1d2, d1m2;
00465         int ind2;
00466         m2=Mi[order+k];
00467         d2=K2*dMi[order+k];
00468         m1m2=m1*m2;
00469         m1d2=m1*d2;
00470         d1m2=d1*m2;
00471         u2++;
00472         ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
00473         const float *qline = q_arr[ind2];
00474         if ( ! qline ) continue;
00475         for (l=0; l<order; l++) {
00476           float term, m3, d3;
00477           m3=Mi[2*order+l];
00478           d3=K3*dMi[2*order+l];
00479           term = qline[u3i+l];
00480           f1 -= d1m2 * m3 * term;
00481           f2 -= m1d2 * m3 * term;
00482           f3 -= m1m2 * d3 * term;
00483         }
00484       }
00485     }
00486     f[i].x = f1;
00487     f[i].y = f2;
00488     f[i].z = f3;
00489   }
00490 }

Generated on Tue Nov 21 01:17:14 2017 for NAMD by  doxygen 1.4.7