00001
00007 #include <string.h>
00008 #include "PmeBase.inl"
00009 #include "PmeRealSpace.h"
00010 #include "Node.h"
00011 #include "SimParameters.h"
00012
00013 PmeRealSpace::PmeRealSpace(PmeGrid grid)
00014 : myGrid(grid) {
00015 }
00016
00017 PmeRealSpace::~PmeRealSpace() {
00018 }
00019
00020 void PmeRealSpace::set_num_atoms(int natoms) {
00021 N = natoms;
00022 int order = myGrid.order;
00023 M_alloc.resize(3*N*order);
00024 M = M_alloc.begin();
00025 dM_alloc.resize(3*N*order);
00026 dM = dM_alloc.begin();
00027 }
00028
00029 template <int order>
00030 void PmeRealSpace::fill_b_spline(PmeParticle p[]) {
00031 float fr[3];
00032 float *Mi, *dMi;
00033 int i, stride;
00034
00035 stride = 3*order;
00036 Mi = M; dMi = dM;
00037 for (i=0; i<N; i++) {
00038 fr[0] = (float)(p[i].x - (double)(int)(p[i].x));
00039 fr[1] = (float)(p[i].y - (double)(int)(p[i].y));
00040 fr[2] = (float)(p[i].z - (double)(int)(p[i].z));
00041 compute_b_spline(fr, Mi, dMi, order);
00042 Mi += stride;
00043 dMi += stride;
00044 }
00045 }
00046
00047 void PmeRealSpace::fill_charges(float **q_arr, float **q_arr_list, int &q_arr_count,
00048 int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[]) {
00049
00050 switch (myGrid.order) {
00051 case 4:
00052 fill_charges_order4(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
00053 break;
00054 case 6:
00055 fill_charges_order<6>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
00056 break;
00057 case 8:
00058 fill_charges_order<8>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
00059 break;
00060 case 10:
00061 fill_charges_order<10>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
00062 break;
00063 default: NAMD_die("unsupported PMEInterpOrder");
00064 }
00065
00066 }
00067
00068 template <int order>
00069 void PmeRealSpace::fill_charges_order(float **q_arr, float **q_arr_list, int &q_arr_count,
00070 int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[]) {
00071
00072 int i, j, k, l;
00073 int stride;
00074 int K1, K2, K3, dim2, dim3;
00075
00076 if ( order != myGrid.order ) NAMD_bug("fill_charges_order template mismatch");
00077
00078 if ( order == 4 ) {
00079 fill_charges_order4(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
00080 return;
00081 }
00082
00083 float * __restrict Mi;
00084 Mi = M;
00085 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2; dim3=myGrid.dim3;
00086 stride = 3*order;
00087
00088 fill_b_spline<order>(p);
00089
00090 for (i=0; i<N; i++) {
00091 float q;
00092 int u1, u2, u2i, u3i;
00093 q = p[i].cg;
00094 u1 = (int)(p[i].x);
00095 u2i = (int)(p[i].y);
00096 u3i = (int)(p[i].z);
00097 u1 -= order;
00098 u2i -= order;
00099 u3i -= order;
00100 u3i += 1;
00101 if ( u3i < 0 ) u3i += K3;
00102 for (j=0; j<order; j++) {
00103 float m1;
00104 int ind1;
00105 m1 = Mi[j]*q;
00106 u1++;
00107 ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
00108 u2 = u2i;
00109 for (k=0; k<order; k++) {
00110 float m1m2;
00111 int ind2;
00112 m1m2 = m1*Mi[order+k];
00113 u2++;
00114 ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
00115 float * __restrict qline = q_arr[ind2];
00116 if ( ! qline ) {
00117 if ( f_arr[ind2] ) {
00118 f_arr[ind2] = 3;
00119 ++stray_count;
00120 continue;
00121 }
00122 qline = q_arr[ind2] = q_arr_list[q_arr_count++]
00123 = new float[K3+order-1];
00124 memset( (void*) qline, 0, (K3+order-1) * sizeof(float) );
00125 }
00126 f_arr[ind2] = 1;
00127 for (l=0; l<order; l++) {
00128 qline[u3i+l] += m1m2 * Mi[2*order + l];
00129 }
00130 }
00131 }
00132 Mi += stride;
00133 for (l=0; l<order; l++) {
00134 int u3 = u3i + l;
00135 int ind = u3 + (u3 < 0 ? K3 : 0);
00136 fz_arr[ind] = 1;
00137 }
00138 }
00139 }
00140
00141 void PmeRealSpace::compute_forces(const float * const *q_arr,
00142 const PmeParticle p[], Vector f[]) {
00143
00144 switch (myGrid.order) {
00145 case 4:
00146 compute_forces_order4(q_arr, p, f);
00147 break;
00148 case 6:
00149 compute_forces_order<6>(q_arr, p, f);
00150 break;
00151 case 8:
00152 compute_forces_order<8>(q_arr, p, f);
00153 break;
00154 case 10:
00155 compute_forces_order<10>(q_arr, p, f);
00156 break;
00157 default: NAMD_die("unsupported PMEInterpOrder");
00158 }
00159
00160 }
00161
00162 template <int order>
00163 void PmeRealSpace::compute_forces_order(const float * const *q_arr,
00164 const PmeParticle p[], Vector f[]) {
00165
00166 int i, j, k, l, stride;
00167 float f1, f2, f3;
00168 float *Mi, *dMi;
00169 int K1, K2, K3, dim2;
00170
00171 if ( order != myGrid.order ) NAMD_bug("compute_forces_order template mismatch");
00172
00173 if ( order == 4 ) {
00174 compute_forces_order4(q_arr, p, f);
00175 return;
00176 }
00177
00178 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2;
00179 stride=3*order;
00180 Mi = M; dMi = dM;
00181
00182 for (i=0; i<N; i++) {
00183 float q;
00184 int u1, u2, u2i, u3i;
00185 q = p[i].cg;
00186 f1=f2=f3=0.0;
00187 u1 = (int)(p[i].x);
00188 u2i = (int)(p[i].y);
00189 u3i = (int)(p[i].z);
00190 u1 -= order;
00191 u2i -= order;
00192 u3i -= order;
00193 u3i += 1;
00194 if ( u3i < 0 ) u3i += K3;
00195 for (j=0; j<order; j++) {
00196 float m1, d1;
00197 int ind1;
00198 m1=Mi[j]*q;
00199 d1=K1*dMi[j]*q;
00200 u1++;
00201 ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
00202 u2 = u2i;
00203 for (k=0; k<order; k++) {
00204 float m2, d2, m1m2, m1d2, d1m2;
00205 int ind2;
00206 m2=Mi[order+k];
00207 d2=K2*dMi[order+k];
00208 m1m2=m1*m2;
00209 m1d2=m1*d2;
00210 d1m2=d1*m2;
00211 u2++;
00212 ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
00213 const float *qline = q_arr[ind2];
00214 if ( ! qline ) continue;
00215 for (l=0; l<order; l++) {
00216 float term, m3, d3;
00217 m3=Mi[2*order+l];
00218 d3=K3*dMi[2*order+l];
00219 term = qline[u3i+l];
00220 f1 -= d1m2 * m3 * term;
00221 f2 -= m1d2 * m3 * term;
00222 f3 -= m1m2 * d3 * term;
00223 }
00224 }
00225 }
00226 Mi += stride;
00227 dMi += stride;
00228 f[i].x = f1;
00229 f[i].y = f2;
00230 f[i].z = f3;
00231 }
00232 }
00233
00234
00235 #define order 4
00236
00237 void PmeRealSpace::fill_charges_order4(float **q_arr, float **q_arr_list, int &q_arr_count,
00238 int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[]) {
00239
00240 int i, j, k, l;
00241 int stride;
00242 int K1, K2, K3, dim2, dim3;
00243 float * __restrict Mi, * __restrict dMi;
00244 Mi = M; dMi = dM;
00245 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2; dim3=myGrid.dim3;
00246
00247 stride = 3*order;
00248
00249
00250
00251 #ifdef NAMD_CUDA
00252 for ( int istart = 0; istart < N; istart += 1000 ) {
00253 int iend = istart + 1000;
00254 if ( iend > N ) iend = N;
00255 CmiNetworkProgress();
00256 for (i=istart; i<iend; i++) {
00257 #else
00258 {
00259 for (i=0; i<N; i++) {
00260 #endif
00261 float q;
00262 int u1, u2, u2i, u3i;
00263 u1 = (int)p[i].x;
00264 u2i = (int)p[i].y;
00265 u3i = (int)p[i].z;
00266 float fr1 = (float)(p[i].x - (double)u1);
00267 float fr2 = (float)(p[i].y - (double)u2i);
00268 float fr3 = (float)(p[i].z - (double)u3i);
00269 q = p[i].cg;
00270
00271
00272 Mi[0] = ( ( (-1./6.) * fr1 + 0.5 ) * fr1 - 0.5 ) * fr1 + (1./6.);
00273 Mi[1] = ( ( 0.5 * fr1 - 1.0 ) * fr1 ) * fr1 + (2./3.);
00274 Mi[2] = ( ( -0.5 * fr1 + 0.5 ) * fr1 + 0.5 ) * fr1 + (1./6.);
00275 Mi[3] = (1./6.) * fr1 * fr1 * fr1;
00276 dMi[0] = ( -0.5 * fr1 + 1.0 )* fr1 - 0.5;
00277 dMi[1] = ( 1.5 * fr1 - 2.0 ) * fr1;
00278 dMi[2] = ( -1.5 * fr1 + 1.0 ) * fr1 + 0.5;
00279 dMi[3] = 0.5 * fr1 * fr1;
00280 Mi[4] = ( ( (-1./6.) * fr2 + 0.5 ) * fr2 - 0.5 ) * fr2 + (1./6.);
00281 Mi[5] = ( ( 0.5 * fr2 - 1.0 ) * fr2 ) * fr2 + (2./3.);
00282 Mi[6] = ( ( -0.5 * fr2 + 0.5 ) * fr2 + 0.5 ) * fr2 + (1./6.);
00283 Mi[7] = (1./6.) * fr2 * fr2 * fr2;
00284 dMi[4] = ( -0.5 * fr2 + 1.0 )* fr2 - 0.5;
00285 dMi[5] = ( 1.5 * fr2 - 2.0 ) * fr2;
00286 dMi[6] = ( -1.5 * fr2 + 1.0 ) * fr2 + 0.5;
00287 dMi[7] = 0.5 * fr2 * fr2;
00288 Mi[8] = ( ( (-1./6.) * fr3 + 0.5 ) * fr3 - 0.5 ) * fr3 + (1./6.);
00289 Mi[9] = ( ( 0.5 * fr3 - 1.0 ) * fr3 ) * fr3 + (2./3.);
00290 Mi[10] = ( ( -0.5 * fr3 + 0.5 ) * fr3 + 0.5 ) * fr3 + (1./6.);
00291 Mi[11] = (1./6.) * fr3 * fr3 * fr3;
00292 dMi[8] = ( -0.5 * fr3 + 1.0 )* fr3 - 0.5;
00293 dMi[9] = ( 1.5 * fr3 - 2.0 ) * fr3;
00294 dMi[10] = ( -1.5 * fr3 + 1.0 ) * fr3 + 0.5;
00295 dMi[11] = 0.5 * fr3 * fr3;
00296
00297 u1 -= order;
00298 u2i -= order;
00299 u3i -= order;
00300 u3i += 1;
00301 if ( u3i < 0 ) u3i += K3;
00302 for (j=0; j<order; j++) {
00303 float m1;
00304 int ind1;
00305 m1 = Mi[j]*q;
00306 u1++;
00307 ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
00308 u2 = u2i;
00309 for (k=0; k<order; k++) {
00310 float m1m2;
00311 int ind2;
00312 m1m2 = m1*Mi[order+k];
00313 u2++;
00314 ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
00315 float * __restrict qline = q_arr[ind2];
00316 if ( ! qline ) {
00317 if ( f_arr[ind2] ) {
00318 f_arr[ind2] = 3;
00319 ++stray_count;
00320 continue;
00321 }
00322 qline = q_arr[ind2] = q_arr_list[q_arr_count++]
00323 = new float[K3+order-1];
00324 memset( (void*) qline, 0, (K3+order-1) * sizeof(float) );
00325 }
00326 f_arr[ind2] = 1;
00327 for (l=0; l<order; l++) {
00328 qline[u3i+l] += m1m2 * Mi[2*order + l];
00329 }
00330 }
00331 }
00332 Mi += stride;
00333 dMi += stride;
00334 for (l=0; l<order; l++) {
00335 int u3 = u3i + l;
00336 int ind = u3 + (u3 < 0 ? K3 : 0);
00337 fz_arr[ind] = 1;
00338 }
00339 }
00340 }
00341 }
00342
00343 static inline void compute_forces_order4_helper(int first, int last, void *result, int paraNum, void *param){
00344 void **params = (void **)param;
00345 PmeRealSpace *rs = (PmeRealSpace *)params[0];
00346 const float * const *q_arr = (const float * const *)params[1];
00347 const PmeParticle *p = (const PmeParticle *)params[2];
00348 Vector *f = (Vector *)params[3];
00349 rs->compute_forces_order4_partial(first, last, q_arr, p, f);
00350 }
00351
00352 void PmeRealSpace::compute_forces_order4(const float * const *q_arr,
00353 const PmeParticle p[], Vector f[]) {
00354
00355 int i, j, k, l, stride;
00356 float f1, f2, f3;
00357 float *Mi, *dMi;
00358 int K1, K2, K3, dim2;
00359
00360 #if USE_CKLOOP
00361 int useCkLoop = Node::Object()->simParameters->useCkLoop;
00362 if(useCkLoop>=CKLOOP_CTRL_PME_UNGRIDCALC){
00363
00364 void *params[] = {(void *)this, (void *)q_arr, (void *)p, (void *)f};
00365 CkLoop_Parallelize(compute_forces_order4_helper, 4, (void *)params, CkMyNodeSize(), 0, N-1);
00366 return;
00367 }
00368 #endif
00369 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2;
00370
00371 stride=3*order;
00372 Mi = M; dMi = dM;
00373
00374 for (i=0; i<N; i++) {
00375 float q;
00376 int u1, u2, u2i, u3i;
00377 q = p[i].cg;
00378 f1=f2=f3=0.0;
00379 u1 = (int)(p[i].x);
00380 u2i = (int)(p[i].y);
00381 u3i = (int)(p[i].z);
00382 u1 -= order;
00383 u2i -= order;
00384 u3i -= order;
00385 u3i += 1;
00386 if ( u3i < 0 ) u3i += K3;
00387 for (j=0; j<order; j++) {
00388 float m1, d1;
00389 int ind1;
00390 m1=Mi[j]*q;
00391 d1=K1*dMi[j]*q;
00392 u1++;
00393 ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
00394 u2 = u2i;
00395 for (k=0; k<order; k++) {
00396 float m2, d2, m1m2, m1d2, d1m2;
00397 int ind2;
00398 m2=Mi[order+k];
00399 d2=K2*dMi[order+k];
00400 m1m2=m1*m2;
00401 m1d2=m1*d2;
00402 d1m2=d1*m2;
00403 u2++;
00404 ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
00405 const float *qline = q_arr[ind2];
00406 if ( ! qline ) continue;
00407 for (l=0; l<order; l++) {
00408 float term, m3, d3;
00409 m3=Mi[2*order+l];
00410 d3=K3*dMi[2*order+l];
00411 term = qline[u3i+l];
00412 f1 -= d1m2 * m3 * term;
00413 f2 -= m1d2 * m3 * term;
00414 f3 -= m1m2 * d3 * term;
00415 }
00416 }
00417 }
00418 Mi += stride;
00419 dMi += stride;
00420 f[i].x = f1;
00421 f[i].y = f2;
00422 f[i].z = f3;
00423 }
00424 }
00425
00426 void PmeRealSpace::compute_forces_order4_partial(int first, int last,
00427 const float * const *q_arr,
00428 const PmeParticle p[], Vector f[]) {
00429
00430 int i, j, k, l, stride;
00431 float f1, f2, f3;
00432 float *Mi, *dMi;
00433 int K1, K2, K3, dim2;
00434
00435 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2;
00436
00437 stride=3*order;
00438 Mi = M; dMi = dM;
00439
00440 for (i=first; i<=last; i++) {
00441 Mi = M + i*stride;
00442 dMi = dM + i*stride;
00443 float q;
00444 int u1, u2, u2i, u3i;
00445 q = p[i].cg;
00446 f1=f2=f3=0.0;
00447 u1 = (int)(p[i].x);
00448 u2i = (int)(p[i].y);
00449 u3i = (int)(p[i].z);
00450 u1 -= order;
00451 u2i -= order;
00452 u3i -= order;
00453 u3i += 1;
00454 if ( u3i < 0 ) u3i += K3;
00455 for (j=0; j<order; j++) {
00456 float m1, d1;
00457 int ind1;
00458 m1=Mi[j]*q;
00459 d1=K1*dMi[j]*q;
00460 u1++;
00461 ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
00462 u2 = u2i;
00463 for (k=0; k<order; k++) {
00464 float m2, d2, m1m2, m1d2, d1m2;
00465 int ind2;
00466 m2=Mi[order+k];
00467 d2=K2*dMi[order+k];
00468 m1m2=m1*m2;
00469 m1d2=m1*d2;
00470 d1m2=d1*m2;
00471 u2++;
00472 ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
00473 const float *qline = q_arr[ind2];
00474 if ( ! qline ) continue;
00475 for (l=0; l<order; l++) {
00476 float term, m3, d3;
00477 m3=Mi[2*order+l];
00478 d3=K3*dMi[2*order+l];
00479 term = qline[u3i+l];
00480 f1 -= d1m2 * m3 * term;
00481 f2 -= m1d2 * m3 * term;
00482 f3 -= m1m2 * d3 * term;
00483 }
00484 }
00485 }
00486 f[i].x = f1;
00487 f[i].y = f2;
00488 f[i].z = f3;
00489 }
00490 }