26 dM = dM_alloc.
begin();
38 fr[0] = (float)(p[i].x - (
double)(int)(p[i].x));
39 fr[1] = (float)(p[i].y - (
double)(int)(p[i].y));
40 fr[2] = (float)(p[i].z - (
double)(int)(p[i].z));
48 int &stray_count,
char *f_arr,
char *fz_arr,
PmeParticle p[]) {
50 switch (myGrid.
order) {
52 fill_charges_order4(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
55 fill_charges_order<6>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
58 fill_charges_order<8>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
61 fill_charges_order<10>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
63 default:
NAMD_die(
"unsupported PMEInterpOrder");
69 void PmeRealSpace::fill_charges_order(
float **q_arr,
float **q_arr_list,
int &q_arr_count,
70 int &stray_count,
char *f_arr,
char *fz_arr,
PmeParticle p[]) {
74 int K1, K2, K3, dim2, dim3;
79 fill_charges_order4(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
83 float * __restrict Mi;
85 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3; dim2=myGrid.
dim2; dim3=myGrid.
dim3;
88 fill_b_spline<order>(p);
101 if ( u3i < 0 ) u3i += K3;
102 for (j=0; j<
order; j++) {
107 ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
109 for (k=0; k<
order; k++) {
112 m1m2 = m1*Mi[
order+k];
114 ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
115 float * __restrict qline = q_arr[ind2];
122 qline = q_arr[ind2] = q_arr_list[q_arr_count++]
123 =
new float[K3+
order-1];
124 memset( (
void*) qline, 0, (K3+
order-1) *
sizeof(
float) );
127 for (l=0; l<
order; l++) {
128 qline[u3i+l] += m1m2 * Mi[2*
order + l];
133 for (l=0; l<
order; l++) {
135 int ind = u3 + (u3 < 0 ? K3 : 0);
144 switch (myGrid.
order) {
146 compute_forces_order4(q_arr, p, f);
149 compute_forces_order<6>(q_arr, p, f);
152 compute_forces_order<8>(q_arr, p, f);
155 compute_forces_order<10>(q_arr, p, f);
157 default:
NAMD_die(
"unsupported PMEInterpOrder");
163 void PmeRealSpace::compute_forces_order(
const float *
const *q_arr,
166 int i, j, k, l, stride;
169 int K1, K2, K3, dim2;
174 compute_forces_order4(q_arr, p, f);
178 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3; dim2=myGrid.
dim2;
182 for (i=0; i<N; i++) {
184 int u1, u2, u2i, u3i;
194 if ( u3i < 0 ) u3i += K3;
195 for (j=0; j<
order; j++) {
201 ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
203 for (k=0; k<
order; k++) {
204 float m2, d2, m1m2, m1d2, d1m2;
212 ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
213 const float *qline = q_arr[ind2];
214 if ( ! qline )
continue;
215 for (l=0; l<
order; l++) {
218 d3=K3*dMi[2*
order+l];
220 f1 -= d1m2 * m3 * term;
221 f2 -= m1d2 * m3 * term;
222 f3 -= m1m2 * d3 * term;
237 void PmeRealSpace::fill_charges_order4(
float **q_arr,
float **q_arr_list,
int &q_arr_count,
238 int &stray_count,
char *f_arr,
char *fz_arr,
PmeParticle p[]) {
242 int K1, K2, K3, dim2, dim3;
243 float * __restrict Mi, * __restrict dMi;
245 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3; dim2=myGrid.
dim2; dim3=myGrid.
dim3;
251 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 252 for (
int istart = 0; istart < N; istart += 1000 ) {
253 int iend = istart + 1000;
254 if ( iend > N ) iend = N;
255 CmiNetworkProgress();
256 for (i=istart; i<iend; i++) {
259 for (i=0; i<N; i++) {
262 int u1, u2, u2i, u3i;
266 float fr1 = (float)(p[i].x - (
double)u1);
267 float fr2 = (float)(p[i].y - (
double)u2i);
268 float fr3 = (float)(p[i].z - (
double)u3i);
272 Mi[0] = ( ( (-1./6.) * fr1 + 0.5 ) * fr1 - 0.5 ) * fr1 + (1./6.);
273 Mi[1] = ( ( 0.5 * fr1 - 1.0 ) * fr1 ) * fr1 + (2./3.);
274 Mi[2] = ( ( -0.5 * fr1 + 0.5 ) * fr1 + 0.5 ) * fr1 + (1./6.);
275 Mi[3] = (1./6.) * fr1 * fr1 * fr1;
276 dMi[0] = ( -0.5 * fr1 + 1.0 )* fr1 - 0.5;
277 dMi[1] = ( 1.5 * fr1 - 2.0 ) * fr1;
278 dMi[2] = ( -1.5 * fr1 + 1.0 ) * fr1 + 0.5;
279 dMi[3] = 0.5 * fr1 * fr1;
280 Mi[4] = ( ( (-1./6.) * fr2 + 0.5 ) * fr2 - 0.5 ) * fr2 + (1./6.);
281 Mi[5] = ( ( 0.5 * fr2 - 1.0 ) * fr2 ) * fr2 + (2./3.);
282 Mi[6] = ( ( -0.5 * fr2 + 0.5 ) * fr2 + 0.5 ) * fr2 + (1./6.);
283 Mi[7] = (1./6.) * fr2 * fr2 * fr2;
284 dMi[4] = ( -0.5 * fr2 + 1.0 )* fr2 - 0.5;
285 dMi[5] = ( 1.5 * fr2 - 2.0 ) * fr2;
286 dMi[6] = ( -1.5 * fr2 + 1.0 ) * fr2 + 0.5;
287 dMi[7] = 0.5 * fr2 * fr2;
288 Mi[8] = ( ( (-1./6.) * fr3 + 0.5 ) * fr3 - 0.5 ) * fr3 + (1./6.);
289 Mi[9] = ( ( 0.5 * fr3 - 1.0 ) * fr3 ) * fr3 + (2./3.);
290 Mi[10] = ( ( -0.5 * fr3 + 0.5 ) * fr3 + 0.5 ) * fr3 + (1./6.);
291 Mi[11] = (1./6.) * fr3 * fr3 * fr3;
292 dMi[8] = ( -0.5 * fr3 + 1.0 )* fr3 - 0.5;
293 dMi[9] = ( 1.5 * fr3 - 2.0 ) * fr3;
294 dMi[10] = ( -1.5 * fr3 + 1.0 ) * fr3 + 0.5;
295 dMi[11] = 0.5 * fr3 * fr3;
301 if ( u3i < 0 ) u3i += K3;
302 for (j=0; j<
order; j++) {
307 ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
309 for (k=0; k<
order; k++) {
312 m1m2 = m1*Mi[
order+k];
314 ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
315 float * __restrict qline = q_arr[ind2];
322 qline = q_arr[ind2] = q_arr_list[q_arr_count++]
323 =
new float[K3+
order-1];
324 memset( (
void*) qline, 0, (K3+
order-1) *
sizeof(
float) );
327 for (l=0; l<
order; l++) {
328 qline[u3i+l] += m1m2 * Mi[2*
order + l];
334 for (l=0; l<
order; l++) {
336 int ind = u3 + (u3 < 0 ? K3 : 0);
344 void **params = (
void **)param;
346 const float *
const *q_arr = (
const float *
const *)params[1];
352 void PmeRealSpace::compute_forces_order4(
const float *
const *q_arr,
355 int i, j, k, l, stride;
358 int K1, K2, K3, dim2;
360 #if CMK_SMP && USE_CKLOOP 364 void *params[] = {(
void *)
this, (
void *)q_arr, (
void *)p, (
void *)f};
369 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3; dim2=myGrid.
dim2;
374 for (i=0; i<N; i++) {
376 int u1, u2, u2i, u3i;
386 if ( u3i < 0 ) u3i += K3;
387 for (j=0; j<
order; j++) {
393 ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
395 for (k=0; k<
order; k++) {
396 float m2, d2, m1m2, m1d2, d1m2;
404 ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
405 const float *qline = q_arr[ind2];
406 if ( ! qline )
continue;
407 for (l=0; l<
order; l++) {
410 d3=K3*dMi[2*
order+l];
412 f1 -= d1m2 * m3 * term;
413 f2 -= m1d2 * m3 * term;
414 f3 -= m1m2 * d3 * term;
427 const float *
const *q_arr,
430 int i, j, k, l, stride;
433 int K1, K2, K3, dim2;
435 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3; dim2=myGrid.
dim2;
440 for (i=first; i<=last; i++) {
444 int u1, u2, u2i, u3i;
454 if ( u3i < 0 ) u3i += K3;
455 for (j=0; j<
order; j++) {
461 ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
463 for (k=0; k<
order; k++) {
464 float m2, d2, m1m2, m1d2, d1m2;
472 ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
473 const float *qline = q_arr[ind2];
474 if ( ! qline )
continue;
475 for (l=0; l<
order; l++) {
478 d3=K3*dMi[2*
order+l];
480 f1 -= d1m2 * m3 * term;
481 f2 -= m1d2 * m3 * term;
482 f3 -= m1m2 * d3 * term;
void compute_forces(const float *const *q_arr, const PmeParticle p[], Vector f[])
SimParameters * simParameters
void NAMD_bug(const char *err_msg)
void fill_charges(float **q_arr, float **q_arr_list, int &q_arr_count, int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[])
void NAMD_die(const char *err_msg)
static void compute_forces_order4_helper(int first, int last, void *result, int paraNum, void *param)
PmeRealSpace(PmeGrid grid)
#define CKLOOP_CTRL_PME_UNGRIDCALC
void set_num_atoms(int natoms)
static void compute_b_spline(REAL *__restrict frac, REAL *M, REAL *dM, int order)
void compute_forces_order4_partial(int first, int last, const float *const *q_arr, const PmeParticle p[], Vector f[])