00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #if !defined(__PGIC__)
00023
00024
00025 #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64))
00026 #if !defined(__SSE2__) && defined(_WIN64)
00027 #define __SSE2__ 1
00028 #endif
00029 #endif
00030
00031 #define VMDQSURFUSESSE 1
00032 #endif
00033
00034
00035
00036
00037
00038
00039
00040
00041 #if !defined(__GNUC__) && defined(__VEC__)
00042 #define VMDQSURFUSEVSX 1
00043 #endif
00044
00045 #include <stdio.h>
00046 #include <stdlib.h>
00047 #if VMDQSURFUSESSE && defined(__SSE2__)
00048 #include <emmintrin.h>
00049 #endif
00050
00051 #if (defined(VMDQSURFUSEVSX) && defined(__VSX__))
00052 #if defined(__GNUC__) && defined(__VEC__)
00053 #include <altivec.h>
00054 #endif
00055 #endif
00056
00057 #include <string.h>
00058 #include <math.h>
00059 #include "QuickSurf.h"
00060 #if defined(VMDCUDA)
00061 #include "CUDAQuickSurf.h"
00062 #endif
00063 #include "Measure.h"
00064 #include "Inform.h"
00065 #include "utilities.h"
00066 #include "WKFUtils.h"
00067 #include "VolumetricData.h"
00068
00069 #include "VMDDisplayList.h"
00070 #include "Displayable.h"
00071 #include "DispCmds.h"
00072 #include "ProfileHooks.h"
00073 #include "VMDApp.h"
00074
00075 #define MIN(X,Y) (((X)<(Y))? (X) : (Y))
00076 #define MAX(X,Y) (((X)>(Y))? (X) : (Y))
00077
00078
00079 void vmd_gaussdensity_avx2(int verbose,
00080 int natoms, const float *xyzr,
00081 const float *atomicnum,
00082 const float *colors,
00083 float *densitymap, float *voltexmap,
00084 const int *numvoxels,
00085 float radscale, float gridspacing,
00086 float isovalue, float gausslim);
00087
00088 void vmd_gaussdensity_neon(int verbose,
00089 int natoms, const float *xyzr,
00090 const float *atomicnum,
00091 const float *colors,
00092 float *densitymap, float *voltexmap,
00093 const int *numvoxels,
00094 float radscale, float gridspacing,
00095 float isovalue, float gausslim);
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136 #if defined(__GNUC__) && ! defined(__INTEL_COMPILER)
00137 #define __align(X) __attribute__((aligned(X) ))
00138 #if (__GNUC__ < 4)
00139 #define MISSING_mm_cvtsd_f64
00140 #endif
00141 #else
00142 #define __align(X) __declspec(align(X) )
00143 #endif
00144
00145 #define MLOG2EF -1.44269504088896f
00146
00147
00148
00149
00150
00151 #define SCEXP0 1.0000000000000000f
00152 #define SCEXP1 0.6987082824680118f
00153 #define SCEXP2 0.2633174272827404f
00154 #define SCEXP3 0.0923611991471395f
00155 #define SCEXP4 0.0277520543324108f
00156
00157
00158 #define EXPOBIAS 127
00159 #define EXPOSHIFT 23
00160
00161
00162 #define ACUTOFF -10
00163
00164 typedef union flint_t {
00165 float f;
00166 int n;
00167 } flint;
00168
00169 #if VMDQSURFUSESSE && defined(__SSE2__)
00170
00171 typedef union SSEreg_t {
00172 __m128 f;
00173 __m128i i;
00174 } SSEreg;
00175 #endif
00176
00177
00178 #if 0
00179 static float aexpfnx(float x) {
00180
00181 float mb;
00182 int mbflr;
00183 float d;
00184 float sy;
00185 flint scalfac;
00186
00187 if (x < ACUTOFF) return 0.f;
00188
00189 mb = x * MLOG2EF;
00190 mbflr = (int) mb;
00191 d = mbflr - mb;
00192 sy = SCEXP0 + d*(SCEXP1 + d*(SCEXP2 + d*(SCEXP3 + d*SCEXP4)));
00193
00194 scalfac.n = (EXPOBIAS - mbflr) << EXPOSHIFT;
00195 return (sy * scalfac.f);
00196 }
00197
00198
00199 static void vmd_gaussdensity(int verbose,
00200 int natoms, const float *xyzr,
00201 const float *atomicnum,
00202 const float *colors,
00203 float *densitymap, float *voltexmap,
00204 const int *numvoxels,
00205 float radscale, float gridspacing,
00206 float isovalue, float gausslim) {
00207 int i, x, y, z;
00208 int maxvoxel[3];
00209 maxvoxel[0] = numvoxels[0]-1;
00210 maxvoxel[1] = numvoxels[1]-1;
00211 maxvoxel[2] = numvoxels[2]-1;
00212 const float invgridspacing = 1.0f / gridspacing;
00213
00214
00215 if (voltexmap != NULL) {
00216 float invisovalue = 1.0f / isovalue;
00217
00218 for (i=0; i<natoms; i++) {
00219 if (verbose && ((i & 0x3fff) == 0)) {
00220 printf(".");
00221 fflush(stdout);
00222 }
00223
00224 ptrdiff_t ind = i*4L;
00225 float scaledrad = xyzr[ind + 3L] * radscale;
00226
00227
00228 float atomicnumfactor = 1.0f;
00229 if (atomicnum != NULL) {
00230 atomicnumfactor = atomicnum[i];
00231 }
00232
00233 float arinv = 1.0f/(2.0f*scaledrad*scaledrad);
00234 float radlim = gausslim * scaledrad;
00235 float radlim2 = radlim * radlim;
00236 radlim *= invgridspacing;
00237
00238 float tmp;
00239 tmp = xyzr[ind ] * invgridspacing;
00240 int xmin = MAX((int) (tmp - radlim), 0);
00241 int xmax = MIN((int) (tmp + radlim), maxvoxel[0]);
00242 tmp = xyzr[ind+1] * invgridspacing;
00243 int ymin = MAX((int) (tmp - radlim), 0);
00244 int ymax = MIN((int) (tmp + radlim), maxvoxel[1]);
00245 tmp = xyzr[ind+2] * invgridspacing;
00246 int zmin = MAX((int) (tmp - radlim), 0);
00247 int zmax = MIN((int) (tmp + radlim), maxvoxel[2]);
00248
00249 float dz = zmin*gridspacing - xyzr[ind+2];
00250 for (z=zmin; z<=zmax; z++,dz+=gridspacing) {
00251 float dy = ymin*gridspacing - xyzr[ind+1];
00252 for (y=ymin; y<=ymax; y++,dy+=gridspacing) {
00253 float dy2dz2 = dy*dy + dz*dz;
00254
00255
00256 if (dy2dz2 >= radlim2)
00257 continue;
00258
00259 int addr = z * numvoxels[0] * numvoxels[1] + y * numvoxels[0];
00260 float dx = xmin*gridspacing - xyzr[ind];
00261 for (x=xmin; x<=xmax; x++,dx+=gridspacing) {
00262 float r2 = dx*dx + dy2dz2;
00263 float expval = -r2 * arinv;
00264 #if VMDUSEFULLEXP
00265
00266 float density = exp(expval);
00267 #else
00268
00269 float density = aexpfnx(expval);
00270 #endif
00271
00272 density *= atomicnumfactor;
00273
00274
00275 densitymap[addr + x] += density;
00276
00277
00278
00279
00280 density *= invisovalue;
00281 ptrdiff_t caddr = (addr + x) * 3L;
00282
00283
00284 voltexmap[caddr ] += density * colors[ind ];
00285 voltexmap[caddr + 1] += density * colors[ind + 1];
00286 voltexmap[caddr + 2] += density * colors[ind + 2];
00287 }
00288 }
00289 }
00290 }
00291 } else {
00292
00293 for (i=0; i<natoms; i++) {
00294 if (verbose && ((i & 0x3fff) == 0)) {
00295 printf(".");
00296 fflush(stdout);
00297 }
00298
00299 ptrdiff_t ind = i*4L;
00300 float scaledrad = xyzr[ind + 3] * radscale;
00301
00302
00303 float atomicnumfactor = 1.0f;
00304 if (atomicnum != NULL) {
00305 atomicnumfactor = atomicnum[i];
00306 }
00307
00308 float arinv = 1.0f/(2.0f*scaledrad*scaledrad);
00309 float radlim = gausslim * scaledrad;
00310 float radlim2 = radlim * radlim;
00311 radlim *= invgridspacing;
00312
00313 float tmp;
00314 tmp = xyzr[ind ] * invgridspacing;
00315 int xmin = MAX((int) (tmp - radlim), 0);
00316 int xmax = MIN((int) (tmp + radlim), maxvoxel[0]);
00317 tmp = xyzr[ind+1] * invgridspacing;
00318 int ymin = MAX((int) (tmp - radlim), 0);
00319 int ymax = MIN((int) (tmp + radlim), maxvoxel[1]);
00320 tmp = xyzr[ind+2] * invgridspacing;
00321 int zmin = MAX((int) (tmp - radlim), 0);
00322 int zmax = MIN((int) (tmp + radlim), maxvoxel[2]);
00323
00324 float dz = zmin*gridspacing - xyzr[ind+2];
00325 for (z=zmin; z<=zmax; z++,dz+=gridspacing) {
00326 float dy = ymin*gridspacing - xyzr[ind+1];
00327 for (y=ymin; y<=ymax; y++,dy+=gridspacing) {
00328 float dy2dz2 = dy*dy + dz*dz;
00329
00330
00331 if (dy2dz2 >= radlim2)
00332 continue;
00333
00334 int addr = z * numvoxels[0] * numvoxels[1] + y * numvoxels[0];
00335 float dx = xmin*gridspacing - xyzr[ind];
00336 for (x=xmin; x<=xmax; x++,dx+=gridspacing) {
00337 float r2 = dx*dx + dy2dz2;
00338 float expval = -r2 * arinv;
00339 #if VMDUSEFULLEXP
00340
00341 float density = exp(expval);
00342 #else
00343
00344 float density = aexpfnx(expval);
00345 #endif
00346
00347 density *= atomicnumfactor;
00348
00349
00350 densitymap[addr + x] += density;
00351 }
00352 }
00353 }
00354 }
00355 }
00356 }
00357 #endif
00358
00359
00360
00361 static void vmd_gaussdensity_opt(wkf_cpu_caps_t *cpucaps, int verbose,
00362 int natoms, const float *xyzr,
00363 const float *atomicnum,
00364 const float *colors,
00365 float *densitymap, float *voltexmap,
00366 const int *numvoxels,
00367 float radscale, float gridspacing,
00368 float isovalue, float gausslim) {
00369 int i, x, y, z;
00370 int maxvoxel[3];
00371 maxvoxel[0] = numvoxels[0]-1;
00372 maxvoxel[1] = numvoxels[1]-1;
00373 maxvoxel[2] = numvoxels[2]-1;
00374 const float invgridspacing = 1.0f / gridspacing;
00375
00376
00377
00378
00379
00380
00381 #if defined(VMDCPUDISPATCH)
00382
00383
00384 #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64))
00385
00386 if ((cpucaps != NULL) &&
00387 (cpucaps->flags & CPU_FMA) && (cpucaps->flags & CPU_AVX2) &&
00388 (getenv("VMDNOAVX2")==NULL)) {
00389 if (verbose)
00390 printf("vmd_gaussdensity_avx2()\n");
00391
00392 vmd_gaussdensity_avx2(verbose, natoms, xyzr, atomicnum, colors,
00393 densitymap, voltexmap, numvoxels, radscale,
00394 gridspacing, isovalue, gausslim);
00395 return;
00396 }
00397 #endif
00398
00399 #if defined(VMDUSENEON)
00400
00401 if (1 && (getenv("VMDNONEON") == NULL)) {
00402 if (verbose)
00403 printf("vmd_gaussdensity_neon()\n");
00404
00405 vmd_gaussdensity_neon(verbose, natoms, xyzr, atomicnum, colors,
00406 densitymap, voltexmap, numvoxels, radscale,
00407 gridspacing, isovalue, gausslim);
00408 return;
00409 }
00410 #endif
00411
00412
00413 if (verbose)
00414 printf("vmd_gaussdensity_opt()\n");
00415 #endif
00416
00417 #if VMDQSURFUSESSE && defined(__SSE2__)
00418 int usesse=1;
00419 if (getenv("VMDNOSSE")) {
00420 usesse=0;
00421 }
00422 #endif
00423
00424 #if VMDQSURFUSEVSX && defined(__VEC__)
00425 int usevsx=1;
00426 if (getenv("VMDNOVSX")) {
00427 usevsx=0;
00428 }
00429 #endif
00430
00431 #if VMDQSURFUSESSE && defined(__SSE2__)
00432
00433 __m128 gridspacing4_4;
00434 __align(16) float sxdelta4[4];
00435
00436 if (usesse) {
00437 gridspacing4_4 = _mm_set1_ps(gridspacing * 4.0f);
00438 for (x=0; x<4; x++)
00439 sxdelta4[x] = ((float) x) * gridspacing;
00440 }
00441 #endif
00442
00443 #if VMDQSURFUSEVSX && defined(__VEC__)
00444
00445 vector float gridspacing4_4;
00446 __attribute__((aligned(16))) float sxdelta4[4];
00447
00448 if (usevsx) {
00449 gridspacing4_4 = vec_splats(gridspacing * 4.0f);
00450 for (x=0; x<4; x++)
00451 sxdelta4[x] = ((float) x) * gridspacing;
00452 }
00453 #endif
00454
00455
00456 if (voltexmap != NULL) {
00457 float invisovalue = 1.0f / isovalue;
00458
00459 for (i=0; i<natoms; i++) {
00460 if (verbose && ((i & 0x3fff) == 0)) {
00461 printf(".");
00462 fflush(stdout);
00463 }
00464
00465 ptrdiff_t ind = i*4L;
00466 float scaledrad = xyzr[ind + 3] * radscale;
00467
00468
00469 float atomicnumfactor = 1.0f;
00470 if (atomicnum != NULL) {
00471 atomicnumfactor = atomicnum[i];
00472 }
00473
00474
00475 float arinv = -(1.0f/(2.0f*scaledrad*scaledrad)) * MLOG2EF;
00476 float radlim = gausslim * scaledrad;
00477 float radlim2 = radlim * radlim;
00478 radlim *= invgridspacing;
00479
00480 #if VMDQSURFUSESSE && defined(__SSE2__)
00481 __m128 atomicnumfactor_4 = { 0 };
00482 __m128 arinv_4;
00483 if (usesse) {
00484 atomicnumfactor_4 = _mm_set1_ps(atomicnumfactor);
00485 #if VMDUSESVMLEXP
00486
00487 arinv_4 = _mm_set1_ps(arinv * (2.718281828f/2.0f) / MLOG2EF);
00488 #else
00489
00490 arinv_4 = _mm_set1_ps(arinv);
00491 #endif
00492 }
00493 #endif
00494
00495 float tmp;
00496 tmp = xyzr[ind ] * invgridspacing;
00497 int xmin = MAX((int) (tmp - radlim), 0);
00498 int xmax = MIN((int) (tmp + radlim), maxvoxel[0]);
00499 tmp = xyzr[ind+1] * invgridspacing;
00500 int ymin = MAX((int) (tmp - radlim), 0);
00501 int ymax = MIN((int) (tmp + radlim), maxvoxel[1]);
00502 tmp = xyzr[ind+2] * invgridspacing;
00503 int zmin = MAX((int) (tmp - radlim), 0);
00504 int zmax = MIN((int) (tmp + radlim), maxvoxel[2]);
00505
00506 float dz = zmin*gridspacing - xyzr[ind+2];
00507 for (z=zmin; z<=zmax; z++,dz+=gridspacing) {
00508 float dy = ymin*gridspacing - xyzr[ind+1];
00509 for (y=ymin; y<=ymax; y++,dy+=gridspacing) {
00510 float dy2dz2 = dy*dy + dz*dz;
00511
00512
00513 if (dy2dz2 >= radlim2)
00514 continue;
00515
00516 ptrdiff_t addr = ptrdiff_t(z * numvoxels[0]) * ptrdiff_t(numvoxels[1]) + ptrdiff_t(y * numvoxels[0]);
00517 float dx = xmin*gridspacing - xyzr[ind];
00518 x=xmin;
00519
00520 #if VMDQSURFUSESSE && defined(__SSE2__)
00521
00522
00523 if (usesse) {
00524 __align(16) SSEreg n;
00525 __align(16) SSEreg y;
00526 __m128 dy2dz2_4 = _mm_set1_ps(dy2dz2);
00527 __m128 dx_4 = _mm_add_ps(_mm_set1_ps(dx), _mm_load_ps(&sxdelta4[0]));
00528
00529 for (; (x+3)<=xmax; x+=4,dx_4=_mm_add_ps(dx_4, gridspacing4_4)) {
00530 __m128 r2 = _mm_add_ps(_mm_mul_ps(dx_4, dx_4), dy2dz2_4);
00531 __m128 d;
00532 #if VMDUSESVMLEXP
00533
00534 y.f = _mm_exp2_ps(_mm_mul_ps(r2, arinv_4));
00535 #else
00536
00537 y.f = _mm_mul_ps(r2, arinv_4);
00538 n.i = _mm_cvttps_epi32(y.f);
00539 d = _mm_cvtepi32_ps(n.i);
00540 d = _mm_sub_ps(d, y.f);
00541
00542
00543
00544 #if 0
00545
00546 y.f = _mm_fmadd_ps(d, _mm_set1_ps(SCEXP4), _mm_set1_ps(SCEXP3));
00547 y.f = _mm_fmadd_ps(y.f, d, _mm_set1_ps(SCEXP2));
00548 y.f = _mm_fmadd_ps(y.f, d, _mm_set1_ps(SCEXP1));
00549 y.f = _mm_fmadd_ps(y.f, d, _mm_set1_ps(SCEXP0));
00550 #else
00551 y.f = _mm_mul_ps(d, _mm_set_ps1(SCEXP4));
00552 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP3));
00553 y.f = _mm_mul_ps(y.f, d);
00554 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP2));
00555 y.f = _mm_mul_ps(y.f, d);
00556 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP1));
00557 y.f = _mm_mul_ps(y.f, d);
00558 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP0));
00559 #endif
00560
00561
00562
00563 n.i = _mm_sub_epi32(_mm_set1_epi32(EXPOBIAS), n.i);
00564 n.i = _mm_slli_epi32(n.i, EXPOSHIFT);
00565 y.f = _mm_mul_ps(y.f, n.f);
00566 #endif
00567
00568
00569
00570 float *ufptr = &densitymap[addr + x];
00571 d = _mm_loadu_ps(ufptr);
00572 y.f = _mm_mul_ps(y.f, atomicnumfactor_4);
00573 _mm_storeu_ps(ufptr, _mm_add_ps(d, y.f));
00574
00575
00576
00577
00578 d = _mm_mul_ps(y.f, _mm_set_ps1(invisovalue));
00579 ptrdiff_t caddr = (addr + x) * 3L;
00580
00581 #if 1
00582 float *txptr = &voltexmap[caddr];
00583
00584 __m128 r0g0b0r1 = _mm_loadu_ps(txptr+0);
00585 __m128 g1b1r2g2 = _mm_loadu_ps(txptr+4);
00586 __m128 b2r3g3b3 = _mm_loadu_ps(txptr+8);
00587
00588
00589 __m128 r2g2r3g3 = _mm_shuffle_ps(g1b1r2g2, b2r3g3b3, _MM_SHUFFLE(2, 1, 3, 2));
00590 __m128 g0b0g1b1 = _mm_shuffle_ps(r0g0b0r1, g1b1r2g2, _MM_SHUFFLE(1, 0, 2, 1));
00591 __m128 r = _mm_shuffle_ps(r0g0b0r1, r2g2r3g3, _MM_SHUFFLE(2, 0, 3, 0));
00592 __m128 g = _mm_shuffle_ps(g0b0g1b1, r2g2r3g3, _MM_SHUFFLE(3, 1, 2, 0));
00593 __m128 b = _mm_shuffle_ps(g0b0g1b1, b2r3g3b3, _MM_SHUFFLE(3, 0, 3, 1));
00594
00595
00596 r = _mm_add_ps(r, _mm_mul_ps(d, _mm_set_ps1(colors[ind ])));
00597 g = _mm_add_ps(g, _mm_mul_ps(d, _mm_set_ps1(colors[ind + 1])));
00598 b = _mm_add_ps(b, _mm_mul_ps(d, _mm_set_ps1(colors[ind + 2])));
00599
00600
00601 __m128 r0r2g0g2 = _mm_shuffle_ps(r, g, _MM_SHUFFLE(2, 0, 2, 0));
00602 __m128 g1g3b1b3 = _mm_shuffle_ps(g, b, _MM_SHUFFLE(3, 1, 3, 1));
00603 __m128 b0b2r1r3 = _mm_shuffle_ps(b, r, _MM_SHUFFLE(3, 1, 2, 0));
00604
00605 __m128 rr0g0b0r1 = _mm_shuffle_ps(r0r2g0g2, b0b2r1r3, _MM_SHUFFLE(2, 0, 2, 0));
00606 __m128 rg1b1r2g2 = _mm_shuffle_ps(g1g3b1b3, r0r2g0g2, _MM_SHUFFLE(3, 1, 2, 0));
00607 __m128 rb2r3g3b3 = _mm_shuffle_ps(b0b2r1r3, g1g3b1b3, _MM_SHUFFLE(3, 1, 3, 1));
00608
00609
00610 _mm_storeu_ps(txptr+0, rr0g0b0r1);
00611 _mm_storeu_ps(txptr+4, rg1b1r2g2);
00612 _mm_storeu_ps(txptr+8, rb2r3g3b3);
00613
00614 #else
00615
00616
00617 float r, g, b;
00618 r = colors[ind ];
00619 g = colors[ind + 1];
00620 b = colors[ind + 2];
00621
00622 SSEreg tmp;
00623 tmp.f = d;
00624 float density;
00625 density = tmp.floatreg.r0;
00626 voltexmap[caddr ] += density * r;
00627 voltexmap[caddr + 1] += density * g;
00628 voltexmap[caddr + 2] += density * b;
00629
00630 density = tmp.floatreg.r1;
00631 voltexmap[caddr + 3] += density * r;
00632 voltexmap[caddr + 4] += density * g;
00633 voltexmap[caddr + 5] += density * b;
00634
00635 density = tmp.floatreg.r2;
00636 voltexmap[caddr + 6] += density * r;
00637 voltexmap[caddr + 7] += density * g;
00638 voltexmap[caddr + 8] += density * b;
00639
00640 density = tmp.floatreg.r3;
00641 voltexmap[caddr + 9] += density * r;
00642 voltexmap[caddr + 10] += density * g;
00643 voltexmap[caddr + 11] += density * b;
00644 #endif
00645 }
00646 }
00647 #endif
00648
00649
00650 for (; x<=xmax; x++,dx+=gridspacing) {
00651 float r2 = dx*dx + dy2dz2;
00652
00653
00654 float mb = r2 * arinv;
00655 int mbflr = (int) mb;
00656 float d = mbflr - mb;
00657
00658
00659 float sy = SCEXP0 + d*(SCEXP1 + d*(SCEXP2 + d*(SCEXP3 + d*SCEXP4)));
00660
00661
00662 flint scalfac;
00663 scalfac.n = (EXPOBIAS - mbflr) << EXPOSHIFT;
00664
00665
00666 float density = (sy * scalfac.f);
00667
00668 density *= atomicnumfactor;
00669
00670
00671 densitymap[addr + x] += density;
00672
00673
00674
00675
00676 density *= invisovalue;
00677 ptrdiff_t caddr = (addr + x) * 3L;
00678
00679
00680 voltexmap[caddr ] += density * colors[ind ];
00681 voltexmap[caddr + 1] += density * colors[ind + 1];
00682 voltexmap[caddr + 2] += density * colors[ind + 2];
00683 }
00684 }
00685 }
00686 }
00687 } else {
00688
00689 for (i=0; i<natoms; i++) {
00690 if (verbose && ((i & 0x3fff) == 0)) {
00691 printf(".");
00692 fflush(stdout);
00693 }
00694
00695 ptrdiff_t ind = i*4L;
00696 float scaledrad = xyzr[ind+3] * radscale;
00697
00698
00699 float atomicnumfactor = 1.0f;
00700 if (atomicnum != NULL) {
00701 atomicnumfactor = atomicnum[i];
00702 }
00703
00704
00705 float arinv = -(1.0f/(2.0f*scaledrad*scaledrad)) * MLOG2EF;
00706 float radlim = gausslim * scaledrad;
00707 float radlim2 = radlim * radlim;
00708 radlim *= invgridspacing;
00709
00710 #if VMDQSURFUSESSE && defined(__SSE2__)
00711 __m128 atomicnumfactor_4 = { 0 };
00712 __m128 arinv_4;
00713 if (usesse) {
00714 atomicnumfactor_4 = _mm_set1_ps(atomicnumfactor);
00715 #if VMDUSESVMLEXP
00716
00717 arinv_4 = _mm_set1_ps(arinv * (2.718281828f/2.0f) / MLOG2EF);
00718 #else
00719
00720 arinv_4 = _mm_set1_ps(arinv);
00721 #endif
00722 }
00723 #endif
00724
00725 #if VMDQSURFUSEVSX && defined(__VEC__)
00726 vector float atomicnumfactor_4;
00727 vector float arinv_4;
00728 if (usevsx) {
00729 atomicnumfactor_4 = vec_splats(atomicnumfactor);
00730
00731
00732 arinv_4 = vec_splats(arinv);
00733 }
00734 #endif
00735
00736 float tmp;
00737 tmp = xyzr[ind ] * invgridspacing;
00738 int xmin = MAX((int) (tmp - radlim), 0);
00739 int xmax = MIN((int) (tmp + radlim), maxvoxel[0]);
00740 tmp = xyzr[ind+1] * invgridspacing;
00741 int ymin = MAX((int) (tmp - radlim), 0);
00742 int ymax = MIN((int) (tmp + radlim), maxvoxel[1]);
00743 tmp = xyzr[ind+2] * invgridspacing;
00744 int zmin = MAX((int) (tmp - radlim), 0);
00745 int zmax = MIN((int) (tmp + radlim), maxvoxel[2]);
00746
00747 float dz = zmin*gridspacing - xyzr[ind+2];
00748 for (z=zmin; z<=zmax; z++,dz+=gridspacing) {
00749 float dy = ymin*gridspacing - xyzr[ind+1];
00750 for (y=ymin; y<=ymax; y++,dy+=gridspacing) {
00751 float dy2dz2 = dy*dy + dz*dz;
00752
00753
00754 if (dy2dz2 >= radlim2)
00755 continue;
00756
00757 ptrdiff_t addr = ptrdiff_t(z * numvoxels[0]) * ptrdiff_t(numvoxels[1]) + ptrdiff_t(y * numvoxels[0]);
00758 float dx = xmin*gridspacing - xyzr[ind];
00759 x=xmin;
00760
00761 #if VMDQSURFUSESSE && defined(__SSE2__)
00762
00763
00764 if (usesse) {
00765 __align(16) SSEreg n;
00766 __align(16) SSEreg y;
00767 __m128 dy2dz2_4 = _mm_set1_ps(dy2dz2);
00768 __m128 dx_4 = _mm_add_ps(_mm_set1_ps(dx), _mm_load_ps(&sxdelta4[0]));
00769
00770 for (; (x+3)<=xmax; x+=4,dx_4=_mm_add_ps(dx_4, gridspacing4_4)) {
00771 __m128 r2 = _mm_add_ps(_mm_mul_ps(dx_4, dx_4), dy2dz2_4);
00772 __m128 d;
00773 #if VMDUSESVMLEXP
00774
00775 y.f = _mm_exp2_ps(_mm_mul_ps(r2, arinv_4));
00776 #else
00777
00778 y.f = _mm_mul_ps(r2, arinv_4);
00779 n.i = _mm_cvttps_epi32(y.f);
00780 d = _mm_cvtepi32_ps(n.i);
00781 d = _mm_sub_ps(d, y.f);
00782
00783
00784
00785 #if 0
00786
00787 y.f = _mm_fmadd_ps(d, _mm_set1_ps(SCEXP4), _mm_set1_ps(SCEXP3));
00788 y.f = _mm_fmadd_ps(y.f, d, _mm_set1_ps(SCEXP2));
00789 y.f = _mm_fmadd_ps(y.f, d, _mm_set1_ps(SCEXP1));
00790 y.f = _mm_fmadd_ps(y.f, d, _mm_set1_ps(SCEXP0));
00791 #else
00792 y.f = _mm_mul_ps(d, _mm_set_ps1(SCEXP4));
00793 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP3));
00794 y.f = _mm_mul_ps(y.f, d);
00795 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP2));
00796 y.f = _mm_mul_ps(y.f, d);
00797 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP1));
00798 y.f = _mm_mul_ps(y.f, d);
00799 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP0));
00800 #endif
00801
00802
00803
00804 n.i = _mm_sub_epi32(_mm_set1_epi32(EXPOBIAS), n.i);
00805 n.i = _mm_slli_epi32(n.i, EXPOSHIFT);
00806 y.f = _mm_mul_ps(y.f, n.f);
00807 y.f = _mm_mul_ps(y.f, atomicnumfactor_4);
00808 #endif
00809
00810
00811
00812 float *ufptr = &densitymap[addr + x];
00813 d = _mm_loadu_ps(ufptr);
00814 _mm_storeu_ps(ufptr, _mm_add_ps(d, y.f));
00815 }
00816 }
00817 #endif
00818
00819
00820 #if VMDQSURFUSEVSX && defined(__VEC__)
00821
00822
00823
00824
00825
00826
00827
00828 if (usevsx) {
00829 vector float dy2dz2_4 = vec_splats(dy2dz2);
00830 vector float tmpvsxdelta4 = *((__vector float *) &sxdelta4[0]);
00831 vector float dx_4 = vec_add(vec_splats(dx), tmpvsxdelta4);
00832
00833 for (; (x+3)<=xmax; x+=4,dx_4=vec_add(dx_4, gridspacing4_4)) {
00834 vector float r2 = vec_add(vec_mul(dx_4, dx_4), dy2dz2_4);
00835
00836
00837 vector float mb = vec_mul(r2, arinv_4);
00838 vector float mbflr = vec_floor(mb);
00839 vector float d = vec_sub(mbflr, mb);
00840 vector float y;
00841
00842
00843
00844 y = vec_madd(d, vec_splats(SCEXP4), vec_splats(SCEXP3));
00845 y = vec_madd(y, d, vec_splats(SCEXP2));
00846 y = vec_madd(y, d, vec_splats(SCEXP1));
00847 y = vec_madd(y, d, vec_splats(SCEXP0));
00848
00849
00850
00851 y = vec_mul(y, vec_expte(-mbflr));
00852 y = vec_mul(y, atomicnumfactor_4);
00853
00854
00855
00856 float *ufptr = &densitymap[addr + x];
00857 d = *((__vector float *) &ufptr[0]);
00858
00859
00860
00861 d = vec_add(d, y);
00862
00863 ufptr[0] = d[0];
00864 ufptr[1] = d[1];
00865 ufptr[2] = d[2];
00866 ufptr[3] = d[3];
00867 }
00868 }
00869 #endif
00870
00871
00872 for (; x<=xmax; x++,dx+=gridspacing) {
00873 float r2 = dx*dx + dy2dz2;
00874
00875
00876 float mb = r2 * arinv;
00877 int mbflr = (int) mb;
00878 float d = mbflr - mb;
00879
00880
00881 float sy = SCEXP0 + d*(SCEXP1 + d*(SCEXP2 + d*(SCEXP3 + d*SCEXP4)));
00882
00883
00884 flint scalfac;
00885 scalfac.n = (EXPOBIAS - mbflr) << EXPOSHIFT;
00886
00887
00888 float density = (sy * scalfac.f);
00889
00890 density *= atomicnumfactor;
00891
00892 densitymap[addr + x] += density;
00893 }
00894 }
00895 }
00896 }
00897 }
00898 }
00899
00900
00901 typedef struct {
00902 wkf_cpu_caps_t *cpucaps;
00903 int verbose;
00904 int natoms;
00905 float radscale;
00906 float gridspacing;
00907 float isovalue;
00908 float gausslim;
00909 const int *numvoxels;
00910 const float *xyzr;
00911 const float *atomicnum;
00912 const float *colors;
00913 float **thrdensitymaps;
00914 float **thrvoltexmaps;
00915 } densitythrparms;
00916
00917
00918 static void * densitythread(void *voidparms) {
00919 wkf_tasktile_t tile;
00920 densitythrparms *parms = NULL;
00921 int threadid;
00922
00923 wkf_threadlaunch_getid(voidparms, &threadid, NULL);
00924 wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00925
00926 while (wkf_threadlaunch_next_tile(voidparms, 16384, &tile) != WKF_SCHED_DONE) {
00927 int natoms = tile.end-tile.start;
00928 const float *atomicnum = (parms->atomicnum == NULL) ? NULL : &parms->atomicnum[tile.start];
00929 vmd_gaussdensity_opt(parms->cpucaps,
00930 parms->verbose, natoms,
00931 &parms->xyzr[4L*tile.start],
00932 atomicnum,
00933 (parms->thrvoltexmaps[0]!=NULL) ? &parms->colors[4L*tile.start] : NULL,
00934 parms->thrdensitymaps[threadid],
00935 parms->thrvoltexmaps[threadid],
00936 parms->numvoxels,
00937 parms->radscale,
00938 parms->gridspacing,
00939 parms->isovalue,
00940 parms->gausslim);
00941 }
00942
00943 return NULL;
00944 }
00945
00946
00947 static void * reductionthread(void *voidparms) {
00948 wkf_tasktile_t tile;
00949 densitythrparms *parms = NULL;
00950 int threadid, numthreads;
00951
00952 wkf_threadlaunch_getid(voidparms, &threadid, &numthreads);
00953 wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00954
00955 while (wkf_threadlaunch_next_tile(voidparms, 1, &tile) != WKF_SCHED_DONE) {
00956
00957 ptrdiff_t planesz = ptrdiff_t(parms->numvoxels[0]) * ptrdiff_t(parms->numvoxels[1]);
00958 ptrdiff_t start = ptrdiff_t(tile.start) * planesz;
00959 ptrdiff_t end = ptrdiff_t(tile.end) * planesz;
00960
00961 ptrdiff_t i, x;
00962 for (x=start; x<end; x++) {
00963 float tmp = 0.0f;
00964 for (i=1; i<numthreads; i++) {
00965 tmp += parms->thrdensitymaps[i][x];
00966 }
00967 parms->thrdensitymaps[0][x] += tmp;
00968 }
00969
00970
00971 if (parms->thrvoltexmaps[0] != NULL) {
00972 for (x=start*3L; x<end*3L; x++) {
00973 float tmp = 0.0f;
00974 for (i=1; i<numthreads; i++) {
00975 tmp += parms->thrvoltexmaps[i][x];
00976 }
00977 parms->thrvoltexmaps[0][x] += tmp;
00978 }
00979 }
00980 }
00981
00982 return NULL;
00983 }
00984
00985
00986 static int vmd_gaussdensity_threaded(wkf_cpu_caps_t *cpucaps, int verbose,
00987 int natoms, const float *xyzr,
00988 const float *atomicnum,
00989 const float *colors,
00990 float *densitymap, float *voltexmap,
00991 const int *numvoxels,
00992 float radscale, float gridspacing,
00993 float isovalue, float gausslim) {
00994 densitythrparms parms;
00995 memset(&parms, 0, sizeof(parms));
00996
00997 parms.cpucaps = cpucaps;
00998 parms.verbose = verbose;
00999 parms.natoms = natoms;
01000 parms.radscale = radscale;
01001 parms.gridspacing = gridspacing;
01002 parms.isovalue = isovalue;
01003 parms.gausslim = gausslim;
01004 parms.numvoxels = numvoxels;
01005 parms.xyzr = xyzr;
01006 parms.atomicnum = atomicnum;
01007 parms.colors = colors;
01008
01009 int physprocs = wkf_thread_numprocessors();
01010 int maxprocs = physprocs;
01011
01012
01013
01014
01015
01016
01017
01018
01019 while (maxprocs > 8)
01020 maxprocs /= 2;
01021
01022
01023
01024
01025
01026
01027 ptrdiff_t volsz = ptrdiff_t(numvoxels[0]) *
01028 ptrdiff_t(numvoxels[1]) * ptrdiff_t(numvoxels[2]);
01029 ptrdiff_t volmemsz = sizeof(float) * volsz;
01030 ptrdiff_t volmemszkb = volmemsz / 1024;
01031 ptrdiff_t volmemtexszkb = volmemszkb + ((voltexmap != NULL) ? 3L*volmemszkb : 0);
01032
01033
01034
01035
01036 ptrdiff_t vmdcorefree = -1;
01037
01038 #if defined(ARCH_BLUEWATERS) || defined(ARCH_CRAY_XC) || defined(ARCH_CRAY_XK) || defined(ARCH_LINUXAMD64) || defined(ARCH_SOLARIS2_64) || defined(ARCH_SOLARISX86_64) || defined(ARCH_AIX6_64) || defined(MACOSXARM64) || defined(ARCH_MACOSXX86_64)
01039
01040
01041
01042
01043
01044
01045 vmdcorefree = vmd_get_avail_physmem_mb();
01046 #endif
01047
01048 if (vmdcorefree >= 0) {
01049
01050
01051
01052
01053 while ((volmemtexszkb * maxprocs) > (1024L*vmdcorefree/4)) {
01054 maxprocs /= 2;
01055 }
01056 } else {
01057
01058 while ((volmemtexszkb * maxprocs) > (2L * 1024L * 1024L))
01059 maxprocs /= 2;
01060 }
01061
01062 if (maxprocs < 1)
01063 maxprocs = 1;
01064
01065
01066
01067 parms.thrdensitymaps = (float **) calloc(1,maxprocs * sizeof(float *));
01068 parms.thrvoltexmaps = (float **) calloc(1, maxprocs * sizeof(float *));
01069
01070
01071 parms.thrdensitymaps[0] = densitymap;
01072 parms.thrvoltexmaps[0] = voltexmap;
01073
01074 int i;
01075 int numprocs = maxprocs;
01076 for (i=1; i<maxprocs; i++) {
01077 parms.thrdensitymaps[i] = (float *) calloc(1, volmemsz);
01078 if (parms.thrdensitymaps[i] == NULL) {
01079 numprocs = i;
01080 break;
01081 }
01082 if (voltexmap != NULL) {
01083 parms.thrvoltexmaps[i] = (float *) calloc(1, 3L * volmemsz);
01084 if (parms.thrvoltexmaps[i] == NULL) {
01085 free(parms.thrdensitymaps[i]);
01086 parms.thrdensitymaps[i] = NULL;
01087 numprocs = i;
01088 break;
01089 }
01090 }
01091 }
01092
01093
01094 wkf_tasktile_t tile;
01095 tile.start = 0;
01096 tile.end = natoms;
01097 wkf_threadlaunch(numprocs, &parms, densitythread, &tile);
01098
01099
01100 tile.start = 0;
01101 tile.end = numvoxels[2];
01102 wkf_threadlaunch(numprocs, &parms, reductionthread, &tile);
01103
01104
01105 for (i=1; i<maxprocs; i++) {
01106 if (parms.thrdensitymaps[i] != NULL)
01107 free(parms.thrdensitymaps[i]);
01108
01109 if (parms.thrvoltexmaps[i] != NULL)
01110 free(parms.thrvoltexmaps[i]);
01111 }
01112 free(parms.thrdensitymaps);
01113 free(parms.thrvoltexmaps);
01114
01115 return 0;
01116 }
01117
01118 QuickSurf::QuickSurf(int forcecpuonly) {
01119 volmap = NULL;
01120 voltexmap = NULL;
01121 s.clear();
01122 isovalue = 0.5f;
01123
01124 numvoxels[0] = 128;
01125 numvoxels[1] = 128;
01126 numvoxels[2] = 128;
01127
01128 origin[0] = 0.0f;
01129 origin[1] = 0.0f;
01130 origin[2] = 0.0f;
01131
01132 xaxis[0] = 1.0f;
01133 xaxis[1] = 0.0f;
01134 xaxis[2] = 0.0f;
01135
01136 yaxis[0] = 0.0f;
01137 yaxis[1] = 1.0f;
01138 yaxis[2] = 0.0f;
01139
01140 zaxis[0] = 0.0f;
01141 zaxis[1] = 0.0f;
01142 zaxis[2] = 1.0f;
01143
01144 cudaqs = NULL;
01145 force_cpuonly = forcecpuonly;
01146 #if defined(VMDCUDA)
01147 if (!force_cpuonly && !getenv("VMDNOCUDA")) {
01148 cudaqs = new CUDAQuickSurf();
01149 }
01150 #endif
01151
01152 timer = wkf_timer_create();
01153 }
01154
01155
01156 void QuickSurf::free_gpu_memory(void) {
01157 if (cudaqs) {
01158 #if defined(VMDCUDA)
01159 delete cudaqs;
01160 #endif
01161 cudaqs = NULL;
01162 }
01163 }
01164
01165
01166 int QuickSurf::calc_surf(AtomSel *atomSel, DrawMolecule *mol,
01167 const float *atompos, const float *atomradii,
01168 int quality, float radscale, float gridspacing,
01169 float isoval, const int *colidx, const float *cmap,
01170 VMDDisplayList *cmdList) {
01171 PROFILE_PUSH_RANGE("QuickSurf", 3);
01172
01173 wkf_timer_start(timer);
01174 int colorperatom = (colidx != NULL && cmap != NULL);
01175 int usebeads=0;
01176
01177 int verbose = (getenv("VMDQUICKSURFVERBOSE") != NULL);
01178
01179
01180
01181
01182 const float *atomicnum = NULL;
01183
01184
01185 if (voltexmap != NULL)
01186 free(voltexmap);
01187 voltexmap = NULL;
01188
01189 ResizeArray<float> beadpos(64 + (3L * atomSel->selected) / 20);
01190 ResizeArray<float> beadradii(64 + (3L * atomSel->selected) / 20);
01191 ResizeArray<float> beadcolors(64 + (3L * atomSel->selected) / 20);
01192
01193 if (getenv("VMDQUICKSURFBEADS")) {
01194 usebeads=1;
01195 if (verbose)
01196 printf("QuickSurf using residue beads representation...\n");
01197 }
01198
01199 int numbeads = 0;
01200 if (usebeads) {
01201 int i, resid, numres;
01202
01203
01204 numres = mol->residueList.num();
01205 for (resid=0; resid<numres; resid++) {
01206 float com[3] = {0.0, 0.0, 0.0};
01207 const ResizeArray<int> &atoms = mol->residueList[resid]->atoms;
01208 int numatoms = atoms.num();
01209 int oncount = 0;
01210
01211
01212 for (i=0; i<numatoms; i++) {
01213 int idx = atoms[i];
01214 if (atomSel->on[idx]) {
01215 oncount++;
01216 vec_add(com, com, atompos + 3L*idx);
01217 }
01218 }
01219
01220 if (oncount < 1)
01221 continue;
01222
01223 vec_scale(com, 1.0f / (float) oncount, com);
01224
01225
01226 int atomcolorindex=0;
01227 float boundradsq = 0.0f;
01228 for (i=0; i<numatoms; i++) {
01229 int idx = atoms[i];
01230 if (atomSel->on[idx]) {
01231 float tmpdist[3];
01232 atomcolorindex = idx;
01233 vec_sub(tmpdist, com, atompos + 3L*idx);
01234 float distsq = dot_prod(tmpdist, tmpdist);
01235 if (distsq > boundradsq) {
01236 boundradsq = distsq;
01237 }
01238 }
01239 }
01240 beadpos.append3(&com[0]);
01241 beadradii.append(sqrtf(boundradsq) + 1.0f);
01242
01243 if (colorperatom) {
01244 const float *cp = &cmap[colidx[atomcolorindex] * 3L];
01245 beadcolors.append3(&cp[0]);
01246 }
01247
01248
01249 }
01250
01251 numbeads = beadpos.num() / 3;
01252 }
01253
01254
01255 isovalue=isoval;
01256
01257
01258
01259
01260
01261 if (cmap != NULL)
01262 vec_copy(solidcolor, cmap);
01263
01264
01265
01266 float minx, miny, minz, maxx, maxy, maxz;
01267 float minrad, maxrad;
01268 int i;
01269 if (usebeads) {
01270 minx = maxx = beadpos[0];
01271 miny = maxy = beadpos[1];
01272 minz = maxz = beadpos[2];
01273 minrad = maxrad = beadradii[0];
01274 for (i=0; i<numbeads; i++) {
01275 ptrdiff_t ind = i * 3L;
01276 float tmpx = beadpos[ind ];
01277 float tmpy = beadpos[ind+1];
01278 float tmpz = beadpos[ind+2];
01279
01280 minx = (tmpx < minx) ? tmpx : minx;
01281 maxx = (tmpx > maxx) ? tmpx : maxx;
01282
01283 miny = (tmpy < miny) ? tmpy : miny;
01284 maxy = (tmpy > maxy) ? tmpy : maxy;
01285
01286 minz = (tmpz < minz) ? tmpz : minz;
01287 maxz = (tmpz > maxz) ? tmpz : maxz;
01288
01289
01290
01291 float r = beadradii[i];
01292 minrad = (r < minrad) ? r : minrad;
01293 maxrad = (r > maxrad) ? r : maxrad;
01294 }
01295 } else {
01296 minx = maxx = atompos[atomSel->firstsel*3L ];
01297 miny = maxy = atompos[atomSel->firstsel*3L+1];
01298 minz = maxz = atompos[atomSel->firstsel*3L+2];
01299
01300
01301 mol->get_radii_minmax(minrad, maxrad);
01302
01303
01304
01305
01306 if (minrad <= 0.001 || maxrad/minrad > 2.5) {
01307 minrad = maxrad = atomradii[atomSel->firstsel];
01308 for (i=atomSel->firstsel; i<=atomSel->lastsel; i++) {
01309 if (atomSel->on[i]) {
01310 ptrdiff_t ind = i * 3L;
01311 float tmpx = atompos[ind ];
01312 float tmpy = atompos[ind+1];
01313 float tmpz = atompos[ind+2];
01314
01315 minx = (tmpx < minx) ? tmpx : minx;
01316 maxx = (tmpx > maxx) ? tmpx : maxx;
01317
01318 miny = (tmpy < miny) ? tmpy : miny;
01319 maxy = (tmpy > maxy) ? tmpy : maxy;
01320
01321 minz = (tmpz < minz) ? tmpz : minz;
01322 maxz = (tmpz > maxz) ? tmpz : maxz;
01323
01324 float r = atomradii[i];
01325 minrad = (r < minrad) ? r : minrad;
01326 maxrad = (r > maxrad) ? r : maxrad;
01327 }
01328 }
01329 } else {
01330 #if 1
01331 float fmin[3], fmax[3];
01332 minmax_selected_3fv_aligned(atompos, atomSel->on, atomSel->num_atoms,
01333 atomSel->firstsel, atomSel->lastsel,
01334 fmin, fmax);
01335 minx = fmin[0];
01336 miny = fmin[1];
01337 minz = fmin[2];
01338
01339 maxx = fmax[0];
01340 maxy = fmax[1];
01341 maxz = fmax[2];
01342 #else
01343 for (i=atomSel->firstsel; i<=atomSel->lastsel; i++) {
01344 if (atomSel->on[i]) {
01345 ptrdiff_t ind = i * 3L;
01346 float tmpx = atompos[ind ];
01347 float tmpy = atompos[ind+1];
01348 float tmpz = atompos[ind+2];
01349
01350 minx = (tmpx < minx) ? tmpx : minx;
01351 maxx = (tmpx > maxx) ? tmpx : maxx;
01352
01353 miny = (tmpy < miny) ? tmpy : miny;
01354 maxy = (tmpy > maxy) ? tmpy : maxy;
01355
01356 minz = (tmpz < minz) ? tmpz : minz;
01357 maxz = (tmpz > maxz) ? tmpz : maxz;
01358 }
01359 }
01360 #endif
01361 }
01362 }
01363
01364 float mincoord[3], maxcoord[3];
01365 mincoord[0] = minx;
01366 mincoord[1] = miny;
01367 mincoord[2] = minz;
01368 maxcoord[0] = maxx;
01369 maxcoord[1] = maxy;
01370 maxcoord[2] = maxz;
01371
01372
01373
01374 float gridpadding = radscale * maxrad * 1.70f;
01375 float padrad = gridpadding;
01376 padrad = 0.65f * sqrtf(4.0f/3.0f*((float) VMD_PI)*padrad*padrad*padrad);
01377 gridpadding = MAX(gridpadding, padrad);
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388 if (minrad > 4.0f) {
01389 gridspacing *= minrad;
01390 }
01391
01392 if (verbose) {
01393 printf("QuickSurf: R*%.1f, I=%.1f, H=%.1f Pad: %.1f minR: %.1f maxR: %.1f)\n",
01394 radscale, isovalue, gridspacing, gridpadding, minrad, maxrad);
01395 }
01396
01397 mincoord[0] -= gridpadding;
01398 mincoord[1] -= gridpadding;
01399 mincoord[2] -= gridpadding;
01400 maxcoord[0] += gridpadding;
01401 maxcoord[1] += gridpadding;
01402 maxcoord[2] += gridpadding;
01403
01404
01405 numvoxels[0] = (int) ceil((maxcoord[0]-mincoord[0]) / gridspacing);
01406 numvoxels[1] = (int) ceil((maxcoord[1]-mincoord[1]) / gridspacing);
01407 numvoxels[2] = (int) ceil((maxcoord[2]-mincoord[2]) / gridspacing);
01408
01409
01410 xaxis[0] = (numvoxels[0]-1) * gridspacing;
01411 yaxis[1] = (numvoxels[1]-1) * gridspacing;
01412 zaxis[2] = (numvoxels[2]-1) * gridspacing;
01413 maxcoord[0] = mincoord[0] + xaxis[0];
01414 maxcoord[1] = mincoord[1] + yaxis[1];
01415 maxcoord[2] = mincoord[2] + zaxis[2];
01416
01417 int boundserr=0;
01418 for (i=0; i<3; i++) {
01419 if (isnan(mincoord[i]) || isnan(maxcoord[i]) || (numvoxels[i] < 1))
01420 boundserr = 1;
01421 }
01422
01423 if (boundserr)
01424 msgErr << "QuickSurf) NaN or illegal bounding box, grid size" << sendmsg;
01425
01426 if (verbose || boundserr) {
01427 printf(" GridSZ: (%4d %4d %4d) BBox: (%.1f %.1f %.1f)->(%.1f %.1f %.1f)\n",
01428 numvoxels[0], numvoxels[1], numvoxels[2],
01429 mincoord[0], mincoord[1], mincoord[2],
01430 maxcoord[0], maxcoord[1], maxcoord[2]);
01431 }
01432
01433 if (boundserr)
01434 return -1;
01435
01436 vec_copy(origin, mincoord);
01437
01438
01439 float *xyzr = NULL;
01440 float *colors = NULL;
01441 if (usebeads) {
01442 int ind =0;
01443 int ind4=0;
01444 xyzr = (float *) malloc(numbeads * sizeof(float) * 4L);
01445 if (colorperatom) {
01446 colors = (float *) malloc(numbeads * sizeof(float) * 4L);
01447
01448
01449 for (i=0; i<numbeads; i++) {
01450 const float *fp = &beadpos[0] + ind;
01451 xyzr[ind4 ] = fp[0]-origin[0];
01452 xyzr[ind4 + 1] = fp[1]-origin[1];
01453 xyzr[ind4 + 2] = fp[2]-origin[2];
01454 xyzr[ind4 + 3] = beadradii[i];
01455
01456 const float *cp = &beadcolors[0] + ind;
01457 colors[ind4 ] = cp[0];
01458 colors[ind4 + 1] = cp[1];
01459 colors[ind4 + 2] = cp[2];
01460 colors[ind4 + 3] = 1.0f;
01461 ind4 += 4;
01462 ind += 3;
01463 }
01464 } else {
01465
01466 for (i=0; i<numbeads; i++) {
01467 const float *fp = &beadpos[0] + ind;
01468 xyzr[ind4 ] = fp[0]-origin[0];
01469 xyzr[ind4 + 1] = fp[1]-origin[1];
01470 xyzr[ind4 + 2] = fp[2]-origin[2];
01471 xyzr[ind4 + 3] = beadradii[i];
01472 ind4 += 4;
01473 ind += 3;
01474 }
01475 }
01476 } else {
01477 ptrdiff_t ind = atomSel->firstsel * 3L;
01478 ptrdiff_t ind4=0;
01479 xyzr = (float *) malloc(atomSel->selected * sizeof(float) * 4L);
01480 if (colorperatom) {
01481 colors = (float *) malloc(atomSel->selected * sizeof(float) * 4L);
01482
01483
01484 for (i=atomSel->firstsel; i <= atomSel->lastsel; i++) {
01485 if (atomSel->on[i]) {
01486 const float *fp = atompos + ind;
01487 xyzr[ind4 ] = fp[0]-origin[0];
01488 xyzr[ind4 + 1] = fp[1]-origin[1];
01489 xyzr[ind4 + 2] = fp[2]-origin[2];
01490 xyzr[ind4 + 3] = atomradii[i];
01491
01492 const float *cp = &cmap[colidx[i] * 3L];
01493 colors[ind4 ] = cp[0];
01494 colors[ind4 + 1] = cp[1];
01495 colors[ind4 + 2] = cp[2];
01496 colors[ind4 + 3] = 1.0f;
01497 ind4 += 4;
01498 }
01499 ind += 3;
01500 }
01501 } else {
01502
01503 for (i=atomSel->firstsel; i <= atomSel->lastsel; i++) {
01504 if (atomSel->on[i]) {
01505 const float *fp = atompos + ind;
01506 xyzr[ind4 ] = fp[0]-origin[0];
01507 xyzr[ind4 + 1] = fp[1]-origin[1];
01508 xyzr[ind4 + 2] = fp[2]-origin[2];
01509 xyzr[ind4 + 3] = atomradii[i];
01510 ind4 += 4;
01511 }
01512 ind += 3;
01513 }
01514 }
01515 }
01516
01517
01518 float gausslim = 2.0f;
01519 switch (quality) {
01520 case 3: gausslim = 4.0f; break;
01521
01522 case 2: gausslim = 3.0f; break;
01523
01524 case 1: gausslim = 2.5f; break;
01525
01526 case 0:
01527 default: gausslim = 2.0f;
01528 break;
01529 }
01530
01531 pretime = wkf_timer_timenow(timer);
01532
01533 #if defined(VMDCUDA)
01534 if (!force_cpuonly && !getenv("VMDNOCUDA")) {
01535
01536 if (cudaqs == NULL)
01537 cudaqs = new CUDAQuickSurf();
01538
01539
01540 CUDAQuickSurf::VolTexFormat voltexformat = CUDAQuickSurf::RGB3F;
01541 switch (quality) {
01542 case 3: voltexformat = CUDAQuickSurf::RGB3F; break;
01543
01544 case 2: voltexformat = CUDAQuickSurf::RGB3F; break;
01545
01546 case 1: voltexformat = CUDAQuickSurf::RGB3F; break;
01547
01548 case 0:
01549 default: voltexformat = CUDAQuickSurf::RGB4U;
01550 break;
01551 }
01552
01553
01554 int pcount = (usebeads) ? numbeads : atomSel->selected;
01555 int rc = cudaqs->calc_surf(pcount, &xyzr[0],
01556 (colorperatom) ? &colors[0] : &cmap[0],
01557 colorperatom, voltexformat,
01558 origin, numvoxels, maxrad,
01559 radscale, gridspacing, isovalue, gausslim,
01560 cmdList);
01561
01562
01563
01564
01565
01566 if (getenv("VMDQUICKSURFMINMEM")) {
01567 free_gpu_memory();
01568 }
01569
01570 if (rc == 0) {
01571 free(xyzr);
01572 if (colors)
01573 free(colors);
01574
01575 voltime = wkf_timer_timenow(timer);
01576
01577 PROFILE_POP_RANGE();
01578
01579 return 0;
01580 }
01581 }
01582 #endif
01583
01584 if (verbose) {
01585 printf(" Computing density map grid on CPUs ");
01586 }
01587
01588 ptrdiff_t volsz = numvoxels[0] * numvoxels[1] * numvoxels[2];
01589 volmap = new float[volsz];
01590 if (colidx != NULL && cmap != NULL) {
01591 voltexmap = (float*) calloc(1, 3L * sizeof(float) * numvoxels[0] * numvoxels[1] * numvoxels[2]);
01592 }
01593
01594 fflush(stdout);
01595 memset(volmap, 0, sizeof(float) * volsz);
01596 if ((volsz * atomSel->selected) > 20000000) {
01597 vmd_gaussdensity_threaded(mol->app->cpucaps, verbose,
01598 atomSel->selected, &xyzr[0], atomicnum,
01599 (voltexmap!=NULL) ? &colors[0] : NULL,
01600 volmap, voltexmap, numvoxels, radscale,
01601 gridspacing, isovalue, gausslim);
01602 } else {
01603 vmd_gaussdensity_opt(mol->app->cpucaps, verbose,
01604 atomSel->selected, &xyzr[0], atomicnum,
01605 (voltexmap!=NULL) ? &colors[0] : NULL,
01606 volmap, voltexmap, numvoxels, radscale,
01607 gridspacing, isovalue, gausslim);
01608 }
01609
01610 free(xyzr);
01611 if (colors)
01612 free(colors);
01613
01614 voltime = wkf_timer_timenow(timer);
01615
01616
01617 if (cmdList != NULL) {
01618 draw_trimesh(cmdList);
01619 }
01620
01621 if (verbose) {
01622 printf(" Done.\n");
01623 }
01624
01625 PROFILE_POP_RANGE();
01626
01627 return 0;
01628 }
01629
01630
01631
01632 VolumetricData * QuickSurf::calc_density_map(AtomSel * atomSel,
01633 DrawMolecule *mymol,
01634 const float *atompos,
01635 const float *atomradii,
01636 int quality, float radscale,
01637 float gridspacing) {
01638 if (!calc_surf(atomSel, mymol, atompos, atomradii, quality,
01639 radscale, gridspacing, 1.0f, NULL, NULL, NULL)) {
01640 VolumetricData *surfvol;
01641 surfvol = new VolumetricData("density map", origin, xaxis, yaxis, zaxis,
01642 numvoxels[0], numvoxels[1], numvoxels[2],
01643 volmap);
01644 return surfvol;
01645 }
01646
01647 return NULL;
01648 }
01649
01650
01651
01652 int QuickSurf::get_trimesh(int &numverts,
01653 float *&v3fv, float *&n3fv, float *&c3fv,
01654 int &numfacets, int *&fiv) {
01655
01656 int verbose = (getenv("VMDQUICKSURFVERBOSE") != NULL);
01657
01658 if (verbose)
01659 printf("Running marching cubes on CPU...\n");
01660
01661 VolumetricData *surfvol;
01662 surfvol = new VolumetricData("molecular surface",
01663 origin, xaxis, yaxis, zaxis,
01664 numvoxels[0], numvoxels[1], numvoxels[2],
01665 volmap);
01666
01667
01668
01669
01670 surfvol->compute_volume_gradient();
01671 gradtime = wkf_timer_timenow(timer);
01672
01673
01674 const int stepsize = 1;
01675 s.clear();
01676 s.compute(surfvol, isovalue, stepsize);
01677
01678 mctime = wkf_timer_timenow(timer);
01679
01680 s.vertexfusion(9, 9);
01681 s.normalize();
01682
01683 if (s.numtriangles > 0) {
01684 if (voltexmap != NULL) {
01685
01686 s.set_color_voltex_rgb3fv(voltexmap);
01687 } else {
01688
01689 s.set_color_rgb3fv(solidcolor);
01690 }
01691 }
01692
01693 numverts = s.v.num() / 3;
01694 v3fv=&s.v[0];
01695 n3fv=&s.n[0];
01696 c3fv=&s.c[0];
01697
01698 numfacets = s.numtriangles;
01699 fiv=&s.f[0];
01700
01701 delete surfvol;
01702
01703 mcverttime = wkf_timer_timenow(timer);
01704 reptime = mcverttime;
01705
01706 if (verbose) {
01707 char strmsg[1024];
01708 sprintf(strmsg, "QuickSurf: %.3f [pre:%.3f vol:%.3f gr:%.3f mc:%.2f mcv:%.3f]",
01709 reptime, pretime, voltime-pretime, gradtime-voltime,
01710 mctime-gradtime, mcverttime-mctime);
01711
01712 msgInfo << strmsg << sendmsg;
01713 }
01714
01715 return 0;
01716 }
01717
01718
01719 int QuickSurf::draw_trimesh(VMDDisplayList *cmdList) {
01720 DispCmdTriMesh cmdTriMesh;
01721
01722 int numverts=0;
01723 float *v=NULL, *n=NULL, *c=NULL;
01724 int numfacets=0;
01725 int *f=NULL;
01726
01727 get_trimesh(numverts, v, n, c, numfacets, f);
01728
01729
01730 if (numfacets > 0) {
01731 cmdTriMesh.putdata(v, n, c, numverts, f, numfacets, 0, cmdList);
01732 }
01733
01734 return 0;
01735 }
01736
01737
01738 QuickSurf::~QuickSurf() {
01739 #if defined(VMDCUDA)
01740 free_gpu_memory();
01741 #endif
01742
01743 if (voltexmap != NULL)
01744 free(voltexmap);
01745 voltexmap = NULL;
01746
01747 wkf_timer_destroy(timer);
01748 }
01749
01750