Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

QuickSurf.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr                                                                       
00003  *cr            (C) Copyright 1995-2011 The Board of Trustees of the           
00004  *cr                        University of Illinois                       
00005  *cr                         All Rights Reserved                        
00006  *cr                                                                   
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: QuickSurf.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.63 $       $Date: 2012/03/20 15:41:03 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *   Fast gaussian surface representation
00019  ***************************************************************************/
00020 
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 #include <math.h>
00025 #include "QuickSurf.h"
00026 #include "CUDAQuickSurf.h"
00027 #include "Measure.h"
00028 #include "Inform.h"
00029 #include "utilities.h"
00030 #include "WKFUtils.h"
00031 #include "VolumetricData.h"
00032 
00033 #include "VMDDisplayList.h"
00034 #include "Displayable.h"
00035 #include "DispCmds.h"
00036 
00037 #define MIN(X,Y) (((X)<(Y))? (X) : (Y))
00038 #define MAX(X,Y) (((X)>(Y))? (X) : (Y))
00039 
00040 
00041 /*
00042  * David J. Hardy
00043  * 12 Dec 2008
00044  *
00045  * aexpfnx() - Approximate expf() for negative x.
00046  *
00047  * Assumes that x <= 0.
00048  *
00049  * Assumes IEEE format for single precision float, specifically:
00050  * 1 sign bit, 8 exponent bits biased by 127, and 23 mantissa bits.
00051  *
00052  * Interpolates exp() on interval (-1/log2(e), 0], then shifts it by
00053  * multiplication of a fast calculation for 2^(-N).  The interpolation
00054  * uses a linear blending of 3rd degree Taylor polynomials at the end
00055  * points, so the approximation is once differentiable.
00056  *
00057  * The error is small (max relative error per interval is calculated
00058  * to be 0.131%, with a max absolute error of -0.000716).
00059  *
00060  * The cutoff is chosen so as to speed up the computation by early
00061  * exit from function, with the value chosen to give less than the
00062  * the max absolute error.  Use of a cutoff is unnecessary, except
00063  * for needing to shift smallest floating point numbers to zero,
00064  * i.e. you could remove cutoff and replace by:
00065  *
00066  * #define MINXNZ  -88.0296919311130  // -127 * log(2)
00067  *
00068  *   if (x < MINXNZ) return 0.f;
00069  *
00070  * Use of a cutoff causes a discontinuity which can be eliminated
00071  * through the use of a switching function.
00072  *
00073  * We can obtain arbitrarily smooth approximation by taking k+1 nodes on
00074  * the interval and weighting their respective Taylor polynomials by the
00075  * kth order Lagrange interpolant through those nodes.  The wiggle in the
00076  * polynomial interpolation due to equidistant nodes (Runge's phenomenon)
00077  * can be reduced by using Chebyshev nodes.
00078  */
00079 
00080 #define MLOG2EF    -1.44269504088896f
00081 
00082 /*
00083  * Interpolating coefficients for linear blending of the
00084  * 3rd degree Taylor expansion of 2^x about 0 and -1.
00085  */
00086 #define SCEXP0     1.0000000000000000f
00087 #define SCEXP1     0.6987082824680118f
00088 #define SCEXP2     0.2633174272827404f
00089 #define SCEXP3     0.0923611991471395f
00090 #define SCEXP4     0.0277520543324108f
00091 
00092 /* for single precision float */
00093 #define EXPOBIAS   127
00094 #define EXPOSHIFT   23
00095 
00096 /* cutoff is optional, but can help avoid unnecessary work */
00097 #define ACUTOFF    -10
00098 
00099 typedef union flint_t {
00100   float f;
00101   int n;
00102 } flint;
00103 
00104 static float aexpfnx(float x) {
00105   /* assume x <= 0 */
00106   float mb;
00107   int mbflr;
00108   float d;
00109   float sy;
00110   flint scalfac;
00111 
00112   if (x < ACUTOFF) return 0.f;
00113 
00114   mb = x * MLOG2EF;    /* change base to 2, mb >= 0 */
00115   mbflr = (int) mb;    /* get int part, floor() */
00116   d = mbflr - mb;      /* remaining exponent, -1 < d <= 0 */
00117   sy = SCEXP0 + d*(SCEXP1 + d*(SCEXP2 + d*(SCEXP3 + d*SCEXP4)));
00118                        /* approx with linear blend of Taylor polys */
00119   scalfac.n = (EXPOBIAS - mbflr) << EXPOSHIFT;  /* 2^(-mbflr) */
00120   return (sy * scalfac.f);  /* scaled approx */
00121 }
00122 
00123 
00124 static void vmd_gaussdensity(int natoms, const float *xyzr,
00125                              const float *colors,
00126                              float *densitymap, float *voltexmap, 
00127                              const int *numvoxels, 
00128                              float radscale, float gridspacing, 
00129                              float isovalue, float gausslim) {
00130   int i, x, y, z;
00131   int maxvoxel[3];
00132   maxvoxel[0] = numvoxels[0]-1; 
00133   maxvoxel[1] = numvoxels[1]-1; 
00134   maxvoxel[2] = numvoxels[2]-1; 
00135   const float invgridspacing = 1.0f / gridspacing;
00136 
00137   // compute colors only if necessary, since they are costly
00138   if (voltexmap != NULL) {
00139     float invisovalue = 1.0f / isovalue;
00140     // compute both density map and floating point color texture map
00141     for (i=0; i<natoms; i++) {
00142 #if !defined(ARCH_BLUEWATERS)
00143       if ((i & 0x3fff) == 0) {
00144         printf("."); 
00145         fflush(stdout);
00146       }
00147 #endif
00148 
00149       int ind = i*4;
00150       float scaledrad = xyzr[ind + 3] * radscale;
00151       float arinv = 1.0f/(2.0f*scaledrad*scaledrad);
00152       float radlim = gausslim * scaledrad;
00153       float radlim2 = radlim * radlim;
00154 
00155       float tmp;
00156       radlim *= invgridspacing;
00157       tmp = xyzr[ind  ] * invgridspacing;
00158       int xmin = MAX((int) (tmp - radlim), 0);
00159       int xmax = MIN((int) (tmp + radlim), maxvoxel[0]);
00160       tmp = xyzr[ind+1] * invgridspacing;
00161       int ymin = MAX((int) (tmp - radlim), 0);
00162       int ymax = MIN((int) (tmp + radlim), maxvoxel[1]);
00163       tmp = xyzr[ind+2] * invgridspacing;
00164       int zmin = MAX((int) (tmp - radlim), 0);
00165       int zmax = MIN((int) (tmp + radlim), maxvoxel[2]);
00166 
00167       float dz = zmin*gridspacing - xyzr[ind+2];
00168       for (z=zmin; z<=zmax; z++,dz+=gridspacing) {
00169         float dy = ymin*gridspacing - xyzr[ind+1];
00170         for (y=ymin; y<=ymax; y++,dy+=gridspacing) {
00171           float dy2dz2 = dy*dy + dz*dz;
00172 
00173           // early-exit when outside the cutoff radius in the Y-Z plane
00174           if (dy2dz2 >= radlim2) 
00175             continue;
00176 
00177           int addr = z * numvoxels[0] * numvoxels[1] + y * numvoxels[0];
00178           float dx = xmin*gridspacing - xyzr[ind];
00179           for (x=xmin; x<=xmax; x++,dx+=gridspacing) {
00180             float r2 = dx*dx + dy2dz2;
00181             float expval = -r2 * arinv;
00182 #if 0
00183             // use the math library exponential routine
00184             float density = exp(expval);
00185 #else
00186             // use our (much faster) fast exponential approximation
00187             float density = aexpfnx(expval);
00188 #endif
00189 
00190             // accumulate density value to density map
00191             densitymap[addr + x] += density;
00192 
00193             // Accumulate density-weighted color to texture map.
00194             // Pre-multiply colors by the inverse isovalue we will extract   
00195             // the surface on, to cause the final color to be normalized.
00196             density *= invisovalue;
00197             int caddr = (addr + x) * 3;
00198 
00199             // color by atom colors
00200             voltexmap[caddr    ] += density * colors[ind    ];
00201             voltexmap[caddr + 1] += density * colors[ind + 1];
00202             voltexmap[caddr + 2] += density * colors[ind + 2];
00203           }
00204         }
00205       }
00206     }
00207   } else {
00208     // compute density map only
00209     for (i=0; i<natoms; i++) {
00210 #if !defined(ARCH_BLUEWATERS)
00211       if ((i & 0x3fff) == 0) {
00212         printf("."); 
00213         fflush(stdout);
00214       }
00215 #endif
00216 
00217       int ind = i*4;
00218       float scaledrad = xyzr[ind + 3] * radscale;
00219       float arinv = 1.0f/(2.0f*scaledrad*scaledrad);
00220       float radlim = gausslim * scaledrad;
00221       float radlim2 = radlim * radlim;
00222 
00223       float tmp;
00224       radlim *= invgridspacing;
00225       tmp = xyzr[ind  ] * invgridspacing;
00226       int xmin = MAX((int) (tmp - radlim), 0);
00227       int xmax = MIN((int) (tmp + radlim), maxvoxel[0]);
00228       tmp = xyzr[ind+1] * invgridspacing;
00229       int ymin = MAX((int) (tmp - radlim), 0);
00230       int ymax = MIN((int) (tmp + radlim), maxvoxel[1]);
00231       tmp = xyzr[ind+2] * invgridspacing;
00232       int zmin = MAX((int) (tmp - radlim), 0);
00233       int zmax = MIN((int) (tmp + radlim), maxvoxel[2]);
00234 
00235       float dz = zmin*gridspacing - xyzr[ind+2];
00236       for (z=zmin; z<=zmax; z++,dz+=gridspacing) {
00237         float dy = ymin*gridspacing - xyzr[ind+1];
00238         for (y=ymin; y<=ymax; y++,dy+=gridspacing) {
00239           float dy2dz2 = dy*dy + dz*dz;
00240 
00241           // early-exit when outside the cutoff radius in the Y-Z plane
00242           if (dy2dz2 >= radlim2) 
00243             continue;
00244 
00245           int addr = z * numvoxels[0] * numvoxels[1] + y * numvoxels[0];
00246           float dx = xmin*gridspacing - xyzr[ind];
00247           for (x=xmin; x<=xmax; x++,dx+=gridspacing) {
00248             float r2 = dx*dx + dy2dz2;
00249             float expval = -r2 * arinv;
00250 #if 0
00251             // use the math library exponential routine
00252             float density = exp(expval);
00253 #else
00254             // use our (much faster) fast exponential approximation
00255             float density = aexpfnx(expval);
00256 #endif
00257             densitymap[addr + x] += density;
00258           }
00259         }
00260       }
00261     }
00262   }
00263 }
00264 
00265 
00266 static void vmd_gaussdensity_opt(int natoms, const float *xyzr,
00267                                  const float *colors,
00268                                  float *densitymap, float *voltexmap, 
00269                                  const int *numvoxels, 
00270                                  float radscale, float gridspacing, 
00271                                  float isovalue, float gausslim) {
00272   int i, x, y, z;
00273   int maxvoxel[3];
00274   maxvoxel[0] = numvoxels[0]-1; 
00275   maxvoxel[1] = numvoxels[1]-1; 
00276   maxvoxel[2] = numvoxels[2]-1; 
00277   const float invgridspacing = 1.0f / gridspacing;
00278 
00279   // compute colors only if necessary, since they are costly
00280   if (voltexmap != NULL) {
00281     float invisovalue = 1.0f / isovalue;
00282     // compute both density map and floating point color texture map
00283     for (i=0; i<natoms; i++) {
00284 #if !defined(ARCH_BLUEWATERS)
00285       if ((i & 0x3fff) == 0) {
00286         printf("."); 
00287         fflush(stdout);
00288       }
00289 #endif
00290 
00291       int ind = i*4;
00292       float scaledrad = xyzr[ind + 3] * radscale;
00293       // negate, precompute reciprocal, and change to base 2 from the outset
00294       float arinv = -(1.0f/(2.0f*scaledrad*scaledrad)) * MLOG2EF;
00295       float radlim = gausslim * scaledrad;
00296       float radlim2 = radlim * radlim;
00297 
00298       float tmp;
00299       radlim *= invgridspacing;
00300       tmp = xyzr[ind  ] * invgridspacing;
00301       int xmin = MAX((int) (tmp - radlim), 0);
00302       int xmax = MIN((int) (tmp + radlim), maxvoxel[0]);
00303       tmp = xyzr[ind+1] * invgridspacing;
00304       int ymin = MAX((int) (tmp - radlim), 0);
00305       int ymax = MIN((int) (tmp + radlim), maxvoxel[1]);
00306       tmp = xyzr[ind+2] * invgridspacing;
00307       int zmin = MAX((int) (tmp - radlim), 0);
00308       int zmax = MIN((int) (tmp + radlim), maxvoxel[2]);
00309 
00310       float dz = zmin*gridspacing - xyzr[ind+2];
00311       for (z=zmin; z<=zmax; z++,dz+=gridspacing) {
00312         float dy = ymin*gridspacing - xyzr[ind+1];
00313         for (y=ymin; y<=ymax; y++,dy+=gridspacing) {
00314           float dy2dz2 = dy*dy + dz*dz;
00315 
00316           // early-exit when outside the cutoff radius in the Y-Z plane
00317           if (dy2dz2 >= radlim2) 
00318             continue;
00319 
00320           int addr = z * numvoxels[0] * numvoxels[1] + y * numvoxels[0];
00321           float dx = xmin*gridspacing - xyzr[ind];
00322           for (x=xmin; x<=xmax; x++,dx+=gridspacing) {
00323             float r2 = dx*dx + dy2dz2;
00324 
00325             // use our (much faster) fully inlined exponential approximation
00326             float mb = r2 * arinv;         /* already negated and in base 2 */
00327             int mbflr = (int) mb;          /* get int part, floor() */
00328             float d = mbflr - mb;          /* remaining exponent, -1 < d <= 0 */
00329 
00330             /* approx with linear blend of Taylor polys */
00331             float sy = SCEXP0 + d*(SCEXP1 + d*(SCEXP2 + d*(SCEXP3 + d*SCEXP4)));
00332 
00333             /* 2^(-mbflr) */
00334             flint scalfac;
00335             scalfac.n = (EXPOBIAS - mbflr) << EXPOSHIFT;  
00336 
00337             // XXX assume we are never beyond the cutoff value in this loop
00338             float density = (sy * scalfac.f);
00339 
00340             // accumulate density value to density map
00341             densitymap[addr + x] += density;
00342 
00343             // Accumulate density-weighted color to texture map.
00344             // Pre-multiply colors by the inverse isovalue we will extract   
00345             // the surface on, to cause the final color to be normalized.
00346             density *= invisovalue;
00347             int caddr = (addr + x) * 3;
00348 
00349             // color by atom colors
00350             voltexmap[caddr    ] += density * colors[ind    ];
00351             voltexmap[caddr + 1] += density * colors[ind + 1];
00352             voltexmap[caddr + 2] += density * colors[ind + 2];
00353           }
00354         }
00355       }
00356     }
00357   } else {
00358     // compute density map only
00359     for (i=0; i<natoms; i++) {
00360 #if !defined(ARCH_BLUEWATERS)
00361       if ((i & 0x3fff) == 0) {
00362         printf("."); 
00363         fflush(stdout);
00364       }
00365 #endif
00366 
00367       int ind = i*4;
00368       float scaledrad = xyzr[ind+3] * radscale;
00369 
00370       // negate, precompute reciprocal, and change to base 2 from the outset
00371       float arinv = -(1.0f/(2.0f*scaledrad*scaledrad)) * MLOG2EF;
00372       float radlim = gausslim * scaledrad;
00373       float radlim2 = radlim * radlim;
00374 
00375       float tmp;
00376       radlim *= invgridspacing;
00377       tmp = xyzr[ind  ] * invgridspacing;
00378       int xmin = MAX((int) (tmp - radlim), 0);
00379       int xmax = MIN((int) (tmp + radlim), maxvoxel[0]);
00380       tmp = xyzr[ind+1] * invgridspacing;
00381       int ymin = MAX((int) (tmp - radlim), 0);
00382       int ymax = MIN((int) (tmp + radlim), maxvoxel[1]);
00383       tmp = xyzr[ind+2] * invgridspacing;
00384       int zmin = MAX((int) (tmp - radlim), 0);
00385       int zmax = MIN((int) (tmp + radlim), maxvoxel[2]);
00386 
00387       float dz = zmin*gridspacing - xyzr[ind+2];
00388       for (z=zmin; z<=zmax; z++,dz+=gridspacing) {
00389         float dy = ymin*gridspacing - xyzr[ind+1];
00390         for (y=ymin; y<=ymax; y++,dy+=gridspacing) {
00391           float dy2dz2 = dy*dy + dz*dz;
00392 
00393           // early-exit when outside the cutoff radius in the Y-Z plane
00394           if (dy2dz2 >= radlim2) 
00395             continue;
00396 
00397           int addr = z * numvoxels[0] * numvoxels[1] + y * numvoxels[0];
00398           float dx = xmin*gridspacing - xyzr[ind];
00399           for (x=xmin; x<=xmax; x++,dx+=gridspacing) {
00400             float r2 = dx*dx + dy2dz2;
00401 
00402             // use our (much faster) fully inlined exponential approximation
00403             float mb = r2 * arinv;         /* already negated and in base 2 */
00404             int mbflr = (int) mb;          /* get int part, floor() */
00405             float d = mbflr - mb;          /* remaining exponent, -1 < d <= 0 */
00406 
00407             /* approx with linear blend of Taylor polys */
00408             float sy = SCEXP0 + d*(SCEXP1 + d*(SCEXP2 + d*(SCEXP3 + d*SCEXP4)));
00409 
00410             /* 2^(-mbflr) */
00411             flint scalfac;
00412             scalfac.n = (EXPOBIAS - mbflr) << EXPOSHIFT;  
00413 
00414             // XXX assume we are never beyond the cutoff value in this loop
00415             float density = (sy * scalfac.f);
00416 
00417             densitymap[addr + x] += density;
00418           }
00419         }
00420       }
00421     }
00422   }
00423 }
00424 
00425 
00426 typedef struct {
00427   int natoms;
00428   float radscale;
00429   float gridspacing;
00430   float isovalue;
00431   float gausslim;
00432   const int *numvoxels;
00433   const float *xyzr; 
00434   const float *colors;
00435   float **thrdensitymaps;
00436   float **thrvoltexmaps;
00437 } densitythrparms;
00438 
00439 
00440 static void * densitythread(void *voidparms) {
00441   wkf_tasktile_t tile;
00442   densitythrparms *parms = NULL;
00443   int threadid;
00444 
00445   wkf_threadlaunch_getid(voidparms, &threadid, NULL);
00446   wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00447 
00448   while (wkf_threadlaunch_next_tile(voidparms, 16384, &tile) != WKF_SCHED_DONE) {
00449     int natoms = tile.end-tile.start;
00450     vmd_gaussdensity_opt(natoms, 
00451                          &parms->xyzr[4*tile.start], 
00452                          (parms->thrvoltexmaps[0]!=NULL) ? &parms->colors[4*tile.start] : NULL,
00453                          parms->thrdensitymaps[threadid], 
00454                          parms->thrvoltexmaps[threadid], 
00455                          parms->numvoxels, 
00456                          parms->radscale, 
00457                          parms->gridspacing, 
00458                          parms->isovalue, 
00459                          parms->gausslim);
00460   }
00461 
00462   return NULL;
00463 }
00464 
00465 
00466 static void * reductionthread(void *voidparms) {
00467   wkf_tasktile_t tile;
00468   densitythrparms *parms = NULL;
00469   int threadid, numthreads;
00470 
00471   wkf_threadlaunch_getid(voidparms, &threadid, &numthreads);
00472   wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00473 
00474   while (wkf_threadlaunch_next_tile(voidparms, 16384, &tile) != WKF_SCHED_DONE) {
00475     // do a reduction over each of the individual density grids
00476     int i, x;
00477     for (x=tile.start; x<tile.end; x++) {
00478       float tmp = 0.0f;
00479       for (i=1; i<numthreads; i++) {
00480         tmp += parms->thrdensitymaps[i][x];
00481       }
00482       parms->thrdensitymaps[0][x] += tmp;
00483     }
00484 
00485     // do a reduction over each of the individual texture grids
00486     if (parms->thrvoltexmaps[0] != NULL) {
00487       for (x=tile.start*3; x<tile.end*3; x++) {
00488         float tmp = 0.0f;
00489         for (i=1; i<numthreads; i++) {
00490           tmp += parms->thrvoltexmaps[i][x];
00491         }
00492         parms->thrvoltexmaps[0][x] += tmp;
00493       }
00494     }
00495   }
00496 
00497   return NULL;
00498 }
00499 
00500 
00501 static int vmd_gaussdensity_threaded(int natoms, const float *xyzr,
00502                                      const float *colors,
00503                                      float *densitymap, float *voltexmap, 
00504                                      const int *numvoxels, 
00505                                      float radscale, float gridspacing, 
00506                                      float isovalue, float gausslim) {
00507   densitythrparms parms;
00508   memset(&parms, 0, sizeof(parms));
00509 
00510   parms.natoms = natoms;
00511   parms.radscale = radscale;
00512   parms.gridspacing = gridspacing;
00513   parms.isovalue = isovalue;
00514   parms.gausslim = gausslim;
00515   parms.numvoxels = numvoxels;
00516   parms.xyzr = xyzr;
00517   parms.colors = colors;
00518  
00519   int physprocs = wkf_thread_numprocessors();
00520   int maxprocs = physprocs;
00521 
00522   // We can productively use only a few cores per socket due to the
00523   // limited memory bandwidth per socket. Also, hyperthreading
00524   // actually hurts performance.  These two considerations combined
00525   // with the linear increase in memory use prevent us from using large
00526   // numbers of cores with this simple approach, so if we've got more 
00527   // than 8 CPU cores, we'll iteratively cutting the core count in 
00528   // half until we're under 8 cores.
00529   while (maxprocs > 8) 
00530     maxprocs /= 2;
00531 
00532   // Limit the number of CPU cores used so we don't run the 
00533   // machine out of memory during surface computation.
00534   // For now we'll set a practical maximum memory use limit to 
00535   // 2GB total for all cores. 
00536   long volsz = numvoxels[0] * numvoxels[1] * numvoxels[2];
00537   long volmemsz = sizeof(float) * volsz;
00538   long volmemszkb = volmemsz / 1024;
00539   long volmemtexszkb = (volmemszkb + (voltexmap != NULL) ? 3*volmemszkb : 0);
00540 #if defined(ARCH_BLUEWATERS)
00541   while ((volmemtexszkb * maxprocs) > (4 * 1024 * 1024))
00542     maxprocs /= 2;
00543 #else
00544   while ((volmemtexszkb * maxprocs) > (2 * 1024 * 1024))
00545     maxprocs /= 2;
00546 #endif
00547 
00548   if (maxprocs < 1) 
00549     maxprocs = 1;
00550 
00551   // Loop over number of physical processors and try to create 
00552   // per-thread volumetric maps for each of them.
00553   parms.thrdensitymaps = (float **) calloc(1,maxprocs * sizeof(float *));
00554   parms.thrvoltexmaps = (float **) calloc(1, maxprocs * sizeof(float *));
00555 
00556   // first thread is already ready to go
00557   parms.thrdensitymaps[0] = densitymap;
00558   parms.thrvoltexmaps[0] = voltexmap;
00559 
00560   int i;
00561   int numprocs = maxprocs; // ever the optimist
00562   for (i=1; i<maxprocs; i++) {
00563     parms.thrdensitymaps[i] = (float *) calloc(1, volmemsz);
00564     if (parms.thrdensitymaps[i] == NULL) {
00565       numprocs = i;
00566       break;
00567     }
00568     if (voltexmap != NULL) {
00569       parms.thrvoltexmaps[i] = (float *) calloc(1, 3 * volmemsz);
00570       if (parms.thrvoltexmaps[i] == NULL) {
00571         free(parms.thrdensitymaps[i]);
00572         parms.thrdensitymaps[i] = NULL;
00573         numprocs = i;
00574         break;
00575       }
00576     }
00577   }
00578 
00579   // launch independent thread calculations
00580   wkf_tasktile_t tile;
00581   tile.start = 0;
00582   tile.end = natoms;
00583   wkf_threadlaunch(numprocs, &parms, densitythread, &tile);
00584 
00585   // do a parallel reduction of the resulting density maps
00586   tile.start = 0;
00587   tile.end = volsz;
00588   wkf_threadlaunch(numprocs, &parms, reductionthread, &tile);
00589 
00590   // free work area
00591   for (i=1; i<maxprocs; i++) {
00592     if (parms.thrdensitymaps[i] != NULL)
00593       free(parms.thrdensitymaps[i]);
00594 
00595     if (parms.thrvoltexmaps[i] != NULL)
00596       free(parms.thrvoltexmaps[i]);
00597   }
00598   free(parms.thrdensitymaps);
00599   free(parms.thrvoltexmaps);
00600 
00601   return 0;
00602 }
00603 
00604 QuickSurf::QuickSurf() {
00605   volmap = NULL;
00606   voltexmap = NULL;
00607   s.clear();
00608   isovalue = 0.5f;
00609 
00610   numvoxels[0] = 128;
00611   numvoxels[1] = 128;
00612   numvoxels[2] = 128;
00613 
00614   origin[0] = 0.0f;
00615   origin[1] = 0.0f;
00616   origin[2] = 0.0f;
00617 
00618   xaxis[0] = 1.0f;
00619   xaxis[1] = 0.0f;
00620   xaxis[2] = 0.0f;
00621 
00622   yaxis[0] = 0.0f;
00623   yaxis[1] = 1.0f;
00624   yaxis[2] = 0.0f;
00625 
00626   zaxis[0] = 0.0f;
00627   zaxis[1] = 0.0f;
00628   zaxis[2] = 1.0f;
00629    
00630   cudaqs = NULL;
00631 #if defined(VMDCUDA)
00632   if (!getenv("VMDNOCUDA")) {
00633     cudaqs = new CUDAQuickSurf();
00634   }
00635 #endif
00636 
00637   timer = wkf_timer_create();
00638 }
00639 
00640 int QuickSurf::calc_surf(AtomSel *atomSel, DrawMolecule *mol,
00641                          const float *atompos, const float *atomradii,
00642                          int quality, float radscale, float gridspacing,
00643                          float isoval, const int *colidx, const float *cmap,
00644                          VMDDisplayList *cmdList) {
00645   wkf_timer_start(timer);
00646   int colorperatom = (colidx != NULL && cmap != NULL);
00647   int usebeads=0;
00648 
00649   // clean up any existing CPU arrays before going any further...
00650   if (voltexmap != NULL)
00651     free(voltexmap);
00652   voltexmap = NULL;
00653 
00654   ResizeArray<float> beadpos(64 + (3 * atomSel->selected) / 20);
00655   ResizeArray<float> beadradii(64 + (3 * atomSel->selected) / 20);
00656   ResizeArray<float> beadcolors(64 + (3 * atomSel->selected) / 20);
00657 
00658   if (getenv("VMDQUICKSURFBEADS")) {
00659     usebeads=1;
00660 #if !defined(ARCH_BLUEWATERS)
00661     printf("QuickSurf using residue beads representation...\n");
00662 #endif
00663   }
00664 
00665   int numbeads = 0;
00666   if (usebeads) {
00667     int i, resid, numres;
00668 
00669     // draw a bead for each residue
00670     numres = mol->residueList.num();
00671     for (resid=0; resid<numres; resid++) {
00672       float com[3] = {0.0, 0.0, 0.0};
00673       const ResizeArray<int> &atoms = mol->residueList[resid]->atoms;
00674       int numatoms = atoms.num();
00675       int oncount = 0;
00676    
00677       // find COM for residue
00678       for (i=0; i<numatoms; i++) {
00679         int idx = atoms[i];
00680         if (atomSel->on[idx]) {
00681           oncount++;
00682           vec_add(com, com, atompos + 3*idx);
00683         }
00684       }
00685 
00686       if (oncount < 1)
00687         continue; // exit if there weren't any atoms
00688 
00689       vec_scale(com, 1.0f / (float) oncount, com);
00690 
00691       // find radius of bounding sphere and save last atom index for color
00692       int atomcolorindex=0; // initialize, to please compilers
00693       float boundradsq = 0.0f;
00694       for (i=0; i<numatoms; i++) {
00695         int idx = atoms[i];
00696         if (atomSel->on[idx]) {
00697           float tmpdist[3];
00698           atomcolorindex = idx;
00699           vec_sub(tmpdist, com, atompos + 3*idx);
00700           float distsq = dot_prod(tmpdist, tmpdist);
00701           if (distsq > boundradsq) {
00702             boundradsq = distsq;
00703           }
00704         }
00705       }
00706       beadpos.append(com[0]);
00707       beadpos.append(com[1]);
00708       beadpos.append(com[2]);
00709 
00710       beadradii.append(sqrtf(boundradsq) + 1.0f);
00711 
00712       if (colorperatom) {
00713         const float *cp = &cmap[colidx[atomcolorindex] * 3];
00714         beadcolors.append(cp[0]);
00715         beadcolors.append(cp[1]);
00716         beadcolors.append(cp[2]);
00717       }
00718 
00719       // XXX still need to add pick points...
00720     }
00721 
00722     numbeads = beadpos.num() / 3;
00723   }
00724 
00725   // initialize class variables
00726   isovalue=isoval;
00727 
00728   // If no volumetric texture will be computed we will use the cmap
00729   // parameter to pass in the solid color to be applied to all vertices
00730   vec_copy(solidcolor, cmap);
00731 
00732   // compute min/max atom radius, build list of selected atom radii,
00733   // and compute bounding box for the selected atoms
00734   float minx, miny, minz, maxx, maxy, maxz;
00735   float minrad, maxrad;
00736   int i;
00737   if (usebeads) {
00738     minx = maxx = beadpos[0];
00739     miny = maxy = beadpos[1];
00740     minz = maxz = beadpos[2];
00741     minrad = maxrad = beadradii[0];
00742     for (i=0; i<numbeads; i++) {
00743       int ind = i * 3;
00744       float tmpx = beadpos[ind  ];
00745       float tmpy = beadpos[ind+1];
00746       float tmpz = beadpos[ind+2];
00747 
00748       minx = (tmpx < minx) ? tmpx : minx;
00749       maxx = (tmpx > maxx) ? tmpx : maxx;
00750 
00751       miny = (tmpy < miny) ? tmpy : miny;
00752       maxy = (tmpy > maxy) ? tmpy : maxy;
00753 
00754       minz = (tmpz < minz) ? tmpz : minz;
00755       maxz = (tmpz > maxz) ? tmpz : maxz;
00756  
00757       // we always have to compute the rmin/rmax for beads
00758       // since these radii are defined on-the-fly
00759       float r = beadradii[i];
00760       minrad = (r < minrad) ? r : minrad;
00761       maxrad = (r > maxrad) ? r : maxrad;
00762     }
00763   } else {
00764     minx = maxx = atompos[atomSel->firstsel*3  ];
00765     miny = maxy = atompos[atomSel->firstsel*3+1];
00766     minz = maxz = atompos[atomSel->firstsel*3+2];
00767 
00768     // Query min/max atom radii for the entire molecule
00769     mol->get_radii_minmax(minrad, maxrad);
00770 
00771     // We only compute rmin/rmax for the actual group of selected atoms if 
00772     // (rmax/rmin > 2.5) for the whole molecule, otherwise it's a small 
00773     // enough range that we don't care since it won't hurt our performance. 
00774     if (minrad <= 0.001 || maxrad/minrad > 2.5) {
00775       minrad = maxrad = atomradii[atomSel->firstsel];
00776       for (i=atomSel->firstsel; i<=atomSel->lastsel; i++) {
00777         if (atomSel->on[i]) {
00778           int ind = i * 3;
00779           float tmpx = atompos[ind  ];
00780           float tmpy = atompos[ind+1];
00781           float tmpz = atompos[ind+2];
00782 
00783           minx = (tmpx < minx) ? tmpx : minx;
00784           maxx = (tmpx > maxx) ? tmpx : maxx;
00785 
00786           miny = (tmpy < miny) ? tmpy : miny;
00787           maxy = (tmpy > maxy) ? tmpy : maxy;
00788 
00789           minz = (tmpz < minz) ? tmpz : minz;
00790           maxz = (tmpz > maxz) ? tmpz : maxz;
00791   
00792           float r = atomradii[i];
00793           minrad = (r < minrad) ? r : minrad;
00794           maxrad = (r > maxrad) ? r : maxrad;
00795         }
00796       }
00797     } else {
00798       for (i=atomSel->firstsel; i<=atomSel->lastsel; i++) {
00799         if (atomSel->on[i]) {
00800           int ind = i * 3;
00801           float tmpx = atompos[ind  ];
00802           float tmpy = atompos[ind+1];
00803           float tmpz = atompos[ind+2];
00804 
00805           minx = (tmpx < minx) ? tmpx : minx;
00806           maxx = (tmpx > maxx) ? tmpx : maxx;
00807 
00808           miny = (tmpy < miny) ? tmpy : miny;
00809           maxy = (tmpy > maxy) ? tmpy : maxy;
00810 
00811           minz = (tmpz < minz) ? tmpz : minz;
00812           maxz = (tmpz > maxz) ? tmpz : maxz;
00813         }
00814       }
00815     }
00816   }
00817 
00818   float mincoord[3], maxcoord[3];
00819   mincoord[0] = minx;
00820   mincoord[1] = miny;
00821   mincoord[2] = minz;
00822   maxcoord[0] = maxx;
00823   maxcoord[1] = maxy;
00824   maxcoord[2] = maxz;
00825 
00826   // crude estimate of the grid padding we require to prevent the
00827   // resulting isosurface from being clipped
00828   float gridpadding = radscale * maxrad * 1.70f;
00829   float padrad = gridpadding;
00830   padrad = 0.65f * sqrtf(4.0f/3.0f*((float) VMD_PI)*padrad*padrad*padrad);
00831   gridpadding = MAX(gridpadding, padrad);
00832 
00833   // Handle coarse-grained structures and whole-cell models
00834   // XXX The switch at 4.0A from an assumed all-atom scale structure to 
00835   //     CG or cell models is a simple heuristic at a somewhat arbitrary 
00836   //     threshold value.  
00837   //     For all-atom models the units shown in the GUI are in Angstroms
00838   //     and are absolute, but for CG or cell models the units in the GUI 
00839   //     are relative to the atom with the minimum radius.
00840   //     This code doesn't do anything to handle structures with a minrad 
00841   //     of zero, where perhaps only one particle has an unset radius.
00842   if (minrad > 4.0f) {
00843     gridspacing *= minrad;
00844   }
00845 
00846 #if !defined(ARCH_BLUEWATERS)
00847   printf("QuickSurf: R*%.1f, I=%.1f, H=%.1f Pad: %.1f minR: %.1f maxR: %.1f)\n",
00848          radscale, isovalue, gridspacing, gridpadding, minrad, maxrad);
00849 #endif
00850 
00851   mincoord[0] -= gridpadding;
00852   mincoord[1] -= gridpadding;
00853   mincoord[2] -= gridpadding;
00854   maxcoord[0] += gridpadding;
00855   maxcoord[1] += gridpadding;
00856   maxcoord[2] += gridpadding;
00857 
00858   // compute the real grid dimensions from the selected atoms
00859   xaxis[0] = maxcoord[0]-mincoord[0];
00860   yaxis[1] = maxcoord[1]-mincoord[1];
00861   zaxis[2] = maxcoord[2]-mincoord[2];
00862   numvoxels[0] = (int) ceil(xaxis[0] / gridspacing);
00863   numvoxels[1] = (int) ceil(yaxis[1] / gridspacing);
00864   numvoxels[2] = (int) ceil(zaxis[2] / gridspacing);
00865 
00866   // recalc the grid dimensions from rounded/padded voxel counts
00867   xaxis[0] = (numvoxels[0]-1) * gridspacing;
00868   yaxis[1] = (numvoxels[1]-1) * gridspacing;
00869   zaxis[2] = (numvoxels[2]-1) * gridspacing;
00870   maxcoord[0] = mincoord[0] + xaxis[0];
00871   maxcoord[1] = mincoord[1] + yaxis[1];
00872   maxcoord[2] = mincoord[2] + zaxis[2];
00873 
00874 #if !defined(ARCH_BLUEWATERS)
00875   printf("  GridSZ: (%4d %4d %4d)  BBox: (%.1f %.1f %.1f)->(%.1f %.1f %.1f)\n",
00876          numvoxels[0], numvoxels[1], numvoxels[2],
00877          mincoord[0], mincoord[1], mincoord[2],
00878          maxcoord[0], maxcoord[1], maxcoord[2]);
00879 #endif
00880 
00881   vec_copy(origin, mincoord);
00882 
00883   // build compacted lists of bead coordinates, radii, and colors
00884   float *xyzr = NULL;
00885   float *colors = NULL;
00886   if (usebeads) { 
00887     int ind =0;
00888     int ind4=0; 
00889     xyzr = (float *) malloc(numbeads * sizeof(float) * 4);
00890     if (colorperatom) {
00891       colors = (float *) malloc(numbeads * sizeof(float) * 4);
00892 
00893       // build compacted lists of bead coordinates, radii, and colors
00894       for (i=0; i<numbeads; i++) {
00895         const float *fp = &beadpos[0] + ind;
00896         xyzr[ind4    ] = fp[0]-origin[0];
00897         xyzr[ind4 + 1] = fp[1]-origin[1];
00898         xyzr[ind4 + 2] = fp[2]-origin[2];
00899         xyzr[ind4 + 3] = beadradii[i];
00900  
00901         const float *cp = &beadcolors[0] + ind;
00902         colors[ind4    ] = cp[0];
00903         colors[ind4 + 1] = cp[1];
00904         colors[ind4 + 2] = cp[2];
00905         colors[ind4 + 3] = 1.0f;
00906         ind4 += 4;
00907         ind += 3;
00908       }
00909     } else {
00910       // build compacted lists of bead coordinates and radii only
00911       for (i=0; i<numbeads; i++) {
00912         const float *fp = &beadpos[0] + ind;
00913         xyzr[ind4    ] = fp[0]-origin[0];
00914         xyzr[ind4 + 1] = fp[1]-origin[1];
00915         xyzr[ind4 + 2] = fp[2]-origin[2];
00916         xyzr[ind4 + 3] = beadradii[i];
00917         ind4 += 4;
00918         ind += 3;
00919       }
00920     }
00921   } else {
00922     int ind = atomSel->firstsel * 3;
00923     int ind4=0; 
00924     xyzr = (float *) malloc(atomSel->selected * sizeof(float) * 4);
00925     if (colorperatom) {
00926       colors = (float *) malloc(atomSel->selected * sizeof(float) * 4);
00927 
00928       // build compacted lists of atom coordinates, radii, and colors
00929       for (i=atomSel->firstsel; i <= atomSel->lastsel; i++) {
00930         if (atomSel->on[i]) {
00931           const float *fp = atompos + ind;
00932           xyzr[ind4    ] = fp[0]-origin[0];
00933           xyzr[ind4 + 1] = fp[1]-origin[1];
00934           xyzr[ind4 + 2] = fp[2]-origin[2];
00935           xyzr[ind4 + 3] = atomradii[i];
00936  
00937           const float *cp = &cmap[colidx[i] * 3];
00938           colors[ind4    ] = cp[0];
00939           colors[ind4 + 1] = cp[1];
00940           colors[ind4 + 2] = cp[2];
00941           colors[ind4 + 3] = 1.0f;
00942           ind4 += 4;
00943         }
00944         ind += 3;
00945       }
00946     } else {
00947       // build compacted lists of atom coordinates and radii only
00948       for (i=atomSel->firstsel; i <= atomSel->lastsel; i++) {
00949         if (atomSel->on[i]) {
00950           const float *fp = atompos + ind;
00951           xyzr[ind4    ] = fp[0]-origin[0];
00952           xyzr[ind4 + 1] = fp[1]-origin[1];
00953           xyzr[ind4 + 2] = fp[2]-origin[2];
00954           xyzr[ind4 + 3] = atomradii[i];
00955           ind4 += 4;
00956         }
00957         ind += 3;
00958       }
00959     }
00960   }
00961 
00962   // set gaussian window size based on user-specified quality parameter
00963   float gausslim = 2.0f;
00964   switch (quality) {
00965     case 3: gausslim = 4.0f; break; // max quality
00966 
00967     case 2: gausslim = 3.0f; break; // high quality
00968 
00969     case 1: gausslim = 2.5f; break; // medium quality
00970 
00971     case 0: 
00972     default: gausslim = 2.0f; // low quality
00973       break;
00974   }
00975 
00976   pretime = wkf_timer_timenow(timer);
00977 
00978 #if defined(VMDCUDA)
00979   if (!getenv("VMDNOCUDA")) {
00980     // compute both density map and floating point color texture map
00981     int pcount = (usebeads) ? numbeads : atomSel->selected; 
00982     int rc = cudaqs->calc_surf(pcount, &xyzr[0],
00983                                (colorperatom) ? &colors[0] : &cmap[0],
00984                                colorperatom, origin, numvoxels, maxrad,
00985                                radscale, gridspacing, isovalue, gausslim,
00986                                cmdList);
00987 
00988     if (rc == 0) {
00989       free(xyzr);
00990       if (colors)
00991         free(colors);
00992   
00993       voltime = wkf_timer_timenow(timer);
00994       return 0;
00995     }
00996   }
00997 #endif
00998 
00999 #if !defined(ARCH_BLUEWATERS)
01000   printf("  Computing density map grid on CPUs ");
01001 #endif
01002 
01003   long volsz = numvoxels[0] * numvoxels[1] * numvoxels[2];
01004   volmap = new float[volsz];
01005   if (colidx != NULL && cmap != NULL) {
01006     voltexmap = (float*) calloc(1, 3 * sizeof(float) * numvoxels[0] * numvoxels[1] * numvoxels[2]);
01007   }
01008 
01009   fflush(stdout);
01010   memset(volmap, 0, sizeof(float) * volsz);
01011   if ((volsz * atomSel->selected) > 20000000) {
01012     vmd_gaussdensity_threaded(atomSel->selected, &xyzr[0],
01013                               (voltexmap!=NULL) ? &colors[0] : NULL,
01014                               volmap, voltexmap, numvoxels, radscale, 
01015                               gridspacing, isovalue, gausslim);
01016   } else {
01017     vmd_gaussdensity_opt(atomSel->selected, &xyzr[0],
01018                          (voltexmap!=NULL) ? &colors[0] : NULL,
01019                          volmap, voltexmap, 
01020                          numvoxels, radscale, gridspacing, isovalue, gausslim);
01021   }
01022 
01023   free(xyzr);
01024   if (colors)
01025     free(colors);
01026 
01027   voltime = wkf_timer_timenow(timer);
01028 
01029   // draw the surface
01030   draw_trimesh(cmdList);
01031 
01032 #if !defined(ARCH_BLUEWATERS)
01033   printf(" Done.\n");
01034 #endif
01035   return 0;
01036 }
01037 
01038 
01039 // Extract the isosurface from the QuickSurf density map
01040 int QuickSurf::get_trimesh(int &numverts, 
01041                            float *&v3fv, float *&n3fv, float *&c3fv, 
01042                            int &numfacets, int *&fiv) {
01043 #if !defined(ARCH_BLUEWATERS)
01044   printf("Running marching cubes on CPU...\n");
01045 #endif
01046 
01047   VolumetricData *surfvol; 
01048   surfvol = new VolumetricData("molecular surface",
01049                                origin, xaxis, yaxis, zaxis,
01050                                numvoxels[0], numvoxels[1], numvoxels[2],
01051                                volmap);
01052 
01053   // XXX we should calculate the volume gradient only for those
01054   //     vertices we extract, since for this rep any changes to settings
01055   //     will require recomputation of the entire volume
01056   surfvol->compute_volume_gradient(); // calc gradients: smooth vertex normals
01057   gradtime = wkf_timer_timenow(timer);
01058 
01059   // trimesh polygonalized surface, max of 6 triangles per voxel
01060   const int stepsize = 1;
01061   s.clear();                              // initialize isosurface data
01062   s.compute(surfvol, isovalue, stepsize); // compute the isosurface
01063 
01064   mctime = wkf_timer_timenow(timer);
01065 
01066   s.vertexfusion(surfvol, 9, 9);          // identify and eliminate duplicated vertices
01067   s.normalize();                          // normalize interpolated gradient/surface normals
01068 
01069   if (s.numtriangles > 0) {
01070     if (voltexmap != NULL) {
01071       // assign per-vertex colors by a 3-D texture map
01072       s.set_color_voltex_rgb3fv(voltexmap);
01073     } else {
01074       // use a single color for the entire mesh
01075       s.set_color_rgb3fv(solidcolor);
01076     }
01077   }
01078 
01079   numverts = s.v.num() / 3;
01080   v3fv=&s.v[0];
01081   n3fv=&s.n[0];
01082   c3fv=&s.c[0];
01083 
01084   numfacets = s.numtriangles;
01085   fiv=&s.f[0];
01086 
01087   delete surfvol;
01088 
01089   mcverttime = wkf_timer_timenow(timer);
01090   reptime = mcverttime;
01091 
01092 #if !defined(ARCH_BLUEWATERS)
01093   char strmsg[1024];
01094   sprintf(strmsg, "QuickSurf: %.3f [pre:%.3f vol:%.3f gr:%.3f mc:%.2f mcv:%.3f]",
01095           reptime, pretime, voltime-pretime, gradtime-voltime, 
01096           mctime-gradtime, mcverttime-mctime);
01097 
01098   msgInfo << strmsg << sendmsg;
01099 #endif
01100  
01101   return 0;
01102 }
01103 
01104 
01105 int QuickSurf::draw_trimesh(VMDDisplayList *cmdList) {
01106   DispCmdTriMesh cmdTriMesh;
01107 
01108   int numverts=0;
01109   float *v=NULL, *n=NULL, *c=NULL;
01110   int numfacets=0;
01111   int *f=NULL;
01112 
01113   get_trimesh(numverts, v, n, c, numfacets, f);
01114 
01115   // Create a triangle mesh
01116   if (numfacets > 0) {
01117     cmdTriMesh.putdata(v, n, c, numverts, f, numfacets, 0, cmdList);
01118   }
01119  
01120   return 0;
01121 }
01122 
01123 
01124 QuickSurf::~QuickSurf() {
01125 #if defined(VMDCUDA)
01126   if (cudaqs) {
01127     delete cudaqs;
01128   }
01129 #endif
01130 
01131   if (voltexmap != NULL)
01132     free(voltexmap);
01133   voltexmap = NULL;
01134 
01135   wkf_timer_destroy(timer);
01136 }
01137 
01138 

Generated on Sat May 26 01:48:25 2012 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002