MeasureCluster.C Source File

00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2019 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: MeasureCluster.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.20 $       $Date: 2020/07/23 03:27:52 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *   Code to find clusters in MD trajectories.
00019  * Current implementation is based on the quality threshold (QT) algorithm:
00020  *   http://dx.doi.org/10.1101/gr.9.11.1106
00021  *   http://en.wikipedia.org/wiki/Cluster_analysis#QT_clustering_algorithm
00022  *
00023  ***************************************************************************/
00024 
00025 #include <stdio.h>
00026 #include <stdlib.h>
00027 #include <math.h>
00028 #include "Measure.h"
00029 #include "AtomSel.h"
00030 #include "utilities.h"
00031 #include "ResizeArray.h"
00032 #include "MoleculeList.h"
00033 #include "Inform.h"
00034 #include "Timestep.h"
00035 #include "VMDApp.h"
00036 #include "WKFThreads.h"
00037 #include "WKFUtils.h"
00038 #include "SpatialSearch.h"
00039 
00040 class AtomSelThr : public AtomSel
00041 {
00042 public:
00043   AtomSelThr(VMDApp *vmdapp, AtomSel *osel, wkf_mutex_t *olock)
00044       : AtomSel(vmdapp, NULL, osel->molid()),
00045         sel(osel), lock(olock) {
00046     if (sel) {
00047       selected=sel->selected;
00048       num_atoms=sel->num_atoms;
00049       which_frame=sel->which_frame;
00050       if (sel->on) {
00051         on = new int[num_atoms];
00052         memcpy(on,sel->on,num_atoms*sizeof(int));
00053       }
00054     } else {
00055       selected=-1;
00056       num_atoms=-1;
00057       which_frame=-1;
00058     }
00059   }
00060 
00061   ~AtomSelThr() {
00062     sel=NULL;
00063   }
00064 
00065   // disable these methods
00066 private:
00067 //  AtomSelThr() : AtomSel(NULL, NULL, -1) {};
00068   AtomSelThr& operator=(const AtomSelThr &) { return *this; };
00069 //  AtomSelThr(AtomSelThr &) : AtomSel(NULL, NULL, -1) {};
00070   int change(const  char *newcmd, DrawMolecule *mol) { return NO_PARSE; }
00071 
00072 public:
00073 
00074   /* thread safe selection update */
00075   void update(/* const */ DrawMolecule *mol, const int frame) {
00076     if (!sel) return;
00077     
00078     wkf_mutex_lock(lock);
00079 
00080     sel->which_frame=frame;
00081     which_frame=frame;
00082 
00083     if (sel->change(NULL, mol) != AtomSel::PARSE_SUCCESS)
00084       msgErr << "AtomSelThr::update(): failed to evaluate atom selection update";
00085 
00086     num_atoms=sel->num_atoms;
00087     selected=sel->selected;
00088     if (!on) on = new int[num_atoms];
00089     memcpy(on,sel->on,num_atoms*sizeof(int));
00090     
00091     wkf_mutex_unlock(lock);
00092   }
00093   
00094 protected:
00095   AtomSel *sel;
00096   wkf_mutex_t *lock;
00097 };
00098 
00099 /* 
00100    XXX: below is a custom version of MatrixFitRMS. unlike the
00101         original in fitrms.c, this one computes and provides
00102         the RMS and does not output the transformation matrix
00103         (not needed below).
00104 
00105         this needs to go away as soon as an improved general
00106         version of MatrixFitRMS is available, where this feature
00107         would be made an option. 
00108 */
00109 
00110 /*
00111 
00112 Code in this file was taken from PyMol v0.90 and used by permissing under
00113 the following license agreement contained in the PyMol distribution.  
00114 Trivial modifications have been made to permit incorporation into VMD.
00115 
00116 
00117 
00118 PyMOL Copyright Notice
00119 ======================
00120 
00121 The PyMOL source code is copyrighted, but you can freely use and copy
00122 it as long as you don't change or remove any of the copyright notices.
00123 
00124 ----------------------------------------------------------------------
00125 PyMOL is Copyright 1998-2003 by Warren L. DeLano of 
00126 DeLano Scientific LLC, San Carlos, CA, USA (www.delanoscientific.com).
00127 
00128                         All Rights Reserved
00129 
00130 Permission to use, copy, modify, distribute, and distribute modified 
00131 versions of this software and its documentation for any purpose and 
00132 without fee is hereby granted, provided that the above copyright 
00133 notice appear in all copies and that both the copyright notice and 
00134 this permission notice appear in supporting documentation, and that 
00135 the names of Warren L. DeLano or DeLano Scientific LLC not be used in 
00136 advertising or publicity pertaining to distribution of the software 
00137 without specific, written prior permission.
00138 
00139 WARREN LYFORD DELANO AND DELANO SCIENTIFIC LLC DISCLAIM ALL WARRANTIES 
00140 WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
00141 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL WARREN LYFORD DELANO
00142 OR DELANO SCIENTIFIC LLC BE LIABLE FOR ANY SPECIAL, INDIRECT OR 
00143 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
00144 OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
00145 OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE 
00146 USE OR PERFORMANCE OF THIS SOFTWARE.
00147 ----------------------------------------------------------------------
00148 
00149 Where indicated, portions of the PyMOL system are instead protected
00150 under the copyrights of the respective authors.  However, all code in
00151 the PyMOL system is released as non-restrictive open-source software
00152 under the above license or an equivalent license.  
00153 
00154 PyMOL Trademark Notice
00155 ======================
00156 
00157 PyMOL(TM) is a trademark of DeLano Scientific LLC.  Derivate software
00158 which contains PyMOL source code must be plainly distinguished from
00159 the PyMOL package distributed by DeLano Scientific LLC in all publicity,
00160 advertising, and documentation.
00161 
00162 The slogans, "Includes PyMOL(TM).", "Based on PyMOL(TM) technology.",
00163 "Contains PyMOL(TM) source code.", and "Built using PyMOL(TM).", may
00164 be used in advertising, publicity, and documentation of derivate
00165 software provided that the notice, "PyMOL is a trademark of DeLano
00166 Scientific LLC.", is included in a footnote or at the end of the document.
00167 
00168 All other endorsements employing the PyMOL trademark require specific,
00169 written prior permission.
00170 
00171 --Warren L. DeLano (warren@delanoscientific.com)
00172 
00173 */
00174 
00175 #ifdef __cplusplus
00176 extern "C" {
00177 #endif
00178 
00179 #ifdef R_SMALL
00180 #undef R_SMALL
00181 #endif
00182 #define R_SMALL 0.000000001
00183 
00184 static void normalize3d(double *v) {
00185   double vlen;
00186   vlen = sqrt(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
00187   if (vlen > R_SMALL) {
00188     v[0] /= vlen;
00189     v[1] /= vlen;
00190     v[2] /= vlen;
00191   } else {
00192     v[0] = 0;
00193     v[1] = 0;
00194     v[2] = 0;
00195   }
00196 }
00197 
00198 /*========================================================================*/
00199   static float MyMatrixFitRMS(int n, float *v1, float *v2, const float *wt, const double tol)
00200 {
00201   /*
00202         Subroutine to do the actual RMS fitting of two sets of vector coordinates
00203         This routine does not rotate the actual coordinates, but instead returns 
00204         the RMS fitting value, along with the center-of-mass translation vectors 
00205         T1 and T2 and the rotation vector M, which rotates the translated 
00206         coordinates of molecule 2 onto the translated coordinates of molecule 1.
00207   */
00208 
00209   float *vv1,*vv2;
00210   double m[3][3],aa[3][3];
00211   double sumwt, sig, gam;
00212   double sg, bb, cc, tmp, err, etmp;
00213   int a, b, c, maxiter, iters, iy, iz;
00214   double t1[3],t2[3];
00215   double aatmp[9];
00216 
00217   /* Initialize arrays. */
00218 
00219   for(a=0;a<3;a++) {
00220     for(b=0;b<3;b++) {
00221       m[a][b] = 0.0;
00222       aa[a][b] = 0.0;
00223       aatmp[3*a+b] = 0;
00224     }
00225     m[a][a] = 1.0;
00226     t1[a]=0.0;
00227     t2[a]=0.0;
00228   }
00229 
00230   /* maximum number of fitting iterations */
00231   maxiter = 1000;
00232 
00233   /* Calculate center-of-mass vectors */
00234 
00235   vv1=v1;
00236   vv2=v2;
00237   sumwt = 0.0;
00238 
00239   for(c=0;c<n;c++) {
00240     double w = wt ? wt[c] : 1;
00241     t1[0] += w * vv1[0];
00242     t1[1] += w * vv1[1];
00243     t1[2] += w * vv1[2];
00244     t2[0] += w * vv2[0];
00245     t2[1] += w * vv2[1];
00246     t2[2] += w * vv2[2];
00247     sumwt += w;
00248     vv1+=3;
00249     vv2+=3;
00250   }
00251   for(a=0;a<3;a++) {
00252     t1[a] /= sumwt;
00253     t2[a] /= sumwt;
00254   }
00255 
00256   /* Calculate correlation matrix */
00257   vv1=v1;
00258   vv2=v2;
00259   for(c=0;c<n;c++) {
00260     double w = wt ? wt[c] : 1;
00261     double x1 = w * (vv1[0] - t1[0]);
00262     double y1 = w * (vv1[1] - t1[1]);
00263     double z1 = w * (vv1[2] - t1[2]);
00264 
00265     /* don't multply x2/y2/z2 by w, otherwise weights get squared */
00266     double x2 =     (vv2[0] - t2[0]); 
00267     double y2 =     (vv2[1] - t2[1]);
00268     double z2 =     (vv2[2] - t2[2]);
00269     aatmp[0] += x2 * x1;
00270     aatmp[1] += x2 * y1;
00271     aatmp[2] += x2 * z1;
00272     aatmp[3] += y2 * x1;
00273     aatmp[4] += y2 * y1;
00274     aatmp[5] += y2 * z1;
00275     aatmp[6] += z2 * x1;
00276     aatmp[7] += z2 * y1;
00277     aatmp[8] += z2 * z1;
00278     vv1+=3;
00279     vv2+=3;
00280   }
00281 
00282   for (a=0; a<3; a++) 
00283     for (b=0; b<3; b++) 
00284       aa[a][b] = aatmp[3*a+b];
00285 
00286   if(n>1) {
00287     /* Primary iteration scheme to determine rotation matrix for molecule 2 */
00288     iters = 0;
00289     while(1) {
00290       /* IX, IY, and IZ rotate 1-2-3, 2-3-1, 3-1-2, etc.*/
00291       iz = (iters+1) % 3;
00292       iy = (iz+1) % 3;
00293       // unused...
00294       // ix = (iy+1) % 3;
00295       sig = aa[iz][iy] - aa[iy][iz];
00296       gam = aa[iy][iy] + aa[iz][iz];
00297 
00298       if(iters>=maxiter) {
00299         fprintf(stderr,
00300                 " Matrix: Warning: no convergence (%.15f>%.15f after %d iterations).\n",
00301                 fabs(sig),tol*fabs(gam),iters);
00302         break;
00303       }
00304 
00305       /* Determine size of off-diagonal element.  If off-diagonals exceed the
00306          diagonal elements * tolerance, perform Jacobi rotation. */
00307       tmp = sig*sig + gam*gam;
00308       sg = sqrt(tmp);
00309       if( (sg > 0.0) && (fabs(sig)>(tol*fabs(gam))) ) {
00310         sg = 1.0 / sg;
00311         for(a=0;a<3;a++) {
00312           bb = gam*aa[iy][a] + sig*aa[iz][a];
00313           cc = gam*aa[iz][a] - sig*aa[iy][a];
00314           aa[iy][a] = bb*sg;
00315           aa[iz][a] = cc*sg;
00316 
00317           bb = gam*m[iy][a] + sig*m[iz][a];
00318           cc = gam*m[iz][a] - sig*m[iy][a];
00319           m[iy][a] = bb*sg;
00320           m[iz][a] = cc*sg;
00321         }
00322       } else {
00323         break;
00324       }
00325       iters++;
00326     }
00327   }
00328   /* At this point, we should have a converged rotation matrix (M).  Calculate
00329          the weighted RMS error. */
00330   err=0.0;
00331   vv1=v1;
00332   vv2=v2;
00333 
00334   normalize3d(m[0]);
00335   normalize3d(m[1]);
00336   normalize3d(m[2]);
00337 
00338   for(c=0;c<n;c++) {
00339         etmp = 0.0;
00340         for(a=0;a<3;a++) {
00341           tmp = m[a][0]*(vv2[0]-t2[0])
00342                 + m[a][1]*(vv2[1]-t2[1])
00343                 + m[a][2]*(vv2[2]-t2[2]);
00344           tmp = (vv1[a]-t1[a])-tmp;
00345           etmp += tmp*tmp;
00346         }
00347 
00348         if(wt)
00349           err += wt[c] * etmp;
00350         else 
00351           err += etmp;
00352 
00353         vv1+=3;
00354         vv2+=3;
00355   }
00356 
00357   err=err/sumwt;
00358   err=sqrt(err);
00359   return (float)err;
00360 }
00361 
00362 #ifdef __cplusplus
00363 }
00364 #endif
00365 
00366 /* XXX: end of customized MatrixFitRMS */
00367 
00368 
00369 // compute weighted RMSD between selected atoms in two frames
00370 // this is a simple wrapper around measure_rmsd.
00371 static float cluster_get_rmsd(const float *Frame1Pos, const float *Frame2Pos, 
00372                               AtomSel *sel, float *weights) {
00373   float distance = 0.0f;
00374   measure_rmsd(sel, sel, sel->num_atoms, Frame1Pos, Frame2Pos, weights, &distance);
00375   return distance;
00376 }
00377 
00378 
00379 // compute weighted difference between radius of gyration
00380 // of the selected atoms in the two frames.
00381 static float cluster_get_rgyrd(const float *Frame1Pos, const float *Frame2Pos, 
00382                                AtomSel *sel, float *weights) {
00383 
00384   float distance = 10000000.0f;
00385 
00386   // compute the center of mass with the current weights
00387   float com1[3], com2[3];
00388   int ret_val;
00389 
00390   ret_val = measure_center(sel, Frame1Pos, weights, com1);
00391   if (ret_val < 0) 
00392     return distance;
00393 
00394   ret_val = measure_center(sel, Frame2Pos, weights, com2);
00395   if (ret_val < 0) 
00396     return distance;
00397   
00398   // measure center of gyration
00399   int i, j;
00400   float total_w, w, sum1, sum2;
00401   total_w=sum1=sum2=0.0f;
00402   for (j=0,i=sel->firstsel; i<=sel->lastsel; i++) {
00403     if (sel->on[i]) {
00404       w = weights[j];
00405       total_w += w;
00406       sum1 += w * distance2(Frame1Pos + 3*i, com1);
00407       sum2 += w * distance2(Frame2Pos + 3*i, com2);
00408       j++;
00409     }
00410   }
00411 
00412   if (total_w == 0.0f)
00413     return distance;
00414 
00415   // and finalize the computation
00416   distance = sqrtf(sum1/total_w) - sqrtf(sum2/total_w);
00417   return fabsf(distance);
00418 }
00419 
00420 
00421 // This is a stripped down version of measure fit supporting only
00422 // selections with 4 atoms or larger. not much value in clustering
00423 // smaller systems with fit and rmsd.
00424 // This algorithm comes from Kabsch, Acta Cryst. (1978) A34, 827-828.
00425 static float cluster_get_fitrmsd(const float *Frame1Pos, const float *Frame2Pos, 
00426                                  AtomSel *sel, float *weights, const double tol) {
00427 
00428   int num = sel->selected;
00429   
00430   // failure of the fit+rmsd is indicated by a very large distance.
00431   float distance = 10000000.0f;
00432 
00433   // use the new RMS fit implementation only
00434   // fit+clustering with 3 or less atoms doesn't make much sense.
00435   // the Kabsch method won't work of the number of atoms is less than 4
00436   // (and won't work in some cases of n > 4; I think it works so long as
00437   // three or more planes are needed to intersect all the data points
00438 
00439   if (sel->selected < 4)
00440     return distance;
00441     
00442   int i, j, k;
00443   float *v1, *v2, *wt;
00444   v1 = new float[3*num];
00445   v2 = new float[3*num];
00446   wt = new float[num];
00447   for (j=0,k=0,i=sel->firstsel; i<=sel->lastsel; i++) {
00448     if (sel->on[i]) {
00449       int ind = 3 * i;
00450       wt[j] = weights[i];
00451       ++j;
00452       v1[k] = Frame1Pos[ind];
00453       v2[k] = Frame2Pos[ind];
00454       v1[k+1] = Frame1Pos[ind+1];
00455       v2[k+1] = Frame2Pos[ind+1];
00456       v1[k+2] = Frame1Pos[ind+2];
00457       v2[k+2] = Frame2Pos[ind+2];
00458       k+=3;
00459     }
00460   }
00461   distance = MyMatrixFitRMS(num, v1, v2, wt, tol);
00462 
00463   delete [] v1;
00464   delete [] v2;
00465   delete [] wt;
00466 
00467   return distance;
00468 }
00469 
00470 
00471 
00472 
00473 typedef struct {
00474   int threadid;
00475   int threadcount;
00476 
00477   int max_cluster_size;
00478   const int *skip_list;
00479   int *new_skip_list;
00480   int *max_cluster;
00481 
00482   int istart;
00483   int iend;
00484   int *frames_list;
00485   int numframes;
00486 
00487   AtomSelThr *sel;
00488   Molecule *mol;
00489   int selupdate;
00490   float cutoff;
00491   int likeness;
00492   float *weights;
00493 } clusterparms_t;
00494 
00495 
00496 // cluster search thread worker function
00497 extern "C" void * find_cluster_thr(void *voidparms)
00498 {
00499 
00500   clusterparms_t *parms = (clusterparms_t *)voidparms;
00501   const int istart = parms->istart;
00502   const int iend   = parms->iend;
00503   int *framesList = parms->frames_list;
00504   const int numframes = parms->numframes;
00505 
00506   const int selupdate = parms->selupdate;
00507   const int likeness = parms->likeness;
00508   float cutoff = parms->cutoff;
00509   float *weights = parms->weights;
00510 
00511   AtomSelThr *sel = parms->sel;
00512   Molecule *mymol = parms->mol;
00513   const int *skipList = parms->skip_list;
00514 
00515   int *maxCluster = parms->max_cluster;
00516   memset(maxCluster, 0, numframes*sizeof(int));
00517   int *newSkipList = parms->new_skip_list;
00518   memset(newSkipList, 0, numframes*sizeof(int));
00519 
00520   int maxClusterSize = 0, tempClusterSize = 0;
00521   int *tempCluster = new int[numframes];
00522   int *tempSkipList = new int[numframes];
00523 
00524 
00525   // MatrixFitRMS returns RMS distance of fitted molecule. 
00526   /* RMS fit tolerance */
00527   double tol = 1e-15;
00528   const char *TOL = getenv( "VMDFITRMSTOLERANCE" );
00529   if (TOL)
00530     tol = atof(TOL);
00531 
00532   // Loops through assigned frames find the one with the max cluster size
00533   int i,j;
00534   for (i = istart; i < iend; i++) {
00535     memset(tempSkipList, 0, numframes*sizeof(int));
00536     memset(tempCluster, 0, numframes*sizeof(int));
00537 
00538     if (skipList[i]==0) {
00539       if (selupdate)
00540         sel->update(mymol,framesList[i]);
00541       
00542       const Timestep *tsMain = mymol->get_frame(framesList[i]);
00543       const float *framePos = tsMain->pos;
00544 
00545       tempCluster[0] = i;
00546       tempSkipList[i] = 1;
00547       tempClusterSize = 1;
00548 
00549       // Loops through all frames other then frame i and computes frame i's distance to them
00550       for (j = 0; j < numframes; j++) {
00551         if (skipList[j]==0 && j != i) {
00552           const Timestep *ts2;
00553           ts2 = mymol->get_frame(framesList[j]);
00554           float distance;
00555 
00556           // branch to the implemented likeness algorithms
00557           switch(likeness) {
00558             case MEASURE_DIST_RMSD:
00559               distance = cluster_get_rmsd(framePos, ts2->pos, sel, weights);
00560               break;
00561 
00562             case MEASURE_DIST_FITRMSD:
00563               distance = cluster_get_fitrmsd(framePos, ts2->pos, sel, weights, tol);
00564               break;
00565               
00566             case MEASURE_DIST_RGYRD:
00567               distance = cluster_get_rgyrd(framePos, ts2->pos, sel, weights);
00568               break;
00569               
00570             default:
00571               distance = 10000000.0f;
00572           }
00573 
00574           if (distance <= cutoff) {
00575             tempCluster[tempClusterSize] = j;
00576             ++tempClusterSize;
00577             tempSkipList[j] = 1;
00578           }
00579         }
00580       }
00581 
00582       // If size of temp cluster > max cluster, temp cluster becomes max cluster
00583       if (tempClusterSize > maxClusterSize) {
00584         int *temp;
00585         maxClusterSize = tempClusterSize;
00586 
00587         temp = maxCluster;
00588         maxCluster = tempCluster;
00589         tempCluster = temp;
00590 
00591         temp = newSkipList;
00592         newSkipList = tempSkipList;
00593         tempSkipList = temp;
00594       }
00595     }
00596   }
00597 
00598   // update parameter struct with results
00599   parms->max_cluster_size = maxClusterSize;
00600   parms->max_cluster = maxCluster;
00601   parms->new_skip_list = newSkipList;
00602   
00603   // cleanup
00604   delete[] tempCluster;
00605   delete[] tempSkipList;
00606 
00607   return MEASURE_NOERR;
00608 }
00609     
00610 
00612 static int *find_next_cluster(Molecule *mymol, int *framesList, const int numframes, 
00613                               const int remframes, const int *skipList, int **newSkipList,
00614                               const int likeness, AtomSel *sel, const int selupdate, 
00615                               const double cutoff, float *weights)
00616 {
00617   int i,j;
00618   
00619   // threading setup.
00620   wkf_thread_t   *threads;
00621   clusterparms_t *parms;
00622 
00623 #if defined(VMDTHREADS)
00624   int numprocs = wkf_thread_numprocessors();
00625 #else
00626   int numprocs = 1;
00627 #endif
00628 
00629   int delta = remframes / numprocs;
00630   int istart = 0;
00631   int iend = 0;
00632   
00633   // not enough work to do, force serial execution
00634   if (delta < 1) {
00635     numprocs=1;
00636     delta=numframes;
00637   }
00638 
00639   threads = new wkf_thread_t[numprocs];
00640   memset(threads, 0, numprocs * sizeof(wkf_thread_t));
00641   wkf_mutex_t *atomsel_lock = new wkf_mutex_t;
00642   wkf_mutex_init(atomsel_lock);
00643 
00644   // allocate and (partially) initialize array of per-thread parameters
00645   parms = new clusterparms_t[numprocs];
00646   for (i=0; i<numprocs; ++i) {
00647     parms[i].threadid = i;
00648     parms[i].threadcount = numprocs;
00649 
00650     parms[i].max_cluster_size = 1;
00651     parms[i].skip_list = skipList;
00652     parms[i].new_skip_list = new int[numframes];
00653     parms[i].max_cluster = new int[numframes];
00654 
00655     // use a thread-safe wrapper to access "the one" 
00656     // AtomSel class. The wrapper uses mutexes to 
00657     // prevent from updating the global selection from
00658     // multiple threads at the same time. The whole data 
00659     // access infrastructure in VMD is currently not thread-safe.
00660     parms[i].sel = new AtomSelThr(mymol->app, sel, atomsel_lock);
00661     parms[i].mol = mymol;
00662 
00663     // load balancing. scatter the remaining frames evenly
00664     // by skipping over eliminated frames.
00665     parms[i].istart = istart;
00666     int nframe=0;
00667     for (j=istart; (j < numframes) && (nframe < delta); ++j) {
00668       if (skipList[framesList[j]]==0) 
00669         ++nframe;
00670       iend=j;
00671     }
00672     parms[i].iend = iend;
00673     istart=iend;
00674                    
00675     parms[i].frames_list = framesList;
00676     parms[i].numframes = numframes;
00677     parms[i].selupdate = selupdate;
00678     parms[i].likeness = likeness;
00679     parms[i].cutoff = (float) cutoff;
00680     parms[i].weights = weights;
00681   }
00682   parms[numprocs-1].iend=numframes;
00683 
00684 
00685 #if defined(VMDTHREADS)
00686   if (numprocs > 1) {
00687     for (i=0; i<numprocs; ++i) {
00688       wkf_thread_create(&threads[i], find_cluster_thr, &parms[i]);
00689     }
00690     for (i=0; i<numprocs; ++i) {
00691       wkf_thread_join(threads[i], NULL);
00692     }
00693   } else
00694 #endif
00695   find_cluster_thr(&parms[0]);
00696   
00697   int maxClusterSize = parms[0].max_cluster_size;
00698   int *maxCluster = parms[0].max_cluster;
00699   delete[] *newSkipList;
00700   *newSkipList= parms[0].new_skip_list;
00701 
00702   // retrieve results from additional threads,
00703   // override, if needed, and free temporary storage.
00704   if (numprocs > 1) {
00705     for (i = 1; i < numprocs; i++) {
00706       if (parms[i].max_cluster_size > maxClusterSize) {
00707         maxClusterSize = parms[i].max_cluster_size;
00708         delete[] maxCluster;
00709         maxCluster = parms[i].max_cluster;
00710         delete[] *newSkipList;
00711         *newSkipList = parms[i].new_skip_list;
00712       } else {
00713         delete[] parms[i].max_cluster;
00714         delete[] parms[i].new_skip_list;
00715       }
00716     }
00717   }
00718     
00719   // Transform cluster list back to real frame numbers
00720   for (i = 0; i < numframes; i++) {
00721     maxCluster[i] = framesList[maxCluster[i]];
00722   }
00723 
00724   // cleanup.
00725   wkf_mutex_destroy(atomsel_lock);
00726   delete atomsel_lock;
00727 
00728   if (selupdate) {
00729     for (i=0; i<numprocs; ++i)
00730       delete parms[i].sel;
00731   }
00732   delete[] threads;
00733   delete[] parms;
00734 
00735   return maxCluster;
00736 }
00737 
00738 int measure_cluster(AtomSel *sel, MoleculeList *mlist,
00739                     const int numcluster, const int algorithm,
00740                     const int likeness, const double cutoff,
00741                     int *clustersize, int **clusterlist,
00742                     int first, int last, int step, int selupdate, 
00743                     float *weights)
00744 {
00745   Molecule *mymol = mlist->mol_from_id(sel->molid());
00746   int maxframe = mymol->numframes()-1;
00747 
00748   if (last == -1) last = maxframe;
00749 
00750   if ((last < first) || (last < 0) || (step <=0) || (first < 0)
00751       || (last > maxframe)) {
00752     msgErr << "measure cluster: bad frame range given."
00753            << " max. allowed frame#: " << maxframe << sendmsg;
00754     return MEASURE_ERR_BADFRAMERANGE;
00755   }
00756 
00757   int numframes = (last-first+1)/step;
00758   int remframes = numframes;
00759 
00760   // create list with frames numbers selected to process
00761   int *framesList = new int[numframes];
00762   int frame_count = 0;
00763   int n;
00764 
00765   for(n = first; n <= last; n += step)
00766     framesList[frame_count++] = n;
00767 
00768   // accumulated list of frames to skip because they belong to a cluster
00769   int *skipList = new int[numframes];
00770   // new list of frames to skip to be added to the existing skip list.
00771   int *newSkipList = new int[numframes];
00772   // initially we want all frames.
00773   memset(skipList, 0, numframes*sizeof(int));
00774   // timer for progress messages
00775   wkfmsgtimer *msgtp = wkf_msg_timer_create(5);
00776 
00777   // compute the numcluster largest clusters
00778   for(n = 0; n < numcluster; ++n){
00779 
00780     // wipe out list of frames to be added to the global skiplist
00781     memset(newSkipList, 0, numframes*sizeof(int));
00782     clusterlist[n] = find_next_cluster(mymol, framesList, numframes, remframes,
00783                                        skipList, &newSkipList, likeness,
00784                                        sel, selupdate, cutoff, weights);
00785     int n_cluster=0;
00786     for(int i = 0; i < numframes; ++i){
00787       if (newSkipList[i] == 1) {
00788         skipList[i] = 1;
00789         n_cluster++;
00790       }
00791     }
00792     clustersize[n]=n_cluster;
00793     remframes -= n_cluster;
00794 
00795     // print progress messages for long running tasks
00796     if (msgtp && wkf_msg_timer_timeout(msgtp)) {
00797       char tmpbuf[1024];
00798       sprintf(tmpbuf, "cluster %d of %d: (%6.2f%% complete). %d frames of %d left.", 
00799               n+1, numcluster, 100.0f*(n+1)/((float) numcluster), remframes, numframes);
00800       msgInfo << "measure cluster: " << tmpbuf << sendmsg;
00801     }
00802   }
00803 
00804   // combine unclustered frames to form the last cluster
00805   int *unclustered = new int[numframes];
00806   int numunclustered = 0;
00807   for (n = 0; n < numframes; ++n) {
00808     if (skipList[n] == 0) {
00809       unclustered[numunclustered] = framesList[n];
00810       ++numunclustered;
00811     }
00812   }
00813   // NOTE: both lists have been allocated
00814   // to the length of numcluster+1
00815   clusterlist[numcluster] = unclustered;
00816   clustersize[numcluster] = numunclustered;
00817 
00818   // Cleanup
00819   delete[] newSkipList;
00820   delete[] skipList;
00821   wkf_msg_timer_destroy(msgtp);
00822 
00823   return MEASURE_NOERR;
00824 }
00825 
00826 
00827 /**************************************************************************/
00828 
00829 typedef ResizeArray<int> intlist;
00830 
00831 // helper function for cluster size analysis
00832 // build index list for the next cluster.
00833 static void assemble_cluster(intlist &cluster_list, intlist &candidate_list,
00834                intlist **neighbor_grid, int atom, int numshared, int *idxmap) {
00835   int idx, nn, i,j;
00836   
00837   // clear lists and add initial atom pairs to candidates list.
00838   candidate_list.clear();
00839   cluster_list.clear();
00840 
00841   idx = idxmap[atom];
00842   nn  = neighbor_grid[idx]->num();
00843   for (i = 0; i < nn; i++) {
00844     int bn = (*neighbor_grid[idx])[i];
00845     if (neighbor_grid[idxmap[bn]]) {
00846       candidate_list.append(atom);
00847       candidate_list.append(bn);
00848     }
00849   }
00850 
00851   // pointer to the currently processed cluster candidate list entry.
00852   int curidx=0;
00853   
00854   while (curidx < candidate_list.num()) {
00855 
00856     // a pair of neighbors has to share at least numshared
00857     // neighbors to be added to the cluster.
00858     // at least numshared neighbors.
00859     int count = 0;
00860 
00861     if (numshared > 0) {
00862       int ida = idxmap[candidate_list[curidx]];
00863       int idb = idxmap[candidate_list[curidx+1]];
00864       int nna  = neighbor_grid[ida]->num();
00865       int nnb  = neighbor_grid[idb]->num();
00866 
00867       for (i = 0; i < nna; i++) {
00868         if (neighbor_grid[ida]) {
00869           for (j = 0; j < nnb; j++) {
00870             if (neighbor_grid[idb]) {
00871               if ( (*neighbor_grid[ida])[i] == (*neighbor_grid[idb])[j] ) {
00872                 ++count;
00873                 if (count == numshared) 
00874                   goto exit;
00875               }
00876             }
00877           }
00878         }
00879       }
00880     }
00881     exit:
00882     
00883     if (count == numshared) {
00884 
00885       // add central atom of group of neighbors.
00886       // its neighbors had already been added to
00887       // the candidate list.
00888       int atma = candidate_list[curidx];
00889       if (cluster_list.find(atma) < 0) 
00890         cluster_list.append(atma);
00891 
00892       // add neighbor of central atom to cluster and
00893       // add neighbors of this atom to candidate list, 
00894       // if they are not already in it.
00895       int atmb = candidate_list[curidx+1];
00896       idx = idxmap[atmb];
00897 
00898       if (cluster_list.find(atmb) < 0) {
00899         cluster_list.append(atmb);
00900         
00901         int nnb = neighbor_grid[idx]->num();
00902         for (i = 0; i < nnb; i++) {
00903           int bn = (*neighbor_grid[idx])[i];
00904           if ((neighbor_grid[idxmap[bn]]) && (cluster_list.find(bn) < 0)) {
00905             candidate_list.append(atmb);
00906             candidate_list.append(bn);
00907           }
00908         }
00909       }
00910     }
00911 
00912     ++curidx;++curidx; // next candidate pair
00913   }
00914   
00915   return;
00916 }
00917 
00918 // perform cluster size analysis
00919 int measure_clustsize(const AtomSel *sel, MoleculeList *mlist,
00920                       const double cutoff, int *clustersize,
00921                       int *clusternum, int *clusteridx, 
00922                       int minsize, int numshared, int usepbc) {
00923   int i,j;
00924 
00925   const float *framepos  = sel->coordinates(mlist);
00926   const int num_selected = sel->selected;
00927   const int num_atoms    = sel->num_atoms;
00928   const int *selected    = sel->on;
00929 
00930   // forward and reverse index maps for relative position
00931   // of atoms in the selection and in the global arrays.
00932   int *idxmap      = new int[num_atoms];
00933   int *idxrev      = new int[num_selected];
00934 
00935   for (i=0; i<sel->firstsel; i++) 
00936     idxmap[i]=-1;
00937 
00938   for(j=0,i=sel->firstsel; i<=sel->lastsel; i++) {
00939     if (sel->on[i]) {
00940       idxrev[j]=i;
00941       idxmap[i]=j++;
00942     } else {
00943       idxmap[i]=-1;
00944     }
00945   }
00946 
00947   for (i=sel->lastsel+1; i<sel->num_atoms; i++) 
00948     idxmap[i]=-1;
00949 
00950   // allocate list of neighbor lists.
00951   intlist **neighbor_grid;
00952   neighbor_grid = new intlist *[num_selected];
00953   for (i = 0; i < num_selected; i++)
00954     neighbor_grid[i] = new intlist;
00955 
00956   // compile list of pairs for selection.
00957   GridSearchPair *pairlist, *currentPair, *nextPair;
00958   pairlist = vmd_gridsearch1(framepos, num_atoms, selected, (float)cutoff, 0, -1);
00959 
00960   // populate the neighborlist grid.
00961   for (currentPair = pairlist; currentPair != NULL; currentPair = nextPair) {
00962     neighbor_grid[idxmap[currentPair->ind1]]->append(currentPair->ind2);
00963     neighbor_grid[idxmap[currentPair->ind2]]->append(currentPair->ind1);
00964     nextPair = currentPair->next;
00965     free(currentPair);
00966   }
00967 
00968   // collect the cluster size information.
00969   int currentClusterNum = 0;
00970   int currentPosition = 0;
00971   intlist cluster_list(64);
00972   intlist candidate_list(128);
00973 
00974   while (currentPosition < num_selected) {
00975     if (neighbor_grid[currentPosition]) {
00976       // pick next atom that has not been processed yet and build list of 
00977       // all indices for this cluster. by looping over the neighbors of 
00978       // the first atom and adding all pairs of neighbors that share at
00979       // least numshared neighbors. continue with neighbors of neighbors 
00980       // accordingly until no more new unique neighbors are found.
00981       // entries of atoms added to a cluster are removed from neighbor_grid
00982       if (neighbor_grid[currentPosition]->num() > numshared) {
00983         assemble_cluster(cluster_list, candidate_list, neighbor_grid, 
00984                          idxrev[currentPosition], numshared, idxmap);
00985 
00986         if (minsize <= cluster_list.num()) {
00987           // these atoms have been processed. remove from global list
00988           for (i = 0; i < cluster_list.num(); i++) {
00989             int idx = idxmap[cluster_list[i]];
00990             delete neighbor_grid[idx];
00991             neighbor_grid[idx] = 0;
00992           }
00993       
00994           // store the cluster size, cluster index and atom index information in 
00995           // the designated arrays.
00996           for (i = 0; i < cluster_list.num(); i++) {
00997             int atom = idxmap[cluster_list[i]];
00998             clusteridx[atom] = cluster_list[i];
00999             clusternum[atom] = currentClusterNum;
01000             clustersize[atom] = cluster_list.num();
01001           }
01002           currentClusterNum++;
01003         }
01004       }
01005     }
01006     ++currentPosition;
01007   }
01008 
01009   for(i=0; i < num_selected; ++i) {
01010     if (neighbor_grid[i])
01011       delete neighbor_grid[i];
01012   }
01013   delete[] neighbor_grid;
01014 
01015   return MEASURE_NOERR;
01016 }