Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

BondSearch.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr                                                                       
00003  *cr            (C) Copyright 1995-2019 The Board of Trustees of the           
00004  *cr                        University of Illinois                       
00005  *cr                         All Rights Reserved                        
00006  *cr                                                                   
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: BondSearch.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.77 $       $Date: 2020/07/08 04:22:12 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *
00019  * Distance based bond search code 
00020  *
00021  ***************************************************************************/
00022 
00023 #include <stdio.h>
00024 #include <math.h>
00025 #include <stdlib.h>
00026 #include "BondSearch.h"
00027 #include "Timestep.h"
00028 #include "BaseMolecule.h"
00029 #include "Molecule.h"
00030 #include "Inform.h"
00031 #include "WKFThreads.h"
00032 #include "WKFUtils.h"
00033 #include <ctype.h>         // needed for isdigit()
00034 #include <string.h>
00035 
00036 
00037 GridSearchPairlist *vmd_gridsearch_bonds(const float *pos, const float *radii,
00038                                    int natoms, float pairdist, int maxpairs) {
00039   float min[3], max[3];
00040   int i, xb, yb, zb, xytotb, totb;
00041   int **boxatom, *numinbox, *maxinbox, **nbrlist;
00042   int numon = 0;
00043   float sidelen[3], volume;
00044   int paircount = 0;
00045 
00046   // find bounding box for selected atoms, and number of atoms in selection.
00047 #if 1
00048   minmax_3fv_aligned(pos, natoms, min, max);
00049 #else
00050   find_minmax_all(pos, natoms, min, max);
00051 #endif
00052 
00053   // check for NaN coordinates propagating to the bounding box result
00054   if (!(max[0] >= min[0] && max[1] >= min[1] && max[2] >= min[2])) {
00055     msgErr << "vmd_gridsearch_bonds: NaN coordinates in bounds, aborting!" << sendmsg;
00056     return NULL;
00057   }
00058 
00059   // do sanity checks and complain if we've got bogus atom coordinates,
00060   // we shouldn't ever have density higher than 0.1 atom/A^3, but we'll
00061   // be generous and allow much higher densities.  
00062   if (maxpairs != -1) {
00063     vec_sub(sidelen, max, min);
00064     // include estimate for atom radius (1 Angstrom) in volume determination
00065     volume = fabsf((sidelen[0] + 2.0f) * (sidelen[1] + 2.0f) * (sidelen[2] + 2.0f));
00066     if ((numon / volume) > 1.0) {
00067       msgWarn << "vmd_gridsearch_bonds: insane atom density" << sendmsg;
00068     }
00069   }
00070 
00071   // I don't want the grid to get too large, otherwise I could run out
00072   // of memory.  Octrees would be cool, but I'll just limit the grid size
00073   // and let the performance degrade a little for pathological systems.
00074   // Note that pairdist^2 is what gets used for the actual distance checks;
00075   // from here on out pairdist is only used to set the grid size, so we 
00076   // can set it to anything larger than the original pairdist.
00077   const int MAXBOXES = 4000000;
00078   totb = MAXBOXES + 1;
00079 
00080   float newpairdist = pairdist;
00081   float xrange = max[0]-min[0];
00082   float yrange = max[1]-min[1];
00083   float zrange = max[2]-min[2];
00084   do {
00085     pairdist = newpairdist;
00086     const float invpairdist = 1.0f / pairdist; 
00087     xb = ((int)(xrange*invpairdist))+1;
00088     yb = ((int)(yrange*invpairdist))+1;
00089     zb = ((int)(zrange*invpairdist))+1;
00090     xytotb = yb * xb;
00091     totb = xytotb * zb;
00092     newpairdist = pairdist * 1.26f; // cbrt(2) is about 1.26
00093   } while (totb > MAXBOXES || totb < 1); // check for integer wraparound too
00094  
00095   // 2. Sort each atom into appropriate bins
00096   boxatom = (int **) calloc(1, totb*sizeof(int *));
00097   numinbox = (int *) calloc(1, totb*sizeof(int));
00098   maxinbox = (int *) calloc(1, totb*sizeof(int));
00099   if (boxatom == NULL || numinbox == NULL || maxinbox == NULL) {
00100     if (boxatom != NULL)
00101       free(boxatom);
00102     if (numinbox != NULL)
00103       free(numinbox);
00104     if (maxinbox != NULL)
00105       free(maxinbox);
00106     msgErr << "Bondsearch memory allocation failed, bailing out" << sendmsg;
00107     return NULL; // ran out of memory, bail out!
00108   }
00109 
00110   const float invpairdist = 1.0f / pairdist; 
00111   for (i=0; i<natoms; i++) {
00112     int axb, ayb, azb, aindex, num;
00113 
00114     // compute box index for new atom
00115     const float *loc = pos + 3L*i;
00116     axb = (int)((loc[0] - min[0])*invpairdist);
00117     ayb = (int)((loc[1] - min[1])*invpairdist);
00118     azb = (int)((loc[2] - min[2])*invpairdist);
00119 
00120     // clamp box indices to valid range in case of FP error
00121     if (axb >= xb) axb = xb-1;
00122     if (ayb >= yb) ayb = yb-1;
00123     if (azb >= zb) azb = zb-1;
00124 
00125     aindex = azb * xytotb + ayb * xb + axb;
00126 
00127     // grow box if necessary 
00128     if ((num = numinbox[aindex]) == maxinbox[aindex]) {
00129       boxatom[aindex] = (int *) realloc(boxatom[aindex], (num+4)*sizeof(int));
00130       maxinbox[aindex] += 4;
00131     }
00132 
00133     // store atom index in box
00134     boxatom[aindex][num] = i;
00135     numinbox[aindex]++;
00136   }
00137   free(maxinbox);
00138  
00139   nbrlist = (int **) calloc(1, totb*sizeof(int *));
00140   if (make_neighborlist(nbrlist, xb, yb, zb)) {
00141     if (boxatom != NULL) {
00142       for (i=0; i<totb; i++) {
00143         if (boxatom[i] != NULL) free(boxatom[i]);
00144       }
00145       free(boxatom);
00146     }
00147     if (nbrlist != NULL) {
00148       for (i=0; i<totb; i++) {
00149         if (nbrlist[i] != NULL) free(nbrlist[i]);
00150       }
00151       free(nbrlist);
00152     }
00153     free(numinbox);
00154     msgErr << "Bondsearch memory allocation failed, bailing out" << sendmsg;
00155     return NULL; // ran out of memory, bail out!
00156   }
00157 
00158   // if maxpairs is "unlimited", set it to the biggest positive int
00159   if (maxpairs < 0) {
00160     maxpairs = 2147483647;
00161   }
00162 
00163   // setup head of pairlist
00164   GridSearchPairlist *head, *cur;
00165   head = (GridSearchPairlist *) malloc(sizeof(GridSearchPairlist));
00166   head->next = NULL;
00167   paircount = vmd_bondsearch_thr(pos, radii, head, totb, 
00168                                  boxatom, numinbox, nbrlist, 
00169                                  maxpairs, pairdist);
00170 
00171   for (i=0; i<totb; i++) {
00172     free(boxatom[i]);
00173     free(nbrlist[i]);
00174   }
00175   free(boxatom);
00176   free(nbrlist);
00177   free(numinbox);
00178 
00179   cur = head->next;
00180   free(head);
00181 
00182   if (paircount > maxpairs) 
00183     msgErr << "vmdgridsearch_bonds: exceeded pairlist sanity check, aborted" << sendmsg;
00184 
00185   return cur;
00186 }
00187 
00188 
00189 
00190 // bond search thread parameter structure
00191 typedef struct {
00192   int threadid;
00193   int threadcount;
00194   wkf_mutex_t *pairlistmutex;
00195   GridSearchPairlist * head;
00196   float *pos;
00197   float *radii;
00198   int totb;
00199   int **boxatom;
00200   int *numinbox;
00201   int **nbrlist;  
00202   int maxpairs;
00203   float pairdist;
00204 } bondsearchthrparms;
00205 
00206 // thread prototype 
00207 extern "C" void * bondsearchthread(void *);
00208 
00209 // setup and launch bond search threads
00210 int vmd_bondsearch_thr(const float *pos, const float *radii,
00211                        GridSearchPairlist * head, 
00212                        int totb, int **boxatom, 
00213                        int *numinbox, int **nbrlist, int maxpairs, 
00214                        float pairdist) {
00215   int i;
00216   bondsearchthrparms *parms;
00217   wkf_thread_t * threads;
00218   wkf_mutex_t pairlistmutex; 
00219   wkf_mutex_init(&pairlistmutex); // init mutex before use
00220 
00221   int numprocs = wkf_thread_numprocessors();
00222 
00223   /* allocate array of threads */
00224   threads = (wkf_thread_t *) calloc(numprocs * sizeof(wkf_thread_t), 1);
00225 
00226   /* allocate and initialize array of thread parameters */
00227   parms = (bondsearchthrparms *) malloc(numprocs * sizeof(bondsearchthrparms));
00228   for (i=0; i<numprocs; i++) {
00229     parms[i].threadid = i;
00230     parms[i].threadcount = numprocs;
00231     parms[i].pairlistmutex = &pairlistmutex;
00232     parms[i].head = NULL;
00233     parms[i].pos = (float *) pos;
00234     parms[i].radii = (float *) radii;
00235     parms[i].totb = totb;
00236     parms[i].boxatom = boxatom;
00237     parms[i].numinbox = numinbox;
00238     parms[i].nbrlist = nbrlist;  
00239     parms[i].maxpairs = maxpairs;
00240     parms[i].pairdist = pairdist;
00241   }
00242 
00243 #if defined(VMDTHREADS)
00244   /* spawn child threads to do the work */
00245   for (i=0; i<numprocs; i++) {
00246     wkf_thread_create(&threads[i], bondsearchthread, &parms[i]);
00247   }
00248 
00249   /* join the threads after work is done */
00250   for (i=0; i<numprocs; i++) {
00251     wkf_thread_join(threads[i], NULL);
00252   }
00253 #else
00254   bondsearchthread(&parms[0]); // single-threaded code
00255 #endif
00256 
00257   // assemble final pairlist from sublists
00258   for (i=0; i<numprocs; i++) {
00259     if (parms[i].head != NULL) {
00260       GridSearchPairlist *tmp = head->next;
00261       head->next = parms[i].head;
00262       parms[i].head->next = tmp;
00263     }
00264   }
00265 
00266   wkf_mutex_destroy(&pairlistmutex); // destroy mutex when finished
00267 
00268   /* free thread parms */
00269   free(parms);
00270   free(threads);
00271 
00272   return 0;
00273 }
00274 
00275 extern "C" void * bondsearchthread(void *voidparms) {
00276   int i, j, aindex;
00277   int paircount = 0;
00278 
00279   bondsearchthrparms *parms = (bondsearchthrparms *) voidparms;
00280 
00281   const int threadid = parms->threadid;
00282   const int threadcount = parms->threadcount;
00283   wkf_mutex_t *pairlistmutex = parms->pairlistmutex;
00284   const float *pos = parms->pos;
00285   const float *radii = parms->radii;
00286   const int totb = parms->totb;
00287   const int **boxatom = (const int **) parms->boxatom;
00288   const int *numinbox = parms->numinbox;
00289   const int **nbrlist = (const int **) parms->nbrlist; 
00290   const int maxpairs = parms->maxpairs;
00291   const float pairdist = parms->pairdist;
00292 
00293   ResizeArray<int> *pairs = new ResizeArray<int>;
00294   float sqdist = pairdist * pairdist;
00295 
00296   wkfmsgtimer *msgt = wkf_msg_timer_create(5);
00297   for (aindex = threadid; (aindex < totb) && (paircount < maxpairs); aindex+=threadcount) {
00298     const int *tmpbox, *nbr;
00299     tmpbox = boxatom[aindex];
00300     int anbox = numinbox[aindex];
00301 
00302     if (((aindex & 255) == 0) && wkf_msg_timer_timeout(msgt)) {
00303       char tmpbuf[128] = { 0 };
00304       sprintf(tmpbuf, "%6.2f", (100.0f * aindex) / (float) totb);
00305 //  XXX: we have to use printf here as msgInfo is not thread-safe.
00306 //  msgInfo << "vmd_gridsearch_bonds (thread " << threadid << "): " 
00307 //          << tmpbuf << "% complete" << sendmsg;
00308       printf("vmd_gridsearch_bonds (thread %d): %s %% complete\n", 
00309         threadid, tmpbuf); 
00310     }
00311 
00312     for (nbr = nbrlist[aindex]; (*nbr != -1) && (paircount < maxpairs); nbr++) {
00313       int nnbr=*nbr;
00314       const int *nbrbox = boxatom[nnbr];
00315       int nbox=numinbox[nnbr];
00316       int self = (aindex == nnbr) ? 1 : 0;
00317 
00318       for (i=0; (i<anbox) && (paircount < maxpairs); i++) {
00319         int ind1 = tmpbox[i];
00320         const float *p1 = pos + 3L*ind1;
00321 
00322         // skip over self and already-tested atoms
00323         int startj = (self) ? i+1 : 0;
00324 
00325         for (j=startj; (j<nbox) && (paircount < maxpairs); j++) {
00326           int ind2 = nbrbox[j];
00327           const float *p2 = pos + 3L*ind2;
00328           float dx = p1[0] - p2[0];
00329           float dy = p1[1] - p2[1];
00330           float dz = p1[2] - p2[2];
00331           float ds2 = dx*dx + dy*dy + dz*dz;
00332 
00333           // perform distance test, but ignore pairs between atoms
00334           // with nearly identical coords
00335           if ((ds2 > sqdist) || (ds2 < 0.001))
00336             continue;
00337 
00338           if (radii) { // Do atom-specific distance check
00339             float cut = 0.6f * (radii[ind1] + radii[ind2]);
00340             if (ds2 > cut*cut)
00341               continue;
00342           }
00343 
00344           pairs->append(ind1);
00345           pairs->append(ind2);
00346           paircount++;
00347         }
00348       }
00349     }
00350   }
00351 
00352   // setup results pairlist node
00353   GridSearchPairlist *head;
00354   head = (GridSearchPairlist *) malloc(sizeof(GridSearchPairlist));
00355   head->next = NULL;
00356   head->pairlist = pairs;
00357 
00358   wkf_mutex_lock(pairlistmutex);   // lock pairlist before update
00359   parms->head = head;
00360   wkf_mutex_unlock(pairlistmutex); // unlock pairlist after update
00361 
00362   wkf_msg_timer_destroy(msgt);
00363 
00364   return NULL;
00365 }
00366 
00367 
00368 
00369 
00370 
00371 // determine bonds from position of atoms previously read.
00372 // If cutoff < 0, use vdw radius to determine if bonded.
00373 int vmd_bond_search(BaseMolecule *mol, const Timestep *ts, 
00374                     float cutoff, int dupcheck) {
00375   const float *pos;
00376   int natoms;
00377   int i;
00378   const float *radius = mol->radius();
00379  
00380   if (ts == NULL) {
00381     msgErr << "Internal Error: NULL Timestep in vmd_bond_search" << sendmsg;
00382     return 0;
00383   }
00384 
00385   natoms = mol->nAtoms; 
00386   if (natoms == 0 || cutoff == 0.0)
00387     return 1;
00388 
00389   msgInfo << "Determining bond structure from distance search ..." << sendmsg;
00390 
00391   if (dupcheck)
00392     msgInfo << "Eliminating bonds duplicated from existing structure..." << sendmsg;
00393 
00394   // Set box distance to either the cutoff, or 1.2 times the largest VDW radius
00395   float dist = cutoff; 
00396   if (cutoff < 0.0) {
00397     // set minimum cutoff distance for the case when the loaded molecule
00398     // has radii set to zero.  This must be >0.0 or the grid search will hang.
00399     dist = 0.833f;
00400     for (i=0; i<natoms; i++) {  
00401       float rad = radius[i];
00402       if (rad > dist) dist = rad;
00403     }
00404     dist = 1.2f * dist;
00405   }
00406   
00407   pos = ts->pos; 
00408  
00409   // Call the bond search to get all atom pairs within the specified distance
00410   // XXX set maxpairs to 27 bonds per atom, which ought to be ridiculously high
00411   //     for any real structure someone would load, but well below N^2
00412   GridSearchPairlist *pairlist = vmd_gridsearch_bonds(pos, 
00413                                                  (cutoff < 0) ? radius : NULL,
00414                                                  natoms, dist, natoms * 27L);
00415 
00416   // Go through the pairlist adding validated bonds freeing nodes as we go. 
00417   GridSearchPairlist *p, *tmp; 
00418   for (p = pairlist; p != NULL; p = tmp) {
00419     int numpairs = p->pairlist->num() / 2;
00420     
00421     for (i=0; i<numpairs; i++) {
00422       int ind1 = (*p->pairlist)[i*2L  ]; 
00423       int ind2 = (*p->pairlist)[i*2L+1];
00424 
00425       MolAtom *atom1 = mol->atom(ind1);
00426       MolAtom *atom2 = mol->atom(ind2);
00427 
00428       // don't bond atoms that aren't part of the same conformation
00429       // or that aren't in the all-conformations part of the structure
00430       if ((atom1->altlocindex != atom2->altlocindex) &&
00431           ((mol->altlocNames.name(atom1->altlocindex)[0] != '\0') && 
00432            (mol->altlocNames.name(atom2->altlocindex)[0] != '\0'))) {
00433         continue;
00434       }
00435 
00436       // Prevent hydrogens from bonding with each other.
00437 #if 1
00438       // XXX must use the atom name strings presently because the
00439       // hydrogen flags aren't necessarily set by the time the bond search
00440       // code executes.  It may soon be time to do something a different
00441       // with per-atom flag storage so that the atom types can be setup
00442       // during the earliest phase of structure analysis, eliminating this
00443       // and other potential gotchas.
00444       if (!IS_HYDROGEN(mol->atomNames.name(atom1->nameindex)) ||
00445           !IS_HYDROGEN(mol->atomNames.name(atom2->nameindex)) ) {
00446 #else
00447       // Use atomType info derived during initial molecule analysis for speed.
00448       if (!(atom1->atomType == ATOMHYDROGEN) ||
00449           !(atom2->atomType == ATOMHYDROGEN)) {
00450 #endif
00451         // Add a bond, bondorder defaults to 1, bond type to -1
00452         if (dupcheck)
00453           mol->add_bond_dupcheck(ind1, ind2, 1, -1);
00454         else
00455           mol->add_bond(ind1, ind2, 1, -1);
00456       }
00457     }
00458 
00459     // free this pairlist node and its ResizeArray of pairs
00460     tmp = p->next;
00461     delete p->pairlist;
00462     free(p);
00463   }
00464 
00465   return 1;
00466 }
00467 
00468 
00469 
00470 

Generated on Fri Oct 4 02:43:25 2024 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002