From: Jeff Comer (jeffcomer_at_gmail.com)
Date: Tue Feb 24 2015 - 15:39:12 CST
I wrote some code find clusters a while back. You make a selection
with "selectText" that contains the residues that you want to find
clusters for. The code iterates through the frames of a list of dcd
files and uses pbwithin to determine which residues in the selection
neighbor another. The proc "searchOverlap" finds clusters of connected
residues. There are four output files that store the result for each
frame, specifically the mean cluster size, the number of clusters, the
size of the largest cluster, and a list of the number of residues in
each cluster. You can set the parameters of the script at the top. You
may have to modify the code for your application. Here's the code.
# Author: Jeff Comer <jeffcomer at gmail>
set name test
set selectText "resname PEG and resid 11"
set threshDist 4.5
set psf nh_stride5_eqSalt_sio20min_peg19.psf
set dcdFreq 25000
set timestep 2
set stride 1
set outDir Cluster
set dcdList {nh_stride5_eqSalt_sio20min_peg19.dcd}
#######################################################
# Cluster progs
# Do two lists have any of the same elements?
proc checkOverlap {aList bList} {
foreach a $aList {
set i [lsearch -sorted -integer $bList $a]
if {$i >= 0} {
return 1
}
}
return 0
}
proc searchOverlap {neighListList} {
set n [llength $neighListList]
set n1 [expr {$n-1}]
# Loop over pairs of elements.
for {set i 0} {$i < $n1} {incr i} {
set li [lindex $neighListList $i]
for {set j [expr {$i+1}]} {$j < $n} {incr j} {
set lj [lindex $neighListList $j]
if {[checkOverlap $li $lj]} {
# They overlap. Merge the lists.
set lij [lsort -unique -integer [concat $li $lj]]
# Delete the other list.
# (Do this first since it's higher in the array.)
set neighListList [lreplace $neighListList $j $j]
# Insert the merged list.
set neighListList [lreplace $neighListList $i $i $lij]
return $neighListList
}
}
}
return $neighListList
}
# Make unique clusters from lists of neighbors.
proc getClusters {neighListList} {
set n [llength $neighListList]
set n0 0
while {$n != $n0} {
set n0 $n
set neighListList [searchOverlap $neighListList]
set n [llength $neighListList]
}
return $neighListList
}
proc mean {l} {
set sum 0.0
foreach i $l {
set sum [expr {$sum + $i}]
}
return [expr {$sum/[llength $l]}]
}
set maxFrames 100
# Compute the interval in nanoseconds.
set frameInterval [expr {1e-6*$timestep*$dcdFreq}]
set displayPeriod 10
if {$stride < 1} { set stride 1 }
# Get the time change between frames in nanoseconds.
set dt [expr {$frameInterval*$stride}]
# Open the output file.
set out [open $outDir/clusterSizeMean_${name}.dat w]
set outN [open $outDir/clusterNum_${name}.dat w]
set outDist [open $outDir/clusterDist_${name}.dat w]
set outMax [open $outDir/clusterSizeMax_${name}.dat w]
# Load the system.
mol load psf $psf
set sel [atomselect top $selectText]
# "residue" requires that the bonds be assigned correctly
foreach silent {0} {
set resList [lsort -unique -integer [$sel get residue]]
}
$sel delete
# Loop over the dcd files.
set nFrames0 0
foreach dcd $dcdList {
# Load the trajectory.
set nFrames $maxFrames
set startFrame 0
while {$nFrames == $maxFrames} {
set endFrame [expr {$startFrame + $stride*($maxFrames - 1)}]
puts "Reading from $dcd from frame $startFrame to $endFrame inclusive"
animate delete all
mol addfile $dcd first $startFrame last $endFrame step $stride waitfor all
set nFrames [molinfo top get numframes]
puts "Reading $nFrames frames."
set startFrame [expr {$endFrame + 1}]
# Move forward computing at every step.
for {set f 0} {$f < $nFrames} {incr f} {
# Get the time in nanoseconds for this frame.
set t [expr {($nFrames0+$f)*$dt}]
# Get the positions at the current frame.
molinfo top set frame $f
# Make a list of all of the neighbors of each residue.
set neighListList {}
foreach res $resList {
set s [atomselect top "($selectText) and pbwithin $threshDist
of residue $res"]
set neighList [lsort -unique -integer [$s get residue]]
lappend neighListList $neighList
$s delete
}
# Get the clusters and their sizes.
set clusterList [getClusters $neighListList]
set clusterNum [llength $clusterList]
if {$clusterNum == 0} {
puts "ERROR: Found zero clusters. Problem with the selection text?"
exit
}
set sizeList {}
foreach cluster $clusterList {
lappend sizeList [llength $cluster]
}
set sizeList [lsort -integer $sizeList]
# Write the sizes.
puts $outDist $sizeList
set meanSize [mean $sizeList]
puts $out "$t $meanSize"
puts $outMax "$t [lindex $sizeList end]"
puts $outN "$t $clusterNum"
if {$f % $displayPeriod == 0} {
puts "FRAME $f: $t clusterNum $clusterNum meanSize $meanSize
minSize [lindex $sizeList 0] maxSize [lindex $sizeList end]"
}
}
set nFrames0 [expr {$nFrames+$nFrames0}]
}
}
close $out
close $outN
close $outMax
close $outDist
mol delete top
exit
–––––––––––––––––––––––––––––––––––———————
Jeffrey Comer, PhD
Assistant Professor
Institute of Computational Comparative Medicine
Nanotechnology Innovation Center of Kansas State
Kansas State University
Office: P-213 Mosier Hall
Phone: 785-532-6311
On Tue, Feb 24, 2015 at 9:39 AM, Jérôme Hénin <jerome.henin_at_ibpc.fr> wrote:
> Hi,
>
> There is a Gromacs tool, g_clustsize, that might do what you want.
>
> Jerome
>
> On 24 February 2015 at 03:28, Aditya Ranganathan <aditya.sia_at_gmail.com>
> wrote:
>>
>> Dear All,
>>
>> I`m doing some protein aggregation related simulations using the residue
>> based coarse graining approach. I was wondering if there are any scripts
>> related to protein aggregation related trajectory analysis. For example, I
>> want to keep track of the cluster sizes (monomer, dimers, trimers etc) with
>> time. Are there any scripts for the same.
>>
>> Thanks and Regards
>>
>> Srivastav Ranganathan
>> IIT Bombay
>> Mumbai
>> India
>
>
This archive was generated by hypermail 2.1.6 : Thu Dec 31 2015 - 23:21:40 CST