# ColorCoord 1.0 # -------------- # routines for coloring of the structure according to the coordination # intended for analysis of ice structures # water molecules are colored according to the number of h-bonded neighbors # lubos vrbka, 2006 # ********************************************************************************* # this file is heavily commented so you shouldn't have any problems with it # however feel free to contact me in case of any problems # usage: # set all CC_water... variables according to your simulation # modify the topology/psf names if applicable # you have to have NAMD topology and psf file for your system - you can use autopsf for it # then you can run the analysis itself # create the array containing number of hbonds per water molecule in the unit cell # this can take really long time! # parameters are distance/angle criterion for hbonds, first and last frame (starting # from 1) # array set n_hbonds_array [GetMolBonds 3.0 20 1 10] # you can save/restore the results of the analysis to/from a file # the following proc will save the n_hbonds array and the global variables # to a file # WriteMolHbondsFile [array get n_hbonds_array] n_hbonds_filename # it can be later safely retrieved # array set loaded_n_bonds_array [ReadMolHbondsFile n_hbonds_filename] # you can perform a kind of averaging (pseudo-running average) on the arrays with hbonds # check the code of the proc # array set n_hbonds_array_averaged [ # set the user field according to the number of hbonds # SetMolHbondsUser [array get n_hbonds_array] # you can use something like user > 3.5 in the selection field for graphical representation # ********************************************************************************* puts "tools for the analysis of the number of h-bonded neighbors" puts "lubos vrbka, 2006" puts "" puts "defined procedures/functions are:" puts "GetMolHbonds {distance angle first last} (returns array)" puts "WriteMolHbondsFile {n_hbonds_array filename}" puts "ReadMolHbondsFile {filename} (returns array)" puts "AverageMolHbonds {n_hbonds_array average} (returns array)" puts "SetMolHbondsUser {n_hbonds_array}" puts "" puts "check the source/docs for usage / more info" # the following global variables control the analysis # identification of frames in the trajectory # these are overwritten from the procedure GetMolHbonds/ReadMolHbondsFile set CC_first_frame 1 set CC_last_frame 1 set CC_total_frames 1 set CC_mol_id 0 # you need to change the following to change the names, ... # names for atom selections set CC_water_resname "NE6" set CC_water_oxygen_name "OW" # you can set more, e.g., the lone pairs, ... set CC_water_all "OW \"HW.\"" # total number of particles per residue according to CC_water_all selection set CC_water_all_nparticles 3 # variables for PBC handling and analysis # name of the temporary file - you need to have a write access to its location! set CC_tmpname "_temporary.pdb" set CC_psfname "_ne6_namd.psf" set CC_topname "_ne6_namd.top" set CC_pbc_threshold 4.0 # ********************************************************************************* # ********************************************************************************* proc GetMolHbonds { distance angle first last} { # get water coordination numbers and return an "array" with the data # parameters: # criteria for the hbond, first/last frame information # note that first frame has number 1 global CC_first_frame global CC_last_frame global CC_total_frames global CC_mol_id global CC_water_resname global CC_water_oxygen_name global CC_water_all global CC_tmpname global CC_psfname global CC_topname global CC_pbc_threshold # get ID of the currently active molecule # if you need another molecule -> set it here set mol_ID [molinfo top] set CC_mol_id $mol_ID # ************************************************************************ # handle frame selection # set global variables to zero set CC_first_frame 0 set CC_last_frame 0 set CC_total_frames 0 # get number of frames set numframes [molinfo $mol_ID get numframes] puts "total number of frames in trajectory: $numframes" set startframe $first set endframe $last if {$startframe <= 0 || $startframe > $numframes} { puts "illegal value of startframe, changing to first frame" set startframe 1 } if {$endframe < $startframe || $endframe > $numframes} { puts "illegal value of endframe, changing to last frame" set endframe $numframes } set totframes [expr ($endframe - $startframe + 1)] puts "analysis will be performed on $totframes frame(s) ($startframe to $endframe)" # set the global variables set CC_first_frame $startframe set CC_last_frame $endframe set CC_total_frames $totframes # now subtract 1 from all frame indexes - numbering starts with 0 set startframe [expr $startframe - 1] set endframe [expr $endframe - 1] # ************************************************************************ # set some selections set wat [atomselect $mol_ID "resname $CC_water_resname"] set cell_oxygen [atomselect $mol_ID "name $CC_water_oxygen_name"] set cell_water [atomselect $mol_ID "name $CC_water_all"] # create array (a dictionary) that will store number of hbonds for every oxygen # for every frame, i.e. # oxygen1 [ no_frame1 no_frame2 ...] oxygen2 [ no_frame1 no_frame2 ...] ... # commented out: # also create array that will contain all hbonds # oxygen1 [{indexes_frame1} {indexes_frame2} {indexes_frame3} ...] oxygen2 [...] ... # maybe there will be some use for it in the future :) foreach ox_index [$cell_oxygen list] { array set n_hbonds [list $ox_index ""] } # repeat analysis for all required frames for {set i $startframe} {$i <= $endframe} {incr i} { # update selections for the original geometry and get some residue mapping $wat frame $i $wat update set wat_unique_res [lsort -unique -integer [$wat get resid]] set wat_res [$wat get resid] set wat_name [$wat get name] $cell_oxygen frame $i $cell_oxygen update $cell_water frame $i $cell_water update set box [molinfo $mol_ID get {a b c}] # ************************************************************************ # handle the PBC - create the images of the central unit cell # requires psfgen to work package require psfgen # read the psf with topology for our system # has to be created beforehand mol load psf $CC_psfname # read the topology (was used to construct the psf file) topology $CC_topname set psf_ID [molinfo top] set n 0 set seglist {} # force segid original structure - if empty then it doesn't work # $wat set segid R$n # for all 27 different combinations of central cell shift foreach x [list 0.0 -1.0 1.0] { foreach y [list 0.0 -1.0 1.0] { foreach z [list 0.0 -1.0 1.0] { set vec [list [expr {$x * [lindex $box 0]} ] [expr {$y * [lindex $box 1]} ] [expr {$z * [lindex $box 2]}]] $wat moveby $vec puts "shifting by $vec" segment R$n { first NONE last NONE foreach res $wat_unique_res { residue $res NE6 } } lappend seglist R$n foreach resid $wat_res name $wat_name pos [$wat get {x y z}] { coord R$n $resid $name $pos } incr n $wat moveby [vecinvert $vec] # clean up unset vec } } } # just to be sure and clean up unset seglist # write the structure to a temporary file writepdb $CC_tmpname # load the temporary file mol load pdb $CC_tmpname # ************************************************************************ # do the analysis # write a message to indicate the progress # the following part takes the longest time... puts "analyzing frame [expr $i + 1] ([expr $i - $startframe + 1] of $totframes; requested frames [expr $startframe + 1] - [expr $endframe + 1])" # create the necessary atom selections # oxygens in the central cell set cc_oxygen [atomselect top "segid R0 and name $CC_water_oxygen_name"] # all oxygens in the system # we need to decrease the complexity -> we are taking into account only # oxygens within some distance of the central cell since we're interested only # in the hydrogen bonds of the central cell anyway # set $CC_pbc_threshold to something large to 'turn' this simplication off set restricted_oxygen [atomselect top "name $CC_water_oxygen_name and within $CC_pbc_threshold of (resname $CC_water_resname and segid R0)"] foreach o_index [$cc_oxygen list] { #foreach o_index [list 5] {} # set target oxygen in the central cell - we calculate number of # hbonds for this species set tgt_oxygen [atomselect top "index $o_index"] # measure hbonds requires list of donors and acceptors to be "orthogonal" # we choose all but the target oxygen; this is done for all cells # to ensure that hbonds at cell boundaries are correctly treated set exc_oxygen [atomselect top [format "%s %s %s" "([$restricted_oxygen text])" "and not" "([$tgt_oxygen text])"]] # do the hbond measurement # first, target oxygen acts as donor foreach {d_donor d_acceptor d_hydrogen} [measure hbonds $distance $angle $tgt_oxygen $exc_oxygen] break # second it acts as an acceptor foreach {a_donor a_acceptor a_hydrogen} [measure hbonds $distance $angle $exc_oxygen $tgt_oxygen] break # merge results to get total number of hbonds for the molecule set tot_hbonds [concat $d_acceptor $a_donor] lappend n_hbonds([$tgt_oxygen list]) [llength $tot_hbonds] # clean up unset a_donor a_acceptor a_hydrogen d_donor d_acceptor d_hydrogen unset tot_hbonds $tgt_oxygen delete $exc_oxygen delete } # clean up resetpsf unset wat_unique_res unset wat_res unset wat_name $cc_oxygen delete $restricted_oxygen delete # remove the molecule with PBC and psf mol delete top mol delete $psf_ID # repeat for another frame } # clean up $wat delete $cell_oxygen delete $cell_water delete puts "done" array set CC_n_hbonds [array get n_hbonds] return [array get n_hbonds] } # ********************************************************************************* # ********************************************************************************* proc WriteMolHbondsFile {n_hbonds_array filename} { # stores the global variables and n_hbonds array in a file # 'export' global variables global CC_first_frame global CC_last_frame global CC_total_frames global CC_mol_id global CC_water_resname global CC_water_oxygen_name global CC_water_all global CC_water_all_nparticles global CC_tmpname global CC_psfname global CC_topname global CC_pbc_threshold array set n_hbonds $n_hbonds_array # open the file set fw [open $filename w] puts $fw "MolHbondsCoord" puts $fw "$CC_first_frame $CC_last_frame $CC_total_frames $CC_mol_id" puts $fw $CC_water_resname puts $fw $CC_water_oxygen_name puts $fw $CC_water_all puts $fw $CC_water_all_nparticles puts $fw $CC_tmpname puts $fw $CC_psfname puts $fw $CC_topname puts $fw $CC_pbc_threshold puts $fw "numatoms [llength [array name n_hbonds]]" foreach {o_index h_bonds_list} [array get n_hbonds] { puts $fw $o_index puts $fw $h_bonds_list } close $fw puts "variables stored" } # ********************************************************************************* # ********************************************************************************* proc ReadMolHbondsFile {filename} { # restores the global variables and n_hbonds array from a file # 'export' global variables global CC_first_frame global CC_last_frame global CC_total_frames global CC_mol_id global CC_water_resname global CC_water_oxygen_name global CC_water_all global CC_water_all_nparticles global CC_tmpname global CC_psfname global CC_topname global CC_pbc_threshold # open the file set fr [open $filename r] # check the header gets $fr line if [eof $fr] { puts "premature eof in header" close $fr return } if {$line != "MolHbondsCoord"} { puts "unrecognized header" close $fr return } # read and set frame/molecule related variables gets $fr line if [eof $fr] { puts "premature eof in frame info" close $fr return } if {[llength $line] != 4} { puts "error in frame info line" close $fr return } set CC_first_frame [lindex $line 0]; set CC_last_frame [lindex $line 1] set CC_total_frames [lindex $line 2]; # set the CC_mol_id to the id of the TOP molecule # i.e., the stored value is ignored set CC_mol_id [molinfo top] # read and set the selection strings gets $fr CC_water_resname if [eof $fr] { puts "premature eof in water_resname string" close $fr return } gets $fr CC_water_oxygen_name if [eof $fr] { puts "premature eof in water_oxygen_name string" close $fr return } gets $fr CC_water_all if [eof $fr] { puts "premature eof in water_all string" close $fr return } gets $fr CC_water_all_nparticles if [eof $fr] { puts "premature eof in water_all_nparticles" close $fr return } # read and set the filenames etc... gets $fr CC_tmpname if [eof $fr] { puts "premature eof in tmpname" close $fr return } gets $fr CC_psfname if [eof $fr] { puts "premature eof in psfname" close $fr return } gets $fr CC_topname if [eof $fr] { puts "premature eof in topname" close $fr return } gets $fr CC_pbc_threshold if [eof $fr] { puts "premature eof in pbc_threshold" close $fr return } # read the n_hbonds array #firstly, the number of stored records gets $fr line if [eof $fr] { puts "premature eof in pbc_threshold" close $fr return } if {[llength $line] != 2} { puts "error in number of molecules (records) line" close $fr return } set n_records [lindex $line 1] for {set i 1} {$i <= $n_records} {incr i} { gets $fr o_index if [eof $fr] { puts "premature eof hbonds (atom index in set $i)" close $fr return } gets $fr h_bonds_list if [eof $fr] { puts "premature eof hbonds (atom index in set $i)" close $fr return } array set n_hbonds [list $o_index $h_bonds_list] } close $fr puts "variables restored:" puts "" puts "frames - first: $CC_first_frame last: $CC_last_frame total: $CC_total_frames mol_ID: $CC_mol_id" puts "water resname selection string: $CC_water_resname" puts "water oxygen name selection string: $CC_water_oxygen_name" puts "water all atoms selection string $CC_water_all" puts "number of particles in all_atoms per molecule: $CC_water_all_nparticles" puts "temporary file name: $CC_tmpname" puts "psf file name: $CC_psfname" puts "topology file name: $CC_topname" puts "PBC threshold for hbonds analysis: $CC_pbc_threshold" return [array get n_hbonds] } # ********************************************************************************* # ********************************************************************************* proc AverageMolHbonds {n_hbonds_array average} { # does a kind of running average # 'average' gives the number of samples that should be taken # it takes the respective value +- average values # if there are less values than required, average over smaller number of samples is used # returns an array with averaged data global CC_total_frames array set n_hbonds $n_hbonds_array # set some values set totframes $CC_total_frames set average_half [expr $average / 2] puts "averaging total $totframes frames, running average over $average samples" foreach {o_index n_hbonds_list} [array get n_hbonds] { # averaging is done for all oxygen atoms # create new array for the averaged values array set avg_n_hbonds [list $o_index ""] # check whether the list is ok - it should have $totframes samples # where totframes is the total number of analyzed frames if { [llength $n_hbonds_list] != $totframes } { puts "critical error - not enough samples in the h_bonds list!" return } # go through the list for {set i 0} {$i < $totframes} {incr i} { # set limits for averaging set start [expr $i - $average_half] if {$start < 0} { set start 0 } set stop [expr $i + $average_half] if {$stop >= $totframes} { set stop [expr $totframes - 1] } set samples [expr $stop - $start + 1] # do the respective average set nhb_value 0.0 for {set n $start} {$n <= $stop} {incr n} { set nhb_value [expr $nhb_value + [lindex $n_hbonds_list $n]] } set nhb_value [expr $nhb_value / $samples] # add the value to the new array lappend avg_n_hbonds($o_index) $nhb_value } } # return the averaged array return [array get avg_n_hbonds] } # ********************************************************************************* # ********************************************************************************* proc SetMolHbondsUser {n_hbonds_array} { # set the user field for the selected molecules/atoms to the value of n_hbonds global CC_first_frame global CC_last_frame global CC_total_frames global CC_mol_id global CC_water_all global CC_water_all_nparticles array set n_hbonds $n_hbonds_array # get frame info from global variable and # subtract 1 from all frame indexes - numbering starts with 0 set startframe [expr $CC_first_frame - 1] set endframe [expr $CC_last_frame - 1] set totframes [expr ($endframe - $startframe + 1)] set mol_ID $CC_mol_id # set some selections set cell_water [atomselect $mol_ID "name $CC_water_all"] # go over the trajectory # set original molecule user field according to the analysis for {set i $startframe; set listindex 0} {$i <= $endframe} {incr i; incr listindex} { # process the n_hbonds array # replicate the appropriate record (n_hbonds for the given oxygen and frame) # to provide CC_water_all_nparticles (the same) numbers. # we expect oxygens and hydrogens to be at the same # place in the topology, so if we have # atomselect "name OW" and atomselect "name OW \"HW.\"" # then the indexes for atoms belonging to the same molecule will be grouped # together and we can just use n_hbonds to mark the appropriate hydrogens(lone pairs,...) as well # if they are put to the same selection $cell_water frame $i $cell_water update set frame_user {} foreach o_index [lsort -integer [array name n_hbonds]] { set nhb_value [lindex $n_hbonds($o_index) $i] for {set x 0} {$x < $CC_water_all_nparticles} {incr x} { set frame_user [concat $frame_user $nhb_value] } } $cell_water set user $frame_user # clean up unset nhb_value frame_user # repeat for next frame } $cell_water delete }