From: Ackad, Edward (eackad_at_siue.edu)
Date: Sun May 22 2022 - 10:54:55 CDT

Hi all,
I'm trying to write a script to renormalize the SASA measurement from GaMD runs (the scripts provided only do the reweighting for other measures). So I am trying to calculate the SASA for a small set of residues (using -restrict, which is why I can't use mdtraj's version). I can fit the stride=1 trajectory into memory without issue but once I start to calculate the SASA the memory blows up. I use a set for each frame and output to a file so I don't know why the memory requirements keep escalating. Is there a free or wipe memory option or some issue with my code (shown below)? Or any other ideas to make this not-so-memory intensive?
Thanks!

proc run_sasa {oname last_10ps_frame dcdstride } {
  # create the dictionary of the values for each reside
  set hydrophob [dict create ILE -9.73 ALA -9.57 PHE -9.23 LEU -8.66 MET -7.65 PRO -7.55 VAL -8.26 TRP -8.62 CYS -5.63 GLY -4.57 THR -4.06 SER -3.56 TYR -0.05 GLN 2.2 ASN 2.76 HIS 1.41 >

  set rdis 1.4
  set fname "$oname.dat"
  set outfile [open $fname w]
  set molid top
  set nes "protein and resid 40 to 53"
  set nesphobic "protein and resid 40 to 53 and hydrophobic"
  set bucket "protein and resid 48 51"

  for {set frame 0} {$frame < [molinfo $molid get numframes]} {incr frame} {
    set printline [measure sasa $rdis [atomselect $molid "protein" frame $frame]]
    set printline2 [measure sasa $rdis [atomselect $molid "protein" frame $frame] -restrict [atomselect $molid $bucket frame $frame ] ]
    set printline3 [measure sasa $rdis [atomselect $molid "protein" frame $frame] -restrict [atomselect $molid $nes frame $frame ] ]
    set printline4 [measure sasa $rdis [atomselect $molid "protein" frame $frame] -restrict [atomselect $molid $nesphobic frame $frame ] ]

    # calculate the sasa's for each hydrophobic res
    set totalhydrophobic 0.0
    set totalsasa 0.0
    # loop over all the NES residues
    for { set i 40 } {$i < 54 } {incr i} {
      #use just one atom (alpha == name CA) so we only get 1 name back instead of a list
      set ressel [atomselect top "resid $i and alpha"]
      #determine what the coefficent is using the name of the residue as the index for the dictionary
      set coef [dict get $hydrophob [$ressel get resname]]
      set thisres [measure sasa $rdis [atomselect $molid "protein" frame $frame] -restrict [atomselect $molid "protein and resid $i" frame $frame ] ]
      # sum each residues value scaled by the coef
      set totalsasa [expr $totalhydrophobic + $thisres*$coef]
      # if the residue is hydrophoic (coef <0 then sub it so we can get a positive number)
      if {$coef <0 } {
        set totalhydrophobic [expr $totalhydrophobic - $thisres*$coef]
      }
    }

    #set the actual frame of the frame to match with the output in the log file
    if { $frame <= $last_10ps_frame} {
      set outframe [expr $dcdstride*$frame*10000]
    } else {
      set outframe [expr $dcdstride*$frame*100000]
    }

    puts $outfile "$outframe $printline $printline2 $printline3 $printline4 $totalsasa $totalhydrophobic"
  }
  close $outfile

}