############################################################################
#cr                                                                       
#cr            (C) Copyright 1995-1998 The Board of Trustees of the            
#cr                        University of Illinois                         
#cr                         All Rights Reserved                           
#cr                                                                       
############################################################################

############################################################################
# RCS INFORMATION:
#
# 	$RCSfile: stride.tcl,v $
# 	$Author: sergei $	$Locker:  $		$State: Exp $
#	$Revision: 1.3 $	$Date: 1998/02/16 18:10:36 $
#
############################################################################
# DESCRIPTION:
#   Compute the secondary structure.  By default this calls STRIDE but
# it could potentially be changed (e.g. to use the PDB file)
#
############################################################################

# write the protein fragments to a file with the special transformation
# that the chain id == ' ' + 1 + pfrag number .  This sets a limit of
# 127 - 36 = 91 protein fragments allowed (I exclude -,",{ and }), so if that 
# happens I go down the list of proteins one at a time

set vmd_stride_convert_string {!#$%&'()*+,./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz|~}

# This is an autoloaded script.  I can imagine others want to
# redefine this function, so I check to see if it already exists and,
# if so, don't re-define it.
# This is a shenanigan to get around the autoloader
if {[info procs vmd_calculate_structure] == "vmd_calculate_structure"} {
    rename vmd_calculate_structure vmd_calculate_structure_tmp
}
proc vmd_calculate_structure {molid} {
    vmd_stride_full $molid
}
# If there was a rename, replace with the original
if {[info procs vmd_calculate_structure_tmp] == \
	"vmd_calculate_structure_tmp"} {
    rename vmd_calculate_structure {}
    rename vmd_calculate_structure_tmp vmd_calculate_structure
}


set vmd_stride_firsttime 1
proc vmd_stride_full {molid} {
    global vmd_stride_firsttime
    if {$vmd_stride_firsttime} {
	set vmd_stride_firsttime 0
puts "In any publication of scientific results based in part or completely"
puts "on the use of the program STRIDE, please reference:"
puts "  Frishman,D & Argos,P. (1995) Knowledge-based secondary structure"
puts "  assignment.  Proteins:  structure, function and genetics, 23, 566-579."
    }

    global vmd_stride_convert_string env
    molinfo $molid get index
    set pfragsel [atomselect $molid "pfrag >= 0" ]
    $pfragsel set structure coil
    set pfrags [luniq [lsort -integer [$pfragsel get pfrag]]]
    set pnum [lindex $pfrags end]
    if {$pnum > 91} {
	puts "\nThere are $pnum protein fragments, but only 91 chain numbers"
	puts " are available.  Trying one protein fragment at a time"
	return [vmd_stride_simple $molid]
    }
    # are there fewer than 10,000 residues
    set resids [luniq [lsort [$pfragsel get residue]]]
    set num_resids [lindex $resids end]
    if {$num_resids > 9999} {
puts "\nThere are $num_resids protein in the molecule but STRIDE will only"
puts " analyze up to 10,000.  Trying one protein fragment at a time"
        return [vmd_stride_simple $molid]
    }

    # write the molecule to a PDB file
    global env
    set filename [unique_file "$env(TMPDIR)/vmdstride.in"]
    set dataname [unique_file "$env(TMPDIR)/vmdstride.out"]

    vmd_stride_pdb_write $filename $pfrags $molid
    set arch_name [vmdinfo arch]
    set exec_name "$env(VMDDIR)/stride_$arch_name"
    if [catch {exec $exec_name -o $filename | awk {/^LOC/ {print $2,$4,$5,$7,$8}} > $dataname 2> /dev/tty}] {
	puts "First pass through STRIDE didn't work, trying one protein fragment at a time"
	exec /bin/rm -f $filename $dataname
	return [vmd_stride_simple $molid]
    }
    set fileId [open $dataname r]

    while {[gets $fileId line] >= 0} {
	lassign $line ss resid1 chain1 resid2 chain2
	# convert from chain1 to pfrag
	set pfrag [string first $chain1 $vmd_stride_convert_string]
	set sel [atomselect $molid "pfrag $pfrag and resid $resid1 to $resid2"]
	set assign C
	switch -glob -- $ss {
	    AlphaHelix { set assign "H"}
	    310Helix { set assign "G"}
	    PiHelix { set assign "I"}
	    Strand { set assign "E"}
	    Bridge { set assign "B"}
	    Coil { set assign "C"}
	    Turn* { set assign "T"}
	    Gamma* { set assign "T"}
	    Disulfide {}
	    default { puts "unknown stride assignment $ss"}
	}
	$sel set structure [ldup [$sel num] $assign]
    }
    close $fileId
    # get rid of the temp files
    exec /bin/rm -f $filename $dataname
}

proc vmd_stride_pdb_write {filename pfrags molid} {
    global vmd_stride_convert_string
    # open the file (returns error if it couldn't)
    set fileId [open $filename w]
    puts $fileId [list REMARK STRIDE input file from VMD]

    # turn the string into an array
    set tmpstr $vmd_stride_convert_string
    for {set i 0} {$i < 91} {incr i} {
	set arr($i) [string range $vmd_stride_convert_string $i $i]
    }

    # write data from each pfrag
    foreach pfrag $pfrags {
	set sel [atomselect $molid "pfrag $pfrag"]
	set seldata [$sel get {
	 index name resname resid x y z occupancy beta segname}]
	set chain $arr($pfrag)
	foreach data $seldata {
	    lassign $data index name resname resid x y z \
		occupancy beta segname
	    incr index

   # the resname should end in column 3 (of the four) if < 4 characters
	    if {[string length $resname] < 4} {
		set resname "$resname "
	    }
	    puts $fileId [format "ATOM  %5d %-4s%1s%-4s%1s%4d%1s   %8.3f%8.3f%8.3f%6.2f%6.2f %3s  %4s" $index $name " " $resname $chain $resid " " $x $y $z $occupancy $beta "   " $segname]
	}
    }
    puts $fileId END
    close $fileId
}


proc vmd_stride_simple {molid} {
    # check that the molecule exists
    molinfo $molid get index

    # go along each pfrag
    set pfragsel [atomselect $molid "pfrag >= 0"]
    $pfragsel set structure coil
    set pfrags [luniq [lsort -integer [$pfragsel get pfrag]]]

    global env
    set filename [unique_file "$env(TMPDIR)/vmdstride.in"]
    set dataname [unique_file "$env(TMPDIR)/vmdstride.out"]
    foreach pfrag $pfrags {
	# get a pfrag
	set pfragsel [atomselect $molid "pfrag $pfrag"]
	# save to a file
	$pfragsel writepdb $filename

	set arch_name [vmdinfo arch]
	set exec_name  "$env(VMDDIR)/stride_$arch_name"
	if [catch {exec $exec_name -o $filename > $dataname 2> /dev/tty}] {
	    # get rid of the temp files
	    exec /bin/rm $filename $dataname
error "Could not execute STRIDE on data set! No secondary structure assignment"
	}

	# and read the LOC record inputs
	# 2th column for the residue, 4th for the start 7 for the end
	set fileId [open $dataname r]
	while {[gets $fileId line] >= 0} {
	    if [regexp "^LOC" $line] {
		lassign $line head ss name1 resid1 chain1 name2 resid2 chain2
		set sel [atomselect $molid "pfrag $pfrag and resid $resid1 to $resid2"]
                set assign C
		switch -glob -- $ss {
		    AlphaHelix { set assign "H"}
		    310Helix { set assign "G"}
		    PiHelix { set assign "I"}
		    Strand { set assign "E"}
		    Bridge { set assign "B"}
		    Coil { set assign "C"}
		    Turn* { set assign "T"}
		    Gamma* { set assign "T"}
		    default { puts "unknown stride assignment $ss"}
		}
#		puts [list $assign $resid1 $resid2]
		$sel set structure [ldup [$sel num] $assign]
	    }
	}
    }
    # and get rid of the temp files
    exec /bin/rm -f $filename $dataname
}


######## Not properly part of stride, but it is secondary structure related

## and a way to change the structure by hand
# eg: structure top "resid 5 to 12" helix
proc structure {molecule_id atom_selection structure_type} {
    set sel [atomselect $molecule_id $atom_selection]
    $sel set structure $structure_type
}