#!/usr/bin/python3

# by Marcelo Melo (melomcr@gmail.com)

# If you would like to run a Quantum Chemistry software which is not supported by NAMD,
# an interface script can be created in order to convert NAMD's input into the format
# expected by the QC software, and subsequently convert the software's output inro the
# format expected by NAMD.
# 
# This script creates an interface for ORCA, so that it can be ran using the custom 
# QM software option provided by NAMD. It is an example script, since ORCA is supported
# natively by NAMD's QM/MM interface, and is intended to be used by those wishing to 
# test theyr own software in hybrid MD simulations through NAMD.

#######  NAMD Files Format
# INPUT: This input file will contain on the first line the number of QM atoms (X)
# and the number of point charges in the file (Y, which may be 0 or more), separated 
# by a space. The following X+Y lines will have four (4) fields: X, Y and Z coordinates,
# and a fourth field which will depend on the type of entry. For QM atoms, the 
# field will contain the element of the QM atom. For point charge lines, the field
# will contain the charge of the point charge.
# 
# OUTPUT: The expected output file whould be placed in the same directory as the 
# input file, and should be named "*inputfile*.result" (meaning it will have the
# same path and name of the input file, plus the suffix '.result'). This file
# should have, on its first line, the energy of the system, and on the following 
# X lines (where X is the number of QM atoms passed in the input file), four (4) 
# fields: the x, y and z components of the TOTAL FORCE applied on that atom, and 
# on the fourth field, the charge of the atom. If the user indicates that charges
# from the QM software should not be used (see "QMChargeMode"), the fourth field
# should have zeroes, but should not be empty.


#######  ORCA Files Format

# ORCA input file is composed of a variable-sized header, where all info pertaining
# to the simulation is specified, and a corrdinates section, where all atom positions
# are specified, along with some informations regarding the overall system (namely
# the charge and multiplicity of the system).
#
# We will get the header and the initial part of the coordinates section from 
# lines specified in this script. The data would be gathered and writen by NAMD,
# but in this case we need to previously prepare it from hardcoded values or by 
# reading from another file.
# 
# ORCA expects a separate file with point charge information. This info is provided
# by NAMD in the same data file, so we will extract it and paste it in a separate 
# file in the same folder. The name and path to the point charge file needs to be 
# included in the configuration portion of ORCA's input file.

##################  Imports

from sys import argv as sargv
from sys import exit 
from os.path import realpath, dirname, exists, join as mkpath
import subprocess as sp, numpy as np

################## Hardcoded parameters

# Here we set up a template for the header and options of the input file.

# The choice of method and basis-set, as well as parallelization and charge
# output are all made here.
# For ORCA, the keyword "ENGRAD" is essential for the proper output of gradients
# over QM atoms.

orcaConfigLines1 = """\
! HF-3c EnGrad
%pal nprocs 6 end
%method FrozenCore FC_ELECTRONS end
%output 
  PrintLevel Mini 
  Print [ P_Mulliken ] 1
  Print [ P_AtCharges_M ] 1
end

"""

# We set up the template for the point charge file indicator

orcaConfigLines2 = "%pointcharges \""

# And set up the header for the coordinates section of the input file.

orcaConfigLines3 = """\
%coords
  CTyp xyz
  Charge -3.000000
  Mult 1.000000
  Units Angs
  coords

"""

################## Processing of file names

inputFilename = realpath(sargv[1])

#print(inputFilename)

directory = dirname(inputFilename)

# Prepares the name of the configuration file based on full path to data file provided by NAMD
orcaInFileName = directory + "/"
orcaInFileName += "qmmm.input"

# Prepares the name of the point charge file based on full path to data file provided by NAMD
#%pointcharges "/dev/shm/NAMD_test/0/qmmm_0.input.pntchrg"
pcFileName = orcaInFileName
pcFileName += ".pntchrg"

orcaConfigLines2 += pcFileName + "\"\n"


# Prepares the name of the output file based on full path to data file provided by NAMD.
# This is where we will direct all ORCA output, so that we can grab the atom charge
# information to pass back to NAMD.
orcaOutFileName = orcaInFileName
orcaOutFileName += ".TmpOut"

# Prepares the name of the gradient file based on full path to data file provided by NAMD.
# This is where ORCA will write the gradient information on QM atoms.
orcaGradFileName = orcaInFileName
orcaGradFileName += ".engrad"

# Prepares the file name for the file which will be read by NAMD
finalResFileName = inputFilename
finalResFileName += ".result"

#print("orcaInFileName:",orcaInFileName)
#print("pcFileName:",pcFileName)
#print("orcaOutFileName:",orcaOutFileName)
#print("orcaGradFileName:",orcaGradFileName)
#print("finalResFileName:",finalResFileName)

################## Reading and parsing NAMD's data ; Writing ORCA's Input

# Reads NAMD data
infile = open(inputFilename,"r") 

line = infile.readline()

# Gets number of atoms in the Quantum Chemistry region (= QM atoms + Link atoms)
numQMatms = int(line.split()[0])
# Gets number of point charges
numPntChr = int(line.split()[1])

# Prepare a numpy array for current coordinates
QMcoords = np.zeros((numQMatms, 3), dtype=float)

#print("numQMatms:",numQMatms,"; numPntChr",numPntChr)

# stores all lines written to ORCA's input file
outLinesQM = []

# stores all lines written to ORCA's point charge file
outLinesPC = []

# The first line in the point charge file is composed only of the total number
# of point charges in the file.
outLinesPC.append(str(numPntChr) + "\n")

# Identation
ident = "  "

lineIndx = 1
for line in infile:
    
    words = line.split()
    posx, posy, posz = words[0:3]
    
    if lineIndx <= numQMatms:
        
        # ORCA's format requires the fileds to be ordered begining with the
        # atom's element symbol, and followed by the XYZ coordinates.
                
        element = words[3]
        
        outLinesQM.append(ident + " ".join([element,posx,posy,posz]) + "\n")

        # Remember coords in numpy array - the type will be converted
        # str->float
        QMcoords[lineIndx-1][:] = [posx,posy,posz]
        
    else:
        
        # ORCA's format requires the fileds to be ordered begining with the
        # charge, and followed by the XYZ coordinates.
        
        charge = words[3]
        
        outLinesPC.append(" ".join([charge,posx,posy,posz]) + "\n")
    
    lineIndx += 1
    
# Finalizes the formating for ORCA's input file, one "end" to terminate the
# atomic coordinates, and nother to terminate the section.
outLinesQM.append(ident + "end" + "\n")
outLinesQM.append("end" + "\n")

infile.close()


# Check previous coordinates
reuse    = False
prevFile = mkpath(directory, 'prevcoord.npz')
prevEps  = 2e-6
keyForm  = 'step%06d'
frcForm  = 'force%06d'
maxSteps = 500
if exists(prevFile):
   tmp = np.load(prevFile)
   prevCoord = dict(tmp)
   tmp.close()
   steps = sorted([k for k in prevCoord.keys() if k[:4] == 'step'])
   last  = int(steps[-1][4:])
   for s in steps:
      if np.all(np.abs(QMcoords - prevCoord[s]) < prevEps):
         print("REPEATED COORDS from %s at step %d" % (s, last+1))
         reuse = int(s[4:])
   if reuse:
         print("WILL REUSE halved forces from step %d" % reuse)
   while len(steps) > maxSteps: # remove excess of old data
      n = int(steps.pop(0)[4:])
      prevCoord.pop(keyForm % n, None)
      prevCoord.pop(frcForm % n, None)
else:
   last = 0
   prevCoord = {}


# To prevent oscillations with QM/MM minimization with NAMD, when this script
# detects that (almost) exactly the same coordinates it already received in
# the past, it will NOT calculate (same as then) forces. To break the
# oscillatory behaviour, it will output old results but with forces divided
# by 2 to convince NAMD to take a smaller step.
if reuse:
   coords = prevCoord[keyForm % reuse]
   forces = prevCoord[frcForm % reuse]

   finFile = open(finalResFileName,"wb")

   # The first line should contain the energy
   # which was previously saved in the first cell of the last row of forces
   finalEnergy = forces[-1][0]
   finFile.write(b'%.12f\n' % finalEnergy)

   # And all follwoing lines contain forces and partial charges
   # First, divide forces by two
   forces[:numQMatms,:3] /= 2.
   np.savetxt(finFile, forces[:numQMatms], fmt='%18.12f')
    
   finFile.close()

   # Add current coords and forces to previously saved
   prevCoord[keyForm % (last+1)] = QMcoords
   prevCoord[frcForm % (last+1)] = forces
   np.savez(prevFile, **prevCoord)

   exit(0)

################## Run ORCA and collect forces and charges

# This will only be necessary if we have no previous results for the same
# coordinates

# Prepare input file
with open(orcaInFileName,"w") as outQMFile:
    
    outQMFile.write(orcaConfigLines1)
    outQMFile.write(orcaConfigLines2)
    outQMFile.write(orcaConfigLines3)
    
    for line in outLinesQM:
        outQMFile.write(line)

with open(pcFileName,"w") as outPCFile:
    
    for line in outLinesPC:
        outPCFile.write(line)

# The working folder of ORCA will be where NAMD saved the input file
# check=True raises an exception if ORCA fails
# stdout is sent to file directly
# subprocess.run() waits for the process to finish
cmdline = ["/home/kedziers/bin/run_orca_namd", orcaInFileName]
orcaOut = open(orcaOutFileName, 'w')
proc = sp.run(args=cmdline, stdout=orcaOut, cwd=directory, check=True)
orcaOut.close()

################## Process ORCA results
# Gets system energy and gradients from ORCA's output file.

gradFile = open(orcaGradFileName,"r")
    
# Skips to the line with number of atoms
for i in range(3):
    gradFile.readline()

orcaNumQMAtms = int(gradFile.readline())

# Runs basic sanity check
if orcaNumQMAtms != numQMatms:
    print("ERROR: Expected",numQMatms,"but found",orcaNumQMAtms,"atoms in engrad file!")
    exit(1)

# Skips to the line with final system energy
for i in range(3):
    gradFile.readline()

finalEnergy = gradFile.readline().strip()

print("ORCA energy: ", finalEnergy, "Eh")

# All energies are given in Eh (Hartree)
# NAMD needs energies in kcal/mol
# The conversion factor is 627.509469
finalEnergy = float(finalEnergy) * 627.509469

print("ORCA energy: ", str(finalEnergy), "kcal/mol")

# Skips to the lines with gradients
for i in range(3):
    gradFile.readline()

# Stores all gradients (yes, it would be faster with numpy, but this is just an
# example)
# FIXME: convert to numpy and save for reuse. Also, do not run ORCA again above
# if the same coords were received. I will need to save both forces and charges
# to the same array

# last row added to store the energy, too
grads_q = np.zeros((orcaNumQMAtms+1, 4), dtype=float)

for i in range(orcaNumQMAtms):
    
    # All *GRADIENTS* are given in Eh/a0 (Hartree over Bohr radius)
    # NAMD needs *FORCES* in kcal/mol/angstrons
    # The conversion factor is -1*627.509469/0.529177 = -1185.82151
    for j in range(3):
        grads_q[i,j] = float(gradFile.readline()) * -1185.82151

grads_q[orcaNumQMAtms,0] = finalEnergy
gradFile.close()

###

# Gets atom charges from the temporary output file.
tmpOutFile = open(orcaOutFileName,"r")

# Iterates ultil we find the section of output that contains atomic partial 
# charges for QM atoms
chargeSection = False

iterate = True
iq      = 0

while iterate:
    
    line = tmpOutFile.readline()
    
    if line.find("MULLIKEN ATOMIC CHARGES") != -1:
        chargeSection = True
        # Skips a dividing line
        line = tmpOutFile.readline()
        continue
    
    if chargeSection:
        grads_q[iq][3] = line.split()[3]
        iq += 1
    
    if iq == numQMatms:
        break

tmpOutFile.close()

# Writes the final output file that will be read by NAMD.

finFile = open(finalResFileName,"wb")

# The first line constains the enegy
finFile.write(b'%.12f\n' % finalEnergy)

# And all follwoing lines contain gradients and partial charges
np.savetxt(finFile, grads_q[:numQMatms], fmt='%18.12f')
finFile.close()

# Add current coords and forces to previously saved
prevCoord[keyForm % (last+1)] = QMcoords
prevCoord[frcForm % (last+1)] = grads_q
np.savez(prevFile, **prevCoord)


##########
# Makes sure the secondary process has ended before we return to NAMD.
# proc2.wait()

exit(0)