NAMD
ReadAmberParm.h
Go to the documentation of this file.
1 #ifndef READ_AMBER_PARM_H
2 #define READ_AMBER_PARM_H
3 
4 #include <string>
5 #include <vector>
6 #include <fstream>
7 #include <unordered_map>
8 #include <tuple>
9 #include <type_traits>
10 #include <functional>
11 
12 /* Full specification:
13  * https://ambermd.org/FileFormats.php#topology
14  * https://ambermd.org/prmtop.pdf
15  *
16  * Requirements for prmtop parsers Parsers: regardless of the language they are
17  * written in, should conform to a list of attributes to maximize the likelihood
18  * that they are parsed correctly.
19  * 1. Parsers should expect that some 4-character fields (e.g., atom or residue names)
20  * may have some names that have 4 characters and therefore might not be
21  * whitespace-delimited.
22  * 2. Parsers should not expect SECTIONs in the prmtop to be in any particular
23  * order.
24  * 3. Parsers should not expect or require %COMMENT lines to exist, but should
25  * properly parse the file if any number of %COMMENT lines appear as indicated
26  * above
27  * 4. The topology file may be assumed to have been generated ‘correctly’ by tleap or
28  * some other credible source. No graceful error checking is required.
29  *
30  * To meet the requirement 1, this implementation uses a two-stage approach. The first
31  * stage splits the files by sections containing %FLAG, %FORMAT and data into a map,
32  * and it doesn't care about the actual meaning of each section.
33  * The second stage analyses the %POINTER section at first, and then turns the map into
34  * a struct used by NAMD. This stage will match the section names by hard-coded strings,
35  * and do lots of sanity checks and memory allocations.
36  */
37 
38 namespace AmberParm7Reader {
39 
40 #ifdef DOUBLE
41 #define _REAL double
42 #else
43 #define _REAL float
44 #endif
45 
46 using std::vector;
47 using std::string;
48 using std::unordered_map;
49 using std::tuple;
50 using std::make_tuple;
51 
52 /* Frankly speaking, I don't fully understand the whole specification of fortran
53  * format specifiers. I try to implement a few of them, which should be enough for
54  * parsing the AMBER parm7 format.
55  */
57  bool IsPadding; // boolean value to determine if this is a specifier or a string padding
58  string Padding; // support padding characters inside quotation marks
59  char Type; // type of the field
60  int Width; // the length of the field
61  int NumOfFields;// number of fields
62  int Precision; // precision of real type
63 };
64 
65 /* struct for reading fortran data regardless of types
66  * I'd like to use std::variant but that is in C++17 and we are bound to C++11
67  */
68 struct FortranData {
69  enum class FortranDataType {String, Int, Real};
71  string String;
72  int Int;
75  // constructors by types
76  FortranData(const string& str): Type(FortranDataType::String), String(str) {}
77  FortranData(const int& x): Type(FortranDataType::Int), Int(x) {}
79 };
80 
81 /* A map to store the data read from AMBER parm7 format
82  * key: title of a %FLAG section
83  * value: a tuple of a boolean and the read data from the section
84  * the boolean value is used in read_amber_parm_stage2 to check if the
85  * section is really parsed
86  */
87 typedef unordered_map<string, tuple<bool, vector<FortranData>>> AmberTopparMap;
88 
89 /* AMBER data structure used by NAMD
90  */
91 struct Ambertoppar {
92  bool HasData = false;
93  bool IsCharmmFF; // is this a CHARMM force field in AMBER format?
94  // version info
95  string build_time;
96  string version_stamp;
97  string ititl; // title
98  int Natom; // total number of atoms
99  int Ntypes; // total number of distinct atom types
100  int Nbonh; // number of bonds containing hydrogen
101  int Mbona; // number of bonds not containing hydrogen
102  int Ntheth; // number of angles containing hydrogen
103  int Mtheta; // number of angles not containing hydrogen
104  int Nphih; // number of dihedrals containing hydrogen
105  int Mphia; // number of dihedrals not containing hydrogen
106  int Nhparm; // currently not used
107  int Nparm; // used to determine if addles created prmtop
108  int Nnb; // number of excluded atoms
109  int Nres; // number of residues
110  int Nbona; // MBONA + number of constraint bonds
111  int Ntheta; // MTHETA + number of constraint angles
112  int Nphia; // MPHIA + number of constraint dihedrals
113  int Numbnd; // number of unique bond types
114  int Numang; // number of unique angle types
115  int Nptra; // number of unique dihedral types
116  int Natyp; // number of atom types in parameter file, see SOLTY below
117  int Nphb; // number of distinct 10-12 hydrogen bond pair types
118  int Ifpert; // set to 1 if perturbation info is to be read in
119  int Nbper; // number of bonds to be perturbed
120  int Ngper; // number of angles to be perturbed
121  int Ndper; // number of dihedrals to be perturbed
122  int Mbper; // number of bonds with atoms completely in perturbed group
123  int Mgper; // number of angles with atoms completely in perturbed group
124  int Mdper; // number of dihedrals with atoms completely in perturbed groups
125  int Ifbox; // set to 1 if standard periodic box, 2 when truncated octahedral
126  int Nmxrs; // number of atoms in the largest residue
127  int Ifcap; // set to 1 if the CAP option from edit was specified
128  int Numextra; // number of extra points found in topology
129  int Ncopy; // number of PIMD slices / number of beads
130  // parameter data (copied from parm.C)
131  vector<string> AtomNames; // (size: Natom) names
132  vector<_REAL> Charges; // (size: Natom) charges
133  vector<_REAL> Masses; // (size: Natom) masses
134  vector<int> AtomNumbers; // (size: Natom) atomic numbers
135  vector<int> Iac; // (size: Natom) LJ atom type indices
136  vector<int> Iblo; // (size: Ntypes * Ntypes) the number of atoms excluded from NB calculation
137  vector<int> Cno; // (size: Ntypes * Ntypes) LJ atom types
138  vector<string> ResNames; // (size: Nres) residue names
139  vector<int> Ipres; // (size: Nres) the first atoms in residues
140  vector<_REAL> Rk; // (size: Numbnd) bond force constants
141  vector<_REAL> Req; // (size: Numbnd) bond equilibrium distances
142  vector<_REAL> Tk; // (size: Numang) angle force constants
143  vector<_REAL> Teq; // (size: Numang) angle equilibrium angles
144  vector<_REAL> Pk; // (size: Nptra) dihedral angle force constants
145  vector<_REAL> Pn; // (size: Nptra) dihedral angle periodicities
146  vector<_REAL> Phase; // (size: Nptra) dihedral angle phases
147  vector<_REAL> SceeScaleFactor; // (size: Nptra) factors by which 1-4 electrostatic interactions are divided
148  vector<_REAL> ScnbScaleFactor; // (size: Nptra) factors by which 1-4 VdW interactions are divided
149  vector<_REAL> Solty; // (size: Natyp) unused
150  vector<_REAL> Cn1; // (size: Ntypes * (Ntypes + 1) / 2) coefficient As of LJ interactions
151  vector<_REAL> Cn2; // (size: Ntypes * (Ntypes + 1) / 2) coefficient Bs of LJ interactions
152  vector<int> BondHAt1; // (size: Nbonh) atom(1) indices in bonds involving hydrogens
153  vector<int> BondHAt2; // (size: Nbonh) atom(2) indices in bonds involving hydrogens
154  vector<int> BondHNum; // (size: Nbonh) bond indices involving hydrogens
155  vector<int> BondAt1; // (size: Nbona) atom(1) indices in bonds not involving hydrogens
156  vector<int> BondAt2; // (size: Nbona) atom(2) indices in bonds not involving hydrogens
157  vector<int> BondNum; // (size: Nbona) bond indices not involving hydrogens
158  vector<int> AngleHAt1; // (size: Ntheth) atom(1) indices in angles involving at least one H
159  vector<int> AngleHAt2; // (size: Ntheth) atom(2) indices in angles involving at least one H
160  vector<int> AngleHAt3; // (size: Ntheth) atom(3) indices in angles involving at least one H
161  vector<int> AngleHNum; // (size: Ntheth) angle indices involving at least one H
162  vector<int> AngleAt1; // (size: Ntheta) atom(1) indices in angles not involving H
163  vector<int> AngleAt2; // (size: Ntheta) atom(2) indices in angles not involving H
164  vector<int> AngleAt3; // (size: Ntheta) atom(3) indices in angles not involving H
165  vector<int> AngleNum; // (size: Ntheth) angle indices not involving H
166  vector<int> DihHAt1; // (size: Nphih) atom(1) indices in dihedral angles involving at least one H
167  vector<int> DihHAt2; // (size: Nphih) atom(2) indices in dihedral angles involving at least one H
168  vector<int> DihHAt3; // (size: Nphih) atom(3) indices in dihedral angles involving at least one H
169  vector<int> DihHAt4; // (size: Nphih) atom(4) indices in dihedral angles involving at least one H
170  vector<int> DihHNum; // (size: Nphih) dihedral angles indices involving at least one H
171  vector<int> DihAt1; // (size: Nphih) atom(1) indices in dihedral angles not involving H
172  vector<int> DihAt2; // (size: Nphih) atom(2) indices in dihedral angles not involving H
173  vector<int> DihAt3; // (size: Nphih) atom(3) indices in dihedral angles not involving H
174  vector<int> DihAt4; // (size: Nphih) atom(4) indices in dihedral angles not involving H
175  vector<int> DihNum; // (size: Nphih) dihedral angles indices not involving H
176  vector<int> ExclAt; // (size: Nnb) a list for each atom of excluded partners in the NB calculation
177  vector<_REAL> HB12; // (size: Nphb) coefficient As in the 12-10 potential
178  vector<_REAL> HB10; // (size: Nphb) coefficient Bs in the 12-10 potential
179  vector<string> AtomSym; // (size: Natom) atom type names for all atoms
180  vector<string> AtomTree; // (size: Natom) the tree structure of each atom
181  vector<int> TreeJoin; // (size: Natom) unused
182  vector<int> AtomRes; // (size: Natom) unused
183  // solvent pointers (if Ifbox > 0)
184  int Iptres; // the final residue that is part of the solute
185  int Nspm; // the total number of molecules
186  int Nspsol; // the first solvent molecules
187  vector<int> AtomsPerMolecule;// (size: Nspm) how many atoms are present in each molecule
188  vector<_REAL> BoxDimensions; // (size: 4) box angle and dimensions
189  vector<int> CapInfo; // (size: 1) the last atom before the water cap begins
190  vector<_REAL> CapInfo2; // (size: 4) distacne from cap center to outside the cap, coordinates of the cap center
191  vector<string> RadiusSet; // (size: 1) intrinsic implicit solvent radii set
192  vector<_REAL> Radii; // (size: Natom) intrinsic radii of atoms
193  vector<_REAL> Screen; // (size: Natom) screening parameters used in Generalized Born
194  // polarizability
195  int Ipol; // 0: fixed-charge force fields, 1: force fields that contain polarization
196  vector<_REAL> Polarizability; // (size: Natom) atomic polarizabilities
197  // CHAMBER support (CHARMM FF in AMBER format)
198  // Urey-Bradley terms
199  int Nub; // total number of Urey-Bradley terms
200  int Nubtypes; // number of unique Urey-Bradley types
201  vector<int> UreyBradleyAt1; // (size: Nub) atom(1) indices in UB terms
202  vector<int> UreyBradleyAt2; // (size: Nub) atom(2) indices in UB terms
203  vector<int> UreyBradleyNum; // (size: Nub) types of UB terms
204  vector<_REAL> UreyBradleyForceConstants; // (size: Nubtypes) force constants of UB terms
205  vector<_REAL> UreyBradleyEquilValues; // (size: Nubtypes) equilibrium distances of UB terms
206  // impropers
207  int Nimphi; // total number of improper terms
208  vector<int> ImproperAt1; // (size: Nimphi) atom(1) indices in improper terms
209  vector<int> ImproperAt2; // (size: Nimphi) atom(2) indices in improper terms
210  vector<int> ImproperAt3; // (size: Nimphi) atom(3) indices in improper terms
211  vector<int> ImproperAt4; // (size: Nimphi) atom(4) indices in improper terms
212  vector<int> ImproperNum; // (size: Nimphi) types of improper terms
213  int NimprTypes; // total number of the types of improper terms
214  vector<_REAL> ImproperForceConstants; // (size: NimprTypes) force constants of improper terms
215  vector<_REAL> ImproperPhases; // (size: NimprTypes) phases of improper terms
216  // CHARMM scaled 1-4 coefficients
217  vector<_REAL> LJ14ACoefficients; // (size: Ntypes * (Ntypes + 1) / 2) LJ parameter As
218  vector<_REAL> LJ14BCoefficients; // (size: Ntypes * (Ntypes + 1) / 2) LJ parameter Bs
219  // CMAP data in AMBER ff19SB or CHARMM
220  // CMAP meta data cannot be obtained from %FLAG POINTERS
221  bool HasCMAP; // CMAP is not always enabled, we need this flag to check it
222  int CMAPTermCount; // number of CMAP terms
223  int CMAPTypeCount; // number of CMAP types
224  vector<int> CMAPResolution; // (size: CMAPTypeCount) CMAP resolution of each type
225  vector<vector<_REAL>> CMAPParameter; // (size: CMAPTypeCount, CMAPResolution[i] * CMAPResolution[i]) CMAP grid parameters
226  vector<int> CMAPIndex; // (size: CMAPTermCount * 6) atom indices and type of a CMAP term
227 };
228 
244 void parse_fortran_format(const std::string& str, FortranFormatSpecifier& specifier);
245 
251 void parse_fortran_format(const std::string& str, vector<FortranFormatSpecifier>& specifiers);
252 
264 void split_string_by_specifiers(const string& source,
265  const vector<FortranFormatSpecifier>& specifiers,
266  vector<FortranData>& destination);
267 
274 bool read_amber_parm_stage1(const char* filename, AmberTopparMap& toppar_map);
275 
282 bool read_amber_parm_stage2(AmberTopparMap& toppar_map, Ambertoppar& toppar_data);
283 
284 // helper function for read the POINTER section and allocate the memory
285 // according to the sizes in it.
286 bool parse_pointer(const vector<FortranData>& source,
287  Ambertoppar& result);
288 
297 bool parse_section(const vector<FortranData>& source,
298  const int& count,
299  vector<string>& destination,
300  const string& section_name);
301 bool parse_section(const vector<FortranData>& source,
302  const int& count,
303  vector<int>& destination,
304  const string& section_name);
305 bool parse_section(const vector<FortranData>& source,
306  const int& count,
307  vector<_REAL>& destination,
308  const string& section_name);
309 
320 bool parse_section(const vector<FortranData>& source,
321  const int& count,
322  std::initializer_list<std::reference_wrapper<vector<string>>> destination,
323  const string& section_name);
324 bool parse_section(const vector<FortranData>& source,
325  const int& count,
326  std::initializer_list<std::reference_wrapper<vector<int>>> destination,
327  const string& section_name);
328 bool parse_section(const vector<FortranData>& source,
329  const int& count,
330  std::initializer_list<std::reference_wrapper<vector<_REAL>>> destination,
331  const string& section_name);
332 
333 // reader function for NAMD
334 Ambertoppar readparm(const char* filename);
335 
336 }
337 
338 #endif
float Real
Definition: common.h:109
#define _REAL
Definition: ReadAmberParm.h:43
unordered_map< string, tuple< bool, vector< FortranData > > > AmberTopparMap
Definition: ReadAmberParm.h:87
void split_string_by_specifiers(const string &source, const vector< FortranFormatSpecifier > &specifiers, vector< FortranData > &destination)
This function splits a string by a set of fortran format specifiers.
FortranData(const string &str)
Definition: ReadAmberParm.h:76
bool read_amber_parm_stage2(AmberTopparMap &toppar_map, Ambertoppar &toppar_data)
Read an AmberTopparMap into an Ambertoppar struct for NAMD.
void parse_fortran_format(const std::string &str, FortranFormatSpecifier &specifier)
Parse a single fortran format specifier.
Definition: ReadAmberParm.C:15
bool read_amber_parm_stage1(const char *filename, AmberTopparMap &toppar_map)
Read an AMBER topology file into an AmberTopparMap.
bool parse_pointer(const vector< FortranData > &source, Ambertoppar &result)
vector< _REAL > LJ14BCoefficients
vector< _REAL > LJ14ACoefficients
vector< vector< _REAL > > CMAPParameter
vector< _REAL > UreyBradleyEquilValues
Ambertoppar readparm(const char *filename)
gridSize x
vector< _REAL > ImproperForceConstants
vector< _REAL > UreyBradleyForceConstants
bool parse_section(const vector< FortranData > &source, const int &count, vector< string > &destination, const string &section_name)
Copy data from source to destination.