View Javadoc
1   //******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  //******************************************************************************
38  package ffx.potential.utils;
39  
40  import ffx.numerics.math.DoubleMath;
41  import ffx.potential.bonded.AminoAcidUtils.AminoAcid3;
42  import ffx.potential.bonded.Residue;
43  import java.util.ArrayList;
44  import java.util.HashMap;
45  import java.util.List;
46  import java.util.Locale;
47  import java.util.NavigableMap;
48  import java.util.TreeMap;
49  
50  public class GetProteinFeatures {
51  
52    private static final HashMap<AminoAcid3, String> polarityMap = new HashMap<>();
53    private static final HashMap<AminoAcid3, String> acidityMap = new HashMap<>();
54    private static final NavigableMap<Double, String> phiToStructure = new TreeMap<>();
55    private static final NavigableMap<Double, String> psiToStructure = new TreeMap<>();
56    private static final HashMap<AminoAcid3, Double> standardSurfaceArea = new HashMap<>();
57    private static final HashMap<String, AminoAcid3> aminoAcidCodes = new HashMap<>();
58    private double phi;
59    private double psi;
60    private double omega;
61    private double totalSurfaceArea = 0.0;
62  
63  
64    public GetProteinFeatures() {
65  
66    }
67  
68    static {
69      //Map residue to its polarity
70      polarityMap.put(AminoAcid3.ARG, "polar");
71      polarityMap.put(AminoAcid3.ASN, "polar");
72      polarityMap.put(AminoAcid3.ASP, "polar");
73      polarityMap.put(AminoAcid3.ASH, "polar");
74      polarityMap.put(AminoAcid3.CYS, "polar");
75      polarityMap.put(AminoAcid3.GLN, "polar");
76      polarityMap.put(AminoAcid3.GLU, "polar");
77      polarityMap.put(AminoAcid3.GLH, "polar");
78      polarityMap.put(AminoAcid3.HIS, "polar");
79      polarityMap.put(AminoAcid3.HIE, "polar");
80      polarityMap.put(AminoAcid3.HID, "polar");
81      polarityMap.put(AminoAcid3.LYS, "polar");
82      polarityMap.put(AminoAcid3.LYD, "polar");
83      polarityMap.put(AminoAcid3.SER, "polar");
84      polarityMap.put(AminoAcid3.THR, "polar");
85      polarityMap.put(AminoAcid3.TYR, "polar");
86      polarityMap.put(AminoAcid3.ALA, "nonpolar");
87      polarityMap.put(AminoAcid3.GLY, "nonpolar");
88      polarityMap.put(AminoAcid3.ILE, "nonpolar");
89      polarityMap.put(AminoAcid3.LEU, "nonpolar");
90      polarityMap.put(AminoAcid3.MET, "nonpolar");
91      polarityMap.put(AminoAcid3.PHE, "nonpolar");
92      polarityMap.put(AminoAcid3.PRO, "nonpolar");
93      polarityMap.put(AminoAcid3.TRP, "nonpolar");
94      polarityMap.put(AminoAcid3.VAL, "nonpolar");
95  
96      //Map residue to its acidity
97      acidityMap.put(AminoAcid3.ASP, "acidic");
98      acidityMap.put(AminoAcid3.ASH, "acidic");
99      acidityMap.put(AminoAcid3.ASN, "acidic");
100     acidityMap.put(AminoAcid3.GLU, "acidic");
101     acidityMap.put(AminoAcid3.GLH, "acidic");
102     acidityMap.put(AminoAcid3.ARG, "basic");
103     acidityMap.put(AminoAcid3.HIS, "basic");
104     acidityMap.put(AminoAcid3.HIE, "basic");
105     acidityMap.put(AminoAcid3.HID, "basic");
106     acidityMap.put(AminoAcid3.LYS, "basic");
107     acidityMap.put(AminoAcid3.LYD, "basic");
108     acidityMap.put(AminoAcid3.LEU, "neutral");
109     acidityMap.put(AminoAcid3.GLN, "neutral");
110     acidityMap.put(AminoAcid3.GLY, "neutral");
111     acidityMap.put(AminoAcid3.ALA, "neutral");
112     acidityMap.put(AminoAcid3.VAL, "neutral");
113     acidityMap.put(AminoAcid3.ILE, "neutral");
114     acidityMap.put(AminoAcid3.SER, "neutral");
115     acidityMap.put(AminoAcid3.CYS, "neutral");
116     acidityMap.put(AminoAcid3.THR, "neutral");
117     acidityMap.put(AminoAcid3.MET, "neutral");
118     acidityMap.put(AminoAcid3.PRO, "neutral");
119     acidityMap.put(AminoAcid3.PHE, "neutral");
120     acidityMap.put(AminoAcid3.TYR, "neutral");
121     acidityMap.put(AminoAcid3.TRP, "neutral");
122 
123     //Map phi to the Alpha Helix (L) and (R) and Beta Sheet Range
124     phiToStructure.put(-180.0, "Extended");
125     phiToStructure.put(-150.0, "Structure");
126     phiToStructure.put(-50.0, "Structure");
127     phiToStructure.put(0.0, "Extended");
128     phiToStructure.put(50.0, "Structure");
129     phiToStructure.put(70.0, "Structure");
130     phiToStructure.put(180.0, "Extended");
131 
132     //Map psi to the Alpha Helix (L) and (R) and Beta Sheet Range
133     psiToStructure.put(-180.0, "Extended");
134     psiToStructure.put(-70.0, "Alpha Helix");
135     psiToStructure.put(-50.0, "Alpha Helix");
136     psiToStructure.put(0.0, "Extended");
137     //psiToStructure.put(30.0, "Alpha Helix (L)");
138     //psiToStructure.put(70.0, "Alpha Helix (L)");
139     psiToStructure.put(85.0, "Extended");
140     psiToStructure.put(100.0, "Beta Sheet");
141     psiToStructure.put(150.0, "Beta Sheet");
142     //psiToStructure.put(150.0, "Anti-Parallel Beta Sheet");
143     psiToStructure.put(180.0, "Extended");
144 
145     //Map of exposed surface area to standard from model compounds
146     standardSurfaceArea.put(AminoAcid3.ALA, 127.15871);
147     standardSurfaceArea.put(AminoAcid3.ARG, 269.42558);
148     standardSurfaceArea.put(AminoAcid3.ASH, 158.67385);
149     standardSurfaceArea.put(AminoAcid3.ASP, 159.97984);
150     standardSurfaceArea.put(AminoAcid3.ASN, 159.82709);
151     standardSurfaceArea.put(AminoAcid3.CYS, 154.52801);
152     standardSurfaceArea.put(AminoAcid3.GLH, 195.30608);
153     standardSurfaceArea.put(AminoAcid3.GLN, 197.75170);
154     standardSurfaceArea.put(AminoAcid3.GLU, 195.27081);
155     standardSurfaceArea.put(AminoAcid3.GLY, 95.346188);
156     standardSurfaceArea.put(AminoAcid3.HID, 202.31302);
157     standardSurfaceArea.put(AminoAcid3.HIE, 203.22195);
158     standardSurfaceArea.put(AminoAcid3.HIS, 205.40232);
159     standardSurfaceArea.put(AminoAcid3.ILE, 196.15872);
160     standardSurfaceArea.put(AminoAcid3.LEU, 192.85123);
161     standardSurfaceArea.put(AminoAcid3.LYD, 235.49182);
162     standardSurfaceArea.put(AminoAcid3.LYS, 236.71473);
163     standardSurfaceArea.put(AminoAcid3.MET, 216.53318);
164     standardSurfaceArea.put(AminoAcid3.PHE, 229.75038);
165     standardSurfaceArea.put(AminoAcid3.PRO, 157.30011);
166     standardSurfaceArea.put(AminoAcid3.SER, 137.90720);
167     standardSurfaceArea.put(AminoAcid3.THR, 157.33759);
168     standardSurfaceArea.put(AminoAcid3.TRP, 262.32819);
169     standardSurfaceArea.put(AminoAcid3.TYR, 239.91172);
170     standardSurfaceArea.put(AminoAcid3.VAL, 171.89211);
171 
172     // Map amino acid codes from 1 letter to 3 letter AA3
173     aminoAcidCodes.put("A", AminoAcid3.ALA);
174     aminoAcidCodes.put("R", AminoAcid3.ARG);
175     aminoAcidCodes.put("N", AminoAcid3.ASN);
176     aminoAcidCodes.put("D", AminoAcid3.ASP);
177     aminoAcidCodes.put("C", AminoAcid3.CYS);
178     aminoAcidCodes.put("E", AminoAcid3.GLU);
179     aminoAcidCodes.put("Q", AminoAcid3.GLN);
180     aminoAcidCodes.put("G", AminoAcid3.GLY);
181     aminoAcidCodes.put("H", AminoAcid3.HIS);
182     aminoAcidCodes.put("I", AminoAcid3.ILE);
183     aminoAcidCodes.put("L", AminoAcid3.LEU);
184     aminoAcidCodes.put("K", AminoAcid3.LYS);
185     aminoAcidCodes.put("M", AminoAcid3.MET);
186     aminoAcidCodes.put("F", AminoAcid3.PHE);
187     aminoAcidCodes.put("P", AminoAcid3.PRO);
188     aminoAcidCodes.put("S", AminoAcid3.SER);
189     aminoAcidCodes.put("T", AminoAcid3.THR);
190     aminoAcidCodes.put("W", AminoAcid3.TRP);
191     aminoAcidCodes.put("Y", AminoAcid3.TYR);
192     aminoAcidCodes.put("V", AminoAcid3.VAL);
193   }
194 
195   /**
196    * Make a string array of surface area and additional selected features (phi,psi,omega,and
197    * structure annotations)
198    *
199    * @param residue Residue
200    * @param surfaceArea residue surface area
201    * @param includeAngles select angles
202    * @param includeStructure select structure annotation
203    * @return String array of features
204    */
205   public String[] saveFeatures(Residue residue, double surfaceArea, boolean includeAngles,
206       boolean includeStructure) {
207     int nFeat = 3;
208     if (includeAngles) {
209       nFeat += 3;
210     }
211     if (includeStructure) {
212       nFeat += 1;
213     }
214 
215     String[] features = new String[nFeat];
216     String structure;
217     String phiString;
218     String psiString;
219     String omegaString;
220 
221     if (residue.getNextResidue() == null) {
222       //Since phi, psi angles are determined between two residues, the first and last residue will not have values
223       //and are default labeled as Extended Secondary Structure
224       structure = "Extended";
225       getPhi(residue);
226       phiString = String.valueOf(phi);
227       psiString = null;
228       omegaString = null;
229     } else if (residue.getPreviousResidue() == null) {
230       structure = "Extended";
231       getPsi(residue);
232       getOmega(residue);
233       psiString = String.valueOf(psi);
234       omegaString = String.valueOf(omega);
235       phiString = null;
236     } else {
237       getPhi(residue);
238       getPsi(residue);
239       getOmega(residue);
240       phiString = String.valueOf(phi);
241       psiString = String.valueOf(psi);
242       omegaString = String.valueOf(omega);
243       structure = getSecondaryStructure();
244     }
245 
246     totalSurfaceArea += surfaceArea;
247     String surfaceAreaString = String.valueOf(surfaceArea);
248 
249     double standSurfaceArea = standardSurfaceArea.getOrDefault(residue.getAminoAcid3(), 0.0);
250     String normalizedSA = "";
251     if (standSurfaceArea != 0.0) {
252       normalizedSA = String.valueOf(surfaceArea / standSurfaceArea);
253     }
254     String confidence = String.valueOf(getConfidenceScore(residue));
255 
256     features[0] = surfaceAreaString;
257     features[1] = normalizedSA;
258     features[2] = confidence;
259     if (includeAngles) {
260       features[3] = phiString;
261       features[4] = psiString;
262       features[5] = omegaString;
263       if (includeStructure) {
264         features[6] = structure;
265       }
266     } else if (includeStructure) {
267       features[3] = structure;
268     }
269     return features;
270   }
271 
272   /**
273    * Get the phi angle of a residue
274    *
275    * @param currentRes current residue
276    */
277   public void getPhi(Residue currentRes) {
278     Residue previousRes = currentRes.getPreviousResidue();
279     double[] cCoor = new double[3];
280     double[] nCoor = new double[3];
281     double[] caCoor = new double[3];
282     double[] c2Coor = new double[3];
283     phi = (DoubleMath.dihedralAngle(previousRes.getAtomByName("C", true).getXYZ(cCoor),
284         currentRes.getAtomByName("N", true).getXYZ(nCoor),
285         currentRes.getAtomByName("CA", true).getXYZ(caCoor),
286         currentRes.getAtomByName("C", true).getXYZ(c2Coor))) * 180 / Math.PI;
287   }
288 
289   /**
290    * Get the psi angle of a residue
291    *
292    * @param currentRes current residue
293    */
294   public void getPsi(Residue currentRes) {
295     //res[0] is always current Res
296     Residue nextRes = currentRes.getNextResidue();
297     double[] nCoor = new double[3];
298     double[] caCoor = new double[3];
299     double[] cCoor = new double[3];
300     double[] n2Coor = new double[3];
301     psi = (DoubleMath.dihedralAngle(currentRes.getAtomByName("N", true).getXYZ(nCoor),
302         currentRes.getAtomByName("CA", true).getXYZ(caCoor),
303         currentRes.getAtomByName("C", true).getXYZ(cCoor),
304         nextRes.getAtomByName("N", true).getXYZ(n2Coor))) * 180 / Math.PI;
305   }
306 
307   /**
308    * Get the omega angle of a residue
309    *
310    * @param currentRes current residue
311    */
312   public void getOmega(Residue currentRes) {
313     //res[0] is always current Res
314     Residue nextRes = currentRes.getNextResidue();
315     double[] ca1Coor = new double[3];
316     double[] cCoor = new double[3];
317     double[] nCoor = new double[3];
318     double[] ca2Coor = new double[3];
319     omega = (DoubleMath.dihedralAngle(currentRes.getAtomByName("CA", true).getXYZ(ca1Coor),
320         currentRes.getAtomByName("C", true).getXYZ(cCoor),
321         nextRes.getAtomByName("N", true).getXYZ(nCoor),
322         nextRes.getAtomByName("CA", true).getXYZ(ca2Coor))) * 180 / Math.PI;
323   }
324 
325   //Data on phi,psi and secondary structure correlations is from Tamar Schlick's
326   // Molecular Modeling and Simulation Textbook. Approx. page 108.
327   //Alpha Helix: phi,psi of approximately -60,-50. Tighter helices are around -50,-25.
328   //Looser helices are around -60,-70.
329 
330   //Collagen Helix: phi,psi of approximately -60,+125.
331 
332   //Parallel Beta Sheet: phi,psi of approximately -120,+115
333   //Antiparallel Beta Sheet: phi,psi of approximately -140,+135
334   //Get surface area region to get surface area
335 
336   /**
337    * Get the secondary structure annotation from the ramachandran angle map
338    *
339    * @return string of secondary structure
340    */
341   public String getSecondaryStructure() {
342     String secondaryStructure;
343     //Use phi, psi ranges to determine which is the most likely secondary structure based on ramachandran values
344     Double lowPhiKey = phiToStructure.floorKey(phi);
345     String lowPhiStruct = phiToStructure.get(lowPhiKey);
346     Double highPhiKey = phiToStructure.ceilingKey(phi);
347     String highPhiStruct = phiToStructure.get(highPhiKey);
348     if (lowPhiStruct.equals("Extended") || highPhiStruct.equals("Extended")) {
349       secondaryStructure = "Extended";
350     } else {
351       Double lowPsiKey = psiToStructure.floorKey(psi);
352       String lowPsiStruct = psiToStructure.get(lowPsiKey);
353       Double highPsiKey = psiToStructure.ceilingKey(psi);
354       String highPsiStruct = psiToStructure.get(highPsiKey);
355       if (lowPsiStruct.equals("Extended") || highPsiStruct.equals("Extended")) {
356         secondaryStructure = "Extended";
357       } else {
358         secondaryStructure = lowPsiStruct;
359       }
360     }
361     return secondaryStructure;
362   }
363 
364   /**
365    * Get the total surface area for the protein
366    *
367    * @return The total surface area.
368    */
369   public double getTotalSurfaceArea() {
370     return totalSurfaceArea;
371   }
372 
373   /**
374    * Get the alphafold confidence score or b-factor from an X-ray model.
375    *
376    * @param currentRes current residue
377    * @return confidence/b-factor value
378    */
379   public double getConfidenceScore(Residue currentRes) {
380     return currentRes.getAtomByName("CA", true).getTempFactor();
381   }
382 
383   /**
384    * Use the ddgun output file to get the amino acid changes
385    *
386    * @param ddgun List of lines from ddGun output file
387    * @return List of NP Changes
388    */
389   public List<String> ddgunToNPChange(List<String> ddgun) {
390     List<String> npChanges = new ArrayList<>();
391     for (String s : ddgun) {
392       String[] splits = s.split("\t");
393       String currentNP = splits[2];
394       String wt = String.valueOf(currentNP.charAt(0));
395       String mut = String.valueOf(currentNP.charAt(currentNP.length() - 1));
396       String pos = currentNP.substring(1, currentNP.length() - 1);
397       String wt3Letter = aminoAcidCodes.get(wt).toString();
398       String mut3Letter = aminoAcidCodes.get(mut).toString();
399       String wildType = wt3Letter.charAt(0) + wt3Letter.substring(1, 3).toLowerCase();
400       String mutant = mut3Letter.charAt(0) + mut3Letter.substring(1, 3).toLowerCase();
401       String npChange = "p." + wildType + pos + mutant;
402       npChanges.add(npChange);
403     }
404     return npChanges;
405   }
406 
407   /**
408    * Get ddgun values from ddgun file
409    *
410    * @param ddgun List of lines from ddGun output file
411    * @return List of ddGun values (raw and abs value)
412    */
413   public List<Double[]> getDDGunValues(List<String> ddgun) {
414     List<Double[]> values = new ArrayList<>();
415     for (String s : ddgun) {
416       String[] splits = s.split("\t");
417       Double[] value = new Double[2];
418       value[0] = Double.parseDouble(splits[3]);
419       value[1] = Math.abs(Double.parseDouble(splits[3]));
420       values.add(value);
421     }
422     return values;
423   }
424 
425   /**
426    * Get the polarity and acidity changes
427    *
428    * @param npChanges list of protein changes
429    * @param includePolarity select polarity
430    * @param includeAcidity select acidity
431    * @return list of polarity and acidity changes
432    */
433   public List<String[]> getPolarityAndAcidityChange(List<String> npChanges, boolean includePolarity,
434       boolean includeAcidity) {
435     List<String[]> polarityAndAcidity = new ArrayList<>();
436     for (String npChange : npChanges) {
437       String change = npChange.split("p\\.")[1].toUpperCase(Locale.ROOT);
438       String[] value = new String[2];
439       AminoAcid3 wt = AminoAcid3.valueOf(change.substring(0, 3));
440       AminoAcid3 mut = AminoAcid3.valueOf(change.substring(change.length() - 3));
441       if (includeAcidity) {
442         if (acidityMap.get(wt).equals("basic") && acidityMap.get(mut).equals("neutral")) {
443           value[0] = "bn";
444         } else if (acidityMap.get(wt).equals("neutral") && acidityMap.get(mut).equals("basic")) {
445           value[0] = "nb";
446         } else if (acidityMap.get(wt).equals("acidic") && acidityMap.get(mut).equals("neutral")) {
447           value[0] = "an";
448         } else if (acidityMap.get(wt).equals("neutral") && acidityMap.get(mut).equals("acidic")) {
449           value[0] = "na";
450         } else if (acidityMap.get(wt).equals("basic") && acidityMap.get(mut).equals("acidic")) {
451           value[0] = "ba";
452         } else if (acidityMap.get(wt).equals("acidic") && acidityMap.get(mut).equals("basic")) {
453           value[0] = "ab";
454         } else if (acidityMap.get(wt).equals(acidityMap.get(mut))) {
455           value[0] = "=";
456         }
457       } else {
458         value[0] = null;
459       }
460 
461       if (includePolarity) {
462         if (polarityMap.get(wt).equals("polar") && polarityMap.get(mut).equals("nonpolar")) {
463           value[1] = "-";
464         } else if (polarityMap.get(wt).equals("nonpolar") && polarityMap.get(mut).equals("polar")) {
465           value[1] = "+";
466         } else {
467           value[1] = "=";
468         }
469       } else {
470         value[1] = null;
471       }
472 
473       polarityAndAcidity.add(value);
474     }
475     return polarityAndAcidity;
476   }
477 
478 
479 }