View Javadoc
1   //******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  //******************************************************************************
38  package ffx.potential.utils;
39  
40  import ffx.numerics.math.DoubleMath;
41  import ffx.potential.MolecularAssembly;
42  import ffx.potential.bonded.AminoAcidUtils.AminoAcid3;
43  import ffx.potential.bonded.Atom;
44  import ffx.potential.bonded.Residue;
45  
46  import java.util.ArrayList;
47  import java.util.Arrays;
48  import java.util.HashMap;
49  import java.util.List;
50  import java.util.Locale;
51  import java.util.NavigableMap;
52  import java.util.TreeMap;
53  import java.util.logging.Logger;
54  
55  public class GetProteinFeatures {
56  
57    private static final HashMap<AminoAcid3, String> polarityMap = new HashMap<>();
58    private static final HashMap<AminoAcid3, String> acidityMap = new HashMap<>();
59    private static final NavigableMap<Double, String> phiToStructure = new TreeMap<>();
60    private static final NavigableMap<Double, String> psiToStructure = new TreeMap<>();
61    private static final HashMap<AminoAcid3, Double> standardSurfaceArea = new HashMap<>();
62    private static final HashMap<String, AminoAcid3> aminoAcidCodes = new HashMap<>();
63    private double phi;
64    private double psi;
65    private double omega;
66    private double totalSurfaceArea = 0.0;
67    MolecularAssembly molecularAssembly;
68    private static final Logger logger = Logger.getLogger(GetProteinFeatures.class.getName());
69  
70  
71    public GetProteinFeatures(MolecularAssembly molecularAssembly) {
72      this.molecularAssembly = molecularAssembly;
73    }
74  
75    static {
76      //Map residue to its polarity
77      polarityMap.put(AminoAcid3.ARG, "polar");
78      polarityMap.put(AminoAcid3.ASN, "polar");
79      polarityMap.put(AminoAcid3.ASP, "polar");
80      polarityMap.put(AminoAcid3.ASH, "polar");
81      polarityMap.put(AminoAcid3.CYS, "polar");
82      polarityMap.put(AminoAcid3.GLN, "polar");
83      polarityMap.put(AminoAcid3.GLU, "polar");
84      polarityMap.put(AminoAcid3.GLH, "polar");
85      polarityMap.put(AminoAcid3.HIS, "polar");
86      polarityMap.put(AminoAcid3.HIE, "polar");
87      polarityMap.put(AminoAcid3.HID, "polar");
88      polarityMap.put(AminoAcid3.LYS, "polar");
89      polarityMap.put(AminoAcid3.LYD, "polar");
90      polarityMap.put(AminoAcid3.SER, "polar");
91      polarityMap.put(AminoAcid3.THR, "polar");
92      polarityMap.put(AminoAcid3.TYR, "polar");
93      polarityMap.put(AminoAcid3.ALA, "nonpolar");
94      polarityMap.put(AminoAcid3.GLY, "nonpolar");
95      polarityMap.put(AminoAcid3.ILE, "nonpolar");
96      polarityMap.put(AminoAcid3.LEU, "nonpolar");
97      polarityMap.put(AminoAcid3.MET, "nonpolar");
98      polarityMap.put(AminoAcid3.PHE, "nonpolar");
99      polarityMap.put(AminoAcid3.PRO, "nonpolar");
100     polarityMap.put(AminoAcid3.TRP, "nonpolar");
101     polarityMap.put(AminoAcid3.VAL, "nonpolar");
102 
103     //Map residue to its acidity
104     acidityMap.put(AminoAcid3.ASP, "acidic");
105     acidityMap.put(AminoAcid3.ASH, "acidic");
106     acidityMap.put(AminoAcid3.ASN, "acidic");
107     acidityMap.put(AminoAcid3.GLU, "acidic");
108     acidityMap.put(AminoAcid3.GLH, "acidic");
109     acidityMap.put(AminoAcid3.ARG, "basic");
110     acidityMap.put(AminoAcid3.HIS, "basic");
111     acidityMap.put(AminoAcid3.HIE, "basic");
112     acidityMap.put(AminoAcid3.HID, "basic");
113     acidityMap.put(AminoAcid3.LYS, "basic");
114     acidityMap.put(AminoAcid3.LYD, "basic");
115     acidityMap.put(AminoAcid3.LEU, "neutral");
116     acidityMap.put(AminoAcid3.GLN, "neutral");
117     acidityMap.put(AminoAcid3.GLY, "neutral");
118     acidityMap.put(AminoAcid3.ALA, "neutral");
119     acidityMap.put(AminoAcid3.VAL, "neutral");
120     acidityMap.put(AminoAcid3.ILE, "neutral");
121     acidityMap.put(AminoAcid3.SER, "neutral");
122     acidityMap.put(AminoAcid3.CYS, "neutral");
123     acidityMap.put(AminoAcid3.THR, "neutral");
124     acidityMap.put(AminoAcid3.MET, "neutral");
125     acidityMap.put(AminoAcid3.PRO, "neutral");
126     acidityMap.put(AminoAcid3.PHE, "neutral");
127     acidityMap.put(AminoAcid3.TYR, "neutral");
128     acidityMap.put(AminoAcid3.TRP, "neutral");
129 
130     //Map phi to the Alpha Helix (L) and (R) and Beta Sheet Range
131     phiToStructure.put(-180.0, "Extended");
132     phiToStructure.put(-150.0, "Structure");
133     phiToStructure.put(-50.0, "Structure");
134     phiToStructure.put(0.0, "Extended");
135     phiToStructure.put(50.0, "Structure");
136     phiToStructure.put(70.0, "Structure");
137     phiToStructure.put(180.0, "Extended");
138 
139     //Map psi to the Alpha Helix (L) and (R) and Beta Sheet Range
140     psiToStructure.put(-180.0, "Extended");
141     psiToStructure.put(-70.0, "Alpha Helix");
142     psiToStructure.put(-50.0, "Alpha Helix");
143     psiToStructure.put(0.0, "Extended");
144     //psiToStructure.put(30.0, "Alpha Helix (L)");
145     //psiToStructure.put(70.0, "Alpha Helix (L)");
146     psiToStructure.put(85.0, "Extended");
147     psiToStructure.put(100.0, "Beta Sheet");
148     psiToStructure.put(150.0, "Beta Sheet");
149     //psiToStructure.put(150.0, "Anti-Parallel Beta Sheet");
150     psiToStructure.put(180.0, "Extended");
151 
152     //Map of exposed surface area to standard from model compounds
153     standardSurfaceArea.put(AminoAcid3.ALA, 127.15871);
154     standardSurfaceArea.put(AminoAcid3.ARG, 269.42558);
155     standardSurfaceArea.put(AminoAcid3.ASH, 158.67385);
156     standardSurfaceArea.put(AminoAcid3.ASP, 159.97984);
157     standardSurfaceArea.put(AminoAcid3.ASN, 159.82709);
158     standardSurfaceArea.put(AminoAcid3.CYS, 154.52801);
159     standardSurfaceArea.put(AminoAcid3.GLH, 195.30608);
160     standardSurfaceArea.put(AminoAcid3.GLN, 197.75170);
161     standardSurfaceArea.put(AminoAcid3.GLU, 195.27081);
162     standardSurfaceArea.put(AminoAcid3.GLY, 95.346188);
163     standardSurfaceArea.put(AminoAcid3.HID, 202.31302);
164     standardSurfaceArea.put(AminoAcid3.HIE, 203.22195);
165     standardSurfaceArea.put(AminoAcid3.HIS, 205.40232);
166     standardSurfaceArea.put(AminoAcid3.ILE, 196.15872);
167     standardSurfaceArea.put(AminoAcid3.LEU, 192.85123);
168     standardSurfaceArea.put(AminoAcid3.LYD, 235.49182);
169     standardSurfaceArea.put(AminoAcid3.LYS, 236.71473);
170     standardSurfaceArea.put(AminoAcid3.MET, 216.53318);
171     standardSurfaceArea.put(AminoAcid3.PHE, 229.75038);
172     standardSurfaceArea.put(AminoAcid3.PRO, 157.30011);
173     standardSurfaceArea.put(AminoAcid3.SER, 137.90720);
174     standardSurfaceArea.put(AminoAcid3.THR, 157.33759);
175     standardSurfaceArea.put(AminoAcid3.TRP, 262.32819);
176     standardSurfaceArea.put(AminoAcid3.TYR, 239.91172);
177     standardSurfaceArea.put(AminoAcid3.VAL, 171.89211);
178 
179     // Map amino acid codes from 1 letter to 3 letter AA3
180     aminoAcidCodes.put("A", AminoAcid3.ALA);
181     aminoAcidCodes.put("R", AminoAcid3.ARG);
182     aminoAcidCodes.put("N", AminoAcid3.ASN);
183     aminoAcidCodes.put("D", AminoAcid3.ASP);
184     aminoAcidCodes.put("C", AminoAcid3.CYS);
185     aminoAcidCodes.put("E", AminoAcid3.GLU);
186     aminoAcidCodes.put("Q", AminoAcid3.GLN);
187     aminoAcidCodes.put("G", AminoAcid3.GLY);
188     aminoAcidCodes.put("H", AminoAcid3.HIS);
189     aminoAcidCodes.put("I", AminoAcid3.ILE);
190     aminoAcidCodes.put("L", AminoAcid3.LEU);
191     aminoAcidCodes.put("K", AminoAcid3.LYS);
192     aminoAcidCodes.put("M", AminoAcid3.MET);
193     aminoAcidCodes.put("F", AminoAcid3.PHE);
194     aminoAcidCodes.put("P", AminoAcid3.PRO);
195     aminoAcidCodes.put("S", AminoAcid3.SER);
196     aminoAcidCodes.put("T", AminoAcid3.THR);
197     aminoAcidCodes.put("W", AminoAcid3.TRP);
198     aminoAcidCodes.put("Y", AminoAcid3.TYR);
199     aminoAcidCodes.put("V", AminoAcid3.VAL);
200   }
201 
202   /**
203    * Make a string array of surface area and additional selected features (phi,psi,omega,and
204    * structure annotations)
205    *
206    * @param residue Residue
207    * @param surfaceArea residue surface area
208    * @param includeAngles select angles
209    * @param includeStructure select structure annotation
210    * @param includePPI select protein-interface annotation
211    * @return String array of features
212    */
213   public String[] saveFeatures(Residue residue, double surfaceArea, boolean includeAngles,
214       boolean includeStructure, boolean includePPI) {
215     int nFeat = 3;
216     if (includeAngles) {
217       nFeat += 3;
218     }
219     if (includeStructure) {
220       nFeat += 1;
221     }
222     if(includePPI){
223       nFeat += 1;
224     }
225 
226     String[] features = new String[nFeat];
227     String structure;
228     String phiString;
229     String psiString;
230     String omegaString;
231     String interfaceResString = "";
232 
233     if (residue.getNextResidue() == null) {
234       //Since phi, psi angles are determined between two residues, the first and last residue will not have values
235       //and are default labeled as Extended Secondary Structure
236       structure = "Extended";
237       getPhi(residue);
238       phiString = String.valueOf(phi);
239       psiString = null;
240       omegaString = null;
241     } else if (residue.getPreviousResidue() == null) {
242       structure = "Extended";
243       getPsi(residue);
244       getOmega(residue);
245       psiString = String.valueOf(psi);
246       omegaString = String.valueOf(omega);
247       phiString = null;
248     } else {
249       getPhi(residue);
250       getPsi(residue);
251       getOmega(residue);
252       phiString = String.valueOf(phi);
253       psiString = String.valueOf(psi);
254       omegaString = String.valueOf(omega);
255       structure = getSecondaryStructure();
256     }
257 
258     totalSurfaceArea += surfaceArea;
259     String surfaceAreaString = String.valueOf(Math.floor(surfaceArea*100)/100);
260 
261     double standSurfaceArea = standardSurfaceArea.getOrDefault(residue.getAminoAcid3(), 0.0);
262     String normalizedSAString = "";
263     if (standSurfaceArea != 0.0) {
264       double normSA = surfaceArea / standSurfaceArea;
265       normSA = Math.floor(normSA * 100) / 100;
266       if(normSA > 1.0){
267         normSA = 1.0;
268       }
269       normalizedSAString = String.valueOf(normSA);
270 
271     }
272     String confidence = String.valueOf(getConfidenceScore(residue));
273     String interactingGene = " ";
274     if(includePPI){
275       interactingGene = getPPI(residue);
276       if(!interactingGene.equals(" ")){
277         interfaceResString = interactingGene;
278       }
279     }
280 
281     features[0] = surfaceAreaString;
282     features[1] = normalizedSAString;
283     features[2] = confidence;
284     if (includeAngles) {
285       features[3] = phiString;
286       features[4] = psiString;
287       features[5] = omegaString;
288       if (includeStructure) {
289         features[6] = structure;
290       }
291       if(includePPI){
292         features[7] = interfaceResString;
293       }
294     } else if (includeStructure) {
295       features[3] = structure;
296       if(includePPI){
297         features[4] = interfaceResString;
298       }
299     } else if(includePPI){
300       features[3] = interfaceResString;
301     }
302     return features;
303   }
304 
305   /**
306    * Get the phi angle of a residue
307    *
308    * @param currentRes current residue
309    */
310   public void getPhi(Residue currentRes) {
311     Residue previousRes = currentRes.getPreviousResidue();
312     double[] cCoor = new double[3];
313     double[] nCoor = new double[3];
314     double[] caCoor = new double[3];
315     double[] c2Coor = new double[3];
316     phi = (DoubleMath.dihedralAngle(previousRes.getAtomByName("C", true).getXYZ(cCoor),
317         currentRes.getAtomByName("N", true).getXYZ(nCoor),
318         currentRes.getAtomByName("CA", true).getXYZ(caCoor),
319         currentRes.getAtomByName("C", true).getXYZ(c2Coor))) * 180 / Math.PI;
320   }
321 
322   /**
323    * Get the psi angle of a residue
324    *
325    * @param currentRes current residue
326    */
327   public void getPsi(Residue currentRes) {
328     //res[0] is always current Res
329     Residue nextRes = currentRes.getNextResidue();
330     double[] nCoor = new double[3];
331     double[] caCoor = new double[3];
332     double[] cCoor = new double[3];
333     double[] n2Coor = new double[3];
334     psi = (DoubleMath.dihedralAngle(currentRes.getAtomByName("N", true).getXYZ(nCoor),
335         currentRes.getAtomByName("CA", true).getXYZ(caCoor),
336         currentRes.getAtomByName("C", true).getXYZ(cCoor),
337         nextRes.getAtomByName("N", true).getXYZ(n2Coor))) * 180 / Math.PI;
338   }
339 
340   /**
341    * Get the omega angle of a residue
342    *
343    * @param currentRes current residue
344    */
345   public void getOmega(Residue currentRes) {
346     //res[0] is always current Res
347     Residue nextRes = currentRes.getNextResidue();
348     double[] ca1Coor = new double[3];
349     double[] cCoor = new double[3];
350     double[] nCoor = new double[3];
351     double[] ca2Coor = new double[3];
352     omega = (DoubleMath.dihedralAngle(currentRes.getAtomByName("CA", true).getXYZ(ca1Coor),
353         currentRes.getAtomByName("C", true).getXYZ(cCoor),
354         nextRes.getAtomByName("N", true).getXYZ(nCoor),
355         nextRes.getAtomByName("CA", true).getXYZ(ca2Coor))) * 180 / Math.PI;
356   }
357 
358   //Data on phi,psi and secondary structure correlations is from Tamar Schlick's
359   // Molecular Modeling and Simulation Textbook. Approx. page 108.
360   //Alpha Helix: phi,psi of approximately -60,-50. Tighter helices are around -50,-25.
361   //Looser helices are around -60,-70.
362 
363   //Collagen Helix: phi,psi of approximately -60,+125.
364 
365   //Parallel Beta Sheet: phi,psi of approximately -120,+115
366   //Antiparallel Beta Sheet: phi,psi of approximately -140,+135
367   //Get surface area region to get surface area
368 
369   /**
370    * Get the secondary structure annotation from the ramachandran angle map
371    *
372    * @return string of secondary structure
373    */
374   public String getSecondaryStructure() {
375     String secondaryStructure;
376     //Use phi, psi ranges to determine which is the most likely secondary structure based on ramachandran values
377     Double lowPhiKey = phiToStructure.floorKey(phi);
378     String lowPhiStruct = phiToStructure.get(lowPhiKey);
379     Double highPhiKey = phiToStructure.ceilingKey(phi);
380     String highPhiStruct = phiToStructure.get(highPhiKey);
381     if (lowPhiStruct.equals("Extended") || highPhiStruct.equals("Extended")) {
382       secondaryStructure = "Extended";
383     } else {
384       Double lowPsiKey = psiToStructure.floorKey(psi);
385       String lowPsiStruct = psiToStructure.get(lowPsiKey);
386       Double highPsiKey = psiToStructure.ceilingKey(psi);
387       String highPsiStruct = psiToStructure.get(highPsiKey);
388       if (lowPsiStruct.equals("Extended") || highPsiStruct.equals("Extended")) {
389         secondaryStructure = "Extended";
390       } else {
391         secondaryStructure = lowPsiStruct;
392       }
393     }
394     return secondaryStructure;
395   }
396 
397   /**
398    * Get the total surface area for the protein
399    *
400    * @return The total surface area.
401    */
402   public double getTotalSurfaceArea() {
403     return Math.floor(totalSurfaceArea);
404   }
405 
406   /**
407    * Get the alphafold confidence score or b-factor from an X-ray model.
408    *
409    * @param currentRes current residue
410    * @return confidence/b-factor value
411    */
412   public double getConfidenceScore(Residue currentRes) {
413     return currentRes.getAtomByName("CA", true).getTempFactor();
414   }
415 
416   /**
417    * Use the ddgun output file to get the amino acid changes
418    *
419    * @param ddgun List of lines from ddGun output file
420    * @return List of NP Changes
421    */
422   public List<String> ddgunToNPChange(List<String> ddgun) {
423     List<String> npChanges = new ArrayList<>();
424     for (String s : ddgun) {
425       String[] splits = s.split("\t");
426       String currentNP = splits[2];
427       String wt = String.valueOf(currentNP.charAt(0));
428       String mut = String.valueOf(currentNP.charAt(currentNP.length() - 1));
429       String pos = currentNP.substring(1, currentNP.length() - 1);
430       String wt3Letter = aminoAcidCodes.get(wt).toString();
431       String mut3Letter = aminoAcidCodes.get(mut).toString();
432       String wildType = wt3Letter.charAt(0) + wt3Letter.substring(1, 3).toLowerCase();
433       String mutant = mut3Letter.charAt(0) + mut3Letter.substring(1, 3).toLowerCase();
434       String npChange = "p." + wildType + pos + mutant;
435       npChanges.add(npChange);
436     }
437     return npChanges;
438   }
439 
440   /**
441    * Get ddgun values from ddgun file
442    *
443    * @param ddgun List of lines from ddGun output file
444    * @return List of ddGun values (raw and abs value)
445    */
446   public List<Double[]> getDDGunValues(List<String> ddgun) {
447     List<Double[]> values = new ArrayList<>();
448     for (String s : ddgun) {
449       String[] splits = s.split("\t");
450       Double[] value = new Double[2];
451       value[0] = Double.parseDouble(splits[3]) * -1.0;
452       value[1] = Math.abs(Double.parseDouble(splits[3]));
453       values.add(value);
454     }
455     return values;
456   }
457 
458   /**
459    * Get the polarity and acidity changes
460    *
461    * @param npChanges list of protein changes
462    * @param includePolarity select polarity
463    * @param includeAcidity select acidity
464    * @return list of polarity and acidity changes
465    */
466   public List<String[]> getPolarityAndAcidityChange(List<String> npChanges, boolean includePolarity,
467       boolean includeAcidity) {
468     List<String[]> polarityAndAcidity = new ArrayList<>();
469     for (String npChange : npChanges) {
470       String change = npChange.split("p\\.")[1].toUpperCase(Locale.ROOT);
471       String[] value = new String[2];
472       AminoAcid3 wt = AminoAcid3.valueOf(change.substring(0, 3));
473       AminoAcid3 mut = AminoAcid3.valueOf(change.substring(change.length() - 3));
474       if (includeAcidity) {
475         if (acidityMap.get(wt).equals("basic") && acidityMap.get(mut).equals("neutral")) {
476           value[0] = "bn";
477         } else if (acidityMap.get(wt).equals("neutral") && acidityMap.get(mut).equals("basic")) {
478           value[0] = "nb";
479         } else if (acidityMap.get(wt).equals("acidic") && acidityMap.get(mut).equals("neutral")) {
480           value[0] = "an";
481         } else if (acidityMap.get(wt).equals("neutral") && acidityMap.get(mut).equals("acidic")) {
482           value[0] = "na";
483         } else if (acidityMap.get(wt).equals("basic") && acidityMap.get(mut).equals("acidic")) {
484           value[0] = "ba";
485         } else if (acidityMap.get(wt).equals("acidic") && acidityMap.get(mut).equals("basic")) {
486           value[0] = "ab";
487         } else if (acidityMap.get(wt).equals(acidityMap.get(mut))) {
488           value[0] = "=";
489         }
490       } else {
491         value[0] = null;
492       }
493 
494       if (includePolarity) {
495         if (polarityMap.get(wt).equals("polar") && polarityMap.get(mut).equals("nonpolar")) {
496           value[1] = "-";
497         } else if (polarityMap.get(wt).equals("nonpolar") && polarityMap.get(mut).equals("polar")) {
498           value[1] = "+";
499         } else {
500           value[1] = "=";
501         }
502       } else {
503         value[1] = null;
504       }
505 
506       polarityAndAcidity.add(value);
507     }
508     return polarityAndAcidity;
509   }
510 
511   public String getPPI(Residue residue){
512     List<String> chainNames = Arrays.stream(molecularAssembly.getChainNames()).toList();
513     if(chainNames.size() == 1){
514       logger.info( " Only one chain in the structure.");
515       return " ";
516     }
517     String name = molecularAssembly.getFile().getName();
518     String[] geneSplit = name.split("_");
519     String[] genes = new String[chainNames.size()];
520 
521     for(int i=1; i < chainNames.size() + 1; i++){
522       genes[i-1] = geneSplit[i];
523     }
524 
525     char chainID = residue.getChainID();
526     for(Atom atom: molecularAssembly.getAtomList()){
527       if(atom.getChainID() != chainID){
528         for(Atom resAtom: residue.getAtomList()){
529           if(resAtom.getXYZ().dist(atom.getXYZ()) <= 10.0){
530             int index = chainNames.indexOf(atom.getChainID().toString());
531             return genes[index];
532           }
533         }
534       }
535 
536     }
537     return " ";
538   }
539 
540 
541 }