1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.potential.utils;
39
40 import ffx.numerics.math.DoubleMath;
41 import ffx.potential.MolecularAssembly;
42 import ffx.potential.bonded.AminoAcidUtils.AminoAcid3;
43 import ffx.potential.bonded.Atom;
44 import ffx.potential.bonded.Residue;
45
46 import java.util.ArrayList;
47 import java.util.Arrays;
48 import java.util.HashMap;
49 import java.util.List;
50 import java.util.Locale;
51 import java.util.NavigableMap;
52 import java.util.TreeMap;
53 import java.util.logging.Logger;
54
55 public class GetProteinFeatures {
56
57 private static final HashMap<AminoAcid3, String> polarityMap = new HashMap<>();
58 private static final HashMap<AminoAcid3, String> acidityMap = new HashMap<>();
59 private static final NavigableMap<Double, String> phiToStructure = new TreeMap<>();
60 private static final NavigableMap<Double, String> psiToStructure = new TreeMap<>();
61 private static final HashMap<AminoAcid3, Double> standardSurfaceArea = new HashMap<>();
62 private static final HashMap<String, AminoAcid3> aminoAcidCodes = new HashMap<>();
63 private double phi;
64 private double psi;
65 private double omega;
66 private double totalSurfaceArea = 0.0;
67 MolecularAssembly molecularAssembly;
68 private static final Logger logger = Logger.getLogger(GetProteinFeatures.class.getName());
69
70
71 public GetProteinFeatures(MolecularAssembly molecularAssembly) {
72 this.molecularAssembly = molecularAssembly;
73 }
74
75 static {
76
77 polarityMap.put(AminoAcid3.ARG, "polar");
78 polarityMap.put(AminoAcid3.ASN, "polar");
79 polarityMap.put(AminoAcid3.ASP, "polar");
80 polarityMap.put(AminoAcid3.ASH, "polar");
81 polarityMap.put(AminoAcid3.CYS, "polar");
82 polarityMap.put(AminoAcid3.GLN, "polar");
83 polarityMap.put(AminoAcid3.GLU, "polar");
84 polarityMap.put(AminoAcid3.GLH, "polar");
85 polarityMap.put(AminoAcid3.HIS, "polar");
86 polarityMap.put(AminoAcid3.HIE, "polar");
87 polarityMap.put(AminoAcid3.HID, "polar");
88 polarityMap.put(AminoAcid3.LYS, "polar");
89 polarityMap.put(AminoAcid3.LYD, "polar");
90 polarityMap.put(AminoAcid3.SER, "polar");
91 polarityMap.put(AminoAcid3.THR, "polar");
92 polarityMap.put(AminoAcid3.TYR, "polar");
93 polarityMap.put(AminoAcid3.ALA, "nonpolar");
94 polarityMap.put(AminoAcid3.GLY, "nonpolar");
95 polarityMap.put(AminoAcid3.ILE, "nonpolar");
96 polarityMap.put(AminoAcid3.LEU, "nonpolar");
97 polarityMap.put(AminoAcid3.MET, "nonpolar");
98 polarityMap.put(AminoAcid3.PHE, "nonpolar");
99 polarityMap.put(AminoAcid3.PRO, "nonpolar");
100 polarityMap.put(AminoAcid3.TRP, "nonpolar");
101 polarityMap.put(AminoAcid3.VAL, "nonpolar");
102
103
104 acidityMap.put(AminoAcid3.ASP, "acidic");
105 acidityMap.put(AminoAcid3.ASH, "acidic");
106 acidityMap.put(AminoAcid3.ASN, "acidic");
107 acidityMap.put(AminoAcid3.GLU, "acidic");
108 acidityMap.put(AminoAcid3.GLH, "acidic");
109 acidityMap.put(AminoAcid3.ARG, "basic");
110 acidityMap.put(AminoAcid3.HIS, "basic");
111 acidityMap.put(AminoAcid3.HIE, "basic");
112 acidityMap.put(AminoAcid3.HID, "basic");
113 acidityMap.put(AminoAcid3.LYS, "basic");
114 acidityMap.put(AminoAcid3.LYD, "basic");
115 acidityMap.put(AminoAcid3.LEU, "neutral");
116 acidityMap.put(AminoAcid3.GLN, "neutral");
117 acidityMap.put(AminoAcid3.GLY, "neutral");
118 acidityMap.put(AminoAcid3.ALA, "neutral");
119 acidityMap.put(AminoAcid3.VAL, "neutral");
120 acidityMap.put(AminoAcid3.ILE, "neutral");
121 acidityMap.put(AminoAcid3.SER, "neutral");
122 acidityMap.put(AminoAcid3.CYS, "neutral");
123 acidityMap.put(AminoAcid3.THR, "neutral");
124 acidityMap.put(AminoAcid3.MET, "neutral");
125 acidityMap.put(AminoAcid3.PRO, "neutral");
126 acidityMap.put(AminoAcid3.PHE, "neutral");
127 acidityMap.put(AminoAcid3.TYR, "neutral");
128 acidityMap.put(AminoAcid3.TRP, "neutral");
129
130
131 phiToStructure.put(-180.0, "Extended");
132 phiToStructure.put(-150.0, "Structure");
133 phiToStructure.put(-50.0, "Structure");
134 phiToStructure.put(0.0, "Extended");
135 phiToStructure.put(50.0, "Structure");
136 phiToStructure.put(70.0, "Structure");
137 phiToStructure.put(180.0, "Extended");
138
139
140 psiToStructure.put(-180.0, "Extended");
141 psiToStructure.put(-70.0, "Alpha Helix");
142 psiToStructure.put(-50.0, "Alpha Helix");
143 psiToStructure.put(0.0, "Extended");
144
145
146 psiToStructure.put(85.0, "Extended");
147 psiToStructure.put(100.0, "Beta Sheet");
148 psiToStructure.put(150.0, "Beta Sheet");
149
150 psiToStructure.put(180.0, "Extended");
151
152
153 standardSurfaceArea.put(AminoAcid3.ALA, 127.15871);
154 standardSurfaceArea.put(AminoAcid3.ARG, 269.42558);
155 standardSurfaceArea.put(AminoAcid3.ASH, 158.67385);
156 standardSurfaceArea.put(AminoAcid3.ASP, 159.97984);
157 standardSurfaceArea.put(AminoAcid3.ASN, 159.82709);
158 standardSurfaceArea.put(AminoAcid3.CYS, 154.52801);
159 standardSurfaceArea.put(AminoAcid3.GLH, 195.30608);
160 standardSurfaceArea.put(AminoAcid3.GLN, 197.75170);
161 standardSurfaceArea.put(AminoAcid3.GLU, 195.27081);
162 standardSurfaceArea.put(AminoAcid3.GLY, 95.346188);
163 standardSurfaceArea.put(AminoAcid3.HID, 202.31302);
164 standardSurfaceArea.put(AminoAcid3.HIE, 203.22195);
165 standardSurfaceArea.put(AminoAcid3.HIS, 205.40232);
166 standardSurfaceArea.put(AminoAcid3.ILE, 196.15872);
167 standardSurfaceArea.put(AminoAcid3.LEU, 192.85123);
168 standardSurfaceArea.put(AminoAcid3.LYD, 235.49182);
169 standardSurfaceArea.put(AminoAcid3.LYS, 236.71473);
170 standardSurfaceArea.put(AminoAcid3.MET, 216.53318);
171 standardSurfaceArea.put(AminoAcid3.PHE, 229.75038);
172 standardSurfaceArea.put(AminoAcid3.PRO, 157.30011);
173 standardSurfaceArea.put(AminoAcid3.SER, 137.90720);
174 standardSurfaceArea.put(AminoAcid3.THR, 157.33759);
175 standardSurfaceArea.put(AminoAcid3.TRP, 262.32819);
176 standardSurfaceArea.put(AminoAcid3.TYR, 239.91172);
177 standardSurfaceArea.put(AminoAcid3.VAL, 171.89211);
178
179
180 aminoAcidCodes.put("A", AminoAcid3.ALA);
181 aminoAcidCodes.put("R", AminoAcid3.ARG);
182 aminoAcidCodes.put("N", AminoAcid3.ASN);
183 aminoAcidCodes.put("D", AminoAcid3.ASP);
184 aminoAcidCodes.put("C", AminoAcid3.CYS);
185 aminoAcidCodes.put("E", AminoAcid3.GLU);
186 aminoAcidCodes.put("Q", AminoAcid3.GLN);
187 aminoAcidCodes.put("G", AminoAcid3.GLY);
188 aminoAcidCodes.put("H", AminoAcid3.HIS);
189 aminoAcidCodes.put("I", AminoAcid3.ILE);
190 aminoAcidCodes.put("L", AminoAcid3.LEU);
191 aminoAcidCodes.put("K", AminoAcid3.LYS);
192 aminoAcidCodes.put("M", AminoAcid3.MET);
193 aminoAcidCodes.put("F", AminoAcid3.PHE);
194 aminoAcidCodes.put("P", AminoAcid3.PRO);
195 aminoAcidCodes.put("S", AminoAcid3.SER);
196 aminoAcidCodes.put("T", AminoAcid3.THR);
197 aminoAcidCodes.put("W", AminoAcid3.TRP);
198 aminoAcidCodes.put("Y", AminoAcid3.TYR);
199 aminoAcidCodes.put("V", AminoAcid3.VAL);
200 }
201
202
203
204
205
206
207
208
209
210
211
212
213 public String[] saveFeatures(Residue residue, double surfaceArea, boolean includeAngles,
214 boolean includeStructure, boolean includePPI) {
215 int nFeat = 3;
216 if (includeAngles) {
217 nFeat += 3;
218 }
219 if (includeStructure) {
220 nFeat += 1;
221 }
222 if(includePPI){
223 nFeat += 1;
224 }
225
226 String[] features = new String[nFeat];
227 String structure;
228 String phiString;
229 String psiString;
230 String omegaString;
231 String interfaceResString = "";
232
233 if (residue.getNextResidue() == null) {
234
235
236 structure = "Extended";
237 getPhi(residue);
238 phiString = String.valueOf(phi);
239 psiString = null;
240 omegaString = null;
241 } else if (residue.getPreviousResidue() == null) {
242 structure = "Extended";
243 getPsi(residue);
244 getOmega(residue);
245 psiString = String.valueOf(psi);
246 omegaString = String.valueOf(omega);
247 phiString = null;
248 } else {
249 getPhi(residue);
250 getPsi(residue);
251 getOmega(residue);
252 phiString = String.valueOf(phi);
253 psiString = String.valueOf(psi);
254 omegaString = String.valueOf(omega);
255 structure = getSecondaryStructure();
256 }
257
258 totalSurfaceArea += surfaceArea;
259 String surfaceAreaString = String.valueOf(Math.floor(surfaceArea*100)/100);
260
261 double standSurfaceArea = standardSurfaceArea.getOrDefault(residue.getAminoAcid3(), 0.0);
262 String normalizedSAString = "";
263 if (standSurfaceArea != 0.0) {
264 double normSA = surfaceArea / standSurfaceArea;
265 normSA = Math.floor(normSA * 100) / 100;
266 if(normSA > 1.0){
267 normSA = 1.0;
268 }
269 normalizedSAString = String.valueOf(normSA);
270
271 }
272 String confidence = String.valueOf(getConfidenceScore(residue));
273 String interactingGene = " ";
274 if(includePPI){
275 interactingGene = getPPI(residue);
276 if(!interactingGene.equals(" ")){
277 interfaceResString = interactingGene;
278 }
279 }
280
281 features[0] = surfaceAreaString;
282 features[1] = normalizedSAString;
283 features[2] = confidence;
284 if (includeAngles) {
285 features[3] = phiString;
286 features[4] = psiString;
287 features[5] = omegaString;
288 if (includeStructure) {
289 features[6] = structure;
290 }
291 if(includePPI){
292 features[7] = interfaceResString;
293 }
294 } else if (includeStructure) {
295 features[3] = structure;
296 if(includePPI){
297 features[4] = interfaceResString;
298 }
299 } else if(includePPI){
300 features[3] = interfaceResString;
301 }
302 return features;
303 }
304
305
306
307
308
309
310 public void getPhi(Residue currentRes) {
311 Residue previousRes = currentRes.getPreviousResidue();
312 double[] cCoor = new double[3];
313 double[] nCoor = new double[3];
314 double[] caCoor = new double[3];
315 double[] c2Coor = new double[3];
316 phi = (DoubleMath.dihedralAngle(previousRes.getAtomByName("C", true).getXYZ(cCoor),
317 currentRes.getAtomByName("N", true).getXYZ(nCoor),
318 currentRes.getAtomByName("CA", true).getXYZ(caCoor),
319 currentRes.getAtomByName("C", true).getXYZ(c2Coor))) * 180 / Math.PI;
320 }
321
322
323
324
325
326
327 public void getPsi(Residue currentRes) {
328
329 Residue nextRes = currentRes.getNextResidue();
330 double[] nCoor = new double[3];
331 double[] caCoor = new double[3];
332 double[] cCoor = new double[3];
333 double[] n2Coor = new double[3];
334 psi = (DoubleMath.dihedralAngle(currentRes.getAtomByName("N", true).getXYZ(nCoor),
335 currentRes.getAtomByName("CA", true).getXYZ(caCoor),
336 currentRes.getAtomByName("C", true).getXYZ(cCoor),
337 nextRes.getAtomByName("N", true).getXYZ(n2Coor))) * 180 / Math.PI;
338 }
339
340
341
342
343
344
345 public void getOmega(Residue currentRes) {
346
347 Residue nextRes = currentRes.getNextResidue();
348 double[] ca1Coor = new double[3];
349 double[] cCoor = new double[3];
350 double[] nCoor = new double[3];
351 double[] ca2Coor = new double[3];
352 omega = (DoubleMath.dihedralAngle(currentRes.getAtomByName("CA", true).getXYZ(ca1Coor),
353 currentRes.getAtomByName("C", true).getXYZ(cCoor),
354 nextRes.getAtomByName("N", true).getXYZ(nCoor),
355 nextRes.getAtomByName("CA", true).getXYZ(ca2Coor))) * 180 / Math.PI;
356 }
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374 public String getSecondaryStructure() {
375 String secondaryStructure;
376
377 Double lowPhiKey = phiToStructure.floorKey(phi);
378 String lowPhiStruct = phiToStructure.get(lowPhiKey);
379 Double highPhiKey = phiToStructure.ceilingKey(phi);
380 String highPhiStruct = phiToStructure.get(highPhiKey);
381 if (lowPhiStruct.equals("Extended") || highPhiStruct.equals("Extended")) {
382 secondaryStructure = "Extended";
383 } else {
384 Double lowPsiKey = psiToStructure.floorKey(psi);
385 String lowPsiStruct = psiToStructure.get(lowPsiKey);
386 Double highPsiKey = psiToStructure.ceilingKey(psi);
387 String highPsiStruct = psiToStructure.get(highPsiKey);
388 if (lowPsiStruct.equals("Extended") || highPsiStruct.equals("Extended")) {
389 secondaryStructure = "Extended";
390 } else {
391 secondaryStructure = lowPsiStruct;
392 }
393 }
394 return secondaryStructure;
395 }
396
397
398
399
400
401
402 public double getTotalSurfaceArea() {
403 return Math.floor(totalSurfaceArea);
404 }
405
406
407
408
409
410
411
412 public double getConfidenceScore(Residue currentRes) {
413 return currentRes.getAtomByName("CA", true).getTempFactor();
414 }
415
416
417
418
419
420
421
422 public List<String> ddgunToNPChange(List<String> ddgun) {
423 List<String> npChanges = new ArrayList<>();
424 for (String s : ddgun) {
425 String[] splits = s.split("\t");
426 String currentNP = splits[2];
427 String wt = String.valueOf(currentNP.charAt(0));
428 String mut = String.valueOf(currentNP.charAt(currentNP.length() - 1));
429 String pos = currentNP.substring(1, currentNP.length() - 1);
430 String wt3Letter = aminoAcidCodes.get(wt).toString();
431 String mut3Letter = aminoAcidCodes.get(mut).toString();
432 String wildType = wt3Letter.charAt(0) + wt3Letter.substring(1, 3).toLowerCase();
433 String mutant = mut3Letter.charAt(0) + mut3Letter.substring(1, 3).toLowerCase();
434 String npChange = "p." + wildType + pos + mutant;
435 npChanges.add(npChange);
436 }
437 return npChanges;
438 }
439
440
441
442
443
444
445
446 public List<Double[]> getDDGunValues(List<String> ddgun) {
447 List<Double[]> values = new ArrayList<>();
448 for (String s : ddgun) {
449 String[] splits = s.split("\t");
450 Double[] value = new Double[2];
451 value[0] = Double.parseDouble(splits[3]) * -1.0;
452 value[1] = Math.abs(Double.parseDouble(splits[3]));
453 values.add(value);
454 }
455 return values;
456 }
457
458
459
460
461
462
463
464
465
466 public List<String[]> getPolarityAndAcidityChange(List<String> npChanges, boolean includePolarity,
467 boolean includeAcidity) {
468 List<String[]> polarityAndAcidity = new ArrayList<>();
469 for (String npChange : npChanges) {
470 String change = npChange.split("p\\.")[1].toUpperCase(Locale.ROOT);
471 String[] value = new String[2];
472 AminoAcid3 wt = AminoAcid3.valueOf(change.substring(0, 3));
473 AminoAcid3 mut = AminoAcid3.valueOf(change.substring(change.length() - 3));
474 if (includeAcidity) {
475 if (acidityMap.get(wt).equals("basic") && acidityMap.get(mut).equals("neutral")) {
476 value[0] = "bn";
477 } else if (acidityMap.get(wt).equals("neutral") && acidityMap.get(mut).equals("basic")) {
478 value[0] = "nb";
479 } else if (acidityMap.get(wt).equals("acidic") && acidityMap.get(mut).equals("neutral")) {
480 value[0] = "an";
481 } else if (acidityMap.get(wt).equals("neutral") && acidityMap.get(mut).equals("acidic")) {
482 value[0] = "na";
483 } else if (acidityMap.get(wt).equals("basic") && acidityMap.get(mut).equals("acidic")) {
484 value[0] = "ba";
485 } else if (acidityMap.get(wt).equals("acidic") && acidityMap.get(mut).equals("basic")) {
486 value[0] = "ab";
487 } else if (acidityMap.get(wt).equals(acidityMap.get(mut))) {
488 value[0] = "=";
489 }
490 } else {
491 value[0] = null;
492 }
493
494 if (includePolarity) {
495 if (polarityMap.get(wt).equals("polar") && polarityMap.get(mut).equals("nonpolar")) {
496 value[1] = "-";
497 } else if (polarityMap.get(wt).equals("nonpolar") && polarityMap.get(mut).equals("polar")) {
498 value[1] = "+";
499 } else {
500 value[1] = "=";
501 }
502 } else {
503 value[1] = null;
504 }
505
506 polarityAndAcidity.add(value);
507 }
508 return polarityAndAcidity;
509 }
510
511 public String getPPI(Residue residue){
512 List<String> chainNames = Arrays.stream(molecularAssembly.getChainNames()).toList();
513 if(chainNames.size() == 1){
514 logger.info( " Only one chain in the structure.");
515 return " ";
516 }
517 String name = molecularAssembly.getFile().getName();
518 String[] geneSplit = name.split("_");
519 String[] genes = new String[chainNames.size()];
520
521 for(int i=1; i < chainNames.size() + 1; i++){
522 genes[i-1] = geneSplit[i];
523 }
524
525 char chainID = residue.getChainID();
526 for(Atom atom: molecularAssembly.getAtomList()){
527 if(atom.getChainID() != chainID){
528 for(Atom resAtom: residue.getAtomList()){
529 if(resAtom.getXYZ().dist(atom.getXYZ()) <= 10.0){
530 int index = chainNames.indexOf(atom.getChainID().toString());
531 return genes[index];
532 }
533 }
534 }
535
536 }
537 return " ";
538 }
539
540
541 }