View Javadoc
1   //******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  //******************************************************************************
38  package ffx.potential.commands;
39  
40  import ffx.potential.ForceFieldEnergy;
41  import ffx.potential.bonded.Residue;
42  import ffx.potential.cli.PotentialCommand;
43  import ffx.potential.utils.GetProteinFeatures;
44  import ffx.utilities.FFXBinding;
45  import picocli.CommandLine.Command;
46  import picocli.CommandLine.Option;
47  import picocli.CommandLine.Parameters;
48  
49  import java.io.BufferedReader;
50  import java.io.BufferedWriter;
51  import java.io.File;
52  import java.io.FileInputStream;
53  import java.io.FileReader;
54  import java.io.FileWriter;
55  import java.io.IOException;
56  import java.io.InputStreamReader;
57  import java.util.ArrayList;
58  import java.util.Arrays;
59  import java.util.List;
60  
61  /**
62   * Create a Feature Map for a given protein structure.
63   *
64   * Usage:
65   *   ffxc FeatureMap [options] <pdb/xyz> <variants.csv> [ddgFile]
66   */
67  @Command(name = "FeatureMap", description = " Create a Feature Map for a given protein structure")
68  public class FeatureMap extends PotentialCommand {
69  
70    @Option(names = {"-d", "--delimiter"}, paramLabel = ",",
71        description = "Delimiter of input variant list file")
72    private String delimiter = ",";
73  
74    @Option(names = {"--iP", "--includePolarity"}, defaultValue = "false",
75        description = "Include polarity change in feature map.")
76    private boolean includePolarity = false;
77  
78    @Option(names = {"--iA", "--includeAcidity"}, defaultValue = "false",
79        description = "Include acidity change in feature map.")
80    private boolean includeAcidity = false;
81  
82    @Option(names = {"--iAng", "--includeAngles"}, defaultValue = "false",
83        description = "Include ramachandran angles in feature map.")
84    private boolean includeAngles = false;
85  
86    @Option(names = {"--iS", "--includeStructure"}, defaultValue = "false",
87        description = "Include secondary structure annotations in feature map.")
88    private boolean includeStructure = false;
89  
90    @Option(names = {"--iPPI", "--includePPI"}, defaultValue = "false",
91        description = "Mark residue as being part of an interaction interface.")
92    private boolean includePPI = false;
93  
94    @Option(names = {"--rR", "--reRun"}, defaultValue = "false",
95        description = "Reading in a CSV a second time. Specifically for gathering ppi information")
96    private boolean rerun = false;
97  
98    @Option(names = {"--ddG", "--ddgFile"}, paramLabel = "file",
99        description = "Read in the folding free energy difference file")
100   private String ddgFile;
101 
102   @Option(names = {"--mI", "--multiple isomers"}, defaultValue = "false",
103       description = "Set this flag if the variant list contains variants from multiple isomers. Isomer should be in name of pdb file")
104   private boolean multipleIsoforms = false;
105 
106   /** The final argument(s) should be one or more filenames. */
107   @Parameters(arity = "1", paramLabel = "file",
108       description = "The atomic coordinate file in PDB or XYZ format, variant list file, and a free energy file")
109   private List<String> filenames = null;
110 
111   private List<Residue> residues;
112 
113   public FeatureMap() { super(); }
114   public FeatureMap(FFXBinding binding) { super(binding); }
115   public FeatureMap(String[] args) { super(args); }
116 
117   @Override
118   public FeatureMap run() {
119     // Init the context and bind variables.
120     if (!init()) {
121       return null;
122     }
123 
124     // Enable GK surface area for confidence calculation as in Groovy script.
125     System.setProperty("gkterm", "true");
126     System.setProperty("cavmodel", "cav");
127     System.setProperty("surface-tension", "1.0");
128 
129     // Load the MolecularAssembly.
130     String structureFile = filenames != null && !filenames.isEmpty() ? filenames.get(0) : null;
131     activeAssembly = getActiveAssembly(structureFile);
132     if (activeAssembly == null) {
133       logger.info(helpString());
134       return null;
135     }
136 
137     ForceFieldEnergy forceFieldEnergy = activeAssembly.getPotentialEnergy();
138     int nVars = forceFieldEnergy.getNumberOfVariables();
139     double[] x = new double[nVars];
140     forceFieldEnergy.getCoordinates(x);
141     forceFieldEnergy.energy(x);
142 
143     residues = activeAssembly.getResidueList();
144     GetProteinFeatures getProteinFeatures = new GetProteinFeatures(activeAssembly);
145 
146     // Handle multiple isoforms by parsing the PDB basename.
147     String fileIsoform = null;
148     if (multipleIsoforms) {
149       String baseName = new File(structureFile).getName();
150       baseName = baseName.replaceFirst("\\.pdb$", "");
151       if (baseName.toUpperCase().contains("ENS")) {
152         String[] geneSplit = baseName.split("_");
153         if (geneSplit.length > 1) {
154           fileIsoform = geneSplit[1];
155         }
156       } else {
157         String[] geneSplit = baseName.split("_");
158         int index = -1;
159         for (int i = 0; i < geneSplit.length; i++) {
160           if (geneSplit[i].equalsIgnoreCase("NP")) {
161             index = i;
162           }
163         }
164         if (index >= 0 && index + 1 < geneSplit.length) {
165           fileIsoform = (geneSplit[index] + '_' + geneSplit[index + 1]).toUpperCase();
166         }
167       }
168     }
169 
170     // Build per-residue feature list (surface area, normalized SA, confidence, optional others).
171     List<String[]> featureList = new ArrayList<>();
172     for (Residue residue : residues) {
173       double residueSurfaceArea = forceFieldEnergy.getGK().getSurfaceAreaRegion().getResidueSurfaceArea(residue);
174       featureList.add(getProteinFeatures.saveFeatures(residue, residueSurfaceArea, includeAngles, includeStructure, includePPI));
175     }
176 
177     // Optional ddG input parsing for ddgun output.
178     List<String> ddgunLines = new ArrayList<>();
179     List<String> npChanges = new ArrayList<>();
180     List<Double[]> ddGun = new ArrayList<>();
181     List<String[]> polarityAndAcidityChange = new ArrayList<>();
182     if (ddgFile != null) {
183       try (BufferedReader txtReader = new BufferedReader(new FileReader(new File(ddgFile)))) {
184         String line;
185         while ((line = txtReader.readLine()) != null) {
186           if (line.contains(".pdb")) {
187             ddgunLines.add(line);
188           }
189         }
190       } catch (IOException e) {
191         logger.warning(" Error reading ddG file: " + e);
192       }
193       ddGun = getProteinFeatures.getDDGunValues(ddgunLines);
194       npChanges = getProteinFeatures.ddgunToNPChange(ddgunLines);
195       if (includePolarity || includeAcidity) {
196         polarityAndAcidityChange = getProteinFeatures.getPolarityAndAcidityChange(npChanges, includePolarity, includeAcidity);
197       }
198     }
199 
200     // Write updated CSV alongside the input variants CSV (filenames[1]).
201     BufferedReader br = null;
202     BufferedWriter bw = null;
203     try {
204       String variantsCSV = filenames.get(1);
205       File inputCSVFile = new File(variantsCSV);
206       String inputCSVPath = inputCSVFile.getParent();
207       if (inputCSVPath == null) inputCSVPath = ".";
208       String newCSVFileName = "update_" + inputCSVFile.getName();
209       File updatedFile = new File(inputCSVPath, newCSVFileName);
210 
211       br = new BufferedReader(new InputStreamReader(new FileInputStream(inputCSVFile)));
212       bw = new BufferedWriter(new FileWriter(updatedFile, true));
213 
214       int i = 0;
215       int npIndex = -1;
216       int isoformIndex = -1;
217       for (String line = br.readLine(); line != null; line = br.readLine(), i++) {
218         StringBuilder newCSVLine = new StringBuilder();
219         if (i == 0) {
220           if (!rerun) {
221             if (updatedFile.length() == 0) {
222               newCSVLine.append(line).append(delimiter)
223                   .append("Surface Area").append(delimiter)
224                   .append("Normalized SA").append(delimiter)
225                   .append("Confidence Score");
226               if (ddgFile != null) {
227                 newCSVLine.append(delimiter).append("ddG").append(delimiter).append("|ddG|");
228               }
229               if (includeAcidity) {
230                 newCSVLine.append(delimiter).append("Acidity Change");
231               }
232               if (includePolarity) {
233                 newCSVLine.append(delimiter).append("Polarity Change");
234               }
235               if (includeAngles) {
236                 newCSVLine.append(delimiter).append("Phi").append(delimiter).append("Psi").append(delimiter).append("Omega");
237               }
238               if (includeStructure) {
239                 newCSVLine.append(delimiter).append("Secondary Structure Annotation");
240               }
241               if (includePPI) {
242                 newCSVLine.append(delimiter).append("Interface Residue");
243               }
244               bw.write(newCSVLine.toString());
245             } else {
246               bw.write(line);
247               bw.newLine();
248             }
249           }
250         } else {
251           String[] splits = line.split(delimiter, -1);
252           if (i == 1) {
253             for (int j = 0; j < splits.length; j++) {
254               if (splits[j].contains("p.")) {
255                 npIndex = j;
256               }
257               if (multipleIsoforms) {
258                 String sj = splits[j];
259                 String sju = sj.toUpperCase();
260                 if (sju.contains("NP") || sju.contains("XP") || sju.contains("ENS")) {
261                   isoformIndex = j;
262                 }
263               }
264             }
265           }
266           int length = splits.length;
267           String npChange = npIndex >= 0 && npIndex < splits.length ? splits[npIndex] : "";
268 
269           String[] feat = featureList.isEmpty() ? new String[0] : new String[featureList.get(0).length];
270           Arrays.fill(feat, null);
271           String[] ddG = new String[]{"null", "null"};
272           String[] pA = new String[]{"null", "null"};
273 
274           if (npChange.contains("del")) {
275             // leave feat as nulls and ddG/pA as "null" strings
276           } else if (!featureList.isEmpty()) {
277             int position = -1;
278             int pIndex = npChange.indexOf('p');
279             if (pIndex >= 0) {
280               String proteinChange = npChange.substring(pIndex);
281               String[] parts = proteinChange.split("p\\.");
282               if (parts.length > 1) {
283                 String sub = parts[1];
284                 if (sub.length() >= 6) {
285                   try {
286                     position = Integer.parseInt(sub.substring(3, sub.length() - 3));
287                   } catch (Exception ignored) {
288                   }
289                 }
290               }
291               if (position > 0 && position <= residues.size()) {
292                 if (ddgFile != null) {
293                   int idx = npChanges.indexOf(proteinChange);
294                   if (idx != -1) {
295                     Double[] d = ddGun.get(idx);
296                     if (d != null && d.length >= 2) {
297                       ddG = new String[]{String.valueOf(d[0]), String.valueOf(d[1])};
298                     }
299                     if (includeAcidity || includePolarity) {
300                       String[] pa = polarityAndAcidityChange.get(idx);
301                       if (pa != null && pa.length >= 2) {
302                         pA = new String[]{pa[0], pa[1]};
303                       }
304                     }
305                   }
306                 }
307                 feat = featureList.get(position - 1);
308               }
309             }
310           }
311 
312           String isoform = null;
313           if (multipleIsoforms && isoformIndex >= 0 && isoformIndex < splits.length) {
314             String isoStr = splits[isoformIndex];
315             isoform = isoStr.contains(":p.") ? isoStr.split(":")[0] : isoStr;
316           }
317 
318           if (splits.length == length) {
319             if (multipleIsoforms) {
320               if (fileIsoform != null) {
321                 String isofU = (isoform == null ? "" : isoform).toUpperCase();
322                 if (!isofU.contains(fileIsoform.toUpperCase())) {
323                   continue;
324                 }
325               }
326             }
327             if (rerun) {
328               newCSVLine.append(line);
329             } else {
330               if (ddgFile != null) {
331                 newCSVLine.append(line).append(delimiter)
332                     .append(safe(feat, 0)).append(delimiter)
333                     .append(safe(feat, 1)).append(delimiter)
334                     .append(safe(feat, 2)).append(delimiter)
335                     .append(ddG[0]).append(delimiter).append(ddG[1]);
336               } else {
337                 newCSVLine.append(line).append(delimiter)
338                     .append(safe(feat, 0)).append(delimiter)
339                     .append(safe(feat, 1)).append(delimiter)
340                     .append(safe(feat, 2));
341               }
342             }
343 
344             if (includeAcidity && !rerun) {
345               newCSVLine.append(delimiter).append(pA[0]);
346             }
347             if (includePolarity && !rerun) {
348               newCSVLine.append(delimiter).append(pA[1]);
349             }
350             if (includeAngles && !rerun) {
351               newCSVLine.append(delimiter).append(safe(feat, 3))
352                   .append(delimiter).append(safe(feat, 4))
353                   .append(delimiter).append(safe(feat, 5));
354               if (includeStructure) {
355                 newCSVLine.append(delimiter).append(safe(feat, 6));
356               }
357               if (includePPI) {
358                 newCSVLine.append(delimiter).append(safe(feat, 7));
359               }
360             } else if (includeStructure && !rerun) {
361               newCSVLine.append(delimiter).append(safe(feat, 3));
362               if (includePPI) {
363                 newCSVLine.append(delimiter).append(safe(feat, 4));
364               }
365             } else if (includePPI) {
366               newCSVLine.append(delimiter).append(safe(feat, 3));
367             }
368             bw.newLine();
369             bw.write(newCSVLine.toString());
370           }
371         }
372       }
373     } catch (Exception e) {
374       logger.warning(" Error updating CSV: " + e);
375     } finally {
376       try {
377         if (br != null) br.close();
378       } catch (IOException ignored) {}
379       try {
380         if (bw != null) bw.close();
381       } catch (IOException ignored) {}
382     }
383 
384     logger.info(" Wrote variants with feature data to update_" + (filenames != null && filenames.size() > 1 ? new File(filenames.get(1)).getName() : "variants.csv"));
385     return this;
386   }
387 
388   private static String safe(String[] arr, int idx) {
389     if (arr == null || idx < 0 || idx >= arr.length) return "";
390     return arr[idx] == null ? "" : arr[idx];
391   }
392 }