1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.potential.commands;
39
40 import ffx.potential.ForceFieldEnergy;
41 import ffx.potential.bonded.Residue;
42 import ffx.potential.cli.PotentialCommand;
43 import ffx.potential.utils.GetProteinFeatures;
44 import ffx.utilities.FFXBinding;
45 import picocli.CommandLine.Command;
46 import picocli.CommandLine.Option;
47 import picocli.CommandLine.Parameters;
48
49 import java.io.BufferedReader;
50 import java.io.BufferedWriter;
51 import java.io.File;
52 import java.io.FileInputStream;
53 import java.io.FileReader;
54 import java.io.FileWriter;
55 import java.io.IOException;
56 import java.io.InputStreamReader;
57 import java.util.ArrayList;
58 import java.util.Arrays;
59 import java.util.List;
60
61
62
63
64
65
66
67 @Command(name = "FeatureMap", description = " Create a Feature Map for a given protein structure")
68 public class FeatureMap extends PotentialCommand {
69
70 @Option(names = {"-d", "--delimiter"}, paramLabel = ",",
71 description = "Delimiter of input variant list file")
72 private String delimiter = ",";
73
74 @Option(names = {"--iP", "--includePolarity"}, defaultValue = "false",
75 description = "Include polarity change in feature map.")
76 private boolean includePolarity = false;
77
78 @Option(names = {"--iA", "--includeAcidity"}, defaultValue = "false",
79 description = "Include acidity change in feature map.")
80 private boolean includeAcidity = false;
81
82 @Option(names = {"--iAng", "--includeAngles"}, defaultValue = "false",
83 description = "Include ramachandran angles in feature map.")
84 private boolean includeAngles = false;
85
86 @Option(names = {"--iS", "--includeStructure"}, defaultValue = "false",
87 description = "Include secondary structure annotations in feature map.")
88 private boolean includeStructure = false;
89
90 @Option(names = {"--iPPI", "--includePPI"}, defaultValue = "false",
91 description = "Mark residue as being part of an interaction interface.")
92 private boolean includePPI = false;
93
94 @Option(names = {"--rR", "--reRun"}, defaultValue = "false",
95 description = "Reading in a CSV a second time. Specifically for gathering ppi information")
96 private boolean rerun = false;
97
98 @Option(names = {"--ddG", "--ddgFile"}, paramLabel = "file",
99 description = "Read in the folding free energy difference file")
100 private String ddgFile;
101
102 @Option(names = {"--mI", "--multiple isomers"}, defaultValue = "false",
103 description = "Set this flag if the variant list contains variants from multiple isomers. Isomer should be in name of pdb file")
104 private boolean multipleIsoforms = false;
105
106
107 @Parameters(arity = "1", paramLabel = "file",
108 description = "The atomic coordinate file in PDB or XYZ format, variant list file, and a free energy file")
109 private List<String> filenames = null;
110
111 private List<Residue> residues;
112
113 public FeatureMap() { super(); }
114 public FeatureMap(FFXBinding binding) { super(binding); }
115 public FeatureMap(String[] args) { super(args); }
116
117 @Override
118 public FeatureMap run() {
119
120 if (!init()) {
121 return null;
122 }
123
124
125 System.setProperty("gkterm", "true");
126 System.setProperty("cavmodel", "cav");
127 System.setProperty("surface-tension", "1.0");
128
129
130 String structureFile = filenames != null && !filenames.isEmpty() ? filenames.get(0) : null;
131 activeAssembly = getActiveAssembly(structureFile);
132 if (activeAssembly == null) {
133 logger.info(helpString());
134 return null;
135 }
136
137 ForceFieldEnergy forceFieldEnergy = activeAssembly.getPotentialEnergy();
138 int nVars = forceFieldEnergy.getNumberOfVariables();
139 double[] x = new double[nVars];
140 forceFieldEnergy.getCoordinates(x);
141 forceFieldEnergy.energy(x);
142
143 residues = activeAssembly.getResidueList();
144 GetProteinFeatures getProteinFeatures = new GetProteinFeatures(activeAssembly);
145
146
147 String fileIsoform = null;
148 if (multipleIsoforms) {
149 String baseName = new File(structureFile).getName();
150 baseName = baseName.replaceFirst("\\.pdb$", "");
151 if (baseName.toUpperCase().contains("ENS")) {
152 String[] geneSplit = baseName.split("_");
153 if (geneSplit.length > 1) {
154 fileIsoform = geneSplit[1];
155 }
156 } else {
157 String[] geneSplit = baseName.split("_");
158 int index = -1;
159 for (int i = 0; i < geneSplit.length; i++) {
160 if (geneSplit[i].equalsIgnoreCase("NP")) {
161 index = i;
162 }
163 }
164 if (index >= 0 && index + 1 < geneSplit.length) {
165 fileIsoform = (geneSplit[index] + '_' + geneSplit[index + 1]).toUpperCase();
166 }
167 }
168 }
169
170
171 List<String[]> featureList = new ArrayList<>();
172 for (Residue residue : residues) {
173 double residueSurfaceArea = forceFieldEnergy.getGK().getSurfaceAreaRegion().getResidueSurfaceArea(residue);
174 featureList.add(getProteinFeatures.saveFeatures(residue, residueSurfaceArea, includeAngles, includeStructure, includePPI));
175 }
176
177
178 List<String> ddgunLines = new ArrayList<>();
179 List<String> npChanges = new ArrayList<>();
180 List<Double[]> ddGun = new ArrayList<>();
181 List<String[]> polarityAndAcidityChange = new ArrayList<>();
182 if (ddgFile != null) {
183 try (BufferedReader txtReader = new BufferedReader(new FileReader(new File(ddgFile)))) {
184 String line;
185 while ((line = txtReader.readLine()) != null) {
186 if (line.contains(".pdb")) {
187 ddgunLines.add(line);
188 }
189 }
190 } catch (IOException e) {
191 logger.warning(" Error reading ddG file: " + e);
192 }
193 ddGun = getProteinFeatures.getDDGunValues(ddgunLines);
194 npChanges = getProteinFeatures.ddgunToNPChange(ddgunLines);
195 if (includePolarity || includeAcidity) {
196 polarityAndAcidityChange = getProteinFeatures.getPolarityAndAcidityChange(npChanges, includePolarity, includeAcidity);
197 }
198 }
199
200
201 BufferedReader br = null;
202 BufferedWriter bw = null;
203 try {
204 String variantsCSV = filenames.get(1);
205 File inputCSVFile = new File(variantsCSV);
206 String inputCSVPath = inputCSVFile.getParent();
207 if (inputCSVPath == null) inputCSVPath = ".";
208 String newCSVFileName = "update_" + inputCSVFile.getName();
209 File updatedFile = new File(inputCSVPath, newCSVFileName);
210
211 br = new BufferedReader(new InputStreamReader(new FileInputStream(inputCSVFile)));
212 bw = new BufferedWriter(new FileWriter(updatedFile, true));
213
214 int i = 0;
215 int npIndex = -1;
216 int isoformIndex = -1;
217 for (String line = br.readLine(); line != null; line = br.readLine(), i++) {
218 StringBuilder newCSVLine = new StringBuilder();
219 if (i == 0) {
220 if (!rerun) {
221 if (updatedFile.length() == 0) {
222 newCSVLine.append(line).append(delimiter)
223 .append("Surface Area").append(delimiter)
224 .append("Normalized SA").append(delimiter)
225 .append("Confidence Score");
226 if (ddgFile != null) {
227 newCSVLine.append(delimiter).append("ddG").append(delimiter).append("|ddG|");
228 }
229 if (includeAcidity) {
230 newCSVLine.append(delimiter).append("Acidity Change");
231 }
232 if (includePolarity) {
233 newCSVLine.append(delimiter).append("Polarity Change");
234 }
235 if (includeAngles) {
236 newCSVLine.append(delimiter).append("Phi").append(delimiter).append("Psi").append(delimiter).append("Omega");
237 }
238 if (includeStructure) {
239 newCSVLine.append(delimiter).append("Secondary Structure Annotation");
240 }
241 if (includePPI) {
242 newCSVLine.append(delimiter).append("Interface Residue");
243 }
244 bw.write(newCSVLine.toString());
245 } else {
246 bw.write(line);
247 bw.newLine();
248 }
249 }
250 } else {
251 String[] splits = line.split(delimiter, -1);
252 if (i == 1) {
253 for (int j = 0; j < splits.length; j++) {
254 if (splits[j].contains("p.")) {
255 npIndex = j;
256 }
257 if (multipleIsoforms) {
258 String sj = splits[j];
259 String sju = sj.toUpperCase();
260 if (sju.contains("NP") || sju.contains("XP") || sju.contains("ENS")) {
261 isoformIndex = j;
262 }
263 }
264 }
265 }
266 int length = splits.length;
267 String npChange = npIndex >= 0 && npIndex < splits.length ? splits[npIndex] : "";
268
269 String[] feat = featureList.isEmpty() ? new String[0] : new String[featureList.get(0).length];
270 Arrays.fill(feat, null);
271 String[] ddG = new String[]{"null", "null"};
272 String[] pA = new String[]{"null", "null"};
273
274 if (npChange.contains("del")) {
275
276 } else if (!featureList.isEmpty()) {
277 int position = -1;
278 int pIndex = npChange.indexOf('p');
279 if (pIndex >= 0) {
280 String proteinChange = npChange.substring(pIndex);
281 String[] parts = proteinChange.split("p\\.");
282 if (parts.length > 1) {
283 String sub = parts[1];
284 if (sub.length() >= 6) {
285 try {
286 position = Integer.parseInt(sub.substring(3, sub.length() - 3));
287 } catch (Exception ignored) {
288 }
289 }
290 }
291 if (position > 0 && position <= residues.size()) {
292 if (ddgFile != null) {
293 int idx = npChanges.indexOf(proteinChange);
294 if (idx != -1) {
295 Double[] d = ddGun.get(idx);
296 if (d != null && d.length >= 2) {
297 ddG = new String[]{String.valueOf(d[0]), String.valueOf(d[1])};
298 }
299 if (includeAcidity || includePolarity) {
300 String[] pa = polarityAndAcidityChange.get(idx);
301 if (pa != null && pa.length >= 2) {
302 pA = new String[]{pa[0], pa[1]};
303 }
304 }
305 }
306 }
307 feat = featureList.get(position - 1);
308 }
309 }
310 }
311
312 String isoform = null;
313 if (multipleIsoforms && isoformIndex >= 0 && isoformIndex < splits.length) {
314 String isoStr = splits[isoformIndex];
315 isoform = isoStr.contains(":p.") ? isoStr.split(":")[0] : isoStr;
316 }
317
318 if (splits.length == length) {
319 if (multipleIsoforms) {
320 if (fileIsoform != null) {
321 String isofU = (isoform == null ? "" : isoform).toUpperCase();
322 if (!isofU.contains(fileIsoform.toUpperCase())) {
323 continue;
324 }
325 }
326 }
327 if (rerun) {
328 newCSVLine.append(line);
329 } else {
330 if (ddgFile != null) {
331 newCSVLine.append(line).append(delimiter)
332 .append(safe(feat, 0)).append(delimiter)
333 .append(safe(feat, 1)).append(delimiter)
334 .append(safe(feat, 2)).append(delimiter)
335 .append(ddG[0]).append(delimiter).append(ddG[1]);
336 } else {
337 newCSVLine.append(line).append(delimiter)
338 .append(safe(feat, 0)).append(delimiter)
339 .append(safe(feat, 1)).append(delimiter)
340 .append(safe(feat, 2));
341 }
342 }
343
344 if (includeAcidity && !rerun) {
345 newCSVLine.append(delimiter).append(pA[0]);
346 }
347 if (includePolarity && !rerun) {
348 newCSVLine.append(delimiter).append(pA[1]);
349 }
350 if (includeAngles && !rerun) {
351 newCSVLine.append(delimiter).append(safe(feat, 3))
352 .append(delimiter).append(safe(feat, 4))
353 .append(delimiter).append(safe(feat, 5));
354 if (includeStructure) {
355 newCSVLine.append(delimiter).append(safe(feat, 6));
356 }
357 if (includePPI) {
358 newCSVLine.append(delimiter).append(safe(feat, 7));
359 }
360 } else if (includeStructure && !rerun) {
361 newCSVLine.append(delimiter).append(safe(feat, 3));
362 if (includePPI) {
363 newCSVLine.append(delimiter).append(safe(feat, 4));
364 }
365 } else if (includePPI) {
366 newCSVLine.append(delimiter).append(safe(feat, 3));
367 }
368 bw.newLine();
369 bw.write(newCSVLine.toString());
370 }
371 }
372 }
373 } catch (Exception e) {
374 logger.warning(" Error updating CSV: " + e);
375 } finally {
376 try {
377 if (br != null) br.close();
378 } catch (IOException ignored) {}
379 try {
380 if (bw != null) bw.close();
381 } catch (IOException ignored) {}
382 }
383
384 logger.info(" Wrote variants with feature data to update_" + (filenames != null && filenames.size() > 1 ? new File(filenames.get(1)).getName() : "variants.csv"));
385 return this;
386 }
387
388 private static String safe(String[] arr, int idx) {
389 if (arr == null || idx < 0 || idx >= arr.length) return "";
390 return arr[idx] == null ? "" : arr[idx];
391 }
392 }