View Javadoc
1   //******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  //******************************************************************************
38  package ffx.algorithms.commands;
39  
40  import ffx.algorithms.cli.AlgorithmsCommand;
41  import ffx.numerics.Potential;
42  import ffx.potential.MolecularAssembly;
43  import ffx.potential.bonded.AminoAcidUtils.AminoAcid3;
44  import ffx.potential.bonded.Atom;
45  import ffx.potential.bonded.Residue;
46  import ffx.potential.cli.AtomSelectionOptions;
47  import ffx.potential.parsers.SystemFilter;
48  import ffx.utilities.FFXBinding;
49  import org.apache.commons.lang3.StringUtils;
50  import picocli.CommandLine.Command;
51  import picocli.CommandLine.Mixin;
52  import picocli.CommandLine.Option;
53  import picocli.CommandLine.Parameters;
54  
55  import java.util.ArrayList;
56  import java.util.Collections;
57  import java.util.List;
58  
59  /**
60   * The Superpose script superposes molecules in an arc/multiple model pdb file (all versus all or one versus all) or in two pdb/xyz files.
61   * TODO: Create a Superpose Unit Test.
62   *
63   * <br>
64   * Usage:
65   * <br>
66   * ffxc Superpose [options] &lt;filename&gt;
67   */
68  @Command(description = " Superpose frames one or two trajectory files to calculate RMSD.", name = "Superpose")
69  public class Superpose extends AlgorithmsCommand {
70  
71    @Mixin
72    private AtomSelectionOptions atomSelectionOptions;
73  
74    /**
75     * --bb or --backboneSelection Restrict selection to backbone atoms [0 == C_ALPHA or 1 == BACKBONE]. Note C_ALPHA uses N1 or N9 for NA.
76     */
77    @Option(names = {"--bb", "--backboneSelection"}, paramLabel = "-1", defaultValue = "-1",
78        description = "Restrict selection to backbone atoms [0 == C_ALPHA or 1 == BACKBONE]. Note C_ALPHA uses N1 or N9 for NA.")
79    private int backboneSelection;
80  
81    /**
82     * --ih or --includeHydrogen Include hydrogen atoms.
83     */
84    @Option(names = {"--ih", "--includeHydrogen"}, paramLabel = "false", defaultValue = "false",
85        description = "Include hydrogen atoms.")
86    private static boolean includeHydrogen;
87  
88    /**
89     * --ss or --secondaryStructure Restrict selection using a secondary structure string.
90     */
91    @Option(names = {"--ss", "--secondaryStructure"}, paramLabel = "", defaultValue = "",
92        description = "Restrict selection using a secondary structure string.")
93    private String secondaryStructure;
94  
95    /**
96     * --dRMSD Calculate dRMSD in addition to RMSD.
97     */
98    @Option(names = {"--dRMSD"}, paramLabel = "false", defaultValue = "false",
99        description = "Calculate dRMSD in addition to RMSD.")
100   private boolean dRMSD;
101 
102   /**
103    * --ps or --printSymOp Print optimal SymOp to align structure 2 to structure 1.
104    */
105   @Option(names = {"--ps", "--printSymOp"}, paramLabel = "false", defaultValue = "false",
106       description = "Print optimal SymOp to align input structures.")
107   private static boolean printSym;
108 
109   /**
110    * -w or --write Write out the RMSD matrix.
111    */
112   @Option(names = {"-w", "--write"}, paramLabel = "false", defaultValue = "false",
113       description = "Write out the RMSD matrix.")
114   private static boolean write;
115 
116   /**
117    * -r or --restart Attempt to restart from a previously written RMSD matrix.
118    */
119   @Option(names = {"-r", "--restart"}, paramLabel = "false", defaultValue = "false",
120       description = "Attempt to restart from a previously written RMSD matrix.")
121   private static boolean restart;
122 
123   /**
124    * --save or --saveSnapshots Save out superposed snapshots.
125    */
126   @Option(names = {"--save", "--saveSnapshots"}, paramLabel = "false", defaultValue = "false",
127       description = "Save superposed snapshots (only for single process jobs).")
128   private boolean saveSnapshots;
129 
130   /**
131    * -v or --verbose Print out RMSD information.
132    */
133   @Option(names = {"-v", "--verbose"}, paramLabel = "true", defaultValue = "true",
134       description = "Write out RMSD information.")
135   private boolean verbose;
136 
137   /**
138    * The arguments are one or two atomic coordinate files in PDB or XYZ format.
139    */
140   @Parameters(arity = "1..2", paramLabel = "files",
141       description = "Atomic coordinate file(s) to compare in PDB or XYZ format.")
142   List<String> filenames = null;
143 
144   /**
145    * Superpose Constructor.
146    */
147   public Superpose() {
148     super();
149   }
150 
151   /**
152    * Superpose Constructor.
153    *
154    * @param binding Groovy to use.
155    */
156   public Superpose(FFXBinding binding) {
157     super(binding);
158   }
159 
160   /**
161    * Superpose constructor that sets the command line arguments.
162    *
163    * @param args Command line arguments.
164    */
165   public Superpose(String[] args) {
166     super(args);
167   }
168 
169   /**
170    * Execute the script.
171    */
172   @Override
173   public Superpose run() {
174 
175     // Init the context and bind variables.
176     if (!init() || filenames == null || filenames.size() < 1) {
177       return this;
178     }
179 
180     // Turn off non-bonded terms for efficiency.
181     System.setProperty("vdwterm", "false");
182 
183     // Load the MolecularAssembly.
184     activeAssembly = getActiveAssembly(filenames.get(0));
185     if (activeAssembly == null) {
186       logger.info(helpString());
187       return this;
188     }
189 
190     // SystemFilter containing structures stored in file 0.
191     SystemFilter baseFilter = algorithmFunctions.getFilter();
192 
193     // SystemFilter containing structures stored in file 1 (or file 0 if file 1 does not exist).
194     SystemFilter targetFilter;
195     // Number of files to read in.
196     int numFiles = filenames.size();
197     boolean isSymmetric = false;
198     if (numFiles == 1) {
199       logger.info(
200           "\n Superpose will be applied between all pairs of conformations within the supplied file.\n");
201       isSymmetric = true;
202       // If only one file is supplied, compare all structures in that file to each other.
203       algorithmFunctions.openAll(filenames.get(0));
204       targetFilter = algorithmFunctions.getFilter();
205     } else {
206       // Otherwise, compare structures from first file those in the second.
207       logger.info(
208           "\n Superpose will compare all conformations in the first file to all those in the second file.\n");
209       algorithmFunctions.openAll(filenames.get(1));
210       targetFilter = algorithmFunctions.getFilter();
211     }
212 
213     // Apply active and inactive atom selections
214     atomSelectionOptions.setActiveAtoms(activeAssembly);
215 
216     // Load SS definition.
217     List<List<Integer>> helices = null;
218     List<List<Integer>> sheets = null;
219     boolean applySS = false;
220     if (!secondaryStructure.isEmpty()) {
221       secondaryStructure = validateSecondaryStructurePrediction(activeAssembly);
222       checkForAppropriateResidueIdentities(activeAssembly);
223       String helixChar = "H";
224       String sheetChar = "E";
225       helices = extractSecondaryElement(secondaryStructure, helixChar, 2);
226       sheets = extractSecondaryElement(secondaryStructure, sheetChar, 2);
227       if (!helices.isEmpty() || !sheets.isEmpty()) {
228         applySS = true;
229       }
230     }
231 
232     // Loop over atoms and apply remaining selection flags.
233     Atom[] atoms = activeAssembly.getAtomArray();
234     List<Integer> selectedList = new ArrayList<>();
235     for (Atom atom : atoms) {
236       // Restrict active atoms to heavy atoms.
237       if (!includeHydrogen && atom.isActive() && atom.isHydrogen()) {
238         atom.setActive(false);
239       }
240 
241       // Restrict active atoms to the SS selection.
242       if (applySS && atom.isActive()) {
243         if (!isSS(helices, sheets, atom)) {
244           atom.setActive(false);
245         }
246       }
247 
248       // Restrict to the backbone atom selection.
249       if (backboneSelection > -1 && atom.isActive()) {
250         if (backboneSelection == 0) {
251           // CALPHA (uses N1 or N9 for NA).
252           if (!(atom.getName().equals("CA") || atom.getName().equals("N1") || atom.getName().equals("N9"))) {
253             atom.setActive(false);
254           }
255         } else if (backboneSelection == 1) {
256           // BACKBONE (not supported for NA)
257           if (!(atom.getName().equals("CA") || atom.getName().equals("N") || atom.getName().equals("C"))) {
258             atom.setActive(false);
259           }
260         }
261       }
262 
263       // Keep only active atoms.
264       if (atom.isActive()) {
265         // Note that the atom index list must run from 0 to N-1.
266         selectedList.add(atom.getIndex() - 1);
267       }
268     }
269 
270     // Indices of atoms used in alignment and RMSD calculations.
271     int nSelected = selectedList.size();
272     int[] usedIndices = new int[nSelected];
273     for (int i = 0; i < nSelected; i++) {
274       usedIndices[i] = selectedList.get(i);
275     }
276 
277     // Reset all atoms to be active because the Superpose class will apply our "usedIndices" array.
278     for (Atom atom : atoms) {
279       atom.setActive(true);
280     }
281 
282     // Compute the RMSD values.
283     ffx.potential.utils.Superpose superpose =
284         new ffx.potential.utils.Superpose(baseFilter, targetFilter, isSymmetric);
285 
286     // Do the superpositions.
287     superpose.calculateRMSDs(usedIndices, dRMSD, verbose, restart, write, saveSnapshots, printSym);
288 
289     return this;
290   }
291 
292   private static boolean isSS(List<List<Integer>> helices, List<List<Integer>> sheets, Atom atom) {
293     int resNum = atom.getResidueNumber() - 1;
294     boolean isHelix = false;
295     for (List<Integer> helix : helices) {
296       if (resNum >= helix.get(0) && resNum <= helix.get(1)) {
297         isHelix = true;
298         break;
299       }
300     }
301     boolean isSheet = false;
302     for (List<Integer> sheet : sheets) {
303       if (resNum >= sheet.get(0) && resNum <= sheet.get(1)) {
304         isSheet = true;
305         break;
306       }
307     }
308     return isHelix || isSheet;
309   }
310 
311   /**
312    * This method determines the starting and ending indices for secondary elements of the requested type based
313    * on the user-supplied secondary structure restraint predictions. The Dill Group requires that secondary
314    * elements have at least three consecutive residues to be considered a secondary element.
315    *
316    * @param ss             A string of the secondary structure prediction.
317    * @param elementType    Character indicating type of secondary element being searched (helix, coil, sheet).
318    * @param minNumResidues Integer minimum of consecutive secondary structure predictions
319    *                       to create a secondary element.
320    * @return ArrayList<ArrayList<Integer> > Contains starting and ending residues for each secondary element.
321    */
322   static List<List<Integer>> extractSecondaryElement(String ss, String elementType,
323                                                      int minNumResidues) {
324     // Will hold starting and ending indices for all found secondary elements of the requested type.
325     List<List<Integer>> allElements = new ArrayList<>();
326     // Track of the most recent index to have a character matching the requested elementType.
327     int lastMatch = 0;
328     // Iterates through each index in the secondary structure string.
329     int i = 0;
330     while (i < ss.length()) {
331       if (ss.charAt(i) == elementType.charAt(0)) {
332         int elementStartIndex = i;
333         // Set the starting index for the secondary element as soon as the value at the ith index matches the
334         //  requested element type.
335         for (int j = i + 1; j <= ss.length(); j++) {
336           // Use the jth index to iterate through the secondary structure prediction until the end of the
337           // secondary element is found.
338           if (j < ss.length()) {
339             if (ss.charAt(j) != elementType.charAt(0)) {
340               if (j == lastMatch + 1) {
341                 // If the most recent lastMatch is only one index away, then check and store the
342                 // starting and ending indices of the secondary element.
343                 i = j;
344                 // Set i=j so that i begins searching for the next element at the end of the most recent
345                 // secondary element.
346                 int elementLength = j - elementStartIndex;
347                 if (elementLength > minNumResidues) {
348                   // If secondary element is above minimum length, store starting and ending indices
349                   // of secondary element.
350                   List<Integer> currentElement = new ArrayList<>();
351                   currentElement.add(elementStartIndex);
352                   currentElement.add(lastMatch);
353                   allElements.add(currentElement);
354                 }
355                 j = ss.length() + 1;
356                 // Since end of current secondary element has been found, exit inner j loop.
357               }
358             } else {
359               lastMatch = j;
360               i++;
361               // If the jth index in the secondary structure string matches the requested element,
362               // increment j until the end of the secondary element is found.
363             }
364           }
365           if (j == ss.length()) {
366             // Handle the case when a secondary element is at the very end of the secondary structure string.
367             i = ss.length() + 1;
368             if (j == lastMatch + 1) {
369               int elementLength = j - elementStartIndex;
370               if (elementLength > minNumResidues) {
371                 List<Integer> currentElement = new ArrayList<>();
372                 currentElement.add(elementStartIndex);
373                 currentElement.add(lastMatch);
374                 allElements.add(currentElement);
375               }
376               j = ss.length() + 1;
377               // Since end of current secondary element has been found, exit inner j loop.
378             }
379           }
380         }
381       } else {
382         i++;
383         // Increment i until end of secondary structure prediction or until requested secondary element is found.
384       }
385     }
386     return allElements;
387   }
388 
389   /**
390    * This method validates that the user-supplied secondary structure predictions are the correct
391    * length and contain the correct characters.
392    *
393    * @param molecularAssembly The molecular assembly.
394    * @return String containing the validated and edited secondary structure restraints.
395    */
396   String validateSecondaryStructurePrediction(MolecularAssembly molecularAssembly) {
397     // The only characters that should be present in secondary structure restraint string are 'H'
398     // for helix, 'E'
399     // for beta sheet and '.' for coil.
400     if (!secondaryStructure.matches("^[HE.]+") && !secondaryStructure.isEmpty()) {
401       logger.severe(" Secondary structure restraints may only contain characters 'H', 'E' and '.'");
402     }
403 
404     int numResidues = molecularAssembly.getResidueList().size();
405     int numSecondaryStructure = secondaryStructure.length();
406 
407     // Only one secondary structure restraint should exist per residue.
408     if (numSecondaryStructure == 0) {
409       logger.warning(
410           " No secondary structure restraints have been provided. Simulation will proceed "
411               + "with all residues having random coil secondary structure restraints.");
412       String randomCoil = StringUtils.leftPad("", numResidues, ".");
413       return randomCoil;
414     } else if (numSecondaryStructure < numResidues) {
415       logger.warning(
416           " Too few secondary structure restraints exist for number of residues present. "
417               +
418               "Random coil will be added to end residues without provided secondary structure restraints.");
419       String extraCoil = StringUtils.rightPad(secondaryStructure, numResidues, '.');
420       return extraCoil;
421     } else if (numSecondaryStructure == numResidues) {
422       logger.info(" Secondary structure restraints will be added for all residues.");
423       return secondaryStructure;
424     } else if (numSecondaryStructure > numResidues) {
425       logger.warning(
426           " Too many secondary structure restraints exist for number of residues present."
427               + " Provided secondary structure restraints will be truncated.");
428       String truncated = secondaryStructure.substring(0, numResidues);
429       return truncated;
430     } else {
431       logger.severe(" Secondary structure restraints or residues do not exist.");
432       return null;
433     }
434   }
435 
436   /**
437    * This method checks that secondary structure assignments are appropriate for the residue
438    * identity. ACE and NME residues do not have alpha carbons, so they are not compatible with the
439    * alpha helix or beta sheet MELD restraints.
440    *
441    * @param molecularAssembly The molecular assembly.
442    */
443   void checkForAppropriateResidueIdentities(MolecularAssembly molecularAssembly) {
444     List<Residue> residues = molecularAssembly.getResidueList();
445     for (int i = 0; i < secondaryStructure.length(); i++) {
446       Residue residue = residues.get(i);
447       AminoAcid3 aminoAcid3 = residue.getAminoAcid3();
448 
449       String aminoAcidString = aminoAcid3.toString();
450       String NMEString = AminoAcid3.NME.toString();
451       String ACEString = AminoAcid3.ACE.toString();
452 
453       if (aminoAcidString.equals(NMEString) || aminoAcidString.equals(ACEString)) {
454         char character = secondaryStructure.charAt(i);
455         char H = 'H';
456         char E = 'E';
457         if (character == H) {
458           logger.info(
459               " Secondary structure was modified to accommodate non-standard amino acid residue.");
460           secondaryStructure =
461               secondaryStructure.substring(0, i) + '.' + secondaryStructure.substring(i + 1);
462         } else if (character == E) {
463           logger.info(
464               " Secondary structure was modified to accommodate non-standard amino acid residue.");
465           secondaryStructure =
466               secondaryStructure.substring(0, i) + '.' + secondaryStructure.substring(i + 1);
467         }
468       }
469     }
470   }
471 
472   @Override
473   public List<Potential> getPotentials() {
474     return Collections.emptyList();
475   }
476 }