View Javadoc
1   //******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  //******************************************************************************
38  package ffx.algorithms.commands;
39  
40  import ffx.algorithms.cli.AlgorithmsCommand;
41  import ffx.crystal.Crystal;
42  import ffx.numerics.Potential;
43  import ffx.numerics.math.RunningStatistics;
44  import ffx.potential.ForceFieldEnergy;
45  import ffx.potential.MolecularAssembly;
46  import ffx.potential.parsers.DistanceMatrixFilter;
47  import ffx.potential.parsers.PDBFilter;
48  import ffx.potential.parsers.SystemFilter;
49  import ffx.potential.parsers.XYZFilter;
50  import ffx.potential.utils.Clustering.Conformation;
51  import ffx.utilities.FFXBinding;
52  import org.apache.commons.io.FilenameUtils;
53  import org.apache.commons.math3.ml.clustering.CentroidCluster;
54  import picocli.CommandLine.Command;
55  import picocli.CommandLine.Option;
56  import picocli.CommandLine.Parameters;
57  
58  import java.io.File;
59  import java.io.FileWriter;
60  import java.io.IOException;
61  import java.util.ArrayList;
62  import java.util.Collections;
63  import java.util.List;
64  
65  import static ffx.potential.utils.Clustering.analyzeClusters;
66  import static ffx.potential.utils.Clustering.hierarchicalClustering;
67  import static ffx.potential.utils.Clustering.iterativeClustering;
68  import static ffx.potential.utils.Clustering.kMeansClustering;
69  import static java.lang.String.format;
70  import static org.apache.commons.math3.util.FastMath.floorDiv;
71  
72  /**
73   * The Cluster script clusters structures utilizing RMSD.
74   * <p>
75   * The Iterative Hierarchical Clustering method was contributed by Yuya Kinoshita, Koki Nishimura and
76   * Masatoshi Karashima from Takeda Pharmaceuticals.
77   *
78   * @author Aaron J. Nessler
79   * @author Mallory R. Tollefson
80   * @author Michael J. Schnieders
81   * <br>
82   * Usage:
83   * <br>
84   * ffxc Cluster [options] &lt;filename&gt;
85   */
86  @Command(description = " Cluster structures using an RMSD matrix.", name = "Cluster")
87  public class Cluster extends AlgorithmsCommand {
88  
89    /**
90     * -a or --algorithm Clustering algorithm to use.
91     * Algorithm: Multi-K-Means++ (0), Hierarchical (1), or Iterative (2)
92     */
93    @Option(names = {"-a", "--algorithm"}, paramLabel = "0", defaultValue = "0",
94        description = "Algorithm: Multi-K-Means++ (0), Hierarchical (1), or Iterative (2)")
95    private int algorithm;
96  
97    /**
98     * -t or --trials Number of trials for Multi-K-Means or Iterative.
99     */
100   @Option(names = {"-t", "--trials"}, paramLabel = "1000", defaultValue = "1000",
101       description = "Number of trials for Multi-K-Means or iterative method.")
102   private int trials;
103 
104   /**
105    * --tol or --tolerance Tolerance to determine equivalent points (iterative method).
106    */
107   @Option(names = {"--tol", "--tolerance"}, paramLabel = "0.5", defaultValue = "0.5",
108       description = "Tolerance to determine if points are equivalent (iterative method).")
109   private double tolerance;
110 
111   /**
112    * -k or --clusters Maximum number of kmeans clusters.
113    */
114   @Option(names = {"-k", "--clusters"}, paramLabel = "0", defaultValue = "0",
115       description = "Maximum number of k-means clusters.")
116   private int maxClusters;
117 
118   /**
119    * --rs or --randomSeed Set the random seed for deterministic clustering.
120    */
121   @Option(names = {"--rs", "--randomSeed"}, paramLabel = "-1", defaultValue = "-1",
122       description = "Set the random seed for deterministic clustering (-1 uses the current time).")
123   private long randomSeed;
124 
125   /**
126    * --td or --threshold
127    * RMSD value for dividing clusters from a hierarchical tree.
128    */
129   @Option(names = {"--td", "--threshold"}, paramLabel = "2.0", defaultValue = "2.0",
130       description = "The distance value where a hierarchical tree flattened into clusters.")
131   private double threshold;
132 
133   /**
134    * -w or --write Write out an archive of a representative structure from each cluster.
135    */
136   @Option(names = {"-w", "--write"}, paramLabel = "false", defaultValue = "false",
137       description = "Write an archive containing a representative from each cluster.")
138   private boolean write;
139 
140   /**
141    * The final argument(s) should be one or two filenames.
142    */
143   @Parameters(arity = "1..2", paramLabel = "files",
144       description = "The RMSD distance matrix and optionally the corresponding structure archive (required for --write).")
145   private List<String> filenames = null;
146 
147   /**
148    * Generated list of clusters.
149    */
150   private List<CentroidCluster<Conformation>> clusterList;
151 
152   /**
153    * Cluster constructor.
154    */
155   public Cluster() {
156     super();
157   }
158 
159   /**
160    * Cluster constructor.
161    *
162    * @param binding The Binding to use.
163    */
164   public Cluster(FFXBinding binding) {
165     super(binding);
166   }
167 
168   /**
169    * Cluster constructor that sets the command line arguments.
170    *
171    * @param args Command line arguments.
172    */
173   public Cluster(String[] args) {
174     super(args);
175   }
176 
177   /**
178    * Return the Clusters.
179    *
180    * @return Returns the generated clusters.
181    */
182   public List<CentroidCluster<Conformation>> getClusterList() {
183     return clusterList;
184   }
185 
186   /**
187    * Execute the script.
188    */
189   @Override
190   public Cluster run() {
191 
192     // Init the context and bind variables.
193     if (!init()) {
194       return this;
195     }
196 
197     if (filenames == null || filenames.isEmpty()) {
198       logger.info(helpString());
199       return this;
200     }
201 
202     List<double[]> distMatrix = new ArrayList<>();
203 
204     String filename = filenames.get(0);
205 
206     DistanceMatrixFilter distanceMatrixFilter = new DistanceMatrixFilter();
207     RunningStatistics runningStatistics = distanceMatrixFilter.readDistanceMatrix(filename, distMatrix);
208 
209     logger.info(" RMSD Distance Matrix Statistics");
210     logger.info(format(" RMSD Minimum:  %8.6f", runningStatistics.getMin()));
211     logger.info(format(" RMSD Maximum:  %8.6f", runningStatistics.getMax()));
212     logger.info(format(" RMSD Mean:     %8.6f", runningStatistics.getMean()));
213     double variance = runningStatistics.getVariance();
214     if (!Double.isNaN(variance)) {
215       logger.info(format(" RMSD Variance: %8.6f\n", variance));
216     } else {
217       logger.info("");
218     }
219 
220     if (runningStatistics == null) {
221       logger.info(format(" The distance matrix %s could not be read in.", filename));
222       return this;
223     }
224 
225     // Either use Multi-K-Means++, iterative, or Hierarchical agglomerative clustering.
226     switch (algorithm) {
227       case 1:
228         // Hierarchical clustering.
229         logger.info(" Performing Hierarchical Clustering");
230         logger.info(format("  Cluster separation threshold: %6.4f A.\n", threshold));
231         clusterList = hierarchicalClustering(distMatrix, threshold);
232         break;
233       case 2:
234         // Iterative Clustering (produced by researchers at Takeda)
235         logger.info(" Performing Iterative Clustering");
236         clusterList = iterativeClustering(distMatrix, trials, tolerance);
237         break;
238       default:
239         // K-Means++ and Multi K-Means++.
240         logger.info(" Performing K-Means++ Clustering");
241 
242         // Ensure cluster are within bounds
243         int dim = distMatrix.size();
244         if (maxClusters <= 0 || maxClusters >= dim) {
245           int newSize = floorDiv(dim, 2);
246           maxClusters = newSize;
247         }
248         logger.info(format("  Number of clusters: %d", maxClusters));
249 
250         // Array representing indices of structures that represent their cluster.
251         if (randomSeed == -1) {
252           randomSeed = System.nanoTime();
253         } else {
254           logger.info(format("  Random seed:        %d", randomSeed));
255         }
256         logger.info(format("  Number of trials:   %d\n", trials));
257         clusterList = kMeansClustering(distMatrix, maxClusters, trials, randomSeed);
258     }
259 
260     boolean verbose = true;
261     List<Integer> representativeConformer = new ArrayList<>();
262     analyzeClusters(clusterList, representativeConformer, verbose);
263 
264     if (write) {
265       writeStructures(representativeConformer);
266     }
267 
268     return this;
269   }
270 
271   /**
272    * Write out structures corresponding to the representative of each cluster.
273    *
274    * @param repStructs Array list for index of representative structures.
275    */
276   private void writeStructures(List<Integer> repStructs) {
277     String saveName = null;
278     File structureFile = null;
279     if (filenames.size() < 2) {
280       saveName = FilenameUtils.removeExtension(filenames.get(0)) + ".arc";
281       structureFile = new File(saveName);
282       if (structureFile.exists()) {
283         structureFile = algorithmFunctions.versionFile(structureFile);
284       } else {
285         saveName = FilenameUtils.removeExtension(filenames.get(0)) + ".pdb";
286         structureFile = new File(saveName);
287         if (structureFile.exists()) {
288           structureFile = algorithmFunctions.versionFile(structureFile);
289         } else {
290           logger.info(" Please supply an ARC or PDB file corresponding to the distance matrix.");
291           return;
292         }
293       }
294     }
295 
296     // Turn off nonbonded terms.
297     System.setProperty("vdwterm", "false");
298 
299     if (structureFile == null) {
300       saveName = filenames.get(1);
301       structureFile = new File(algorithmFunctions.versionFile(saveName));
302     }
303     File saveFile = new File(algorithmFunctions.versionFile(saveName));
304     logger.info(format("\n Saving representative cluster members to %s.", structureFile.toString()));
305 
306     MolecularAssembly[] molecularAssemblies = algorithmFunctions.openAll(saveName);
307     activeAssembly = molecularAssemblies[0];
308 
309     SystemFilter systemFilter = algorithmFunctions.getFilter();
310     PDBFilter pdbFilter = null;
311     XYZFilter xyzFilter = null;
312     if (systemFilter instanceof PDBFilter) {
313       pdbFilter = new PDBFilter(structureFile, activeAssembly, activeAssembly.getForceField(),
314           activeAssembly.getProperties());
315     } else if (systemFilter instanceof XYZFilter) {
316       xyzFilter = new XYZFilter(structureFile, activeAssembly, activeAssembly.getForceField(),
317           activeAssembly.getProperties());
318     } else {
319       logger.info(" Unknown file type.");
320       return;
321     }
322 
323     logger.info("\n Conformations: ");
324     int structNum = 0;
325     do {
326       if (repStructs.contains(structNum)) {
327         int clusterNum = repStructs.indexOf(structNum);
328         // Make sure the crystal is is updated.
329         Crystal crystal = activeAssembly.getCrystal();
330         ForceFieldEnergy forceFieldEnergy = activeAssembly.getPotentialEnergy();
331         forceFieldEnergy.setCrystal(crystal);
332         if (crystal.aperiodic()) {
333           logger.info(format(" Conformation %d, Cluster %d)", structNum + 1, clusterNum + 1));
334         } else {
335           logger.info(format(" Conformation %d, Cluster %d (%s)", structNum + 1, clusterNum + 1,
336               crystal.getUnitCell().toShortString()));
337         }
338         if (systemFilter instanceof PDBFilter) {
339           pdbFilter.writeFile(structureFile, true, false, false);
340         } else if (systemFilter instanceof XYZFilter) {
341           String[] message = new String[1];
342           message[0] = format(" Conformation %d, Cluster %d ", structNum + 1, clusterNum + 1);
343           xyzFilter.writeFile(structureFile, true, message);
344         }
345       }
346       structNum++;
347     } while (systemFilter.readNext(false, false));
348 
349     if (systemFilter instanceof PDBFilter) {
350       try (FileWriter fw = new FileWriter(structureFile, true)) {
351         fw.append("END\n");
352       } catch (IOException e) {
353         logger.warning("Error appending END to PDB file: " + e.getMessage());
354       }
355     }
356 
357     systemFilter.closeReader();
358   }
359 
360   /**
361    * {@inheritDoc}
362    */
363   @Override
364   public List<Potential> getPotentials() {
365     return Collections.emptyList();
366   }
367 }