1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.algorithms.commands;
39
40 import ffx.algorithms.cli.AlgorithmsCommand;
41 import ffx.crystal.Crystal;
42 import ffx.numerics.Potential;
43 import ffx.numerics.math.RunningStatistics;
44 import ffx.potential.ForceFieldEnergy;
45 import ffx.potential.MolecularAssembly;
46 import ffx.potential.parsers.DistanceMatrixFilter;
47 import ffx.potential.parsers.PDBFilter;
48 import ffx.potential.parsers.SystemFilter;
49 import ffx.potential.parsers.XYZFilter;
50 import ffx.potential.utils.Clustering.Conformation;
51 import ffx.utilities.FFXBinding;
52 import org.apache.commons.io.FilenameUtils;
53 import org.apache.commons.math3.ml.clustering.CentroidCluster;
54 import picocli.CommandLine.Command;
55 import picocli.CommandLine.Option;
56 import picocli.CommandLine.Parameters;
57
58 import java.io.File;
59 import java.io.FileWriter;
60 import java.io.IOException;
61 import java.util.ArrayList;
62 import java.util.Collections;
63 import java.util.List;
64
65 import static ffx.potential.utils.Clustering.analyzeClusters;
66 import static ffx.potential.utils.Clustering.hierarchicalClustering;
67 import static ffx.potential.utils.Clustering.iterativeClustering;
68 import static ffx.potential.utils.Clustering.kMeansClustering;
69 import static java.lang.String.format;
70 import static org.apache.commons.math3.util.FastMath.floorDiv;
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86 @Command(description = " Cluster structures using an RMSD matrix.", name = "Cluster")
87 public class Cluster extends AlgorithmsCommand {
88
89
90
91
92
93 @Option(names = {"-a", "--algorithm"}, paramLabel = "0", defaultValue = "0",
94 description = "Algorithm: Multi-K-Means++ (0), Hierarchical (1), or Iterative (2)")
95 private int algorithm;
96
97
98
99
100 @Option(names = {"-t", "--trials"}, paramLabel = "1000", defaultValue = "1000",
101 description = "Number of trials for Multi-K-Means or iterative method.")
102 private int trials;
103
104
105
106
107 @Option(names = {"--tol", "--tolerance"}, paramLabel = "0.5", defaultValue = "0.5",
108 description = "Tolerance to determine if points are equivalent (iterative method).")
109 private double tolerance;
110
111
112
113
114 @Option(names = {"-k", "--clusters"}, paramLabel = "0", defaultValue = "0",
115 description = "Maximum number of k-means clusters.")
116 private int maxClusters;
117
118
119
120
121 @Option(names = {"--rs", "--randomSeed"}, paramLabel = "-1", defaultValue = "-1",
122 description = "Set the random seed for deterministic clustering (-1 uses the current time).")
123 private long randomSeed;
124
125
126
127
128
129 @Option(names = {"--td", "--threshold"}, paramLabel = "2.0", defaultValue = "2.0",
130 description = "The distance value where a hierarchical tree flattened into clusters.")
131 private double threshold;
132
133
134
135
136 @Option(names = {"-w", "--write"}, paramLabel = "false", defaultValue = "false",
137 description = "Write an archive containing a representative from each cluster.")
138 private boolean write;
139
140
141
142
143 @Parameters(arity = "1..2", paramLabel = "files",
144 description = "The RMSD distance matrix and optionally the corresponding structure archive (required for --write).")
145 private List<String> filenames = null;
146
147
148
149
150 private List<CentroidCluster<Conformation>> clusterList;
151
152
153
154
155 public Cluster() {
156 super();
157 }
158
159
160
161
162
163
164 public Cluster(FFXBinding binding) {
165 super(binding);
166 }
167
168
169
170
171
172
173 public Cluster(String[] args) {
174 super(args);
175 }
176
177
178
179
180
181
182 public List<CentroidCluster<Conformation>> getClusterList() {
183 return clusterList;
184 }
185
186
187
188
189 @Override
190 public Cluster run() {
191
192
193 if (!init()) {
194 return this;
195 }
196
197 if (filenames == null || filenames.isEmpty()) {
198 logger.info(helpString());
199 return this;
200 }
201
202 List<double[]> distMatrix = new ArrayList<>();
203
204 String filename = filenames.get(0);
205
206 DistanceMatrixFilter distanceMatrixFilter = new DistanceMatrixFilter();
207 RunningStatistics runningStatistics = distanceMatrixFilter.readDistanceMatrix(filename, distMatrix);
208
209 logger.info(" RMSD Distance Matrix Statistics");
210 logger.info(format(" RMSD Minimum: %8.6f", runningStatistics.getMin()));
211 logger.info(format(" RMSD Maximum: %8.6f", runningStatistics.getMax()));
212 logger.info(format(" RMSD Mean: %8.6f", runningStatistics.getMean()));
213 double variance = runningStatistics.getVariance();
214 if (!Double.isNaN(variance)) {
215 logger.info(format(" RMSD Variance: %8.6f\n", variance));
216 } else {
217 logger.info("");
218 }
219
220 if (runningStatistics == null) {
221 logger.info(format(" The distance matrix %s could not be read in.", filename));
222 return this;
223 }
224
225
226 switch (algorithm) {
227 case 1:
228
229 logger.info(" Performing Hierarchical Clustering");
230 logger.info(format(" Cluster separation threshold: %6.4f A.\n", threshold));
231 clusterList = hierarchicalClustering(distMatrix, threshold);
232 break;
233 case 2:
234
235 logger.info(" Performing Iterative Clustering");
236 clusterList = iterativeClustering(distMatrix, trials, tolerance);
237 break;
238 default:
239
240 logger.info(" Performing K-Means++ Clustering");
241
242
243 int dim = distMatrix.size();
244 if (maxClusters <= 0 || maxClusters >= dim) {
245 int newSize = floorDiv(dim, 2);
246 maxClusters = newSize;
247 }
248 logger.info(format(" Number of clusters: %d", maxClusters));
249
250
251 if (randomSeed == -1) {
252 randomSeed = System.nanoTime();
253 } else {
254 logger.info(format(" Random seed: %d", randomSeed));
255 }
256 logger.info(format(" Number of trials: %d\n", trials));
257 clusterList = kMeansClustering(distMatrix, maxClusters, trials, randomSeed);
258 }
259
260 boolean verbose = true;
261 List<Integer> representativeConformer = new ArrayList<>();
262 analyzeClusters(clusterList, representativeConformer, verbose);
263
264 if (write) {
265 writeStructures(representativeConformer);
266 }
267
268 return this;
269 }
270
271
272
273
274
275
276 private void writeStructures(List<Integer> repStructs) {
277 String saveName = null;
278 File structureFile = null;
279 if (filenames.size() < 2) {
280 saveName = FilenameUtils.removeExtension(filenames.get(0)) + ".arc";
281 structureFile = new File(saveName);
282 if (structureFile.exists()) {
283 structureFile = algorithmFunctions.versionFile(structureFile);
284 } else {
285 saveName = FilenameUtils.removeExtension(filenames.get(0)) + ".pdb";
286 structureFile = new File(saveName);
287 if (structureFile.exists()) {
288 structureFile = algorithmFunctions.versionFile(structureFile);
289 } else {
290 logger.info(" Please supply an ARC or PDB file corresponding to the distance matrix.");
291 return;
292 }
293 }
294 }
295
296
297 System.setProperty("vdwterm", "false");
298
299 if (structureFile == null) {
300 saveName = filenames.get(1);
301 structureFile = new File(algorithmFunctions.versionFile(saveName));
302 }
303 File saveFile = new File(algorithmFunctions.versionFile(saveName));
304 logger.info(format("\n Saving representative cluster members to %s.", structureFile.toString()));
305
306 MolecularAssembly[] molecularAssemblies = algorithmFunctions.openAll(saveName);
307 activeAssembly = molecularAssemblies[0];
308
309 SystemFilter systemFilter = algorithmFunctions.getFilter();
310 PDBFilter pdbFilter = null;
311 XYZFilter xyzFilter = null;
312 if (systemFilter instanceof PDBFilter) {
313 pdbFilter = new PDBFilter(structureFile, activeAssembly, activeAssembly.getForceField(),
314 activeAssembly.getProperties());
315 } else if (systemFilter instanceof XYZFilter) {
316 xyzFilter = new XYZFilter(structureFile, activeAssembly, activeAssembly.getForceField(),
317 activeAssembly.getProperties());
318 } else {
319 logger.info(" Unknown file type.");
320 return;
321 }
322
323 logger.info("\n Conformations: ");
324 int structNum = 0;
325 do {
326 if (repStructs.contains(structNum)) {
327 int clusterNum = repStructs.indexOf(structNum);
328
329 Crystal crystal = activeAssembly.getCrystal();
330 ForceFieldEnergy forceFieldEnergy = activeAssembly.getPotentialEnergy();
331 forceFieldEnergy.setCrystal(crystal);
332 if (crystal.aperiodic()) {
333 logger.info(format(" Conformation %d, Cluster %d)", structNum + 1, clusterNum + 1));
334 } else {
335 logger.info(format(" Conformation %d, Cluster %d (%s)", structNum + 1, clusterNum + 1,
336 crystal.getUnitCell().toShortString()));
337 }
338 if (systemFilter instanceof PDBFilter) {
339 pdbFilter.writeFile(structureFile, true, false, false);
340 } else if (systemFilter instanceof XYZFilter) {
341 String[] message = new String[1];
342 message[0] = format(" Conformation %d, Cluster %d ", structNum + 1, clusterNum + 1);
343 xyzFilter.writeFile(structureFile, true, message);
344 }
345 }
346 structNum++;
347 } while (systemFilter.readNext(false, false));
348
349 if (systemFilter instanceof PDBFilter) {
350 try (FileWriter fw = new FileWriter(structureFile, true)) {
351 fw.append("END\n");
352 } catch (IOException e) {
353 logger.warning("Error appending END to PDB file: " + e.getMessage());
354 }
355 }
356
357 systemFilter.closeReader();
358 }
359
360
361
362
363 @Override
364 public List<Potential> getPotentials() {
365 return Collections.emptyList();
366 }
367 }