1 // ******************************************************************************
2 //
3 // Title: Force Field X.
4 // Description: Force Field X - Software for Molecular Biophysics.
5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2025.
6 //
7 // This file is part of Force Field X.
8 //
9 // Force Field X is free software; you can redistribute it and/or modify it
10 // under the terms of the GNU General Public License version 3 as published by
11 // the Free Software Foundation.
12 //
13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16 // details.
17 //
18 // You should have received a copy of the GNU General Public License along with
19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20 // Place, Suite 330, Boston, MA 02111-1307 USA
21 //
22 // Linking this library statically or dynamically with other modules is making a
23 // combined work based on this library. Thus, the terms and conditions of the
24 // GNU General Public License cover the whole combination.
25 //
26 // As a special exception, the copyright holders of this library give you
27 // permission to link this library with independent modules to produce an
28 // executable, regardless of the license terms of these independent modules, and
29 // to copy and distribute the resulting executable under terms of your choice,
30 // provided that you also meet, for each linked independent module, the terms
31 // and conditions of the license of that module. An independent module is a
32 // module which is not derived from or based on this library. If you modify this
33 // library, you may extend this exception to your version of the library, but
34 // you are not obligated to do so. If you do not wish to do so, delete this
35 // exception statement from your version.
36 //
37 // ******************************************************************************
38 package ffx.numerics.clustering;
39
40 import java.util.ArrayList;
41 import java.util.Arrays;
42 import java.util.HashSet;
43 import java.util.List;
44
45 /**
46 * Clustering algorithm that operates on a full N x N distance matrix to produce
47 * hierarchical agglomerative clusters (dendrogram), with optional support for
48 * per-element weights and flat clustering by threshold.
49 *
50 * @author Lars Behnke, 2013
51 * @author Michael J. Schnieders
52 * @since 1.0
53 */
54 public class DefaultClusteringAlgorithm implements ClusteringAlgorithm {
55
56 /**
57 * Performs hierarchical agglomerative clustering using a full N x N distance matrix.
58 *
59 * @param distances an N x N symmetric matrix of pairwise distances
60 * @param clusterNames names corresponding to rows/columns of the distance matrix
61 * @param linkageStrategy linkage criterion used to compute inter-cluster distances
62 * @return root Cluster of the resulting hierarchy
63 */
64 @Override
65 public Cluster performClustering(double[][] distances,
66 String[] clusterNames, LinkageStrategy linkageStrategy) {
67
68 checkArguments(distances, clusterNames, linkageStrategy);
69 /* Setup model */
70 List<Cluster> clusters = createClusters(clusterNames);
71 DistanceMap linkages = createLinkages(distances, clusters);
72
73 /* Process */
74 HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages);
75 while (!builder.isTreeComplete()) {
76 builder.agglomerate(linkageStrategy);
77 }
78
79 return builder.getRootCluster();
80 }
81
82 /**
83 * Produces a flat clustering by agglomerating until the next merge would exceed the threshold.
84 *
85 * @param distances an N x N symmetric matrix of pairwise distances
86 * @param clusterNames names corresponding to the distance matrix
87 * @param linkageStrategy linkage criterion used during agglomeration
88 * @param threshold maximum allowed linkage distance for merging
89 * @return list of clusters at the chosen cut
90 */
91 @Override
92 public List<Cluster> performFlatClustering(double[][] distances,
93 String[] clusterNames, LinkageStrategy linkageStrategy, Double threshold) {
94
95 checkArguments(distances, clusterNames, linkageStrategy);
96 /* Setup model */
97 List<Cluster> clusters = createClusters(clusterNames);
98 DistanceMap linkages = createLinkages(distances, clusters);
99
100 /* Process */
101 HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages);
102 return builder.flatAgg(linkageStrategy, threshold);
103 }
104
105 /**
106 * Validates input arrays and strategy for consistency.
107 *
108 * @param distances an N x N symmetric matrix of distances
109 * @param clusterNames array of N names
110 * @param linkageStrategy strategy to compute linkage distances
111 * @throws IllegalArgumentException if inputs are inconsistent
112 */
113 private void checkArguments(double[][] distances, String[] clusterNames,
114 LinkageStrategy linkageStrategy) {
115 if (distances == null || distances.length == 0
116 || distances[0].length != distances.length) {
117 throw new IllegalArgumentException("Invalid distance matrix");
118 }
119 if (distances.length != clusterNames.length) {
120 throw new IllegalArgumentException("Invalid cluster name array");
121 }
122 if (linkageStrategy == null) {
123 throw new IllegalArgumentException("Undefined linkage strategy");
124 }
125 int uniqueCount = new HashSet<>(Arrays.asList(clusterNames)).size();
126 if (uniqueCount != clusterNames.length) {
127 throw new IllegalArgumentException("Duplicate names");
128 }
129 }
130
131 /**
132 * Performs hierarchical clustering when each element has an associated weight.
133 *
134 * @param distances an N x N symmetric matrix of distances
135 * @param clusterNames names for the N input elements
136 * @param weights weights for the N input elements
137 * @param linkageStrategy linkage criterion to use
138 * @return root Cluster of the resulting hierarchy
139 */
140 @Override
141 public Cluster performWeightedClustering(double[][] distances, String[] clusterNames,
142 double[] weights, LinkageStrategy linkageStrategy) {
143
144 checkArguments(distances, clusterNames, linkageStrategy);
145
146 if (weights.length != clusterNames.length) {
147 throw new IllegalArgumentException("Invalid weights array");
148 }
149
150 /* Setup model */
151 List<Cluster> clusters = createClusters(clusterNames, weights);
152 DistanceMap linkages = createLinkages(distances, clusters);
153
154 /* Process */
155 HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages);
156 while (!builder.isTreeComplete()) {
157 builder.agglomerate(linkageStrategy);
158 }
159
160 return builder.getRootCluster();
161 }
162
163 /**
164 * Builds initial pairwise linkages from a full distance matrix and initial clusters.
165 *
166 * @param distances N x N symmetric matrix of distances
167 * @param clusters list of initial singleton clusters
168 * @return a DistanceMap containing all inter-cluster linkages
169 */
170 private DistanceMap createLinkages(double[][] distances,
171 List<Cluster> clusters) {
172 DistanceMap linkages = new DistanceMap();
173 for (int col = 0; col < clusters.size(); col++) {
174 for (int row = col + 1; row < clusters.size(); row++) {
175 ClusterPair link = new ClusterPair();
176 Cluster lCluster = clusters.get(col);
177 Cluster rCluster = clusters.get(row);
178 link.setLinkageDistance(distances[col][row]);
179 link.setlCluster(lCluster);
180 link.setrCluster(rCluster);
181 linkages.add(link);
182 }
183 }
184 return linkages;
185 }
186
187 /**
188 * Creates initial singleton clusters, one per input name.
189 *
190 * @param clusterNames names for singleton clusters
191 * @return list of newly created singleton clusters
192 */
193 private List<Cluster> createClusters(String[] clusterNames) {
194 List<Cluster> clusters = new ArrayList<>();
195 for (String clusterName : clusterNames) {
196 Cluster cluster = new Cluster(clusterName);
197 cluster.addLeafName(clusterName);
198 clusters.add(cluster);
199 }
200 return clusters;
201 }
202
203 /**
204 * Creates initial singleton clusters with weights.
205 *
206 * @param clusterNames names for singleton clusters
207 * @param weights weights for each corresponding singleton
208 * @return list of newly created weighted singleton clusters
209 */
210 private List<Cluster> createClusters(String[] clusterNames, double[] weights) {
211 List<Cluster> clusters = new ArrayList<>();
212 for (int i = 0; i < weights.length; i++) {
213 Cluster cluster = new Cluster(clusterNames[i]);
214 cluster.setDistance(new Distance(0.0, weights[i]));
215 clusters.add(cluster);
216 }
217 return clusters;
218 }
219
220 }