1 // ****************************************************************************** 2 // 3 // Title: Force Field X. 4 // Description: Force Field X - Software for Molecular Biophysics. 5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2025. 6 // 7 // This file is part of Force Field X. 8 // 9 // Force Field X is free software; you can redistribute it and/or modify it 10 // under the terms of the GNU General Public License version 3 as published by 11 // the Free Software Foundation. 12 // 13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT 14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 // details. 17 // 18 // You should have received a copy of the GNU General Public License along with 19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple 20 // Place, Suite 330, Boston, MA 02111-1307 USA 21 // 22 // Linking this library statically or dynamically with other modules is making a 23 // combined work based on this library. Thus, the terms and conditions of the 24 // GNU General Public License cover the whole combination. 25 // 26 // As a special exception, the copyright holders of this library give you 27 // permission to link this library with independent modules to produce an 28 // executable, regardless of the license terms of these independent modules, and 29 // to copy and distribute the resulting executable under terms of your choice, 30 // provided that you also meet, for each linked independent module, the terms 31 // and conditions of the license of that module. An independent module is a 32 // module which is not derived from or based on this library. If you modify this 33 // library, you may extend this exception to your version of the library, but 34 // you are not obligated to do so. If you do not wish to do so, delete this 35 // exception statement from your version. 36 // 37 // ****************************************************************************** 38 package ffx.numerics.clustering; 39 40 import java.util.ArrayList; 41 import java.util.Arrays; 42 import java.util.HashSet; 43 import java.util.List; 44 45 /** 46 * Clustering algorithm that operates on a full N x N distance matrix to produce 47 * hierarchical agglomerative clusters (dendrogram), with optional support for 48 * per-element weights and flat clustering by threshold. 49 * 50 * @author Lars Behnke, 2013 51 * @author Michael J. Schnieders 52 * @since 1.0 53 */ 54 public class DefaultClusteringAlgorithm implements ClusteringAlgorithm { 55 56 /** 57 * Performs hierarchical agglomerative clustering using a full N x N distance matrix. 58 * 59 * @param distances an N x N symmetric matrix of pairwise distances 60 * @param clusterNames names corresponding to rows/columns of the distance matrix 61 * @param linkageStrategy linkage criterion used to compute inter-cluster distances 62 * @return root Cluster of the resulting hierarchy 63 */ 64 @Override 65 public Cluster performClustering(double[][] distances, 66 String[] clusterNames, LinkageStrategy linkageStrategy) { 67 68 checkArguments(distances, clusterNames, linkageStrategy); 69 /* Setup model */ 70 List<Cluster> clusters = createClusters(clusterNames); 71 DistanceMap linkages = createLinkages(distances, clusters); 72 73 /* Process */ 74 HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages); 75 while (!builder.isTreeComplete()) { 76 builder.agglomerate(linkageStrategy); 77 } 78 79 return builder.getRootCluster(); 80 } 81 82 /** 83 * Produces a flat clustering by agglomerating until the next merge would exceed the threshold. 84 * 85 * @param distances an N x N symmetric matrix of pairwise distances 86 * @param clusterNames names corresponding to the distance matrix 87 * @param linkageStrategy linkage criterion used during agglomeration 88 * @param threshold maximum allowed linkage distance for merging 89 * @return list of clusters at the chosen cut 90 */ 91 @Override 92 public List<Cluster> performFlatClustering(double[][] distances, 93 String[] clusterNames, LinkageStrategy linkageStrategy, Double threshold) { 94 95 checkArguments(distances, clusterNames, linkageStrategy); 96 /* Setup model */ 97 List<Cluster> clusters = createClusters(clusterNames); 98 DistanceMap linkages = createLinkages(distances, clusters); 99 100 /* Process */ 101 HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages); 102 return builder.flatAgg(linkageStrategy, threshold); 103 } 104 105 /** 106 * Validates input arrays and strategy for consistency. 107 * 108 * @param distances an N x N symmetric matrix of distances 109 * @param clusterNames array of N names 110 * @param linkageStrategy strategy to compute linkage distances 111 * @throws IllegalArgumentException if inputs are inconsistent 112 */ 113 private void checkArguments(double[][] distances, String[] clusterNames, 114 LinkageStrategy linkageStrategy) { 115 if (distances == null || distances.length == 0 116 || distances[0].length != distances.length) { 117 throw new IllegalArgumentException("Invalid distance matrix"); 118 } 119 if (distances.length != clusterNames.length) { 120 throw new IllegalArgumentException("Invalid cluster name array"); 121 } 122 if (linkageStrategy == null) { 123 throw new IllegalArgumentException("Undefined linkage strategy"); 124 } 125 int uniqueCount = new HashSet<>(Arrays.asList(clusterNames)).size(); 126 if (uniqueCount != clusterNames.length) { 127 throw new IllegalArgumentException("Duplicate names"); 128 } 129 } 130 131 /** 132 * Performs hierarchical clustering when each element has an associated weight. 133 * 134 * @param distances an N x N symmetric matrix of distances 135 * @param clusterNames names for the N input elements 136 * @param weights weights for the N input elements 137 * @param linkageStrategy linkage criterion to use 138 * @return root Cluster of the resulting hierarchy 139 */ 140 @Override 141 public Cluster performWeightedClustering(double[][] distances, String[] clusterNames, 142 double[] weights, LinkageStrategy linkageStrategy) { 143 144 checkArguments(distances, clusterNames, linkageStrategy); 145 146 if (weights.length != clusterNames.length) { 147 throw new IllegalArgumentException("Invalid weights array"); 148 } 149 150 /* Setup model */ 151 List<Cluster> clusters = createClusters(clusterNames, weights); 152 DistanceMap linkages = createLinkages(distances, clusters); 153 154 /* Process */ 155 HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages); 156 while (!builder.isTreeComplete()) { 157 builder.agglomerate(linkageStrategy); 158 } 159 160 return builder.getRootCluster(); 161 } 162 163 /** 164 * Builds initial pairwise linkages from a full distance matrix and initial clusters. 165 * 166 * @param distances N x N symmetric matrix of distances 167 * @param clusters list of initial singleton clusters 168 * @return a DistanceMap containing all inter-cluster linkages 169 */ 170 private DistanceMap createLinkages(double[][] distances, 171 List<Cluster> clusters) { 172 DistanceMap linkages = new DistanceMap(); 173 for (int col = 0; col < clusters.size(); col++) { 174 for (int row = col + 1; row < clusters.size(); row++) { 175 ClusterPair link = new ClusterPair(); 176 Cluster lCluster = clusters.get(col); 177 Cluster rCluster = clusters.get(row); 178 link.setLinkageDistance(distances[col][row]); 179 link.setlCluster(lCluster); 180 link.setrCluster(rCluster); 181 linkages.add(link); 182 } 183 } 184 return linkages; 185 } 186 187 /** 188 * Creates initial singleton clusters, one per input name. 189 * 190 * @param clusterNames names for singleton clusters 191 * @return list of newly created singleton clusters 192 */ 193 private List<Cluster> createClusters(String[] clusterNames) { 194 List<Cluster> clusters = new ArrayList<>(); 195 for (String clusterName : clusterNames) { 196 Cluster cluster = new Cluster(clusterName); 197 cluster.addLeafName(clusterName); 198 clusters.add(cluster); 199 } 200 return clusters; 201 } 202 203 /** 204 * Creates initial singleton clusters with weights. 205 * 206 * @param clusterNames names for singleton clusters 207 * @param weights weights for each corresponding singleton 208 * @return list of newly created weighted singleton clusters 209 */ 210 private List<Cluster> createClusters(String[] clusterNames, double[] weights) { 211 List<Cluster> clusters = new ArrayList<>(); 212 for (int i = 0; i < weights.length; i++) { 213 Cluster cluster = new Cluster(clusterNames[i]); 214 cluster.setDistance(new Distance(0.0, weights[i])); 215 clusters.add(cluster); 216 } 217 return clusters; 218 } 219 220 }