1 // ****************************************************************************** 2 // 3 // Title: Force Field X. 4 // Description: Force Field X - Software for Molecular Biophysics. 5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2025. 6 // 7 // This file is part of Force Field X. 8 // 9 // Force Field X is free software; you can redistribute it and/or modify it 10 // under the terms of the GNU General Public License version 3 as published by 11 // the Free Software Foundation. 12 // 13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT 14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 // details. 17 // 18 // You should have received a copy of the GNU General Public License along with 19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple 20 // Place, Suite 330, Boston, MA 02111-1307 USA 21 // 22 // Linking this library statically or dynamically with other modules is making a 23 // combined work based on this library. Thus, the terms and conditions of the 24 // GNU General Public License cover the whole combination. 25 // 26 // As a special exception, the copyright holders of this library give you 27 // permission to link this library with independent modules to produce an 28 // executable, regardless of the license terms of these independent modules, and 29 // to copy and distribute the resulting executable under terms of your choice, 30 // provided that you also meet, for each linked independent module, the terms 31 // and conditions of the license of that module. An independent module is a 32 // module which is not derived from or based on this library. If you modify this 33 // library, you may extend this exception to your version of the library, but 34 // you are not obligated to do so. If you do not wish to do so, delete this 35 // exception statement from your version. 36 // 37 // ****************************************************************************** 38 package ffx.numerics.clustering; 39 40 import java.util.ArrayList; 41 import java.util.List; 42 43 /** 44 * Clustering algorithm that consumes a condensed (pdist-style) upper-triangular 45 * distance array to produce hierarchical agglomerative clusters. Supports flat 46 * clustering by threshold; weighted inputs delegate to unweighted behavior. 47 * 48 * @author Lars Behnke, 2013 49 * @author Michael J. Schnieders 50 * @since 1.0 51 */ 52 public class PDistClusteringAlgorithm implements ClusteringAlgorithm { 53 54 /** 55 * Performs hierarchical agglomerative clustering using a condensed pdist-like matrix. 56 * 57 * @param distances a 1 x M array holding the upper-triangular distances in row-major pdist order 58 * @param clusterNames names corresponding to the N items (M = N*(N-1)/2) 59 * @param linkageStrategy linkage criterion used during agglomeration 60 * @return root Cluster of the resulting hierarchy 61 */ 62 @Override 63 public Cluster performClustering(double[][] distances, 64 String[] clusterNames, LinkageStrategy linkageStrategy) { 65 66 /* Argument checks */ 67 if (distances == null || distances.length == 0) { 68 throw new IllegalArgumentException("Invalid distance matrix"); 69 } 70 if (distances[0].length != clusterNames.length 71 * (clusterNames.length - 1) / 2) { 72 throw new IllegalArgumentException("Invalid cluster name array"); 73 } 74 if (linkageStrategy == null) { 75 throw new IllegalArgumentException("Undefined linkage strategy"); 76 } 77 78 /* Setup model */ 79 List<Cluster> clusters = createClusters(clusterNames); 80 DistanceMap linkages = createLinkages(distances, clusters); 81 82 /* Process */ 83 HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages); 84 while (!builder.isTreeComplete()) { 85 builder.agglomerate(linkageStrategy); 86 } 87 88 return builder.getRootCluster(); 89 } 90 91 /** 92 * Produces a flat clustering from a condensed distance matrix by agglomerating until the 93 * threshold is exceeded. 94 * 95 * @param distances a 1 x M condensed distance array (pdist order) 96 * @param clusterNames names of the N items 97 * @param linkageStrategy linkage criterion used during agglomeration 98 * @param threshold maximum allowed linkage distance for merging 99 * @return list of clusters at the chosen cut 100 */ 101 @Override 102 public List<Cluster> performFlatClustering(double[][] distances, 103 String[] clusterNames, LinkageStrategy linkageStrategy, Double threshold) { 104 105 /* Argument checks */ 106 if (distances == null || distances.length == 0) { 107 throw new IllegalArgumentException("Invalid distance matrix"); 108 } 109 if (distances[0].length != clusterNames.length 110 * (clusterNames.length - 1) / 2) { 111 throw new IllegalArgumentException("Invalid cluster name array"); 112 } 113 if (linkageStrategy == null) { 114 throw new IllegalArgumentException("Undefined linkage strategy"); 115 } 116 117 /* Setup model */ 118 List<Cluster> clusters = createClusters(clusterNames); 119 DistanceMap linkages = createLinkages(distances, clusters); 120 121 /* Process */ 122 HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages); 123 return builder.flatAgg(linkageStrategy, threshold); 124 } 125 126 /** 127 * Weighted variant for condensed inputs; currently delegates to unweighted clustering as 128 * weights are not applied with condensed input in this implementation. 129 * 130 * @param distances a 1 x M condensed distance array (pdist order) 131 * @param clusterNames names of the N items 132 * @param weights weights for the N items (unused) 133 * @param linkageStrategy linkage criterion used during agglomeration 134 * @return root Cluster of the resulting hierarchy 135 */ 136 @Override 137 public Cluster performWeightedClustering(double[][] distances, String[] clusterNames, 138 double[] weights, LinkageStrategy linkageStrategy) { 139 return performClustering(distances, clusterNames, linkageStrategy); 140 } 141 142 /** 143 * Builds initial linkages from a condensed upper-triangular distance array. 144 * 145 * @param distances a 1 x M condensed distance array (pdist order) 146 * @param clusters list of initial singleton clusters 147 * @return a DistanceMap containing inter-cluster linkages 148 */ 149 private DistanceMap createLinkages(double[][] distances, 150 List<Cluster> clusters) { 151 DistanceMap linkages = new DistanceMap(); 152 for (int col = 0; col < clusters.size(); col++) { 153 Cluster cluster_col = clusters.get(col); 154 for (int row = col + 1; row < clusters.size(); row++) { 155 ClusterPair link = new ClusterPair(); 156 Double d = distances[0][accessFunction(row, col, 157 clusters.size())]; 158 link.setLinkageDistance(d); 159 link.setlCluster(cluster_col); 160 link.setrCluster(clusters.get(row)); 161 linkages.add(link); 162 } 163 } 164 return linkages; 165 } 166 167 /** 168 * Creates initial singleton clusters, one per input name. 169 * 170 * @param clusterNames names for singleton clusters 171 * @return list of newly created singleton clusters 172 */ 173 private List<Cluster> createClusters(String[] clusterNames) { 174 List<Cluster> clusters = new ArrayList<>(); 175 for (String clusterName : clusterNames) { 176 Cluster cluster = new Cluster(clusterName); 177 cluster.addLeafName(clusterName); 178 clusters.add(cluster); 179 } 180 return clusters; 181 } 182 183 // Credit to this function goes to 184 // http://stackoverflow.com/questions/13079563/how-does-condensed-distance-matrix-work-pdist 185 186 /** 187 * Indexing function mapping (i,j) with i>j to the position in the condensed pdist array. 188 * 189 * @param i row index (0-based) 190 * @param j column index (0-based), with i > j 191 * @param n the total number of items (matrix dimension) 192 * @return index into the condensed upper-triangular storage 193 */ 194 private static int accessFunction(int i, int j, int n) { 195 return n * j - j * (j + 1) / 2 + i - 1 - j; 196 } 197 198 }