1 // ******************************************************************************
2 //
3 // Title: Force Field X.
4 // Description: Force Field X - Software for Molecular Biophysics.
5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2025.
6 //
7 // This file is part of Force Field X.
8 //
9 // Force Field X is free software; you can redistribute it and/or modify it
10 // under the terms of the GNU General Public License version 3 as published by
11 // the Free Software Foundation.
12 //
13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16 // details.
17 //
18 // You should have received a copy of the GNU General Public License along with
19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20 // Place, Suite 330, Boston, MA 02111-1307 USA
21 //
22 // Linking this library statically or dynamically with other modules is making a
23 // combined work based on this library. Thus, the terms and conditions of the
24 // GNU General Public License cover the whole combination.
25 //
26 // As a special exception, the copyright holders of this library give you
27 // permission to link this library with independent modules to produce an
28 // executable, regardless of the license terms of these independent modules, and
29 // to copy and distribute the resulting executable under terms of your choice,
30 // provided that you also meet, for each linked independent module, the terms
31 // and conditions of the license of that module. An independent module is a
32 // module which is not derived from or based on this library. If you modify this
33 // library, you may extend this exception to your version of the library, but
34 // you are not obligated to do so. If you do not wish to do so, delete this
35 // exception statement from your version.
36 //
37 // ******************************************************************************
38 package ffx.numerics.clustering;
39
40 import java.util.ArrayList;
41 import java.util.List;
42
43 /**
44 * Clustering algorithm that consumes a condensed (pdist-style) upper-triangular
45 * distance array to produce hierarchical agglomerative clusters. Supports flat
46 * clustering by threshold; weighted inputs delegate to unweighted behavior.
47 *
48 * @author Lars Behnke, 2013
49 * @author Michael J. Schnieders
50 * @since 1.0
51 */
52 public class PDistClusteringAlgorithm implements ClusteringAlgorithm {
53
54 /**
55 * Performs hierarchical agglomerative clustering using a condensed pdist-like matrix.
56 *
57 * @param distances a 1 x M array holding the upper-triangular distances in row-major pdist order
58 * @param clusterNames names corresponding to the N items (M = N*(N-1)/2)
59 * @param linkageStrategy linkage criterion used during agglomeration
60 * @return root Cluster of the resulting hierarchy
61 */
62 @Override
63 public Cluster performClustering(double[][] distances,
64 String[] clusterNames, LinkageStrategy linkageStrategy) {
65
66 /* Argument checks */
67 if (distances == null || distances.length == 0) {
68 throw new IllegalArgumentException("Invalid distance matrix");
69 }
70 if (distances[0].length != clusterNames.length
71 * (clusterNames.length - 1) / 2) {
72 throw new IllegalArgumentException("Invalid cluster name array");
73 }
74 if (linkageStrategy == null) {
75 throw new IllegalArgumentException("Undefined linkage strategy");
76 }
77
78 /* Setup model */
79 List<Cluster> clusters = createClusters(clusterNames);
80 DistanceMap linkages = createLinkages(distances, clusters);
81
82 /* Process */
83 HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages);
84 while (!builder.isTreeComplete()) {
85 builder.agglomerate(linkageStrategy);
86 }
87
88 return builder.getRootCluster();
89 }
90
91 /**
92 * Produces a flat clustering from a condensed distance matrix by agglomerating until the
93 * threshold is exceeded.
94 *
95 * @param distances a 1 x M condensed distance array (pdist order)
96 * @param clusterNames names of the N items
97 * @param linkageStrategy linkage criterion used during agglomeration
98 * @param threshold maximum allowed linkage distance for merging
99 * @return list of clusters at the chosen cut
100 */
101 @Override
102 public List<Cluster> performFlatClustering(double[][] distances,
103 String[] clusterNames, LinkageStrategy linkageStrategy, Double threshold) {
104
105 /* Argument checks */
106 if (distances == null || distances.length == 0) {
107 throw new IllegalArgumentException("Invalid distance matrix");
108 }
109 if (distances[0].length != clusterNames.length
110 * (clusterNames.length - 1) / 2) {
111 throw new IllegalArgumentException("Invalid cluster name array");
112 }
113 if (linkageStrategy == null) {
114 throw new IllegalArgumentException("Undefined linkage strategy");
115 }
116
117 /* Setup model */
118 List<Cluster> clusters = createClusters(clusterNames);
119 DistanceMap linkages = createLinkages(distances, clusters);
120
121 /* Process */
122 HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages);
123 return builder.flatAgg(linkageStrategy, threshold);
124 }
125
126 /**
127 * Weighted variant for condensed inputs; currently delegates to unweighted clustering as
128 * weights are not applied with condensed input in this implementation.
129 *
130 * @param distances a 1 x M condensed distance array (pdist order)
131 * @param clusterNames names of the N items
132 * @param weights weights for the N items (unused)
133 * @param linkageStrategy linkage criterion used during agglomeration
134 * @return root Cluster of the resulting hierarchy
135 */
136 @Override
137 public Cluster performWeightedClustering(double[][] distances, String[] clusterNames,
138 double[] weights, LinkageStrategy linkageStrategy) {
139 return performClustering(distances, clusterNames, linkageStrategy);
140 }
141
142 /**
143 * Builds initial linkages from a condensed upper-triangular distance array.
144 *
145 * @param distances a 1 x M condensed distance array (pdist order)
146 * @param clusters list of initial singleton clusters
147 * @return a DistanceMap containing inter-cluster linkages
148 */
149 private DistanceMap createLinkages(double[][] distances,
150 List<Cluster> clusters) {
151 DistanceMap linkages = new DistanceMap();
152 for (int col = 0; col < clusters.size(); col++) {
153 Cluster cluster_col = clusters.get(col);
154 for (int row = col + 1; row < clusters.size(); row++) {
155 ClusterPair link = new ClusterPair();
156 Double d = distances[0][accessFunction(row, col,
157 clusters.size())];
158 link.setLinkageDistance(d);
159 link.setlCluster(cluster_col);
160 link.setrCluster(clusters.get(row));
161 linkages.add(link);
162 }
163 }
164 return linkages;
165 }
166
167 /**
168 * Creates initial singleton clusters, one per input name.
169 *
170 * @param clusterNames names for singleton clusters
171 * @return list of newly created singleton clusters
172 */
173 private List<Cluster> createClusters(String[] clusterNames) {
174 List<Cluster> clusters = new ArrayList<>();
175 for (String clusterName : clusterNames) {
176 Cluster cluster = new Cluster(clusterName);
177 cluster.addLeafName(clusterName);
178 clusters.add(cluster);
179 }
180 return clusters;
181 }
182
183 // Credit to this function goes to
184 // http://stackoverflow.com/questions/13079563/how-does-condensed-distance-matrix-work-pdist
185
186 /**
187 * Indexing function mapping (i,j) with i>j to the position in the condensed pdist array.
188 *
189 * @param i row index (0-based)
190 * @param j column index (0-based), with i > j
191 * @param n the total number of items (matrix dimension)
192 * @return index into the condensed upper-triangular storage
193 */
194 private static int accessFunction(int i, int j, int n) {
195 return n * j - j * (j + 1) / 2 + i - 1 - j;
196 }
197
198 }