View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.numerics.clustering;
39  
40  import java.util.ArrayList;
41  import java.util.Arrays;
42  import java.util.HashSet;
43  import java.util.List;
44  
45  /**
46   * Clustering algorithm that operates on a full N x N distance matrix to produce
47   * hierarchical agglomerative clusters (dendrogram), with optional support for
48   * per-element weights and flat clustering by threshold.
49   *
50   * @author Lars Behnke, 2013
51   * @author Michael J. Schnieders
52   * @since 1.0
53   */
54  public class DefaultClusteringAlgorithm implements ClusteringAlgorithm {
55  
56    /**
57     * Performs hierarchical agglomerative clustering using a full N x N distance matrix.
58     *
59     * @param distances       an N x N symmetric matrix of pairwise distances
60     * @param clusterNames    names corresponding to rows/columns of the distance matrix
61     * @param linkageStrategy linkage criterion used to compute inter-cluster distances
62     * @return root Cluster of the resulting hierarchy
63     */
64    @Override
65    public Cluster performClustering(double[][] distances,
66                                     String[] clusterNames, LinkageStrategy linkageStrategy) {
67  
68      checkArguments(distances, clusterNames, linkageStrategy);
69      /* Setup model */
70      List<Cluster> clusters = createClusters(clusterNames);
71      DistanceMap linkages = createLinkages(distances, clusters);
72  
73      /* Process */
74      HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages);
75      while (!builder.isTreeComplete()) {
76        builder.agglomerate(linkageStrategy);
77      }
78  
79      return builder.getRootCluster();
80    }
81  
82    /**
83     * Produces a flat clustering by agglomerating until the next merge would exceed the threshold.
84     *
85     * @param distances       an N x N symmetric matrix of pairwise distances
86     * @param clusterNames    names corresponding to the distance matrix
87     * @param linkageStrategy linkage criterion used during agglomeration
88     * @param threshold       maximum allowed linkage distance for merging
89     * @return list of clusters at the chosen cut
90     */
91    @Override
92    public List<Cluster> performFlatClustering(double[][] distances,
93                                               String[] clusterNames, LinkageStrategy linkageStrategy, Double threshold) {
94  
95      checkArguments(distances, clusterNames, linkageStrategy);
96      /* Setup model */
97      List<Cluster> clusters = createClusters(clusterNames);
98      DistanceMap linkages = createLinkages(distances, clusters);
99  
100     /* Process */
101     HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages);
102     return builder.flatAgg(linkageStrategy, threshold);
103   }
104 
105   /**
106    * Validates input arrays and strategy for consistency.
107    *
108    * @param distances       an N x N symmetric matrix of distances
109    * @param clusterNames    array of N names
110    * @param linkageStrategy strategy to compute linkage distances
111    * @throws IllegalArgumentException if inputs are inconsistent
112    */
113   private void checkArguments(double[][] distances, String[] clusterNames,
114                               LinkageStrategy linkageStrategy) {
115     if (distances == null || distances.length == 0
116         || distances[0].length != distances.length) {
117       throw new IllegalArgumentException("Invalid distance matrix");
118     }
119     if (distances.length != clusterNames.length) {
120       throw new IllegalArgumentException("Invalid cluster name array");
121     }
122     if (linkageStrategy == null) {
123       throw new IllegalArgumentException("Undefined linkage strategy");
124     }
125     int uniqueCount = new HashSet<>(Arrays.asList(clusterNames)).size();
126     if (uniqueCount != clusterNames.length) {
127       throw new IllegalArgumentException("Duplicate names");
128     }
129   }
130 
131   /**
132    * Performs hierarchical clustering when each element has an associated weight.
133    *
134    * @param distances       an N x N symmetric matrix of distances
135    * @param clusterNames    names for the N input elements
136    * @param weights         weights for the N input elements
137    * @param linkageStrategy linkage criterion to use
138    * @return root Cluster of the resulting hierarchy
139    */
140   @Override
141   public Cluster performWeightedClustering(double[][] distances, String[] clusterNames,
142                                            double[] weights, LinkageStrategy linkageStrategy) {
143 
144     checkArguments(distances, clusterNames, linkageStrategy);
145 
146     if (weights.length != clusterNames.length) {
147       throw new IllegalArgumentException("Invalid weights array");
148     }
149 
150     /* Setup model */
151     List<Cluster> clusters = createClusters(clusterNames, weights);
152     DistanceMap linkages = createLinkages(distances, clusters);
153 
154     /* Process */
155     HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages);
156     while (!builder.isTreeComplete()) {
157       builder.agglomerate(linkageStrategy);
158     }
159 
160     return builder.getRootCluster();
161   }
162 
163   /**
164    * Builds initial pairwise linkages from a full distance matrix and initial clusters.
165    *
166    * @param distances N x N symmetric matrix of distances
167    * @param clusters  list of initial singleton clusters
168    * @return a DistanceMap containing all inter-cluster linkages
169    */
170   private DistanceMap createLinkages(double[][] distances,
171                                      List<Cluster> clusters) {
172     DistanceMap linkages = new DistanceMap();
173     for (int col = 0; col < clusters.size(); col++) {
174       for (int row = col + 1; row < clusters.size(); row++) {
175         ClusterPair link = new ClusterPair();
176         Cluster lCluster = clusters.get(col);
177         Cluster rCluster = clusters.get(row);
178         link.setLinkageDistance(distances[col][row]);
179         link.setlCluster(lCluster);
180         link.setrCluster(rCluster);
181         linkages.add(link);
182       }
183     }
184     return linkages;
185   }
186 
187   /**
188    * Creates initial singleton clusters, one per input name.
189    *
190    * @param clusterNames names for singleton clusters
191    * @return list of newly created singleton clusters
192    */
193   private List<Cluster> createClusters(String[] clusterNames) {
194     List<Cluster> clusters = new ArrayList<>();
195     for (String clusterName : clusterNames) {
196       Cluster cluster = new Cluster(clusterName);
197       cluster.addLeafName(clusterName);
198       clusters.add(cluster);
199     }
200     return clusters;
201   }
202 
203   /**
204    * Creates initial singleton clusters with weights.
205    *
206    * @param clusterNames names for singleton clusters
207    * @param weights      weights for each corresponding singleton
208    * @return list of newly created weighted singleton clusters
209    */
210   private List<Cluster> createClusters(String[] clusterNames, double[] weights) {
211     List<Cluster> clusters = new ArrayList<>();
212     for (int i = 0; i < weights.length; i++) {
213       Cluster cluster = new Cluster(clusterNames[i]);
214       cluster.setDistance(new Distance(0.0, weights[i]));
215       clusters.add(cluster);
216     }
217     return clusters;
218   }
219 
220 }