View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.numerics.clustering;
39  
40  import java.util.ArrayList;
41  import java.util.List;
42  
43  /**
44   * Clustering algorithm that consumes a condensed (pdist-style) upper-triangular
45   * distance array to produce hierarchical agglomerative clusters. Supports flat
46   * clustering by threshold; weighted inputs delegate to unweighted behavior.
47   *
48   * @author Lars Behnke, 2013
49   * @author Michael J. Schnieders
50   * @since 1.0
51   */
52  public class PDistClusteringAlgorithm implements ClusteringAlgorithm {
53  
54    /**
55     * Performs hierarchical agglomerative clustering using a condensed pdist-like matrix.
56     *
57     * @param distances       a 1 x M array holding the upper-triangular distances in row-major pdist order
58     * @param clusterNames    names corresponding to the N items (M = N*(N-1)/2)
59     * @param linkageStrategy linkage criterion used during agglomeration
60     * @return root Cluster of the resulting hierarchy
61     */
62    @Override
63    public Cluster performClustering(double[][] distances,
64                                     String[] clusterNames, LinkageStrategy linkageStrategy) {
65  
66      /* Argument checks */
67      if (distances == null || distances.length == 0) {
68        throw new IllegalArgumentException("Invalid distance matrix");
69      }
70      if (distances[0].length != clusterNames.length
71          * (clusterNames.length - 1) / 2) {
72        throw new IllegalArgumentException("Invalid cluster name array");
73      }
74      if (linkageStrategy == null) {
75        throw new IllegalArgumentException("Undefined linkage strategy");
76      }
77  
78      /* Setup model */
79      List<Cluster> clusters = createClusters(clusterNames);
80      DistanceMap linkages = createLinkages(distances, clusters);
81  
82      /* Process */
83      HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages);
84      while (!builder.isTreeComplete()) {
85        builder.agglomerate(linkageStrategy);
86      }
87  
88      return builder.getRootCluster();
89    }
90  
91    /**
92     * Produces a flat clustering from a condensed distance matrix by agglomerating until the
93     * threshold is exceeded.
94     *
95     * @param distances       a 1 x M condensed distance array (pdist order)
96     * @param clusterNames    names of the N items
97     * @param linkageStrategy linkage criterion used during agglomeration
98     * @param threshold       maximum allowed linkage distance for merging
99     * @return list of clusters at the chosen cut
100    */
101   @Override
102   public List<Cluster> performFlatClustering(double[][] distances,
103                                              String[] clusterNames, LinkageStrategy linkageStrategy, Double threshold) {
104 
105     /* Argument checks */
106     if (distances == null || distances.length == 0) {
107       throw new IllegalArgumentException("Invalid distance matrix");
108     }
109     if (distances[0].length != clusterNames.length
110         * (clusterNames.length - 1) / 2) {
111       throw new IllegalArgumentException("Invalid cluster name array");
112     }
113     if (linkageStrategy == null) {
114       throw new IllegalArgumentException("Undefined linkage strategy");
115     }
116 
117     /* Setup model */
118     List<Cluster> clusters = createClusters(clusterNames);
119     DistanceMap linkages = createLinkages(distances, clusters);
120 
121     /* Process */
122     HierarchyBuilder builder = new HierarchyBuilder(clusters, linkages);
123     return builder.flatAgg(linkageStrategy, threshold);
124   }
125 
126   /**
127    * Weighted variant for condensed inputs; currently delegates to unweighted clustering as
128    * weights are not applied with condensed input in this implementation.
129    *
130    * @param distances       a 1 x M condensed distance array (pdist order)
131    * @param clusterNames    names of the N items
132    * @param weights         weights for the N items (unused)
133    * @param linkageStrategy linkage criterion used during agglomeration
134    * @return root Cluster of the resulting hierarchy
135    */
136   @Override
137   public Cluster performWeightedClustering(double[][] distances, String[] clusterNames,
138                                            double[] weights, LinkageStrategy linkageStrategy) {
139     return performClustering(distances, clusterNames, linkageStrategy);
140   }
141 
142   /**
143    * Builds initial linkages from a condensed upper-triangular distance array.
144    *
145    * @param distances a 1 x M condensed distance array (pdist order)
146    * @param clusters  list of initial singleton clusters
147    * @return a DistanceMap containing inter-cluster linkages
148    */
149   private DistanceMap createLinkages(double[][] distances,
150                                      List<Cluster> clusters) {
151     DistanceMap linkages = new DistanceMap();
152     for (int col = 0; col < clusters.size(); col++) {
153       Cluster cluster_col = clusters.get(col);
154       for (int row = col + 1; row < clusters.size(); row++) {
155         ClusterPair link = new ClusterPair();
156         Double d = distances[0][accessFunction(row, col,
157             clusters.size())];
158         link.setLinkageDistance(d);
159         link.setlCluster(cluster_col);
160         link.setrCluster(clusters.get(row));
161         linkages.add(link);
162       }
163     }
164     return linkages;
165   }
166 
167   /**
168    * Creates initial singleton clusters, one per input name.
169    *
170    * @param clusterNames names for singleton clusters
171    * @return list of newly created singleton clusters
172    */
173   private List<Cluster> createClusters(String[] clusterNames) {
174     List<Cluster> clusters = new ArrayList<>();
175     for (String clusterName : clusterNames) {
176       Cluster cluster = new Cluster(clusterName);
177       cluster.addLeafName(clusterName);
178       clusters.add(cluster);
179     }
180     return clusters;
181   }
182 
183   // Credit to this function goes to
184   // http://stackoverflow.com/questions/13079563/how-does-condensed-distance-matrix-work-pdist
185 
186   /**
187    * Indexing function mapping (i,j) with i>j to the position in the condensed pdist array.
188    *
189    * @param i row index (0-based)
190    * @param j column index (0-based), with i > j
191    * @param n the total number of items (matrix dimension)
192    * @return index into the condensed upper-triangular storage
193    */
194   private static int accessFunction(int i, int j, int n) {
195     return n * j - j * (j + 1) / 2 + i - 1 - j;
196   }
197 
198 }