1 // ****************************************************************************** 2 // 3 // Title: Force Field X. 4 // Description: Force Field X - Software for Molecular Biophysics. 5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2025. 6 // 7 // This file is part of Force Field X. 8 // 9 // Force Field X is free software; you can redistribute it and/or modify it 10 // under the terms of the GNU General Public License version 3 as published by 11 // the Free Software Foundation. 12 // 13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT 14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 // details. 17 // 18 // You should have received a copy of the GNU General Public License along with 19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple 20 // Place, Suite 330, Boston, MA 02111-1307 USA 21 // 22 // Linking this library statically or dynamically with other modules is making a 23 // combined work based on this library. Thus, the terms and conditions of the 24 // GNU General Public License cover the whole combination. 25 // 26 // As a special exception, the copyright holders of this library give you 27 // permission to link this library with independent modules to produce an 28 // executable, regardless of the license terms of these independent modules, and 29 // to copy and distribute the resulting executable under terms of your choice, 30 // provided that you also meet, for each linked independent module, the terms 31 // and conditions of the license of that module. An independent module is a 32 // module which is not derived from or based on this library. If you modify this 33 // library, you may extend this exception to your version of the library, but 34 // you are not obligated to do so. If you do not wish to do so, delete this 35 // exception statement from your version. 36 // 37 // ****************************************************************************** 38 package ffx.numerics.clustering; 39 40 import java.util.ArrayList; 41 import java.util.List; 42 43 /** 44 * Represents a node in a hierarchical clustering tree (dendrogram). 45 * A Cluster may be a leaf (no children) or an internal node with two children. 46 * It tracks its name, parent, children, the list of leaf names contained beneath it, 47 * and an associated Distance used to store linkage distance and aggregate weight. 48 * 49 * @author Lars Behnke, 2013 50 * @author Michael J. Schnieders 51 * @since 1.0 52 */ 53 public class Cluster { 54 55 private String name; 56 57 private Cluster parent; 58 59 private List<Cluster> children; 60 61 private final List<String> leafNames; 62 63 private Distance distance = new Distance(); 64 65 66 /** 67 * Creates a new Cluster with the provided name. 68 * 69 * @param name the cluster name 70 */ 71 public Cluster(String name) { 72 this.name = name; 73 leafNames = new ArrayList<>(); 74 } 75 76 /** 77 * Gets the Distance metadata for this cluster (linkage distance and weight). 78 * 79 * @return the Distance object 80 */ 81 public Distance getDistance() { 82 return distance; 83 } 84 85 /** 86 * Convenience accessor for this cluster's aggregate weight. 87 * 88 * @return the weight value stored in the Distance 89 */ 90 public Double getWeightValue() { 91 return distance.getWeight(); 92 } 93 94 /** 95 * Convenience accessor for this cluster's linkage distance value. 96 * 97 * @return the linkage distance, or null if unset 98 */ 99 public Double getDistanceValue() { 100 return distance.getDistance(); 101 } 102 103 /** 104 * Sets the Distance metadata for this cluster. 105 * 106 * @param distance the Distance to set 107 */ 108 public void setDistance(Distance distance) { 109 this.distance = distance; 110 } 111 112 /** 113 * Returns the list of child clusters, creating it lazily if needed. 114 * 115 * @return mutable list of child clusters (possibly empty) 116 */ 117 public List<Cluster> getChildren() { 118 if (children == null) { 119 children = new ArrayList<>(); 120 } 121 122 return children; 123 } 124 125 /** 126 * Adds a single leaf name contained within this cluster's subtree. 127 * 128 * @param lname the leaf name to add 129 */ 130 public void addLeafName(String lname) { 131 leafNames.add(lname); 132 } 133 134 /** 135 * Appends a list of leaf names to this cluster's leaf name list. 136 * 137 * @param lnames list of leaf names to append 138 */ 139 public void appendLeafNames(List<String> lnames) { 140 leafNames.addAll(lnames); 141 } 142 143 /** 144 * Returns the list of leaf names beneath this cluster. 145 * 146 * @return list of leaf names 147 */ 148 public List<String> getLeafNames() { 149 return leafNames; 150 } 151 152 /** 153 * Sets the list of child clusters for this node. 154 * 155 * @param children the children to set 156 */ 157 public void setChildren(List<Cluster> children) { 158 this.children = children; 159 } 160 161 /** 162 * Gets the parent cluster of this node, or null if this is the root. 163 * 164 * @return the parent cluster, or null 165 */ 166 public Cluster getParent() { 167 return parent; 168 } 169 170 /** 171 * Sets the parent cluster of this node. 172 * 173 * @param parent the parent cluster to set 174 */ 175 public void setParent(Cluster parent) { 176 this.parent = parent; 177 } 178 179 180 /** 181 * Gets the name of this cluster. 182 * 183 * @return the cluster name 184 */ 185 public String getName() { 186 return name; 187 } 188 189 /** 190 * Sets the name of this cluster. 191 * 192 * @param name the name to set 193 */ 194 public void setName(String name) { 195 this.name = name; 196 } 197 198 /** 199 * Adds a child cluster to this node. 200 * 201 * @param cluster the child to add 202 */ 203 public void addChild(Cluster cluster) { 204 getChildren().add(cluster); 205 206 } 207 208 /** 209 * Tests whether this cluster has the specified child. 210 * 211 * @param cluster the child to look for 212 * @return true if present; false otherwise 213 */ 214 public boolean contains(Cluster cluster) { 215 return getChildren().contains(cluster); 216 } 217 218 @Override 219 public String toString() { 220 return "Cluster " + name; 221 } 222 223 @Override 224 public boolean equals(Object obj) { 225 if (this == obj) { 226 return true; 227 } 228 if (obj == null) { 229 return false; 230 } 231 if (getClass() != obj.getClass()) { 232 return false; 233 } 234 Cluster other = (Cluster) obj; 235 if (name == null) { 236 return other.name == null; 237 } else return name.equals(other.name); 238 } 239 240 @Override 241 public int hashCode() { 242 return (name == null) ? 0 : name.hashCode(); 243 } 244 245 /** 246 * Returns true if this cluster is a leaf (has no children). 247 * 248 * @return true if leaf; false otherwise 249 */ 250 public boolean isLeaf() { 251 return getChildren().isEmpty(); 252 } 253 254 /** 255 * Counts the number of leaf descendants beneath this cluster. 256 * 257 * @return number of leaf nodes 258 */ 259 public int countLeafs() { 260 return countLeafs(this, 0); 261 } 262 263 /** 264 * Recursive helper to count leaves under the specified node. 265 * 266 * @param node the node to inspect 267 * @param count running count of leaves 268 * @return updated count including leaves beneath node 269 */ 270 public int countLeafs(Cluster node, int count) { 271 if (node.isLeaf()) count++; 272 for (Cluster child : node.getChildren()) { 273 count += child.countLeafs(); 274 } 275 return count; 276 } 277 278 /** 279 * Prints this cluster and its subtree to the console with indentation. 280 * 281 * @param indent number of indentation levels for this node 282 */ 283 public void toConsole(int indent) { 284 for (int i = 0; i < indent; i++) { 285 System.out.print(" "); 286 287 } 288 String name = getName() + (isLeaf() ? " (leaf)" : "") + (distance != null ? " distance: " + distance : ""); 289 System.out.println(name); 290 for (Cluster child : getChildren()) { 291 child.toConsole(indent + 1); 292 } 293 } 294 295 /** 296 * Serializes this cluster subtree into a simple Newick-like string. 297 * The first child is annotated with distance, the second with weight. 298 * 299 * @param indent indentation spaces to include (for readability) 300 * @return a Newick-like representation of this subtree 301 */ 302 public String toNewickString(int indent) { 303 StringBuilder cdtString = new StringBuilder(); 304 if (!isLeaf()) cdtString.append("("); 305 306 cdtString.append(" ".repeat(Math.max(0, indent))); 307 308 309 if (isLeaf()) { 310 cdtString.append(getName()); 311 } 312 313 List<Cluster> children = getChildren(); 314 315 boolean firstChild = true; 316 for (Cluster child : children) { 317 cdtString.append(child.toNewickString(indent)); 318 String distanceString = distance.getDistance().toString().replace(",", "."); 319 String weightString = distance.getWeight().toString().replace(",", "."); 320 if (firstChild) cdtString.append(":").append(distanceString).append(","); 321 else cdtString.append(":").append(weightString); 322 323 firstChild = false; 324 } 325 326 cdtString.append(" ".repeat(Math.max(0, indent))); 327 328 if (!isLeaf()) cdtString.append(")"); 329 330 return cdtString.toString(); 331 } 332 333 /** 334 * Computes the cumulative distance down the leftmost branch of this cluster. 335 * 336 * @return total distance along the first-child path 337 */ 338 public double getTotalDistance() { 339 double dist = getDistance() == null ? 0 : getDistance().getDistance(); 340 if (!getChildren().isEmpty()) { 341 dist += children.getFirst().getTotalDistance(); 342 } 343 return dist; 344 345 } 346 347 }