1 // ****************************************************************************** 2 // 3 // Title: Force Field X. 4 // Description: Force Field X - Software for Molecular Biophysics. 5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2025. 6 // 7 // This file is part of Force Field X. 8 // 9 // Force Field X is free software; you can redistribute it and/or modify it 10 // under the terms of the GNU General Public License version 3 as published by 11 // the Free Software Foundation. 12 // 13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT 14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 // details. 17 // 18 // You should have received a copy of the GNU General Public License along with 19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple 20 // Place, Suite 330, Boston, MA 02111-1307 USA 21 // 22 // Linking this library statically or dynamically with other modules is making a 23 // combined work based on this library. Thus, the terms and conditions of the 24 // GNU General Public License cover the whole combination. 25 // 26 // As a special exception, the copyright holders of this library give you 27 // permission to link this library with independent modules to produce an 28 // executable, regardless of the license terms of these independent modules, and 29 // to copy and distribute the resulting executable under terms of your choice, 30 // provided that you also meet, for each linked independent module, the terms 31 // and conditions of the license of that module. An independent module is a 32 // module which is not derived from or based on this library. If you modify this 33 // library, you may extend this exception to your version of the library, but 34 // you are not obligated to do so. If you do not wish to do so, delete this 35 // exception statement from your version. 36 // 37 // ****************************************************************************** 38 package ffx.numerics.estimator; 39 40 import java.util.ArrayList; 41 42 import static java.lang.System.arraycopy; 43 import static java.util.Arrays.copyOf; 44 import static java.util.Arrays.fill; 45 import static java.util.Arrays.stream; 46 47 /** 48 * The SequentialEstimator abstract class defines a statistical estimator based on perturbative 49 * potential energy differences between adjacent windows (e.g. exponential free energy perturbation, 50 * Bennett Acceptance Ratio, etc). 51 * 52 * @author Michael J. Schnieders 53 * @author Jacob M. Litman 54 * @author Matthew J. Speranza 55 * @since 1.0 56 */ 57 public abstract class SequentialEstimator implements StatisticalEstimator { 58 59 /** 60 * The lambda values at which the samples were collected. 61 */ 62 protected final double[] lamValues; 63 /** 64 * The number of states from which samples were collected. 65 */ 66 protected final int nStates; 67 /** 68 * The potential energy of each snapshot at lambda - dL. 69 */ 70 protected final double[][] eLambdaMinusdL; 71 /** 72 * The potential energy of each snapshot at lambda. 73 */ 74 protected final double[][] eLambda; 75 /** 76 * The potential energy of each snapshot at lambda + dL. 77 */ 78 protected final double[][] eLambdaPlusdL; 79 /** 80 * The potential energies of the snapshots at all other lambda values. 81 * <p> 82 * eAll[lambdaWindow][perturbations][energies] 83 */ 84 protected final double[][][] eAll; 85 /** 86 * The potential energies of the snapshots at all other lambda values. 87 * <p> 88 * eAllFlat[lambda][evaluationsAtThisLambdaFromAllOtherLambda] 89 */ 90 protected double[][] eAllFlat; 91 /** 92 * The number of samples for each lambda state. 93 */ 94 protected int[] nSamples; 95 /** 96 * The temperatures at which the samples were collected. 97 */ 98 protected final double[] temperatures; 99 100 /** 101 * The SequentialEstimator constructor largely just copies its parameters into local variables. 102 * Most arrays are duplicated (rather a just copying their reference). 103 * The temperature array can be of length 1 if all elements are meant to be the same temperature. 104 * 105 * <p>The first dimension of the energies arrays corresponds to the lambda values. The 106 * second dimension (can be of uneven length) corresponds to potential energies of snapshots 107 * sampled at that lambda value, calculated either at that lambda value, the lambda value below, 108 * or the lambda value above. The arrays eLambdaMindL[0] and eLambdaPlusdL[n-1] is expected to be all 109 * NaN. 110 * 111 * @param lambdaValues Values of lambda for sampled states. 112 * @param eLambdaMinusdL Potential energies of state L at L-dL. 113 * @param eLambda Potential energies of state L at L. 114 * @param eLambdaPlusdL Potential energies of state L at L+dL. 115 * @param temperature Temperature each state (single-element for a constant temperature). 116 */ 117 public SequentialEstimator(double[] lambdaValues, double[][] eLambdaMinusdL, double[][] eLambda, 118 double[][] eLambdaPlusdL, double[] temperature) { 119 nStates = lambdaValues.length; 120 eAll = null; 121 eAllFlat = null; 122 nSamples = null; 123 124 assert stream(eLambdaMinusdL[0]).allMatch(Double::isNaN) 125 && stream(eLambdaPlusdL[nStates - 1]).allMatch(Double::isNaN); 126 127 assert nStates == eLambda.length 128 && nStates == eLambdaMinusdL.length 129 && nStates == eLambdaPlusdL.length 130 : "One of the energy arrays is of the incorrect length in the first dimension!"; 131 132 this.lamValues = copyOf(lambdaValues, nStates); 133 temperatures = new double[nStates]; 134 if (temperature.length == 1) { 135 fill(temperatures, temperature[0]); 136 } else { 137 arraycopy(temperature, 0, temperatures, 0, nStates); 138 } 139 140 // Just in case, copy the arrays rather than storing them as provided. 141 this.eLambdaMinusdL = new double[nStates][]; 142 this.eLambda = new double[nStates][]; 143 this.eLambdaPlusdL = new double[nStates][]; 144 for (int i = 0; i < nStates; i++) { 145 if (i != 0) { 146 // There is no perturbation below first state (usually L = 0). 147 this.eLambdaMinusdL[i] = copyOf(eLambdaMinusdL[i], eLambdaMinusdL[i].length); 148 } 149 this.eLambda[i] = copyOf(eLambda[i], eLambda[i].length); 150 if (i != nStates - 1) { 151 // There is no perturbation above the final state (usually L = 1). 152 this.eLambdaPlusdL[i] = copyOf(eLambdaPlusdL[i], eLambdaPlusdL[i].length); 153 } 154 } 155 } 156 157 158 /** 159 * The SequentialEstimator constructor largely just copies its parameters into local variables. 160 * Most arrays are duplicated (rather a just copying their reference). 161 * The temperature array can be of length 1 if all elements are meant to be the same temperature. 162 * <p> 163 * This constructor is meant for lower variance estimators such as MBAR and WHAM. These methods require energy 164 * evaluations from all lambda windows at all lambda values. The energiesAll array is expected to be 165 * of the form energiesAll[lambdaWindow][windowPerspective][lambdaWindowSnapshotPerspectiveEnergy]. 166 * As an example, at the 3rd lambda window, the energiesAll[2] array should contain the energies 167 * of all the snapshots from the 3rd lambda window evaluated at all lambda values. energiesAll[2][3] is a 168 * list of all snapshots from lambda 3 evaluated with the potential of lambda 4. energiesAll[2][3][4] is 169 * the 5th snapshot from lambda 3 evaluated with the potential of lambda 4. 170 * <p> 171 * This constructor also breaks energiesAll into a flattened array (across the second dimension) such that 172 * the first dimension is the lambda window where the energy was evaluated and the second dimension is the 173 * snaps. energiesAll is also broken down into eAt, eLow, and eHigh arrays for convenience and so that BAR 174 * calculations can be performed and compared. 175 * 176 * @param lambdaValues Values of lambda dynamics was run at. 177 * @param energiesAll Potential energy of each sample at all other lambdas. (Missing states are NaN) 178 * @param temperature Temperature each state (single-element for a constant temperature). 179 */ 180 public SequentialEstimator(double[] lambdaValues, double[][][] energiesAll, double[] temperature) { 181 nStates = lambdaValues.length; 182 assert nStates == energiesAll.length 183 : "The energy arrays is of the incorrect length in the first lambda dimension!"; 184 assert nStates == energiesAll[0].length 185 : "The energy arrays is of the incorrect length in the second lambda dimension!"; 186 187 this.lamValues = copyOf(lambdaValues, nStates); 188 temperatures = new double[nStates]; 189 if (temperature.length == 1) { 190 fill(temperatures, temperature[0]); 191 } else { 192 arraycopy(temperature, 0, temperatures, 0, nStates); 193 } 194 195 // Just in case, deep copy the array rather than storing them as provided. 196 eAll = new double[nStates][][]; 197 int maxSnaps = 0; 198 for (int i = 0; i < nStates; i++) { 199 eAll[i] = new double[energiesAll[i].length][]; 200 for (int j = 0; j < energiesAll[i].length; j++) { 201 eAll[i][j] = copyOf(energiesAll[i][j], energiesAll[i][j].length); 202 maxSnaps = Math.max(maxSnaps, eAll[i][j].length); 203 } 204 } 205 206 // Remove jagged edges from eAll with NaN values in case it hasn't been done for you 207 for (int i = 0; i < nStates; i++) { 208 for (int j = 0; j < nStates; j++) { 209 if (eAll[i][j].length < maxSnaps) { 210 double[] temp = new double[maxSnaps]; 211 System.arraycopy(eAll[i][j], 0, temp, 0, eAll[i][j].length); 212 for (int k = eAll[i][j].length; k < maxSnaps; k++) { 213 temp[k] = Double.NaN; 214 } 215 eAll[i][j] = temp; 216 } 217 } 218 } 219 220 // Flatten the eAll array into a 2D array of [lambda][allEvaluationsAtThisLambda] 221 nSamples = new int[nStates]; 222 int[] nanCount = new int[nStates]; 223 eAllFlat = new double[nStates][]; 224 for (int i = 0; i < nStates; i++) { 225 ArrayList<Double> temp = new ArrayList<>(); 226 for (int j = 0; j < nStates; j++) { 227 int count = 0; 228 int countNaN = 0; 229 for (int k = 0; k < eAll[j][i].length; k++) { 230 // Don't include NaN values 231 if (!Double.isNaN(eAll[j][i][k])) { 232 temp.add(eAll[j][i][k]); 233 count++; 234 } else { 235 countNaN++; 236 } 237 } 238 nSamples[j] = count; 239 nanCount[j] = countNaN; 240 } 241 eAllFlat[i] = temp.stream().mapToDouble(Double::doubleValue).toArray(); 242 } 243 244 for (int i = 0; i < nStates; i++) { 245 if (nSamples[i] + nanCount[i] != maxSnaps) { 246 throw new IllegalArgumentException("Lambda window " + i 247 + " is not set properly. You need to fill in the missing states with NaN."); 248 } 249 } 250 251 // Assert that lengths of the energiesAll arrays are correct. 252 for (int i = 0; i < nStates; i++) { 253 assert eAll[i].length == nStates : 254 "The energy arrays is of the incorrect length in the second lambda dimension at lambda " + i + "!"; 255 int nSnapshots = eAll[i][0].length; 256 for (int j = 0; j < nStates; j++) { 257 assert eAll[i][j].length == nSnapshots : 258 "The energy arrays is of the incorrect length in numSnaps dimension at lambda " + 259 i + " for evaluation at lambda " + j + "!"; 260 } 261 } 262 263 // Initialize the eLow, eAt, and eHigh arrays to their expected values from eAll. Don't include NaN values. 264 // Handle zero sample cases for BAR 265 ArrayList<Integer> nonZeroSampleStates = new ArrayList<>(); 266 for (int i = 0; i < nStates; i++) { 267 if (nSamples[i] != 0) { 268 nonZeroSampleStates.add(i); 269 } 270 } 271 eLambdaMinusdL = new double[nonZeroSampleStates.size()][eAll[0][0].length]; 272 fill(eLambdaMinusdL[0], Double.NaN); 273 eLambda = new double[nonZeroSampleStates.size()][]; 274 eLambdaPlusdL = new double[nonZeroSampleStates.size()][eAll[0][0].length]; 275 fill(eLambdaPlusdL[nonZeroSampleStates.size() - 1], Double.NaN); 276 for (int i = 0; i < nonZeroSampleStates.size(); i++) { 277 int index = nonZeroSampleStates.get(i); // Contains out of bounds index for e; 278 if (i != 0) { 279 int indexLow = nonZeroSampleStates.get(i - 1); 280 eLambdaMinusdL[i] = copyOf(eAll[index][indexLow], nSamples[index]); 281 } 282 eLambda[i] = copyOf(eAll[index][index], nSamples[index]); 283 if (i != nonZeroSampleStates.size() - 1) { 284 int indexHigh = nonZeroSampleStates.get(i + 1); 285 eLambdaPlusdL[i] = copyOf(eAll[index][indexHigh], nSamples[index]); 286 } 287 } 288 } 289 290 /** 291 * Simpler constructor for when data provided is already flattened (although it adds uncertainty about 292 * snap counts, they are all set to the same number). 293 * 294 * @param lambdaValues Lambda values. 295 * @param nSamples Number of samples for each state. 296 * @param eAllFlat Flattened energy evaluations at all lambda values. 297 * @param temperature Temperature each state (single-element for a constant temperature). 298 */ 299 public SequentialEstimator(double[] lambdaValues, int[] nSamples, double[][] eAllFlat, double[] temperature) { 300 nStates = lambdaValues.length; 301 assert nStates == eAllFlat.length 302 : "The energy arrays is of the incorrect length in the first lambda dimension!"; 303 304 this.lamValues = copyOf(lambdaValues, nStates); 305 temperatures = new double[nStates]; 306 if (temperature.length == 1) { 307 fill(temperatures, temperature[0]); 308 } else { 309 arraycopy(temperature, 0, temperatures, 0, nStates); 310 } 311 312 this.eAllFlat = eAllFlat; 313 this.nSamples = nSamples; 314 // No way of knowing the snap counts for each lambda window & therefore no way to break data into these matrices, 315 // so just set these all to null. 316 eLambdaMinusdL = null; 317 eLambda = null; 318 eLambdaPlusdL = null; 319 eAll = null; 320 } 321 }