View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.numerics.estimator;
39  
40  import java.util.ArrayList;
41  
42  import static java.lang.System.arraycopy;
43  import static java.util.Arrays.copyOf;
44  import static java.util.Arrays.fill;
45  import static java.util.Arrays.stream;
46  
47  /**
48   * The SequentialEstimator abstract class defines a statistical estimator based on perturbative
49   * potential energy differences between adjacent windows (e.g. exponential free energy perturbation,
50   * Bennett Acceptance Ratio, etc).
51   *
52   * @author Michael J. Schnieders
53   * @author Jacob M. Litman
54   * @author Matthew J. Speranza
55   * @since 1.0
56   */
57  public abstract class SequentialEstimator implements StatisticalEstimator {
58  
59    /**
60     * The lambda values at which the samples were collected.
61     */
62    protected final double[] lamValues;
63    /**
64     * The number of states from which samples were collected.
65     */
66    protected final int nStates;
67    /**
68     * The potential energy of each snapshot at lambda - dL.
69     */
70    protected final double[][] eLambdaMinusdL;
71    /**
72     * The potential energy of each snapshot at lambda.
73     */
74    protected final double[][] eLambda;
75    /**
76     * The potential energy of each snapshot at lambda + dL.
77     */
78    protected final double[][] eLambdaPlusdL;
79    /**
80     * The potential energies of the snapshots at all other lambda values.
81     * <p>
82     * eAll[lambdaWindow][perturbations][energies]
83     */
84    protected final double[][][] eAll;
85    /**
86     * The potential energies of the snapshots at all other lambda values.
87     * <p>
88     * eAllFlat[lambda][evaluationsAtThisLambdaFromAllOtherLambda]
89     */
90    protected double[][] eAllFlat;
91    /**
92     * The number of samples for each lambda state.
93     */
94    protected int[] nSamples;
95    /**
96     * The temperatures at which the samples were collected.
97     */
98    protected final double[] temperatures;
99  
100   /**
101    * The SequentialEstimator constructor largely just copies its parameters into local variables.
102    * Most arrays are duplicated (rather a just copying their reference).
103    * The temperature array can be of length 1 if all elements are meant to be the same temperature.
104    *
105    * <p>The first dimension of the energies arrays corresponds to the lambda values. The
106    * second dimension (can be of uneven length) corresponds to potential energies of snapshots
107    * sampled at that lambda value, calculated either at that lambda value, the lambda value below,
108    * or the lambda value above. The arrays eLambdaMindL[0] and eLambdaPlusdL[n-1] is expected to be all
109    * NaN.
110    *
111    * @param lambdaValues   Values of lambda for sampled states.
112    * @param eLambdaMinusdL Potential energies of state L at L-dL.
113    * @param eLambda        Potential energies of state L at L.
114    * @param eLambdaPlusdL  Potential energies of state L at L+dL.
115    * @param temperature    Temperature each state (single-element for a constant temperature).
116    */
117   public SequentialEstimator(double[] lambdaValues, double[][] eLambdaMinusdL, double[][] eLambda,
118                              double[][] eLambdaPlusdL, double[] temperature) {
119     nStates = lambdaValues.length;
120     eAll = null;
121     eAllFlat = null;
122     nSamples = null;
123 
124     assert stream(eLambdaMinusdL[0]).allMatch(Double::isNaN)
125         && stream(eLambdaPlusdL[nStates - 1]).allMatch(Double::isNaN);
126 
127     assert nStates == eLambda.length
128         && nStates == eLambdaMinusdL.length
129         && nStates == eLambdaPlusdL.length
130         : "One of the energy arrays is of the incorrect length in the first dimension!";
131 
132     this.lamValues = copyOf(lambdaValues, nStates);
133     temperatures = new double[nStates];
134     if (temperature.length == 1) {
135       fill(temperatures, temperature[0]);
136     } else {
137       arraycopy(temperature, 0, temperatures, 0, nStates);
138     }
139 
140     // Just in case, copy the arrays rather than storing them as provided.
141     this.eLambdaMinusdL = new double[nStates][];
142     this.eLambda = new double[nStates][];
143     this.eLambdaPlusdL = new double[nStates][];
144     for (int i = 0; i < nStates; i++) {
145       if (i != 0) {
146         // There is no perturbation below first state (usually L = 0).
147         this.eLambdaMinusdL[i] = copyOf(eLambdaMinusdL[i], eLambdaMinusdL[i].length);
148       }
149       this.eLambda[i] = copyOf(eLambda[i], eLambda[i].length);
150       if (i != nStates - 1) {
151         // There is no perturbation above the final state (usually L = 1).
152         this.eLambdaPlusdL[i] = copyOf(eLambdaPlusdL[i], eLambdaPlusdL[i].length);
153       }
154     }
155   }
156 
157 
158   /**
159    * The SequentialEstimator constructor largely just copies its parameters into local variables.
160    * Most arrays are duplicated (rather a just copying their reference).
161    * The temperature array can be of length 1 if all elements are meant to be the same temperature.
162    * <p>
163    * This constructor is meant for lower variance estimators such as MBAR and WHAM. These methods require energy
164    * evaluations from all lambda windows at all lambda values. The energiesAll array is expected to be
165    * of the form energiesAll[lambdaWindow][windowPerspective][lambdaWindowSnapshotPerspectiveEnergy].
166    * As an example, at the 3rd lambda window, the energiesAll[2] array should contain the energies
167    * of all the snapshots from the 3rd lambda window evaluated at all lambda values. energiesAll[2][3] is a
168    * list of all snapshots from lambda 3 evaluated with the potential of lambda 4. energiesAll[2][3][4] is
169    * the 5th snapshot from lambda 3 evaluated with the potential of lambda 4.
170    * <p>
171    * This constructor also breaks energiesAll into a flattened array (across the second dimension) such that
172    * the first dimension is the lambda window where the energy was evaluated and the second dimension is the
173    * snaps. energiesAll is also broken down into eAt, eLow, and eHigh arrays for convenience and so that BAR
174    * calculations can be performed and compared.
175    *
176    * @param lambdaValues Values of lambda dynamics was run at.
177    * @param energiesAll  Potential energy of each sample at all other lambdas. (Missing states are NaN)
178    * @param temperature  Temperature each state (single-element for a constant temperature).
179    */
180   public SequentialEstimator(double[] lambdaValues, double[][][] energiesAll, double[] temperature) {
181     nStates = lambdaValues.length;
182     assert nStates == energiesAll.length
183         : "The energy arrays is of the incorrect length in the first lambda dimension!";
184     assert nStates == energiesAll[0].length
185         : "The energy arrays is of the incorrect length in the second lambda dimension!";
186 
187     this.lamValues = copyOf(lambdaValues, nStates);
188     temperatures = new double[nStates];
189     if (temperature.length == 1) {
190       fill(temperatures, temperature[0]);
191     } else {
192       arraycopy(temperature, 0, temperatures, 0, nStates);
193     }
194 
195     // Just in case, deep copy the array rather than storing them as provided.
196     eAll = new double[nStates][][];
197     int maxSnaps = 0;
198     for (int i = 0; i < nStates; i++) {
199       eAll[i] = new double[energiesAll[i].length][];
200       for (int j = 0; j < energiesAll[i].length; j++) {
201         eAll[i][j] = copyOf(energiesAll[i][j], energiesAll[i][j].length);
202         maxSnaps = Math.max(maxSnaps, eAll[i][j].length);
203       }
204     }
205 
206     // Remove jagged edges from eAll with NaN values in case it hasn't been done for you
207     for (int i = 0; i < nStates; i++) {
208       for (int j = 0; j < nStates; j++) {
209         if (eAll[i][j].length < maxSnaps) {
210           double[] temp = new double[maxSnaps];
211           System.arraycopy(eAll[i][j], 0, temp, 0, eAll[i][j].length);
212           for (int k = eAll[i][j].length; k < maxSnaps; k++) {
213             temp[k] = Double.NaN;
214           }
215           eAll[i][j] = temp;
216         }
217       }
218     }
219 
220     // Flatten the eAll array into a 2D array of [lambda][allEvaluationsAtThisLambda]
221     nSamples = new int[nStates];
222     int[] nanCount = new int[nStates];
223     eAllFlat = new double[nStates][];
224     for (int i = 0; i < nStates; i++) {
225       ArrayList<Double> temp = new ArrayList<>();
226       for (int j = 0; j < nStates; j++) {
227         int count = 0;
228         int countNaN = 0;
229         for (int k = 0; k < eAll[j][i].length; k++) {
230           // Don't include NaN values
231           if (!Double.isNaN(eAll[j][i][k])) {
232             temp.add(eAll[j][i][k]);
233             count++;
234           } else {
235             countNaN++;
236           }
237         }
238         nSamples[j] = count;
239         nanCount[j] = countNaN;
240       }
241       eAllFlat[i] = temp.stream().mapToDouble(Double::doubleValue).toArray();
242     }
243 
244     for (int i = 0; i < nStates; i++) {
245       if (nSamples[i] + nanCount[i] != maxSnaps) {
246         throw new IllegalArgumentException("Lambda window " + i
247             + " is not set properly. You need to fill in the missing states with NaN.");
248       }
249     }
250 
251     // Assert that lengths of the energiesAll arrays are correct.
252     for (int i = 0; i < nStates; i++) {
253       assert eAll[i].length == nStates :
254           "The energy arrays is of the incorrect length in the second lambda dimension at lambda " + i + "!";
255       int nSnapshots = eAll[i][0].length;
256       for (int j = 0; j < nStates; j++) {
257         assert eAll[i][j].length == nSnapshots :
258             "The energy arrays is of the incorrect length in numSnaps dimension at lambda " +
259                 i + " for evaluation at lambda " + j + "!";
260       }
261     }
262 
263     // Initialize the eLow, eAt, and eHigh arrays to their expected values from eAll. Don't include NaN values.
264     // Handle zero sample cases for BAR
265     ArrayList<Integer> nonZeroSampleStates = new ArrayList<>();
266     for (int i = 0; i < nStates; i++) {
267       if (nSamples[i] != 0) {
268         nonZeroSampleStates.add(i);
269       }
270     }
271     eLambdaMinusdL = new double[nonZeroSampleStates.size()][eAll[0][0].length];
272     fill(eLambdaMinusdL[0], Double.NaN);
273     eLambda = new double[nonZeroSampleStates.size()][];
274     eLambdaPlusdL = new double[nonZeroSampleStates.size()][eAll[0][0].length];
275     fill(eLambdaPlusdL[nonZeroSampleStates.size() - 1], Double.NaN);
276     for (int i = 0; i < nonZeroSampleStates.size(); i++) {
277       int index = nonZeroSampleStates.get(i); // Contains out of bounds index for e;
278       if (i != 0) {
279         int indexLow = nonZeroSampleStates.get(i - 1);
280         eLambdaMinusdL[i] = copyOf(eAll[index][indexLow], nSamples[index]);
281       }
282       eLambda[i] = copyOf(eAll[index][index], nSamples[index]);
283       if (i != nonZeroSampleStates.size() - 1) {
284         int indexHigh = nonZeroSampleStates.get(i + 1);
285         eLambdaPlusdL[i] = copyOf(eAll[index][indexHigh], nSamples[index]);
286       }
287     }
288   }
289 
290   /**
291    * Simpler constructor for when data provided is already flattened (although it adds uncertainty about
292    * snap counts, they are all set to the same number).
293    *
294    * @param lambdaValues Lambda values.
295    * @param nSamples     Number of samples for each state.
296    * @param eAllFlat     Flattened energy evaluations at all lambda values.
297    * @param temperature  Temperature each state (single-element for a constant temperature).
298    */
299   public SequentialEstimator(double[] lambdaValues, int[] nSamples, double[][] eAllFlat, double[] temperature) {
300     nStates = lambdaValues.length;
301     assert nStates == eAllFlat.length
302         : "The energy arrays is of the incorrect length in the first lambda dimension!";
303 
304     this.lamValues = copyOf(lambdaValues, nStates);
305     temperatures = new double[nStates];
306     if (temperature.length == 1) {
307       fill(temperatures, temperature[0]);
308     } else {
309       arraycopy(temperature, 0, temperatures, 0, nStates);
310     }
311 
312     this.eAllFlat = eAllFlat;
313     this.nSamples = nSamples;
314     // No way of knowing the snap counts for each lambda window & therefore no way to break data into these matrices,
315     // so just set these all to null.
316     eLambdaMinusdL = null;
317     eLambda = null;
318     eLambdaPlusdL = null;
319     eAll = null;
320   }
321 }