1 // ******************************************************************************
2 //
3 // Title: Force Field X.
4 // Description: Force Field X - Software for Molecular Biophysics.
5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2025.
6 //
7 // This file is part of Force Field X.
8 //
9 // Force Field X is free software; you can redistribute it and/or modify it
10 // under the terms of the GNU General Public License version 3 as published by
11 // the Free Software Foundation.
12 //
13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16 // details.
17 //
18 // You should have received a copy of the GNU General Public License along with
19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20 // Place, Suite 330, Boston, MA 02111-1307 USA
21 //
22 // Linking this library statically or dynamically with other modules is making a
23 // combined work based on this library. Thus, the terms and conditions of the
24 // GNU General Public License cover the whole combination.
25 //
26 // As a special exception, the copyright holders of this library give you
27 // permission to link this library with independent modules to produce an
28 // executable, regardless of the license terms of these independent modules, and
29 // to copy and distribute the resulting executable under terms of your choice,
30 // provided that you also meet, for each linked independent module, the terms
31 // and conditions of the license of that module. An independent module is a
32 // module which is not derived from or based on this library. If you modify this
33 // library, you may extend this exception to your version of the library, but
34 // you are not obligated to do so. If you do not wish to do so, delete this
35 // exception statement from your version.
36 //
37 // ******************************************************************************
38 package ffx.numerics.estimator;
39
40 import java.util.ArrayList;
41
42 import static java.lang.System.arraycopy;
43 import static java.util.Arrays.copyOf;
44 import static java.util.Arrays.fill;
45 import static java.util.Arrays.stream;
46
47 /**
48 * The SequentialEstimator abstract class defines a statistical estimator based on perturbative
49 * potential energy differences between adjacent windows (e.g. exponential free energy perturbation,
50 * Bennett Acceptance Ratio, etc).
51 *
52 * @author Michael J. Schnieders
53 * @author Jacob M. Litman
54 * @author Matthew J. Speranza
55 * @since 1.0
56 */
57 public abstract class SequentialEstimator implements StatisticalEstimator {
58
59 /**
60 * The lambda values at which the samples were collected.
61 */
62 protected final double[] lamValues;
63 /**
64 * The number of states from which samples were collected.
65 */
66 protected final int nStates;
67 /**
68 * The potential energy of each snapshot at lambda - dL.
69 */
70 protected final double[][] eLambdaMinusdL;
71 /**
72 * The potential energy of each snapshot at lambda.
73 */
74 protected final double[][] eLambda;
75 /**
76 * The potential energy of each snapshot at lambda + dL.
77 */
78 protected final double[][] eLambdaPlusdL;
79 /**
80 * The potential energies of the snapshots at all other lambda values.
81 * <p>
82 * eAll[lambdaWindow][perturbations][energies]
83 */
84 protected final double[][][] eAll;
85 /**
86 * The potential energies of the snapshots at all other lambda values.
87 * <p>
88 * eAllFlat[lambda][evaluationsAtThisLambdaFromAllOtherLambda]
89 */
90 protected double[][] eAllFlat;
91 /**
92 * The number of samples for each lambda state.
93 */
94 protected int[] nSamples;
95 /**
96 * The temperatures at which the samples were collected.
97 */
98 protected final double[] temperatures;
99
100 /**
101 * The SequentialEstimator constructor largely just copies its parameters into local variables.
102 * Most arrays are duplicated (rather a just copying their reference).
103 * The temperature array can be of length 1 if all elements are meant to be the same temperature.
104 *
105 * <p>The first dimension of the energies arrays corresponds to the lambda values. The
106 * second dimension (can be of uneven length) corresponds to potential energies of snapshots
107 * sampled at that lambda value, calculated either at that lambda value, the lambda value below,
108 * or the lambda value above. The arrays eLambdaMindL[0] and eLambdaPlusdL[n-1] is expected to be all
109 * NaN.
110 *
111 * @param lambdaValues Values of lambda for sampled states.
112 * @param eLambdaMinusdL Potential energies of state L at L-dL.
113 * @param eLambda Potential energies of state L at L.
114 * @param eLambdaPlusdL Potential energies of state L at L+dL.
115 * @param temperature Temperature each state (single-element for a constant temperature).
116 */
117 public SequentialEstimator(double[] lambdaValues, double[][] eLambdaMinusdL, double[][] eLambda,
118 double[][] eLambdaPlusdL, double[] temperature) {
119 nStates = lambdaValues.length;
120 eAll = null;
121 eAllFlat = null;
122 nSamples = null;
123
124 assert stream(eLambdaMinusdL[0]).allMatch(Double::isNaN)
125 && stream(eLambdaPlusdL[nStates - 1]).allMatch(Double::isNaN);
126
127 assert nStates == eLambda.length
128 && nStates == eLambdaMinusdL.length
129 && nStates == eLambdaPlusdL.length
130 : "One of the energy arrays is of the incorrect length in the first dimension!";
131
132 this.lamValues = copyOf(lambdaValues, nStates);
133 temperatures = new double[nStates];
134 if (temperature.length == 1) {
135 fill(temperatures, temperature[0]);
136 } else {
137 arraycopy(temperature, 0, temperatures, 0, nStates);
138 }
139
140 // Just in case, copy the arrays rather than storing them as provided.
141 this.eLambdaMinusdL = new double[nStates][];
142 this.eLambda = new double[nStates][];
143 this.eLambdaPlusdL = new double[nStates][];
144 for (int i = 0; i < nStates; i++) {
145 if (i != 0) {
146 // There is no perturbation below first state (usually L = 0).
147 this.eLambdaMinusdL[i] = copyOf(eLambdaMinusdL[i], eLambdaMinusdL[i].length);
148 }
149 this.eLambda[i] = copyOf(eLambda[i], eLambda[i].length);
150 if (i != nStates - 1) {
151 // There is no perturbation above the final state (usually L = 1).
152 this.eLambdaPlusdL[i] = copyOf(eLambdaPlusdL[i], eLambdaPlusdL[i].length);
153 }
154 }
155 }
156
157
158 /**
159 * The SequentialEstimator constructor largely just copies its parameters into local variables.
160 * Most arrays are duplicated (rather a just copying their reference).
161 * The temperature array can be of length 1 if all elements are meant to be the same temperature.
162 * <p>
163 * This constructor is meant for lower variance estimators such as MBAR and WHAM. These methods require energy
164 * evaluations from all lambda windows at all lambda values. The energiesAll array is expected to be
165 * of the form energiesAll[lambdaWindow][windowPerspective][lambdaWindowSnapshotPerspectiveEnergy].
166 * As an example, at the 3rd lambda window, the energiesAll[2] array should contain the energies
167 * of all the snapshots from the 3rd lambda window evaluated at all lambda values. energiesAll[2][3] is a
168 * list of all snapshots from lambda 3 evaluated with the potential of lambda 4. energiesAll[2][3][4] is
169 * the 5th snapshot from lambda 3 evaluated with the potential of lambda 4.
170 * <p>
171 * This constructor also breaks energiesAll into a flattened array (across the second dimension) such that
172 * the first dimension is the lambda window where the energy was evaluated and the second dimension is the
173 * snaps. energiesAll is also broken down into eAt, eLow, and eHigh arrays for convenience and so that BAR
174 * calculations can be performed and compared.
175 *
176 * @param lambdaValues Values of lambda dynamics was run at.
177 * @param energiesAll Potential energy of each sample at all other lambdas. (Missing states are NaN)
178 * @param temperature Temperature each state (single-element for a constant temperature).
179 */
180 public SequentialEstimator(double[] lambdaValues, double[][][] energiesAll, double[] temperature) {
181 nStates = lambdaValues.length;
182 assert nStates == energiesAll.length
183 : "The energy arrays is of the incorrect length in the first lambda dimension!";
184 assert nStates == energiesAll[0].length
185 : "The energy arrays is of the incorrect length in the second lambda dimension!";
186
187 this.lamValues = copyOf(lambdaValues, nStates);
188 temperatures = new double[nStates];
189 if (temperature.length == 1) {
190 fill(temperatures, temperature[0]);
191 } else {
192 arraycopy(temperature, 0, temperatures, 0, nStates);
193 }
194
195 // Just in case, deep copy the array rather than storing them as provided.
196 eAll = new double[nStates][][];
197 int maxSnaps = 0;
198 for (int i = 0; i < nStates; i++) {
199 eAll[i] = new double[energiesAll[i].length][];
200 for (int j = 0; j < energiesAll[i].length; j++) {
201 eAll[i][j] = copyOf(energiesAll[i][j], energiesAll[i][j].length);
202 maxSnaps = Math.max(maxSnaps, eAll[i][j].length);
203 }
204 }
205
206 // Remove jagged edges from eAll with NaN values in case it hasn't been done for you
207 for (int i = 0; i < nStates; i++) {
208 for (int j = 0; j < nStates; j++) {
209 if (eAll[i][j].length < maxSnaps) {
210 double[] temp = new double[maxSnaps];
211 System.arraycopy(eAll[i][j], 0, temp, 0, eAll[i][j].length);
212 for (int k = eAll[i][j].length; k < maxSnaps; k++) {
213 temp[k] = Double.NaN;
214 }
215 eAll[i][j] = temp;
216 }
217 }
218 }
219
220 // Flatten the eAll array into a 2D array of [lambda][allEvaluationsAtThisLambda]
221 nSamples = new int[nStates];
222 int[] nanCount = new int[nStates];
223 eAllFlat = new double[nStates][];
224 for (int i = 0; i < nStates; i++) {
225 ArrayList<Double> temp = new ArrayList<>();
226 for (int j = 0; j < nStates; j++) {
227 int count = 0;
228 int countNaN = 0;
229 for (int k = 0; k < eAll[j][i].length; k++) {
230 // Don't include NaN values
231 if (!Double.isNaN(eAll[j][i][k])) {
232 temp.add(eAll[j][i][k]);
233 count++;
234 } else {
235 countNaN++;
236 }
237 }
238 nSamples[j] = count;
239 nanCount[j] = countNaN;
240 }
241 eAllFlat[i] = temp.stream().mapToDouble(Double::doubleValue).toArray();
242 }
243
244 for (int i = 0; i < nStates; i++) {
245 if (nSamples[i] + nanCount[i] != maxSnaps) {
246 throw new IllegalArgumentException("Lambda window " + i
247 + " is not set properly. You need to fill in the missing states with NaN.");
248 }
249 }
250
251 // Assert that lengths of the energiesAll arrays are correct.
252 for (int i = 0; i < nStates; i++) {
253 assert eAll[i].length == nStates :
254 "The energy arrays is of the incorrect length in the second lambda dimension at lambda " + i + "!";
255 int nSnapshots = eAll[i][0].length;
256 for (int j = 0; j < nStates; j++) {
257 assert eAll[i][j].length == nSnapshots :
258 "The energy arrays is of the incorrect length in numSnaps dimension at lambda " +
259 i + " for evaluation at lambda " + j + "!";
260 }
261 }
262
263 // Initialize the eLow, eAt, and eHigh arrays to their expected values from eAll. Don't include NaN values.
264 // Handle zero sample cases for BAR
265 ArrayList<Integer> nonZeroSampleStates = new ArrayList<>();
266 for (int i = 0; i < nStates; i++) {
267 if (nSamples[i] != 0) {
268 nonZeroSampleStates.add(i);
269 }
270 }
271 eLambdaMinusdL = new double[nonZeroSampleStates.size()][eAll[0][0].length];
272 fill(eLambdaMinusdL[0], Double.NaN);
273 eLambda = new double[nonZeroSampleStates.size()][];
274 eLambdaPlusdL = new double[nonZeroSampleStates.size()][eAll[0][0].length];
275 fill(eLambdaPlusdL[nonZeroSampleStates.size() - 1], Double.NaN);
276 for (int i = 0; i < nonZeroSampleStates.size(); i++) {
277 int index = nonZeroSampleStates.get(i); // Contains out of bounds index for e;
278 if (i != 0) {
279 int indexLow = nonZeroSampleStates.get(i - 1);
280 eLambdaMinusdL[i] = copyOf(eAll[index][indexLow], nSamples[index]);
281 }
282 eLambda[i] = copyOf(eAll[index][index], nSamples[index]);
283 if (i != nonZeroSampleStates.size() - 1) {
284 int indexHigh = nonZeroSampleStates.get(i + 1);
285 eLambdaPlusdL[i] = copyOf(eAll[index][indexHigh], nSamples[index]);
286 }
287 }
288 }
289
290 /**
291 * Simpler constructor for when data provided is already flattened (although it adds uncertainty about
292 * snap counts, they are all set to the same number).
293 *
294 * @param lambdaValues Lambda values.
295 * @param nSamples Number of samples for each state.
296 * @param eAllFlat Flattened energy evaluations at all lambda values.
297 * @param temperature Temperature each state (single-element for a constant temperature).
298 */
299 public SequentialEstimator(double[] lambdaValues, int[] nSamples, double[][] eAllFlat, double[] temperature) {
300 nStates = lambdaValues.length;
301 assert nStates == eAllFlat.length
302 : "The energy arrays is of the incorrect length in the first lambda dimension!";
303
304 this.lamValues = copyOf(lambdaValues, nStates);
305 temperatures = new double[nStates];
306 if (temperature.length == 1) {
307 fill(temperatures, temperature[0]);
308 } else {
309 arraycopy(temperature, 0, temperatures, 0, nStates);
310 }
311
312 this.eAllFlat = eAllFlat;
313 this.nSamples = nSamples;
314 // No way of knowing the snap counts for each lambda window & therefore no way to break data into these matrices,
315 // so just set these all to null.
316 eLambdaMinusdL = null;
317 eLambda = null;
318 eLambdaPlusdL = null;
319 eAll = null;
320 }
321 }