View Javadoc
1   
2   // ******************************************************************************
3   //
4   // Title:       Force Field X.
5   // Description: Force Field X - Software for Molecular Biophysics.
6   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
7   //
8   // This file is part of Force Field X.
9   //
10  // Force Field X is free software; you can redistribute it and/or modify it
11  // under the terms of the GNU General Public License version 3 as published by
12  // the Free Software Foundation.
13  //
14  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
15  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
17  // details.
18  //
19  // You should have received a copy of the GNU General Public License along with
20  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
21  // Place, Suite 330, Boston, MA 02111-1307 USA
22  //
23  // Linking this library statically or dynamically with other modules is making a
24  // combined work based on this library. Thus, the terms and conditions of the
25  // GNU General Public License cover the whole combination.
26  //
27  // As a special exception, the copyright holders of this library give you
28  // permission to link this library with independent modules to produce an
29  // executable, regardless of the license terms of these independent modules, and
30  // to copy and distribute the resulting executable under terms of your choice,
31  // provided that you also meet, for each linked independent module, the terms
32  // and conditions of the license of that module. An independent module is a
33  // module which is not derived from or based on this library. If you modify this
34  // library, you may extend this exception to your version of the library, but
35  // you are not obligated to do so. If you do not wish to do so, delete this
36  // exception statement from your version.
37  //
38  // ******************************************************************************
39  package ffx.numerics.math;
40  
41  
42  import org.apache.commons.math3.distribution.TDistribution;
43  
44  import javax.annotation.Nullable;
45  import java.util.Random;
46  
47  import static java.lang.String.format;
48  import static java.util.Arrays.fill;
49  import static org.apache.commons.math3.util.FastMath.sqrt;
50  
51  /**
52   * The BootStrapStatistics class uses bootstrapping to estimate statistics from a
53   * given population.
54   *
55   * @author Michael J. Schnieders
56   * @author Rose Gogal
57   * @since 1.0
58   */
59  public class BootStrapStatistics {
60  
61    // Weight-sensitive values.
62    /**
63     * The mean value.
64     */
65    public final double mean;
66    /**
67     * The variance.
68     */
69    public final double var;
70    /**
71     * The population variance.
72     */
73    public final double varPopulation;
74    /**
75     * The standard deviation.
76     */
77    public final double sd;
78    /**
79     * The population standard deviation.
80     */
81    public final double sdPopulation;
82    /**
83     * The sum of all weights.
84     */
85    public final double sumWeights;
86  
87    // Weight-insensitive values.
88    /**
89     * The minimum value.
90     */
91    public final double min;
92    /**
93     * The maximum value.
94     */
95    public final double max;
96    /**
97     * The number of entries.
98     */
99    public final long count;
100   /**
101    * The sum of all values.
102    */
103   public final double sum;
104   /**
105    * The number of degrees of freedom.
106    */
107   public final long dof;
108   private final TDistribution tDist;
109   private final String descString;
110 
111   /**
112    * Constructs a static summary of a statistic from provided values. Assumes weights are all
113    * constant (1.0). Assumes all values will be used.
114    *
115    * @param values Values to summarize.
116    */
117   public BootStrapStatistics(double[] values) {
118     this(values, null, 0, values.length, 1);
119   }
120 
121   /**
122    * Constructs a static summary of a statistic from provided values. Assumes weights are all
123    * constant (1.0). Assumes all values from first to end will be used.
124    *
125    * @param values Values to summarize.
126    * @param first  First value to use.
127    */
128   public BootStrapStatistics(double[] values, int first) {
129     this(values, null, first, values.length, 1);
130   }
131 
132   /**
133    * Constructs a static summary of a statistic from provided values. Assumes weights are all
134    * constant (1.0). Assumes a stride of 1.
135    *
136    * @param values Values to summarize.
137    * @param first  First value to use.
138    * @param last   Last value to use.
139    */
140   public BootStrapStatistics(double[] values, int first, int last) {
141     this(values, null, first, last, 1);
142   }
143 
144   /**
145    * Constructs a static summary of a statistic from provided values. Assumes weights are all
146    * constant (1.0).
147    *
148    * @param values Values to summarize.
149    * @param first  First value to use.
150    * @param last   Last value to use.
151    * @param stride Stride between values used.
152    */
153   public BootStrapStatistics(double[] values, int first, int last, int stride) {
154     this(values, null, first, last, stride);
155   }
156 
157   /**
158    * Constructs a static summary of a statistic from provided values.
159    *
160    * @param values  Values to summarize.
161    * @param weights Weights for each value.
162    * @param first   First value to use.
163    * @param last    Last value to use.
164    * @param stride  Stride between values used.
165    */
166   public BootStrapStatistics(double[] values, @Nullable double[] weights, int first, int last, int stride) {
167     if (values == null) {
168       throw new IllegalArgumentException(" Cannot have null values!");
169     }
170     int nValues = getNumberOfValues(values, first, last);
171 
172     if (weights == null) {
173       weights = new double[nValues];
174       fill(weights, 1.0);
175     }
176 
177     int tempCount = (last - first);
178     if (tempCount % stride == 0) {
179       count = tempCount / stride;
180     } else {
181       count = (tempCount / stride) + 1;
182     }
183     assert count > 0;
184 
185     if (count == 1) {
186       mean = values[first];
187       var = Double.NaN;
188       varPopulation = 0;
189       sd = Double.NaN;
190       sdPopulation = 0;
191       min = mean;
192       max = mean;
193       sum = mean;
194       sumWeights = weights[first];
195       dof = 0;
196       tDist = null;
197       descString = format(" Summary of single observation: value is %17.14g", mean);
198     } else {
199       // Collect Bootstrap Results
200       RunningStatistics runningStatistics = getRunningStatistics(values, weights);
201       min = runningStatistics.getMin();
202       max = runningStatistics.getMax();
203       mean = runningStatistics.getMean();
204       sum = runningStatistics.getSum();
205       sumWeights = runningStatistics.getWeight();
206       varPopulation = runningStatistics.getPopulationVariance();
207       sdPopulation = runningStatistics.getPopulationStandardDeviation();
208       dof = runningStatistics.getDOF();
209       var = runningStatistics.getVariance();
210       sd = runningStatistics.getStandardDeviation();
211       tDist = new TDistribution(dof);
212       descString = format(
213           " Summary of %d observations: sum is %17.14g, mean is %17.14g, min is %17.14g, "
214               + "max is %17.14g, and the sum of weights is %17.14g"
215               + "\nSample standard deviation: %17.14g (dof = %d)"
216               + "\nPopulation standard deviation: %17.14g (dof = %d)",
217           count, sum, mean, min, max, sumWeights, sd, dof, sdPopulation, count);
218     }
219   }
220 
221   private RunningStatistics getRunningStatistics(double[] values, double[] weights) {
222     RunningStatistics bootstrapRunningStatistics = new RunningStatistics();
223     Random random = new Random();
224 
225     for (int bs = 0; bs < count; bs++) {
226       // Collect the mean for one Bootstrap round.
227       RunningStatistics bootstrapRound = new RunningStatistics();
228       for (int i = 0; i < count; i++) {
229         int j = random.nextInt((int) count);
230         bootstrapRound.addValue(values[j], weights[j]);
231       }
232       // Add the mean from this round.
233       bootstrapRunningStatistics.addValue(bootstrapRound.getMean());
234     }
235     return bootstrapRunningStatistics;
236   }
237 
238   private static int getNumberOfValues(double[] values, int first, int last) {
239     int nValues = values.length;
240 
241     if (first < 0 || first > (nValues - 1)) {
242       throw new IllegalArgumentException(
243           format(" First entry %d was not in valid range 0-%d (0 to length of values - 1)",
244               first, nValues - 1));
245     }
246     if (last <= first || last > nValues) {
247       throw new IllegalArgumentException(
248           format(" Last entry %d was not in valid range %d-%d (first+1 to length of values",
249               last, (first + 1), nValues));
250     }
251     return nValues;
252   }
253 
254   /**
255    * Computes a 95% confidence interval based on a Student's T-distribution.
256    *
257    * @return 95% confidence interval.
258    */
259   public double confidenceInterval() {
260     return confidenceInterval(0.05);
261   }
262 
263   /**
264    * Computes a confidence interval based on a Student's T-distribution.
265    *
266    * @param alpha Alpha (e.g. 0.05 for a 95% CI).
267    * @return Confidence interval.
268    */
269   public double confidenceInterval(double alpha) {
270     if (dof == 0) {
271       throw new IllegalArgumentException(
272           " Cannot calculate confidence intervals when there are no degrees of freedom!");
273     }
274     double critVal = tDist.inverseCumulativeProbability(0.5 * (1.0 - alpha));
275     return critVal * sd / sqrt(count);
276   }
277 
278   /**
279    * The mean.
280    *
281    * @return Return the mean.
282    */
283   public double getMean() {
284     return mean;
285   }
286 
287   /**
288    * The standard deviation.
289    *
290    * @return Return the standard deviation.
291    */
292   public double getSd() {
293     return sd;
294   }
295 
296   /**
297    * The variance.
298    *
299    * @return Return the variance.
300    */
301   public double getVar() {
302     return var;
303   }
304 
305   /**
306    * ${@inheritDoc}
307    */
308   @Override
309   public String toString() {
310     return descString;
311   }
312 
313   /**
314    * Describe the Summary Statistics.
315    *
316    * @return Return the description.
317    */
318   public String describe() {
319     return format(" Mean: %12.6f +/-%12.6f, Min/Max: %12.6f/%12.6f", mean, sd, min, max);
320   }
321 
322 }