View Javadoc
1   
2   // ******************************************************************************
3   //
4   // Title:       Force Field X.
5   // Description: Force Field X - Software for Molecular Biophysics.
6   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
7   //
8   // This file is part of Force Field X.
9   //
10  // Force Field X is free software; you can redistribute it and/or modify it
11  // under the terms of the GNU General Public License version 3 as published by
12  // the Free Software Foundation.
13  //
14  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
15  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
17  // details.
18  //
19  // You should have received a copy of the GNU General Public License along with
20  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
21  // Place, Suite 330, Boston, MA 02111-1307 USA
22  //
23  // Linking this library statically or dynamically with other modules is making a
24  // combined work based on this library. Thus, the terms and conditions of the
25  // GNU General Public License cover the whole combination.
26  //
27  // As a special exception, the copyright holders of this library give you
28  // permission to link this library with independent modules to produce an
29  // executable, regardless of the license terms of these independent modules, and
30  // to copy and distribute the resulting executable under terms of your choice,
31  // provided that you also meet, for each linked independent module, the terms
32  // and conditions of the license of that module. An independent module is a
33  // module which is not derived from or based on this library. If you modify this
34  // library, you may extend this exception to your version of the library, but
35  // you are not obligated to do so. If you do not wish to do so, delete this
36  // exception statement from your version.
37  //
38  // ******************************************************************************
39  package ffx.numerics.math;
40  
41  
42  import static java.lang.String.format;
43  import static java.util.Arrays.fill;
44  import static org.apache.commons.math3.util.FastMath.sqrt;
45  
46  import java.util.Random;
47  
48  import org.apache.commons.math3.distribution.TDistribution;
49  
50  import javax.annotation.Nullable;
51  
52  /**
53   * The BootStrapStatistics class uses bootstrapping to estimate statistics from a
54   * given population.
55   *
56   * @author Michael J. Schnieders
57   * @author Rose Gogal
58   * @since 1.0
59   */
60  public class BootStrapStatistics {
61  
62    // Weight-sensitive values.
63    public final double mean;
64    public final double var;
65    public final double varPopulation;
66    public final double sd;
67    public final double sdPopulation;
68    public final double sumWeights;
69    // Weight-insensitive values.
70    public final double min;
71    public final double max;
72    public final long count;
73    public final double sum;
74    public final long dof;
75    private final TDistribution tDist;
76    private final String descString;
77  
78    /**
79     * Constructs a static summary of a statistic from provided values. Assumes weights are all
80     * constant (1.0). Assumes all values will be used.
81     *
82     * @param values Values to summarize.
83     */
84    public BootStrapStatistics(double[] values) {
85      this(values, null, 0, values.length, 1);
86    }
87  
88    /**
89     * Constructs a static summary of a statistic from provided values. Assumes weights are all
90     * constant (1.0). Assumes all values from first to end will be used.
91     *
92     * @param values Values to summarize.
93     * @param first  First value to use.
94     */
95    public BootStrapStatistics(double[] values, int first) {
96      this(values, null, first, values.length, 1);
97    }
98  
99    /**
100    * Constructs a static summary of a statistic from provided values. Assumes weights are all
101    * constant (1.0). Assumes a stride of 1.
102    *
103    * @param values Values to summarize.
104    * @param first  First value to use.
105    * @param last   Last value to use.
106    */
107   public BootStrapStatistics(double[] values, int first, int last) {
108     this(values, null, first, last, 1);
109   }
110 
111   /**
112    * Constructs a static summary of a statistic from provided values. Assumes weights are all
113    * constant (1.0).
114    *
115    * @param values Values to summarize.
116    * @param first  First value to use.
117    * @param last   Last value to use.
118    * @param stride Stride between values used.
119    */
120   public BootStrapStatistics(double[] values, int first, int last, int stride) {
121     this(values, null, first, last, stride);
122   }
123 
124   /**
125    * Constructs a static summary of a statistic from provided values.
126    *
127    * @param values  Values to summarize.
128    * @param weights Weights for each value.
129    * @param first   First value to use.
130    * @param last    Last value to use.
131    * @param stride  Stride between values used.
132    */
133   public BootStrapStatistics(double[] values, @Nullable double[] weights, int first, int last, int stride) {
134     if (values == null) {
135       throw new IllegalArgumentException(" Cannot have null values!");
136     }
137     int nValues = getNumberOfValues(values, first, last);
138 
139     if (weights == null) {
140       weights = new double[nValues];
141       fill(weights, 1.0);
142     }
143 
144     int tempCount = (last - first);
145     if (tempCount % stride == 0) {
146       count = tempCount / stride;
147     } else {
148       count = (tempCount / stride) + 1;
149     }
150     assert count > 0;
151 
152     if (count == 1) {
153       mean = values[first];
154       var = Double.NaN;
155       varPopulation = 0;
156       sd = Double.NaN;
157       sdPopulation = 0;
158       min = mean;
159       max = mean;
160       sum = mean;
161       sumWeights = weights[first];
162       dof = 0;
163       tDist = null;
164       descString = format(" Summary of single observation: value is %17.14g", mean);
165     } else {
166       // Collect Bootstrap Results
167       RunningStatistics runningStatistics = getRunningStatistics(values, weights);
168       min = runningStatistics.getMin();
169       max = runningStatistics.getMax();
170       mean = runningStatistics.getMean();
171       sum = runningStatistics.getSum();
172       sumWeights = runningStatistics.getWeight();
173       varPopulation = runningStatistics.getPopulationVariance();
174       sdPopulation = runningStatistics.getPopulationStandardDeviation();
175       dof = runningStatistics.getDOF();
176       var = runningStatistics.getVariance();
177       sd = runningStatistics.getStandardDeviation();
178       tDist = new TDistribution(dof);
179       descString = format(
180           " Summary of %d observations: sum is %17.14g, mean is %17.14g, min is %17.14g, "
181               + "max is %17.14g, and the sum of weights is %17.14g"
182               + "\nSample standard deviation: %17.14g (dof = %d)"
183               + "\nPopulation standard deviation: %17.14g (dof = %d)",
184           count, sum, mean, min, max, sumWeights, sd, dof, sdPopulation, count);
185     }
186   }
187 
188   private RunningStatistics getRunningStatistics(double[] values, double[] weights) {
189     RunningStatistics bootstrapRunningStatistics = new RunningStatistics();
190     Random random = new Random();
191 
192     for (int bs = 0; bs < count; bs++) {
193       // Collect the mean for one Bootstrap round.
194       RunningStatistics bootstrapRound = new RunningStatistics();
195       for (int i = 0; i < count; i++) {
196         int j = random.nextInt((int) count);
197         bootstrapRound.addValue(values[j], weights[j]);
198       }
199       // Add the mean from this round.
200       bootstrapRunningStatistics.addValue(bootstrapRound.getMean());
201     }
202     return bootstrapRunningStatistics;
203   }
204 
205   private static int getNumberOfValues(double[] values, int first, int last) {
206     int nValues = values.length;
207 
208     if (first < 0 || first > (nValues - 1)) {
209       throw new IllegalArgumentException(
210           format(" First entry %d was not in valid range 0-%d (0 to length of values - 1)",
211               first, nValues - 1));
212     }
213     if (last <= first || last > nValues) {
214       throw new IllegalArgumentException(
215           format(" Last entry %d was not in valid range %d-%d (first+1 to length of values",
216               last, (first + 1), nValues));
217     }
218     return nValues;
219   }
220 
221   /**
222    * Computes a 95% confidence interval based on a Student's T-distribution.
223    *
224    * @return 95% confidence interval.
225    */
226   public double confidenceInterval() {
227     return confidenceInterval(0.05);
228   }
229 
230   /**
231    * Computes a confidence interval based on a Student's T-distribution.
232    *
233    * @param alpha Alpha (e.g. 0.05 for a 95% CI).
234    * @return Confidence interval.
235    */
236   public double confidenceInterval(double alpha) {
237     if (dof == 0) {
238       throw new IllegalArgumentException(
239           " Cannot calculate confidence intervals when there are no degrees of freedom!");
240     }
241     double critVal = tDist.inverseCumulativeProbability(0.5 * (1.0 - alpha));
242     return critVal * sd / sqrt(count);
243   }
244 
245   /**
246    * The mean.
247    *
248    * @return Return the mean.
249    */
250   public double getMean() {
251     return mean;
252   }
253 
254   /**
255    * The standard deviation.
256    *
257    * @return Return the standard deviation.
258    */
259   public double getSd() {
260     return sd;
261   }
262 
263   /**
264    * The variance.
265    *
266    * @return Return the variance.
267    */
268   public double getVar() {
269     return var;
270   }
271 
272   /**
273    * ${@inheritDoc}
274    */
275   @Override
276   public String toString() {
277     return descString;
278   }
279 
280   /**
281    * Describe the Summary Statistics.
282    *
283    * @return Return the description.
284    */
285   public String describe() {
286     return format(" Mean: %12.6f +/-%12.6f, Min/Max: %12.6f/%12.6f", mean, sd, min, max);
287   }
288 
289 }