View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.numerics.math;
39  
40  import static java.lang.String.format;
41  import static java.util.Arrays.fill;
42  import static org.apache.commons.math3.util.FastMath.abs;
43  import static org.apache.commons.math3.util.FastMath.max;
44  import static org.apache.commons.math3.util.FastMath.min;
45  import static org.apache.commons.math3.util.FastMath.sqrt;
46  
47  import org.apache.commons.math3.distribution.TDistribution;
48  
49  import javax.annotation.Nullable;
50  
51  /**
52   * The SummaryStatistics class uses online, stable algorithms to calculate summary statistics from
53   * double arrays/lists, including mean, variance, standard deviation, max, min, sum, and count.
54   *
55   * <p>This is intended for accuracy and numerical stability, not necessarily for performance (e.g.
56   * using Kahan summation).
57   *
58   * @author Michael J. Schnieders
59   * @author Jacob M. Litman
60   * @since 1.0
61   */
62  public class SummaryStatistics {
63  
64    // Weight-sensitive values.
65    /**
66     * Mean value.
67     */
68    public final double mean;
69    /**
70     * Sample variance.
71     */
72    public final double var;
73    /**
74     * Population variance.
75     */
76    public final double varPopulation;
77    /**
78     * Sample standard deviation.
79     */
80    public final double sd;
81    /**
82     * Population standard deviation.
83     */
84    public final double sdPopulation;
85    /**
86     * Sum of weights.
87     */
88    public final double sumWeights;
89  
90    // Weight-insensitive values.
91    /**
92     * Minimum value.
93     */
94    public final double min;
95    /**
96     * Maximum value.
97     */
98    public final double max;
99    /**
100    * Count of observations.
101    */
102   public final long count;
103   /**
104    * Sum of observations.
105    */
106   public final double sum;
107   /**
108    * Degrees of freedom.
109    */
110   public final long dof;
111   /**
112    * T-distribution for confidence intervals.
113    */
114   private final TDistribution tDist;
115   /**
116    * Description of the summary statistics.
117    */
118   private final String descString;
119 
120   /**
121    * Builds a static view of a running statistic.
122    *
123    * @param rs Running statistic.
124    */
125   public SummaryStatistics(RunningStatistics rs) {
126     mean = rs.getMean();
127     var = rs.getVariance();
128     varPopulation = rs.getPopulationVariance();
129     sumWeights = rs.getWeight();
130     min = rs.getMin();
131     max = rs.getMax();
132     count = rs.getCount();
133     sum = rs.getSum();
134     dof = rs.getDOF();
135     tDist = (dof > 0) ? new TDistribution(dof) : null;
136     sd = rs.getStandardDeviation();
137     sdPopulation = rs.getPopulationStandardDeviation();
138     descString =
139         format(
140             " Summary of %d observations: sum is %17.14g, mean is %17.14g, min is %17.14g, "
141                 + "max is %17.14g, and the sum of weights is %17.14g"
142                 + "\nSample standard deviation: %17.14g (dof = %d)"
143                 + "\nPopulation standard deviation: %17.14g (dof = %d)",
144             count, sum, mean, min, max, sumWeights, sd, dof, sdPopulation, count);
145   }
146 
147   /**
148    * Constructs a static summary of a statistic from provided values. Assumes weights are all
149    * constant (1.0). Assumes all values will be used.
150    *
151    * @param values Values to summarize.
152    */
153   public SummaryStatistics(double[] values) {
154     this(values, null, 0, values.length, 1);
155   }
156 
157   /**
158    * Constructs a static summary of a statistic from provided values. Assumes weights are all
159    * constant (1.0). Assumes all values from first to end will be used.
160    *
161    * @param values Values to summarize.
162    * @param first  First value to use.
163    */
164   public SummaryStatistics(double[] values, int first) {
165     this(values, null, first, values.length, 1);
166   }
167 
168   /**
169    * Constructs a static summary of a statistic from provided values. Assumes weights are all
170    * constant (1.0). Assumes a stride of 1.
171    *
172    * @param values Values to summarize.
173    * @param first  First value to use.
174    * @param last   Last value to use.
175    */
176   public SummaryStatistics(double[] values, int first, int last) {
177     this(values, null, first, last, 1);
178   }
179 
180   /**
181    * Constructs a static summary of a statistic from provided values. Assumes weights are all
182    * constant (1.0).
183    *
184    * @param values Values to summarize.
185    * @param first  First value to use.
186    * @param last   Last value to use.
187    * @param stride Stride between values used.
188    */
189   public SummaryStatistics(double[] values, int first, int last, int stride) {
190     this(values, null, first, last, stride);
191   }
192 
193   /**
194    * Constructs a static summary of a statistic from provided values.
195    *
196    * @param values  Values to summarize.
197    * @param weights Weights for each value.
198    * @param first   First value to use.
199    * @param last    Last value to use.
200    * @param stride  Stride between values used.
201    */
202   public SummaryStatistics(@Nullable double[] values, @Nullable double[] weights, int first, int last, int stride) {
203     if (values == null) {
204       throw new IllegalArgumentException(" Cannot have null values!");
205     }
206     int nVals = values.length;
207 
208     if (first < 0 || first > (nVals - 1)) {
209       throw new IllegalArgumentException(
210           format(" First entry %d was not in valid range 0-%d (0 to length of values - 1)",
211               first, nVals - 1));
212     }
213     if (last <= first || last > nVals) {
214       throw new IllegalArgumentException(
215           format(" Last entry %d was not in valid range %d-%d (first+1 to length of values",
216               last, (first + 1), nVals));
217     }
218 
219     if (weights == null) {
220       weights = new double[nVals];
221       fill(weights, 1.0);
222     }
223 
224     int tempCount = (last - first);
225     if (tempCount % stride == 0) {
226       count = tempCount / stride;
227     } else {
228       count = (tempCount / stride) + 1;
229     }
230     assert count > 0;
231 
232     if (count == 1) {
233       mean = values[first];
234       var = Double.NaN;
235       varPopulation = 0;
236       sd = Double.NaN;
237       sdPopulation = 0;
238       min = mean;
239       max = mean;
240       sum = mean;
241       sumWeights = weights[first];
242       dof = 0;
243       tDist = null;
244 
245       descString = format(" Summary of single observation: value is %17.14g", mean);
246     } else {
247       double meanAcc = 0;
248       double varAcc = 0;
249       double minAcc = Double.MAX_VALUE;
250       double maxAcc = Double.MIN_VALUE;
251       double sumAcc = 0;
252       double comp = 0;
253       double weightAcc = 0;
254 
255       for (int i = 0; i < count; i++) {
256         int ii = first + (i * stride);
257         assert ii < last;
258         double val = values[ii];
259         double priorMean = meanAcc;
260         double weight = weights[ii];
261         weightAcc += weight;
262 
263         double y = val - comp;
264         double t = sumAcc + y;
265         comp = (t - sumAcc) - y;
266         sumAcc = t;
267 
268         minAcc = min(minAcc, val);
269         maxAcc = max(maxAcc, val);
270 
271         double invCount = weight / weightAcc;
272         meanAcc += ((val - meanAcc) * invCount);
273         // TODO: Check correctness of variance accumulation when weight != 1.0.
274         varAcc += ((val - priorMean) * (val - meanAcc)) * weight;
275       }
276 
277       min = minAcc;
278       max = maxAcc;
279       mean = meanAcc;
280       sum = sumAcc;
281       sumWeights = weightAcc;
282       // Mean via Kahan summation and online mean estimation should be pretty close to each other.
283       assert abs(mean - (sum / count)) < 1.0E-11;
284       varPopulation = varAcc / count;
285       sdPopulation = sqrt(varPopulation);
286       dof = count - 1;
287       var = varAcc / dof;
288       sd = sqrt(var);
289       tDist = new TDistribution(dof);
290 
291       descString =
292           format(
293               " Summary of %d observations: sum is %17.14g, mean is %17.14g, min is %17.14g, "
294                   + "max is %17.14g, and the sum of weights is %17.14g"
295                   + "\nSample standard deviation: %17.14g (dof = %d)"
296                   + "\nPopulation standard deviation: %17.14g (dof = %d)",
297               count, sum, mean, min, max, sumWeights, sd, dof, sdPopulation, count);
298     }
299   }
300 
301   /**
302    * Computes a 95% confidence interval based on a Student's T-distribution.
303    *
304    * @return 95% confidence interval.
305    */
306   public double confidenceInterval() {
307     return confidenceInterval(0.05);
308   }
309 
310   /**
311    * Computes a confidence interval based on a Student's T-distribution.
312    *
313    * @param alpha Alpha (e.g. 0.05 for a 95% CI).
314    * @return Confidence interval.
315    */
316   public double confidenceInterval(double alpha) {
317     if (dof == 0) {
318       throw new IllegalArgumentException(
319           " Cannot calculate confidence intervals when there are no degrees of freedom!");
320     }
321     double critVal = tDist.inverseCumulativeProbability(0.5 * (1.0 - alpha));
322     return critVal * sd / sqrt(count);
323   }
324 
325   /**
326    * The mean.
327    *
328    * @return Return the mean.
329    */
330   public double getMean() {
331     return mean;
332   }
333 
334   /**
335    * The standard deviation.
336    *
337    * @return Return the standard deviation.
338    */
339   public double getSd() {
340     return sd;
341   }
342 
343   /**
344    * The variance.
345    *
346    * @return Return the variance.
347    */
348   public double getVar() {
349     return var;
350   }
351 
352   /**
353    * ${@inheritDoc}
354    */
355   @Override
356   public String toString() {
357     return descString;
358   }
359 
360   /**
361    * Describe the Summary Statistics.
362    *
363    * @return Return the description.
364    */
365   public String describe() {
366     return format(" Mean: %12.6f +/-%12.6f, Min/Max: %12.6f/%12.6f", mean, sd, min, max);
367   }
368 }