1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.numerics.math;
39
40 import static java.lang.String.format;
41 import static java.util.Arrays.fill;
42 import static org.apache.commons.math3.util.FastMath.abs;
43 import static org.apache.commons.math3.util.FastMath.max;
44 import static org.apache.commons.math3.util.FastMath.min;
45 import static org.apache.commons.math3.util.FastMath.sqrt;
46
47 import org.apache.commons.math3.distribution.TDistribution;
48
49 import javax.annotation.Nullable;
50
51
52
53
54
55
56
57
58
59
60
61
62 public class SummaryStatistics {
63
64
65 public final double mean;
66 public final double var;
67 public final double varPopulation;
68 public final double sd;
69 public final double sdPopulation;
70 public final double sumWeights;
71
72 public final double min;
73 public final double max;
74 public final long count;
75 public final double sum;
76 public final long dof;
77 private final TDistribution tDist;
78 private final String descString;
79
80
81
82
83
84
85 public SummaryStatistics(RunningStatistics rs) {
86 mean = rs.getMean();
87 var = rs.getVariance();
88 varPopulation = rs.getPopulationVariance();
89 sumWeights = rs.getWeight();
90 min = rs.getMin();
91 max = rs.getMax();
92 count = rs.getCount();
93 sum = rs.getSum();
94 dof = rs.getDOF();
95 tDist = (dof > 0) ? new TDistribution(dof) : null;
96 sd = rs.getStandardDeviation();
97 sdPopulation = rs.getPopulationStandardDeviation();
98 descString =
99 format(
100 " Summary of %d observations: sum is %17.14g, mean is %17.14g, min is %17.14g, "
101 + "max is %17.14g, and the sum of weights is %17.14g"
102 + "\nSample standard deviation: %17.14g (dof = %d)"
103 + "\nPopulation standard deviation: %17.14g (dof = %d)",
104 count, sum, mean, min, max, sumWeights, sd, dof, sdPopulation, count);
105 }
106
107
108
109
110
111
112
113 public SummaryStatistics(double[] values) {
114 this(values, null, 0, values.length, 1);
115 }
116
117
118
119
120
121
122
123
124 public SummaryStatistics(double[] values, int first) {
125 this(values, null, first, values.length, 1);
126 }
127
128
129
130
131
132
133
134
135
136 public SummaryStatistics(double[] values, int first, int last) {
137 this(values, null, first, last, 1);
138 }
139
140
141
142
143
144
145
146
147
148
149 public SummaryStatistics(double[] values, int first, int last, int stride) {
150 this(values, null, first, last, stride);
151 }
152
153
154
155
156
157
158
159
160
161
162 public SummaryStatistics(@Nullable double[] values, @Nullable double[] weights, int first, int last, int stride) {
163 if (values == null) {
164 throw new IllegalArgumentException(" Cannot have null values!");
165 }
166 int nVals = values.length;
167
168 if (first < 0 || first > (nVals - 1)) {
169 throw new IllegalArgumentException(
170 format(" First entry %d was not in valid range 0-%d (0 to length of values - 1)",
171 first, nVals - 1));
172 }
173 if (last <= first || last > nVals) {
174 throw new IllegalArgumentException(
175 format(" Last entry %d was not in valid range %d-%d (first+1 to length of values",
176 last, (first + 1), nVals));
177 }
178
179 if (weights == null) {
180 weights = new double[nVals];
181 fill(weights, 1.0);
182 }
183
184 int tempCount = (last - first);
185 if (tempCount % stride == 0) {
186 count = tempCount / stride;
187 } else {
188 count = (tempCount / stride) + 1;
189 }
190 assert count > 0;
191
192 if (count == 1) {
193 mean = values[first];
194 var = Double.NaN;
195 varPopulation = 0;
196 sd = Double.NaN;
197 sdPopulation = 0;
198 min = mean;
199 max = mean;
200 sum = mean;
201 sumWeights = weights[first];
202 dof = 0;
203 tDist = null;
204
205 descString = format(" Summary of single observation: value is %17.14g", mean);
206 } else {
207 double meanAcc = 0;
208 double varAcc = 0;
209 double minAcc = Double.MAX_VALUE;
210 double maxAcc = Double.MIN_VALUE;
211 double sumAcc = 0;
212 double comp = 0;
213 double weightAcc = 0;
214
215 for (int i = 0; i < count; i++) {
216 int ii = first + (i * stride);
217 assert ii < last;
218 double val = values[ii];
219 double priorMean = meanAcc;
220 double weight = weights[ii];
221 weightAcc += weight;
222
223 double y = val - comp;
224 double t = sumAcc + y;
225 comp = (t - sumAcc) - y;
226 sumAcc = t;
227
228 minAcc = min(minAcc, val);
229 maxAcc = max(maxAcc, val);
230
231 double invCount = weight / weightAcc;
232 meanAcc += ((val - meanAcc) * invCount);
233
234 varAcc += ((val - priorMean) * (val - meanAcc)) * weight;
235 }
236
237 min = minAcc;
238 max = maxAcc;
239 mean = meanAcc;
240 sum = sumAcc;
241 sumWeights = weightAcc;
242
243 assert abs(mean - (sum / count)) < 1.0E-11;
244 varPopulation = varAcc / count;
245 sdPopulation = sqrt(varPopulation);
246 dof = count - 1;
247 var = varAcc / dof;
248 sd = sqrt(var);
249 tDist = new TDistribution(dof);
250
251 descString =
252 format(
253 " Summary of %d observations: sum is %17.14g, mean is %17.14g, min is %17.14g, "
254 + "max is %17.14g, and the sum of weights is %17.14g"
255 + "\nSample standard deviation: %17.14g (dof = %d)"
256 + "\nPopulation standard deviation: %17.14g (dof = %d)",
257 count, sum, mean, min, max, sumWeights, sd, dof, sdPopulation, count);
258 }
259 }
260
261
262
263
264
265
266 public double confidenceInterval() {
267 return confidenceInterval(0.05);
268 }
269
270
271
272
273
274
275
276 public double confidenceInterval(double alpha) {
277 if (dof == 0) {
278 throw new IllegalArgumentException(
279 " Cannot calculate confidence intervals when there are no degrees of freedom!");
280 }
281 double critVal = tDist.inverseCumulativeProbability(0.5 * (1.0 - alpha));
282 return critVal * sd / sqrt(count);
283 }
284
285
286
287
288
289
290 public double getMean() {
291 return mean;
292 }
293
294
295
296
297
298
299 public double getSd() {
300 return sd;
301 }
302
303
304
305
306
307
308 public double getVar() {
309 return var;
310 }
311
312
313
314
315 @Override
316 public String toString() {
317 return descString;
318 }
319
320
321
322
323
324
325 public String describe() {
326 return format(" Mean: %12.6f +/-%12.6f, Min/Max: %12.6f/%12.6f", mean, sd, min, max);
327 }
328 }