1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.numerics.math;
39
40 import static java.lang.String.format;
41 import static java.util.Arrays.fill;
42 import static org.apache.commons.math3.util.FastMath.abs;
43 import static org.apache.commons.math3.util.FastMath.max;
44 import static org.apache.commons.math3.util.FastMath.min;
45 import static org.apache.commons.math3.util.FastMath.sqrt;
46
47 import org.apache.commons.math3.distribution.TDistribution;
48
49 import javax.annotation.Nullable;
50
51
52
53
54
55
56
57
58
59
60
61
62 public class SummaryStatistics {
63
64
65
66
67
68 public final double mean;
69
70
71
72 public final double var;
73
74
75
76 public final double varPopulation;
77
78
79
80 public final double sd;
81
82
83
84 public final double sdPopulation;
85
86
87
88 public final double sumWeights;
89
90
91
92
93
94 public final double min;
95
96
97
98 public final double max;
99
100
101
102 public final long count;
103
104
105
106 public final double sum;
107
108
109
110 public final long dof;
111
112
113
114 private final TDistribution tDist;
115
116
117
118 private final String descString;
119
120
121
122
123
124
125 public SummaryStatistics(RunningStatistics rs) {
126 mean = rs.getMean();
127 var = rs.getVariance();
128 varPopulation = rs.getPopulationVariance();
129 sumWeights = rs.getWeight();
130 min = rs.getMin();
131 max = rs.getMax();
132 count = rs.getCount();
133 sum = rs.getSum();
134 dof = rs.getDOF();
135 tDist = (dof > 0) ? new TDistribution(dof) : null;
136 sd = rs.getStandardDeviation();
137 sdPopulation = rs.getPopulationStandardDeviation();
138 descString =
139 format(
140 " Summary of %d observations: sum is %17.14g, mean is %17.14g, min is %17.14g, "
141 + "max is %17.14g, and the sum of weights is %17.14g"
142 + "\nSample standard deviation: %17.14g (dof = %d)"
143 + "\nPopulation standard deviation: %17.14g (dof = %d)",
144 count, sum, mean, min, max, sumWeights, sd, dof, sdPopulation, count);
145 }
146
147
148
149
150
151
152
153 public SummaryStatistics(double[] values) {
154 this(values, null, 0, values.length, 1);
155 }
156
157
158
159
160
161
162
163
164 public SummaryStatistics(double[] values, int first) {
165 this(values, null, first, values.length, 1);
166 }
167
168
169
170
171
172
173
174
175
176 public SummaryStatistics(double[] values, int first, int last) {
177 this(values, null, first, last, 1);
178 }
179
180
181
182
183
184
185
186
187
188
189 public SummaryStatistics(double[] values, int first, int last, int stride) {
190 this(values, null, first, last, stride);
191 }
192
193
194
195
196
197
198
199
200
201
202 public SummaryStatistics(@Nullable double[] values, @Nullable double[] weights, int first, int last, int stride) {
203 if (values == null) {
204 throw new IllegalArgumentException(" Cannot have null values!");
205 }
206 int nVals = values.length;
207
208 if (first < 0 || first > (nVals - 1)) {
209 throw new IllegalArgumentException(
210 format(" First entry %d was not in valid range 0-%d (0 to length of values - 1)",
211 first, nVals - 1));
212 }
213 if (last <= first || last > nVals) {
214 throw new IllegalArgumentException(
215 format(" Last entry %d was not in valid range %d-%d (first+1 to length of values",
216 last, (first + 1), nVals));
217 }
218
219 if (weights == null) {
220 weights = new double[nVals];
221 fill(weights, 1.0);
222 }
223
224 int tempCount = (last - first);
225 if (tempCount % stride == 0) {
226 count = tempCount / stride;
227 } else {
228 count = (tempCount / stride) + 1;
229 }
230 assert count > 0;
231
232 if (count == 1) {
233 mean = values[first];
234 var = Double.NaN;
235 varPopulation = 0;
236 sd = Double.NaN;
237 sdPopulation = 0;
238 min = mean;
239 max = mean;
240 sum = mean;
241 sumWeights = weights[first];
242 dof = 0;
243 tDist = null;
244
245 descString = format(" Summary of single observation: value is %17.14g", mean);
246 } else {
247 double meanAcc = 0;
248 double varAcc = 0;
249 double minAcc = Double.MAX_VALUE;
250 double maxAcc = Double.MIN_VALUE;
251 double sumAcc = 0;
252 double comp = 0;
253 double weightAcc = 0;
254
255 for (int i = 0; i < count; i++) {
256 int ii = first + (i * stride);
257 assert ii < last;
258 double val = values[ii];
259 double priorMean = meanAcc;
260 double weight = weights[ii];
261 weightAcc += weight;
262
263 double y = val - comp;
264 double t = sumAcc + y;
265 comp = (t - sumAcc) - y;
266 sumAcc = t;
267
268 minAcc = min(minAcc, val);
269 maxAcc = max(maxAcc, val);
270
271 double invCount = weight / weightAcc;
272 meanAcc += ((val - meanAcc) * invCount);
273
274 varAcc += ((val - priorMean) * (val - meanAcc)) * weight;
275 }
276
277 min = minAcc;
278 max = maxAcc;
279 mean = meanAcc;
280 sum = sumAcc;
281 sumWeights = weightAcc;
282
283 assert abs(mean - (sum / count)) < 1.0E-11;
284 varPopulation = varAcc / count;
285 sdPopulation = sqrt(varPopulation);
286 dof = count - 1;
287 var = varAcc / dof;
288 sd = sqrt(var);
289 tDist = new TDistribution(dof);
290
291 descString =
292 format(
293 " Summary of %d observations: sum is %17.14g, mean is %17.14g, min is %17.14g, "
294 + "max is %17.14g, and the sum of weights is %17.14g"
295 + "\nSample standard deviation: %17.14g (dof = %d)"
296 + "\nPopulation standard deviation: %17.14g (dof = %d)",
297 count, sum, mean, min, max, sumWeights, sd, dof, sdPopulation, count);
298 }
299 }
300
301
302
303
304
305
306 public double confidenceInterval() {
307 return confidenceInterval(0.05);
308 }
309
310
311
312
313
314
315
316 public double confidenceInterval(double alpha) {
317 if (dof == 0) {
318 throw new IllegalArgumentException(
319 " Cannot calculate confidence intervals when there are no degrees of freedom!");
320 }
321 double critVal = tDist.inverseCumulativeProbability(0.5 * (1.0 - alpha));
322 return critVal * sd / sqrt(count);
323 }
324
325
326
327
328
329
330 public double getMean() {
331 return mean;
332 }
333
334
335
336
337
338
339 public double getSd() {
340 return sd;
341 }
342
343
344
345
346
347
348 public double getVar() {
349 return var;
350 }
351
352
353
354
355 @Override
356 public String toString() {
357 return descString;
358 }
359
360
361
362
363
364
365 public String describe() {
366 return format(" Mean: %12.6f +/-%12.6f, Min/Max: %12.6f/%12.6f", mean, sd, min, max);
367 }
368 }