1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.numerics.multipole;
39
40 import jdk.incubator.vector.DoubleVector;
41
42 import static ffx.numerics.math.ScalarMath.doubleFactorial;
43 import static ffx.numerics.multipole.GKTensorMode.POTENTIAL;
44 import static java.lang.System.arraycopy;
45 import static java.util.Arrays.fill;
46 import static jdk.incubator.vector.DoubleVector.SPECIES_PREFERRED;
47 import static jdk.incubator.vector.VectorOperators.EXP;
48 import static org.apache.commons.math3.util.FastMath.pow;
49
50
51
52
53 public class GKSourceSIMD {
54
55 private GKTensorMode mode = POTENTIAL;
56
57
58
59
60 private DoubleVector rb2;
61
62
63
64
65 private DoubleVector expTerm;
66
67
68
69
70 private DoubleVector f;
71
72
73
74
75 private DoubleVector f1;
76
77
78
79
80 private DoubleVector f2;
81
82
83
84
85 private final double gc;
86
87
88
89
90 private final double igc;
91
92
93
94
95 private DoubleVector gcAiAj;
96
97
98
99
100 private DoubleVector ratio;
101
102
103
104
105 private DoubleVector fr;
106
107
108
109
110 private DoubleVector r2;
111
112
113
114
115 private final int order;
116
117
118
119
120 private final double[] kirkwoodSource;
121
122
123
124
125 private final double[][] anmc;
126
127
128
129
130
131 private final DoubleVector[] fn;
132
133
134
135
136
137 protected final DoubleVector[] bn;
138
139
140
141
142 private final DoubleVector[][] anm;
143
144
145
146
147
148 private final DoubleVector[][] bnm;
149
150 private final DoubleVector zero = DoubleVector.zero(SPECIES_PREFERRED);
151 private final DoubleVector one = zero.add(1.0);
152
153
154
155
156
157
158
159 public GKSourceSIMD(int order, double gc) {
160 this.order = order;
161 this.gc = gc;
162 this.igc = 1.0 / gc;
163
164
165 kirkwoodSource = new double[order + 1];
166 for (short n = 0; n <= order; n++) {
167 kirkwoodSource[n] = pow(-1, n) * doubleFactorial(2 * n - 1);
168 }
169
170 anmc = new double[order + 1][];
171 for (int n = 0; n <= order; n++) {
172 anmc[n] = GKSource.anmc(n);
173 }
174
175 fn = new DoubleVector[order + 1];
176 anm = new DoubleVector[order + 1][order + 1];
177 bn = new DoubleVector[order + 1];
178 bnm = new DoubleVector[order + 1][order + 1];
179 }
180
181
182
183
184
185
186
187 protected void source(DoubleVector[] work, GKMultipoleOrder multipoleOrder) {
188 int mpoleOrder = multipoleOrder.getOrder();
189 fill(work, 0, mpoleOrder, zero);
190 if (mode == POTENTIAL) {
191
192 int derivatives = order - mpoleOrder;
193 arraycopy(anm[mpoleOrder], 0, work, mpoleOrder, derivatives + 1);
194 } else {
195
196 int derivatives = (order - 1) - mpoleOrder;
197 arraycopy(bnm[mpoleOrder], 0, work, mpoleOrder, derivatives + 1);
198 }
199 }
200
201
202
203
204
205
206
207
208
209
210 public void generateSource(GKTensorMode mode, GKMultipoleOrder multipole,
211 DoubleVector r2, DoubleVector ai, DoubleVector aj) {
212 int multipoleOrder = multipole.getOrder();
213 this.mode = mode;
214
215 this.r2 = r2;
216 rb2 = ai.mul(aj);
217 gcAiAj = rb2.mul(gc);
218 ratio = r2.neg().div(gcAiAj);
219 expTerm = ratio.lanewise(EXP);
220 fr = zero.sub(2.0).div(gcAiAj);
221 f = r2.add(rb2.mul(expTerm)).sqrt();
222 f1 = one.sub(expTerm.mul(igc));
223 f2 = zero.add(2.0).mul(expTerm).div(gcAiAj.mul(gc));
224 if (mode == POTENTIAL) {
225
226 anm(order, order - multipoleOrder);
227 } else {
228
229 bnm(order - 1, (order - 1) - multipoleOrder);
230 }
231 }
232
233
234
235
236
237
238
239
240
241
242
243 private void anm(int n, int derivatives) {
244
245 fn(n);
246
247
248 an0(n);
249
250
251 for (int d = 1; d <= derivatives; d++) {
252
253 var coef = anmc[d];
254
255 int limit = n - d;
256
257 for (int order = 0; order <= limit; order++) {
258
259 var terms = anm[order + 1];
260 var sum = zero;
261 for (int i = 1; i <= d; i++) {
262 sum = sum.add(fn[i].mul(terms[d - i].mul(coef[i - 1])));
263 }
264 anm[order][d] = sum;
265 }
266 }
267 }
268
269
270
271
272
273
274
275
276
277
278 private void bnm(int n, int derivatives) {
279
280 bn(n);
281
282
283 bn0(n);
284
285
286 for (int d = 1; d <= derivatives; d++) {
287
288 var coef = anmc[d];
289
290 int limit = n - d;
291
292 for (int order = 0; order <= limit; order++) {
293
294 var terma = anm[order + 1];
295 var termb = bnm[order + 1];
296 var sum = zero;
297 for (int i = 1; i <= d; i++) {
298 sum = sum.add(bn[i].mul(terma[d - i].mul(coef[i - 1])));
299 sum = sum.add(fn[i].mul(termb[d - i].mul(coef[i - 1])));
300 }
301 bnm[order][d] = sum;
302 }
303 }
304 }
305
306
307
308
309
310
311
312 private void fn(int n) {
313 switch (n) {
314 case 0 -> fn[0] = f;
315 case 1 -> {
316 fn[0] = f;
317 fn[1] = f1;
318 }
319 case 2 -> {
320 fn[0] = f;
321 fn[1] = f1;
322 fn[2] = f2;
323 }
324 case 3 -> {
325 fn[0] = f;
326 fn[1] = f1;
327 fn[2] = f2;
328 fn[3] = fr.mul(f2);
329 }
330 case 4 -> {
331 fn[0] = f;
332 fn[1] = f1;
333 fn[2] = f2;
334 fn[3] = fr.mul(f2);
335 fn[4] = fr.mul(fn[3]);
336 }
337 case 5 -> {
338 fn[0] = f;
339 fn[1] = f1;
340 fn[2] = f2;
341 fn[3] = fr.mul(f2);
342 fn[4] = fr.mul(fn[3]);
343 fn[5] = fr.mul(fn[4]);
344 }
345 case 6 -> {
346 fn[0] = f;
347 fn[1] = f1;
348 fn[2] = f2;
349 fn[3] = fr.mul(f2);
350 fn[4] = fr.mul(fn[3]);
351 fn[5] = fr.mul(fn[4]);
352 fn[6] = fr.mul(fn[5]);
353 }
354 default -> {
355 fn[0] = f;
356 fn[1] = f1;
357 fn[2] = f2;
358 for (int i = 3; i <= n; i++) {
359 fn[i] = fr.mul(fn[i - 1]);
360 }
361 }
362 }
363 }
364
365
366
367
368
369
370
371 protected void bn(int n) {
372 var b2 = expTerm.mul(2.0).div(gcAiAj.mul(gcAiAj)).mul(ratio.neg().sub(1.0));
373 switch (n) {
374 case 0 -> bn[0] = expTerm.mul(0.5).mul(ratio.neg().add(1.0));
375 case 1 -> {
376 bn[0] = expTerm.mul(0.5).mul(ratio.neg().add(1.0));
377 bn[1] = r2.mul(expTerm).div(gcAiAj.mul(gcAiAj)).neg();
378 }
379 case 2 -> {
380 bn[0] = expTerm.mul(0.5).mul(ratio.neg().add(1.0));
381 bn[1] = r2.mul(expTerm).div(gcAiAj.mul(gcAiAj)).neg();
382 bn[2] = b2;
383 }
384 default -> {
385 bn[0] = expTerm.mul(0.5).mul(ratio.neg().add(1.0));
386 bn[1] = r2.mul(expTerm).div(gcAiAj.mul(gcAiAj)).neg();
387 bn[2] = b2;
388 var br = zero.add(2.0).div(gcAiAj.mul(rb2));
389 var f2 = zero.add(2.0).div(gcAiAj.mul(gc)).mul(expTerm);
390 var frA = one;
391 var frB = fr;
392 for (int i = 3; i < n; i++) {
393
394 bn[i] = frA.mul(i - 2).mul(br).mul(f2).add(frB.mul(b2));
395 frA = frA.mul(fr);
396 frB = frB.mul(fr);
397 }
398 }
399 }
400 }
401
402
403
404
405
406
407 private void an0(int n) {
408 DoubleVector inverseF = one.div(f);
409 DoubleVector inverseF2 = inverseF.mul(inverseF);
410 for (int i = 0; i <= n; i++) {
411 anm[i][0] = inverseF.mul(kirkwoodSource[i]);
412 inverseF = inverseF.mul(inverseF2);
413 }
414 }
415
416
417
418
419
420
421 private void bn0(int n) {
422 for (int i = 0; i <= n; i++) {
423 bnm[i][0] = bn[0].mul(anm[i + 1][0]);
424 }
425 }
426
427 }