1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.potential.nonbonded.pme;
39
40 import static ffx.numerics.special.Erf.erfc;
41 import static java.lang.String.format;
42 import static org.apache.commons.math3.util.FastMath.exp;
43 import static org.apache.commons.math3.util.FastMath.max;
44 import static org.apache.commons.math3.util.FastMath.min;
45 import static org.apache.commons.math3.util.FastMath.sqrt;
46
47 import edu.rit.pj.IntegerForLoop;
48 import edu.rit.pj.IntegerSchedule;
49 import edu.rit.pj.ParallelRegion;
50 import edu.rit.pj.ParallelTeam;
51 import edu.rit.pj.reduction.SharedDouble;
52 import ffx.crystal.Crystal;
53 import ffx.crystal.SymOp;
54 import ffx.numerics.atomic.AtomicDoubleArray3D;
55 import ffx.potential.bonded.Atom;
56 import ffx.potential.nonbonded.ParticleMeshEwald;
57 import ffx.potential.nonbonded.ParticleMeshEwald.EwaldParameters;
58 import ffx.potential.parameters.ForceField;
59 import ffx.potential.utils.EnergyException;
60 import ffx.utilities.Constants;
61 import java.util.List;
62 import java.util.logging.Level;
63 import java.util.logging.Logger;
64
65
66
67
68
69
70
71 public class PCGSolver {
72
73 private static final Logger logger = Logger.getLogger(PCGSolver.class.getName());
74
75 private final InducedDipolePreconditionerRegion inducedDipolePreconditionerRegion;
76 private final PCGInitRegion1 pcgInitRegion1;
77 private final PCGInitRegion2 pcgInitRegion2;
78 private final PCGIterRegion1 pcgIterRegion1;
79 private final PCGIterRegion2 pcgIterRegion2;
80 private final double poleps;
81 private final int preconditionerListSize = 50;
82 public double preconditionerCutoff;
83 public double preconditionerEwald = 0.0;
84
85
86
87
88 int[][][] preconditionerLists;
89
90 int[][] preconditionerCounts;
91
92 private double[][] rsd;
93
94 private double[][] rsdCR;
95
96 private double[][] rsdPre;
97
98 private double[][] rsdPreCR;
99
100 private double[][] conj;
101
102 private double[][] conjCR;
103
104 private double[][] vec;
105
106 private double[][] vecCR;
107
108 private Atom[] atoms;
109
110 private double[][][] coordinates;
111
112 private double[] polarizability;
113 private double[] ipdamp;
114 private double[] thole;
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133 private boolean[] use;
134
135 private Crystal crystal;
136
137 private double[][][] inducedDipole;
138 private double[][][] inducedDipoleCR;
139
140 private double[][] directDipole;
141 private double[][] directDipoleCR;
142
143
144 private AtomicDoubleArray3D field;
145
146 private AtomicDoubleArray3D fieldCR;
147
148 private EwaldParameters ewaldParameters;
149
150
151
152
153 private ParallelTeam parallelTeam;
154
155 private IntegerSchedule realSpaceSchedule;
156
157 private long[] realSpaceSCFTime;
158
159
160
161
162
163
164
165
166
167 public PCGSolver(int maxThreads, double poleps, ForceField forceField, int nAtoms) {
168 this.poleps = poleps;
169 inducedDipolePreconditionerRegion = new InducedDipolePreconditionerRegion(maxThreads);
170 pcgInitRegion1 = new PCGInitRegion1(maxThreads);
171 pcgInitRegion2 = new PCGInitRegion2(maxThreads);
172 pcgIterRegion1 = new PCGIterRegion1(maxThreads);
173 pcgIterRegion2 = new PCGIterRegion2(maxThreads);
174
175
176
177 boolean preconditioner = forceField.getBoolean("USE_SCF_PRECONDITIONER", true);
178 if (preconditioner) {
179 preconditionerCutoff = forceField.getDouble("CG_PRECONDITIONER_CUTOFF", 4.5);
180 preconditionerEwald = forceField.getDouble("CG_PRECONDITIONER_EWALD", 0.0);
181 } else {
182 preconditionerCutoff = 0.0;
183 }
184
185 allocateVectors(nAtoms);
186 }
187
188
189
190
191
192
193
194 public void allocateLists(int nSymm, int nAtoms) {
195 preconditionerLists = new int[nSymm][nAtoms][preconditionerListSize];
196 preconditionerCounts = new int[nSymm][nAtoms];
197 }
198
199
200
201
202
203
204 public void allocateVectors(int nAtoms) {
205 if (rsd == null || rsd[0].length != nAtoms) {
206 rsd = new double[3][nAtoms];
207 rsdCR = new double[3][nAtoms];
208 rsdPre = new double[3][nAtoms];
209 rsdPreCR = new double[3][nAtoms];
210 conj = new double[3][nAtoms];
211 conjCR = new double[3][nAtoms];
212 vec = new double[3][nAtoms];
213 vecCR = new double[3][nAtoms];
214 }
215 }
216
217 public void init(
218 Atom[] atoms,
219 double[][][] coordinates,
220 double[] polarizability,
221 double[] ipdamp,
222 double[] thole,
223 boolean[] use,
224 Crystal crystal,
225 double[][][] inducedDipole,
226 double[][][] inducedDipoleCR,
227 double[][] directDipole,
228 double[][] directDipoleCR,
229 AtomicDoubleArray3D field,
230 AtomicDoubleArray3D fieldCR,
231 EwaldParameters ewaldParameters,
232 ParallelTeam parallelTeam,
233 IntegerSchedule realSpaceSchedule,
234 long[] realSpaceSCFTime) {
235 this.atoms = atoms;
236 this.coordinates = coordinates;
237 this.polarizability = polarizability;
238 this.ipdamp = ipdamp;
239 this.thole = thole;
240 this.use = use;
241 this.crystal = crystal;
242 this.inducedDipole = inducedDipole;
243 this.inducedDipoleCR = inducedDipoleCR;
244 this.directDipole = directDipole;
245 this.directDipoleCR = directDipoleCR;
246 this.field = field;
247 this.fieldCR = fieldCR;
248 this.ewaldParameters = ewaldParameters;
249 this.parallelTeam = parallelTeam;
250 this.realSpaceSchedule = realSpaceSchedule;
251 this.realSpaceSCFTime = realSpaceSCFTime;
252 }
253
254 public int scfByPCG(boolean print, long startTime, ParticleMeshEwald pme) {
255 long directTime = System.nanoTime() - startTime;
256
257 StringBuilder sb = null;
258 if (print) {
259 sb = new StringBuilder("\n Self-Consistent Field\n Iter RMS Change (Debye) Time\n");
260 }
261
262
263
264 pme.computeInduceDipoleField();
265
266 try {
267
268
269 parallelTeam.execute(pcgInitRegion1);
270
271
272 pme.expandInducedDipoles();
273
274 double aewaldTemp = ewaldParameters.aewald;
275 ewaldParameters.setEwaldParameters(ewaldParameters.off, preconditionerEwald);
276 int nAtoms = atoms.length;
277 field.reset(parallelTeam, 0, nAtoms - 1);
278 fieldCR.reset(parallelTeam, 0, nAtoms - 1);
279 parallelTeam.execute(inducedDipolePreconditionerRegion);
280 field.reduce(parallelTeam, 0, nAtoms - 1);
281 fieldCR.reduce(parallelTeam, 0, nAtoms - 1);
282 ewaldParameters.setEwaldParameters(ewaldParameters.off, aewaldTemp);
283
284
285 parallelTeam.execute(pcgInitRegion2);
286 } catch (Exception e) {
287 String message = "Exception initializing preconditioned CG.";
288 logger.log(Level.SEVERE, message, e);
289 }
290
291
292 int completedSCFCycles = 0;
293 int maxSCFCycles = 1000;
294 double eps = 100.0;
295 double previousEps;
296 boolean done = false;
297 while (!done) {
298 long cycleTime = -System.nanoTime();
299
300
301
302 int nAtoms = atoms.length;
303 for (int i = 0; i < nAtoms; i++) {
304 vec[0][i] = inducedDipole[0][i][0];
305 vec[1][i] = inducedDipole[0][i][1];
306 vec[2][i] = inducedDipole[0][i][2];
307 inducedDipole[0][i][0] = conj[0][i];
308 inducedDipole[0][i][1] = conj[1][i];
309 inducedDipole[0][i][2] = conj[2][i];
310 vecCR[0][i] = inducedDipoleCR[0][i][0];
311 vecCR[1][i] = inducedDipoleCR[0][i][1];
312 vecCR[2][i] = inducedDipoleCR[0][i][2];
313 inducedDipoleCR[0][i][0] = conjCR[0][i];
314 inducedDipoleCR[0][i][1] = conjCR[1][i];
315 inducedDipoleCR[0][i][2] = conjCR[2][i];
316 }
317
318
319 pme.computeInduceDipoleField();
320
321 try {
322
323
324
325
326
327
328
329 parallelTeam.execute(pcgIterRegion1);
330
331
332 pme.expandInducedDipoles();
333
334 double aewaldTemp = ewaldParameters.aewald;
335 ewaldParameters.setEwaldParameters(ewaldParameters.off, preconditionerEwald);
336 field.reset(parallelTeam, 0, nAtoms - 1);
337 fieldCR.reset(parallelTeam, 0, nAtoms - 1);
338 parallelTeam.execute(inducedDipolePreconditionerRegion);
339 field.reduce(parallelTeam, 0, nAtoms - 1);
340 fieldCR.reduce(parallelTeam, 0, nAtoms - 1);
341 ewaldParameters.setEwaldParameters(ewaldParameters.off, aewaldTemp);
342
343
344
345
346
347
348 pcgIterRegion2.sum = pcgIterRegion1.getSum();
349 pcgIterRegion2.sumCR = pcgIterRegion1.getSumCR();
350 parallelTeam.execute(pcgIterRegion2);
351 } catch (Exception e) {
352 String message = "Exception in first CG iteration region.";
353 logger.log(Level.SEVERE, message, e);
354 }
355
356 previousEps = eps;
357 eps = max(pcgIterRegion2.getEps(), pcgIterRegion2.getEpsCR());
358 completedSCFCycles++;
359 eps = Constants.ELEC_ANG_TO_DEBYE * sqrt(eps / (double) nAtoms);
360 cycleTime += System.nanoTime();
361 if (print) {
362 sb.append(
363 format(
364 " %4d %15.10f %7.4f\n", completedSCFCycles, eps, cycleTime * Constants.NS2SEC));
365 }
366
367
368 if (eps > previousEps) {
369 if (sb != null) {
370 logger.warning(sb.toString());
371 }
372 String message =
373 format("Fatal SCF convergence failure: (%10.5f > %10.5f)\n", eps, previousEps);
374 throw new EnergyException(message);
375 }
376
377
378
379 if (completedSCFCycles >= maxSCFCycles) {
380 if (sb != null) {
381 logger.warning(sb.toString());
382 }
383 String message = format("Maximum SCF iterations reached: (%d)\n", completedSCFCycles);
384 throw new EnergyException(message);
385 }
386
387
388 if (eps < poleps) {
389 done = true;
390 }
391 }
392 if (print) {
393 sb.append(format(" Direct: %7.4f\n", Constants.NS2SEC * directTime));
394 startTime = System.nanoTime() - startTime;
395 sb.append(format(" Total: %7.4f", startTime * Constants.NS2SEC));
396 logger.info(sb.toString());
397 }
398
399
400 pme.computeInduceDipoleField();
401
402 return completedSCFCycles;
403 }
404
405 private class PCGInitRegion1 extends ParallelRegion {
406
407 private final PCGInitLoop[] pcgLoop;
408
409 public PCGInitRegion1(int nt) {
410 pcgLoop = new PCGInitLoop[nt];
411 }
412
413 @Override
414 public void run() throws Exception {
415 try {
416 int ti = getThreadIndex();
417 if (pcgLoop[ti] == null) {
418 pcgLoop[ti] = new PCGInitLoop();
419 }
420 int nAtoms = atoms.length;
421 execute(0, nAtoms - 1, pcgLoop[ti]);
422 } catch (Exception e) {
423 String message =
424 "Fatal exception computing the mutual induced dipoles in thread "
425 + getThreadIndex()
426 + "\n";
427 logger.log(Level.SEVERE, message, e);
428 }
429 }
430
431 private class PCGInitLoop extends IntegerForLoop {
432
433 @Override
434 public void run(int lb, int ub) throws Exception {
435 for (int i = lb; i <= ub; i++) {
436
437 double ipolar;
438 if (polarizability[i] > 0) {
439 ipolar = 1.0 / polarizability[i];
440 rsd[0][i] = (directDipole[i][0] - inducedDipole[0][i][0]) * ipolar + field.getX(i);
441 rsd[1][i] = (directDipole[i][1] - inducedDipole[0][i][1]) * ipolar + field.getY(i);
442 rsd[2][i] = (directDipole[i][2] - inducedDipole[0][i][2]) * ipolar + field.getZ(i);
443 rsdCR[0][i] =
444 (directDipoleCR[i][0] - inducedDipoleCR[0][i][0]) * ipolar + fieldCR.getX(i);
445 rsdCR[1][i] =
446 (directDipoleCR[i][1] - inducedDipoleCR[0][i][1]) * ipolar + fieldCR.getY(i);
447 rsdCR[2][i] =
448 (directDipoleCR[i][2] - inducedDipoleCR[0][i][2]) * ipolar + fieldCR.getZ(i);
449 } else {
450 rsd[0][i] = 0.0;
451 rsd[1][i] = 0.0;
452 rsd[2][i] = 0.0;
453 rsdCR[0][i] = 0.0;
454 rsdCR[1][i] = 0.0;
455 rsdCR[2][i] = 0.0;
456 }
457
458 double polar = polarizability[i];
459 vec[0][i] = inducedDipole[0][i][0];
460 vec[1][i] = inducedDipole[0][i][1];
461 vec[2][i] = inducedDipole[0][i][2];
462 vecCR[0][i] = inducedDipoleCR[0][i][0];
463 vecCR[1][i] = inducedDipoleCR[0][i][1];
464 vecCR[2][i] = inducedDipoleCR[0][i][2];
465 inducedDipole[0][i][0] = polar * rsd[0][i];
466 inducedDipole[0][i][1] = polar * rsd[1][i];
467 inducedDipole[0][i][2] = polar * rsd[2][i];
468 inducedDipoleCR[0][i][0] = polar * rsdCR[0][i];
469 inducedDipoleCR[0][i][1] = polar * rsdCR[1][i];
470 inducedDipoleCR[0][i][2] = polar * rsdCR[2][i];
471 }
472 }
473
474 @Override
475 public IntegerSchedule schedule() {
476 return IntegerSchedule.fixed();
477 }
478 }
479 }
480
481 private class PCGInitRegion2 extends ParallelRegion {
482
483 private final PCGInitLoop[] pcgLoop;
484
485 public PCGInitRegion2(int nt) {
486 pcgLoop = new PCGInitLoop[nt];
487 }
488
489 @Override
490 public void run() throws Exception {
491 try {
492 int ti = getThreadIndex();
493 if (pcgLoop[ti] == null) {
494 pcgLoop[ti] = new PCGInitLoop();
495 }
496 int nAtoms = atoms.length;
497 execute(0, nAtoms - 1, pcgLoop[ti]);
498 } catch (Exception e) {
499 String message =
500 "Fatal exception computing the mutual induced dipoles in thread "
501 + getThreadIndex()
502 + "\n";
503 logger.log(Level.SEVERE, message, e);
504 }
505 }
506
507 private class PCGInitLoop extends IntegerForLoop {
508
509 @Override
510 public void run(int lb, int ub) throws Exception {
511
512 for (int i = lb; i <= ub; i++) {
513
514
515 inducedDipole[0][i][0] = vec[0][i];
516 inducedDipole[0][i][1] = vec[1][i];
517 inducedDipole[0][i][2] = vec[2][i];
518 inducedDipoleCR[0][i][0] = vecCR[0][i];
519 inducedDipoleCR[0][i][1] = vecCR[1][i];
520 inducedDipoleCR[0][i][2] = vecCR[2][i];
521
522
523 double udiag = 2.0;
524 double polar = polarizability[i];
525 rsdPre[0][i] = polar * (field.getX(i) + udiag * rsd[0][i]);
526 rsdPre[1][i] = polar * (field.getY(i) + udiag * rsd[1][i]);
527 rsdPre[2][i] = polar * (field.getZ(i) + udiag * rsd[2][i]);
528 rsdPreCR[0][i] = polar * (fieldCR.getX(i) + udiag * rsdCR[0][i]);
529 rsdPreCR[1][i] = polar * (fieldCR.getY(i) + udiag * rsdCR[1][i]);
530 rsdPreCR[2][i] = polar * (fieldCR.getZ(i) + udiag * rsdCR[2][i]);
531 conj[0][i] = rsdPre[0][i];
532 conj[1][i] = rsdPre[1][i];
533 conj[2][i] = rsdPre[2][i];
534 conjCR[0][i] = rsdPreCR[0][i];
535 conjCR[1][i] = rsdPreCR[1][i];
536 conjCR[2][i] = rsdPreCR[2][i];
537 }
538 }
539
540 @Override
541 public IntegerSchedule schedule() {
542 return IntegerSchedule.fixed();
543 }
544 }
545 }
546
547 private class PCGIterRegion1 extends ParallelRegion {
548
549 private final PCGIterLoop1[] iterLoop1;
550 private final PCGIterLoop2[] iterLoop2;
551 private final SharedDouble dotShared;
552 private final SharedDouble dotCRShared;
553 private final SharedDouble sumShared;
554 private final SharedDouble sumCRShared;
555
556 public PCGIterRegion1(int nt) {
557 iterLoop1 = new PCGIterLoop1[nt];
558 iterLoop2 = new PCGIterLoop2[nt];
559 dotShared = new SharedDouble();
560 dotCRShared = new SharedDouble();
561 sumShared = new SharedDouble();
562 sumCRShared = new SharedDouble();
563 }
564
565 public double getSum() {
566 return sumShared.get();
567 }
568
569 public double getSumCR() {
570 return sumCRShared.get();
571 }
572
573 @Override
574 public void run() throws Exception {
575 try {
576 int ti = getThreadIndex();
577 int nAtoms = atoms.length;
578 if (iterLoop1[ti] == null) {
579 iterLoop1[ti] = new PCGIterLoop1();
580 iterLoop2[ti] = new PCGIterLoop2();
581 }
582 execute(0, nAtoms - 1, iterLoop1[ti]);
583 if (ti == 0) {
584 if (dotShared.get() != 0.0) {
585 dotShared.set(sumShared.get() / dotShared.get());
586 }
587 if (dotCRShared.get() != 0.0) {
588 dotCRShared.set(sumCRShared.get() / dotCRShared.get());
589 }
590 }
591 barrier();
592 execute(0, nAtoms - 1, iterLoop2[ti]);
593 } catch (Exception e) {
594 String message =
595 "Fatal exception computing the mutual induced dipoles in thread "
596 + getThreadIndex()
597 + "\n";
598 logger.log(Level.SEVERE, message, e);
599 }
600 }
601
602 @Override
603 public void start() {
604 dotShared.set(0.0);
605 dotCRShared.set(0.0);
606 sumShared.set(0.0);
607 sumCRShared.set(0.0);
608 }
609
610 private class PCGIterLoop1 extends IntegerForLoop {
611
612 public double dot;
613 public double dotCR;
614 public double sum;
615 public double sumCR;
616
617 @Override
618 public void finish() {
619 dotShared.addAndGet(dot);
620 dotCRShared.addAndGet(dotCR);
621 sumShared.addAndGet(sum);
622 sumCRShared.addAndGet(sumCR);
623 }
624
625 @Override
626 public void run(int lb, int ub) throws Exception {
627 for (int i = lb; i <= ub; i++) {
628 if (polarizability[i] > 0) {
629 double ipolar = 1.0 / polarizability[i];
630 inducedDipole[0][i][0] = vec[0][i];
631 inducedDipole[0][i][1] = vec[1][i];
632 inducedDipole[0][i][2] = vec[2][i];
633 vec[0][i] = conj[0][i] * ipolar - field.getX(i);
634 vec[1][i] = conj[1][i] * ipolar - field.getY(i);
635 vec[2][i] = conj[2][i] * ipolar - field.getZ(i);
636 inducedDipoleCR[0][i][0] = vecCR[0][i];
637 inducedDipoleCR[0][i][1] = vecCR[1][i];
638 inducedDipoleCR[0][i][2] = vecCR[2][i];
639 vecCR[0][i] = conjCR[0][i] * ipolar - fieldCR.getX(i);
640 vecCR[1][i] = conjCR[1][i] * ipolar - fieldCR.getY(i);
641 vecCR[2][i] = conjCR[2][i] * ipolar - fieldCR.getZ(i);
642 } else {
643 inducedDipole[0][i][0] = 0.0;
644 inducedDipole[0][i][1] = 0.0;
645 inducedDipole[0][i][2] = 0.0;
646 vec[0][i] = 0.0;
647 vec[1][i] = 0.0;
648 vec[2][i] = 0.0;
649 inducedDipoleCR[0][i][0] = 0.0;
650 inducedDipoleCR[0][i][1] = 0.0;
651 inducedDipoleCR[0][i][2] = 0.0;
652 vecCR[0][i] = 0.0;
653 vecCR[1][i] = 0.0;
654 vecCR[2][i] = 0.0;
655 }
656
657
658 dot += conj[0][i] * vec[0][i] + conj[1][i] * vec[1][i] + conj[2][i] * vec[2][i];
659 dotCR +=
660 conjCR[0][i] * vecCR[0][i] + conjCR[1][i] * vecCR[1][i] + conjCR[2][i] * vecCR[2][i];
661
662 sum += rsd[0][i] * rsdPre[0][i] + rsd[1][i] * rsdPre[1][i] + rsd[2][i] * rsdPre[2][i];
663 sumCR +=
664 rsdCR[0][i] * rsdPreCR[0][i]
665 + rsdCR[1][i] * rsdPreCR[1][i]
666 + rsdCR[2][i] * rsdPreCR[2][i];
667 }
668 }
669
670 @Override
671 public IntegerSchedule schedule() {
672 return IntegerSchedule.fixed();
673 }
674
675 @Override
676 public void start() {
677 dot = 0.0;
678 dotCR = 0.0;
679 sum = 0.0;
680 sumCR = 0.0;
681 }
682 }
683
684 private class PCGIterLoop2 extends IntegerForLoop {
685
686 @Override
687 public void run(int lb, int ub) throws Exception {
688 double dot = dotShared.get();
689 double dotCR = dotCRShared.get();
690 for (int i = lb; i <= ub; i++) {
691
692
693
694
695
696
697 rsd[0][i] -= dot * vec[0][i];
698 rsd[1][i] -= dot * vec[1][i];
699 rsd[2][i] -= dot * vec[2][i];
700 rsdCR[0][i] -= dotCR * vecCR[0][i];
701 rsdCR[1][i] -= dotCR * vecCR[1][i];
702 rsdCR[2][i] -= dotCR * vecCR[2][i];
703 vec[0][i] = inducedDipole[0][i][0] + dot * conj[0][i];
704 vec[1][i] = inducedDipole[0][i][1] + dot * conj[1][i];
705 vec[2][i] = inducedDipole[0][i][2] + dot * conj[2][i];
706 vecCR[0][i] = inducedDipoleCR[0][i][0] + dotCR * conjCR[0][i];
707 vecCR[1][i] = inducedDipoleCR[0][i][1] + dotCR * conjCR[1][i];
708 vecCR[2][i] = inducedDipoleCR[0][i][2] + dotCR * conjCR[2][i];
709 double polar = polarizability[i];
710 inducedDipole[0][i][0] = polar * rsd[0][i];
711 inducedDipole[0][i][1] = polar * rsd[1][i];
712 inducedDipole[0][i][2] = polar * rsd[2][i];
713 inducedDipoleCR[0][i][0] = polar * rsdCR[0][i];
714 inducedDipoleCR[0][i][1] = polar * rsdCR[1][i];
715 inducedDipoleCR[0][i][2] = polar * rsdCR[2][i];
716 }
717 }
718
719 @Override
720 public IntegerSchedule schedule() {
721 return IntegerSchedule.fixed();
722 }
723 }
724 }
725
726 private class PCGIterRegion2 extends ParallelRegion {
727
728 private final PCGIterLoop1[] iterLoop1;
729 private final PCGIterLoop2[] iterLoop2;
730 private final SharedDouble dotShared;
731 private final SharedDouble dotCRShared;
732 private final SharedDouble epsShared;
733 private final SharedDouble epsCRShared;
734 public double sum;
735 public double sumCR;
736
737 public PCGIterRegion2(int nt) {
738 iterLoop1 = new PCGIterLoop1[nt];
739 iterLoop2 = new PCGIterLoop2[nt];
740 dotShared = new SharedDouble();
741 dotCRShared = new SharedDouble();
742 epsShared = new SharedDouble();
743 epsCRShared = new SharedDouble();
744 }
745
746 public double getEps() {
747 return epsShared.get();
748 }
749
750 public double getEpsCR() {
751 return epsCRShared.get();
752 }
753
754 @Override
755 public void run() throws Exception {
756 try {
757 int ti = getThreadIndex();
758 if (iterLoop1[ti] == null) {
759 iterLoop1[ti] = new PCGIterLoop1();
760 iterLoop2[ti] = new PCGIterLoop2();
761 }
762 int nAtoms = atoms.length;
763 execute(0, nAtoms - 1, iterLoop1[ti]);
764 execute(0, nAtoms - 1, iterLoop2[ti]);
765 } catch (Exception e) {
766 String message =
767 "Fatal exception computing the mutual induced dipoles in thread "
768 + getThreadIndex()
769 + "\n";
770 logger.log(Level.SEVERE, message, e);
771 }
772 }
773
774 @Override
775 public void start() {
776 dotShared.set(0.0);
777 dotCRShared.set(0.0);
778 epsShared.set(0.0);
779 epsCRShared.set(0.0);
780 if (sum == 0.0) {
781 sum = 1.0;
782 }
783 if (sumCR == 0.0) {
784 sumCR = 1.0;
785 }
786 }
787
788 private class PCGIterLoop1 extends IntegerForLoop {
789
790 public double dot;
791 public double dotCR;
792
793 @Override
794 public void finish() {
795 dotShared.addAndGet(dot / sum);
796 dotCRShared.addAndGet(dotCR / sumCR);
797 }
798
799 @Override
800 public void run(int lb, int ub) throws Exception {
801 double udiag = 2.0;
802 for (int i = lb; i <= ub; i++) {
803
804
805 inducedDipole[0][i][0] = vec[0][i];
806 inducedDipole[0][i][1] = vec[1][i];
807 inducedDipole[0][i][2] = vec[2][i];
808 inducedDipoleCR[0][i][0] = vecCR[0][i];
809 inducedDipoleCR[0][i][1] = vecCR[1][i];
810 inducedDipoleCR[0][i][2] = vecCR[2][i];
811
812
813 double polar = polarizability[i];
814 rsdPre[0][i] = polar * (field.getX(i) + udiag * rsd[0][i]);
815 rsdPre[1][i] = polar * (field.getY(i) + udiag * rsd[1][i]);
816 rsdPre[2][i] = polar * (field.getZ(i) + udiag * rsd[2][i]);
817 rsdPreCR[0][i] = polar * (fieldCR.getX(i) + udiag * rsdCR[0][i]);
818 rsdPreCR[1][i] = polar * (fieldCR.getY(i) + udiag * rsdCR[1][i]);
819 rsdPreCR[2][i] = polar * (fieldCR.getZ(i) + udiag * rsdCR[2][i]);
820 dot += rsd[0][i] * rsdPre[0][i] + rsd[1][i] * rsdPre[1][i] + rsd[2][i] * rsdPre[2][i];
821 dotCR +=
822 rsdCR[0][i] * rsdPreCR[0][i]
823 + rsdCR[1][i] * rsdPreCR[1][i]
824 + rsdCR[2][i] * rsdPreCR[2][i];
825 }
826 }
827
828 @Override
829 public IntegerSchedule schedule() {
830 return IntegerSchedule.fixed();
831 }
832
833 @Override
834 public void start() {
835 dot = 0.0;
836 dotCR = 0.0;
837 }
838 }
839
840 private class PCGIterLoop2 extends IntegerForLoop {
841
842 public double eps;
843 public double epsCR;
844
845 @Override
846 public void finish() {
847 epsShared.addAndGet(eps);
848 epsCRShared.addAndGet(epsCR);
849 }
850
851 @Override
852 public void run(int lb, int ub) throws Exception {
853 double dot = dotShared.get();
854 double dotCR = dotCRShared.get();
855 for (int i = lb; i <= ub; i++) {
856
857 conj[0][i] = rsdPre[0][i] + dot * conj[0][i];
858 conj[1][i] = rsdPre[1][i] + dot * conj[1][i];
859 conj[2][i] = rsdPre[2][i] + dot * conj[2][i];
860 conjCR[0][i] = rsdPreCR[0][i] + dotCR * conjCR[0][i];
861 conjCR[1][i] = rsdPreCR[1][i] + dotCR * conjCR[1][i];
862 conjCR[2][i] = rsdPreCR[2][i] + dotCR * conjCR[2][i];
863 eps += rsd[0][i] * rsd[0][i] + rsd[1][i] * rsd[1][i] + rsd[2][i] * rsd[2][i];
864 epsCR +=
865 rsdCR[0][i] * rsdCR[0][i] + rsdCR[1][i] * rsdCR[1][i] + rsdCR[2][i] * rsdCR[2][i];
866 }
867 }
868
869 @Override
870 public IntegerSchedule schedule() {
871 return IntegerSchedule.fixed();
872 }
873
874 @Override
875 public void start() {
876 eps = 0.0;
877 epsCR = 0.0;
878 }
879 }
880 }
881
882
883 private class InducedDipolePreconditionerRegion extends ParallelRegion {
884 private final InducedPreconditionerFieldLoop[] inducedPreconditionerFieldLoop;
885
886 InducedDipolePreconditionerRegion(int threadCount) {
887 inducedPreconditionerFieldLoop = new InducedPreconditionerFieldLoop[threadCount];
888 }
889
890 @Override
891 public void run() {
892 int threadIndex = getThreadIndex();
893 if (inducedPreconditionerFieldLoop[threadIndex] == null) {
894 inducedPreconditionerFieldLoop[threadIndex] = new InducedPreconditionerFieldLoop();
895 }
896 try {
897 int nAtoms = atoms.length;
898 execute(0, nAtoms - 1, inducedPreconditionerFieldLoop[threadIndex]);
899 } catch (Exception e) {
900 String message =
901 "Fatal exception computing the induced real space field in thread "
902 + getThreadIndex()
903 + "\n";
904 logger.log(Level.SEVERE, message, e);
905 }
906 }
907
908 private class InducedPreconditionerFieldLoop extends IntegerForLoop {
909
910 private int threadID;
911 private double[] x, y, z;
912 private double[][] ind, indCR;
913
914 InducedPreconditionerFieldLoop() {}
915
916 @Override
917 public void finish() {
918 realSpaceSCFTime[threadID] += System.nanoTime();
919 }
920
921 @Override
922 public void run(int lb, int ub) {
923 final double[] dx = new double[3];
924 final double[][] transOp = new double[3][3];
925
926
927 int[][] lists = preconditionerLists[0];
928 int[] counts = preconditionerCounts[0];
929 for (int i = lb; i <= ub; i++) {
930 if (!use[i]) {
931 continue;
932 }
933 double fx = 0.0;
934 double fy = 0.0;
935 double fz = 0.0;
936 double px = 0.0;
937 double py = 0.0;
938 double pz = 0.0;
939 final double xi = x[i];
940 final double yi = y[i];
941 final double zi = z[i];
942 final double[] dipolei = ind[i];
943 final double uix = dipolei[0];
944 final double uiy = dipolei[1];
945 final double uiz = dipolei[2];
946 final double[] dipoleCRi = indCR[i];
947 final double pix = dipoleCRi[0];
948 final double piy = dipoleCRi[1];
949 final double piz = dipoleCRi[2];
950 final double pdi = ipdamp[i];
951 final double pti = thole[i];
952
953
954 final int[] list = lists[i];
955 final int npair = counts[i];
956 for (int j = 0; j < npair; j++) {
957 final int k = list[j];
958 if (!use[k]) {
959 continue;
960 }
961 final double pdk = ipdamp[k];
962 final double ptk = thole[k];
963 dx[0] = x[k] - xi;
964 dx[1] = y[k] - yi;
965 dx[2] = z[k] - zi;
966 final double r2 = crystal.image(dx);
967
968
969 final double r = sqrt(r2);
970 final double rr1 = 1.0 / r;
971 final double rr2 = rr1 * rr1;
972 final double ralpha = ewaldParameters.aewald * r;
973 final double exp2a = exp(-ralpha * ralpha);
974 final double bn0 = erfc(ralpha) * rr1;
975
976
977 final double bn1 = (bn0 + ewaldParameters.an0 * exp2a) * rr2;
978 final double bn2 = (3.0 * bn1 + ewaldParameters.an1 * exp2a) * rr2;
979 double scale3 = 1.0;
980 double scale5 = 1.0;
981 double damp = pdi * pdk;
982 final double pgamma = min(pti, ptk);
983 final double rdamp = r * damp;
984 damp = -pgamma * rdamp * rdamp * rdamp;
985 if (damp > -50.0) {
986 final double expdamp = exp(damp);
987 scale3 = 1.0 - expdamp;
988 scale5 = 1.0 - expdamp * (1.0 - damp);
989 }
990 double rr3 = rr1 * rr2;
991 double rr5 = 3.0 * rr3 * rr2;
992 rr3 *= (1.0 - scale3);
993 rr5 *= (1.0 - scale5);
994 final double xr = dx[0];
995 final double yr = dx[1];
996 final double zr = dx[2];
997 final double[] dipolek = ind[k];
998 final double ukx = dipolek[0];
999 final double uky = dipolek[1];
1000 final double ukz = dipolek[2];
1001 final double ukr = ukx * xr + uky * yr + ukz * zr;
1002 final double bn2ukr = bn2 * ukr;
1003 final double fimx = -bn1 * ukx + bn2ukr * xr;
1004 final double fimy = -bn1 * uky + bn2ukr * yr;
1005 final double fimz = -bn1 * ukz + bn2ukr * zr;
1006 final double rr5ukr = rr5 * ukr;
1007 final double fidx = -rr3 * ukx + rr5ukr * xr;
1008 final double fidy = -rr3 * uky + rr5ukr * yr;
1009 final double fidz = -rr3 * ukz + rr5ukr * zr;
1010 fx += (fimx - fidx);
1011 fy += (fimy - fidy);
1012 fz += (fimz - fidz);
1013 final double[] dipolepk = indCR[k];
1014 final double pkx = dipolepk[0];
1015 final double pky = dipolepk[1];
1016 final double pkz = dipolepk[2];
1017 final double pkr = pkx * xr + pky * yr + pkz * zr;
1018 final double bn2pkr = bn2 * pkr;
1019 final double pimx = -bn1 * pkx + bn2pkr * xr;
1020 final double pimy = -bn1 * pky + bn2pkr * yr;
1021 final double pimz = -bn1 * pkz + bn2pkr * zr;
1022 final double rr5pkr = rr5 * pkr;
1023 final double pidx = -rr3 * pkx + rr5pkr * xr;
1024 final double pidy = -rr3 * pky + rr5pkr * yr;
1025 final double pidz = -rr3 * pkz + rr5pkr * zr;
1026 px += (pimx - pidx);
1027 py += (pimy - pidy);
1028 pz += (pimz - pidz);
1029 final double uir = uix * xr + uiy * yr + uiz * zr;
1030 final double bn2uir = bn2 * uir;
1031 final double fkmx = -bn1 * uix + bn2uir * xr;
1032 final double fkmy = -bn1 * uiy + bn2uir * yr;
1033 final double fkmz = -bn1 * uiz + bn2uir * zr;
1034 final double rr5uir = rr5 * uir;
1035 final double fkdx = -rr3 * uix + rr5uir * xr;
1036 final double fkdy = -rr3 * uiy + rr5uir * yr;
1037 final double fkdz = -rr3 * uiz + rr5uir * zr;
1038 field.add(threadID, k, fkmx - fkdx, fkmy - fkdy, fkmz - fkdz);
1039 final double pir = pix * xr + piy * yr + piz * zr;
1040 final double bn2pir = bn2 * pir;
1041 final double pkmx = -bn1 * pix + bn2pir * xr;
1042 final double pkmy = -bn1 * piy + bn2pir * yr;
1043 final double pkmz = -bn1 * piz + bn2pir * zr;
1044 final double rr5pir = rr5 * pir;
1045 final double pkdx = -rr3 * pix + rr5pir * xr;
1046 final double pkdy = -rr3 * piy + rr5pir * yr;
1047 final double pkdz = -rr3 * piz + rr5pir * zr;
1048 fieldCR.add(threadID, k, pkmx - pkdx, pkmy - pkdy, pkmz - pkdz);
1049 }
1050 field.add(threadID, i, fx, fy, fz);
1051 fieldCR.add(threadID, i, px, py, pz);
1052 }
1053
1054
1055 List<SymOp> symOps = crystal.spaceGroup.symOps;
1056 int nSymm = symOps.size();
1057 for (int iSymm = 1; iSymm < nSymm; iSymm++) {
1058 SymOp symOp = crystal.spaceGroup.getSymOp(iSymm);
1059 crystal.getTransformationOperator(symOp, transOp);
1060 lists = preconditionerLists[iSymm];
1061 counts = preconditionerCounts[iSymm];
1062 final double[] xs = coordinates[iSymm][0];
1063 final double[] ys = coordinates[iSymm][1];
1064 final double[] zs = coordinates[iSymm][2];
1065 final double[][] inds = inducedDipole[iSymm];
1066 final double[][] indCRs = inducedDipoleCR[iSymm];
1067
1068
1069 for (int i = lb; i <= ub; i++) {
1070 if (!use[i]) {
1071 continue;
1072 }
1073 double fx = 0.0;
1074 double fy = 0.0;
1075 double fz = 0.0;
1076 double px = 0.0;
1077 double py = 0.0;
1078 double pz = 0.0;
1079 final double xi = x[i];
1080 final double yi = y[i];
1081 final double zi = z[i];
1082 final double[] dipolei = ind[i];
1083 final double uix = dipolei[0];
1084 final double uiy = dipolei[1];
1085 final double uiz = dipolei[2];
1086 final double[] dipoleCRi = indCR[i];
1087 final double pix = dipoleCRi[0];
1088 final double piy = dipoleCRi[1];
1089 final double piz = dipoleCRi[2];
1090 final double pdi = ipdamp[i];
1091 final double pti = thole[i];
1092
1093
1094 final int[] list = lists[i];
1095 final int npair = counts[i];
1096 for (int j = 0; j < npair; j++) {
1097 final int k = list[j];
1098 if (!use[k]) {
1099 continue;
1100 }
1101 double selfScale = 1.0;
1102 if (i == k) {
1103 selfScale = 0.5;
1104 }
1105 final double pdk = ipdamp[k];
1106 final double ptk = thole[k];
1107 dx[0] = xs[k] - xi;
1108 dx[1] = ys[k] - yi;
1109 dx[2] = zs[k] - zi;
1110 final double r2 = crystal.image(dx);
1111
1112
1113 final double r = sqrt(r2);
1114 final double rr1 = 1.0 / r;
1115 final double rr2 = rr1 * rr1;
1116 final double ralpha = ewaldParameters.aewald * r;
1117 final double exp2a = exp(-ralpha * ralpha);
1118 final double bn0 = erfc(ralpha) * rr1;
1119
1120
1121 final double bn1 = (bn0 + ewaldParameters.an0 * exp2a) * rr2;
1122 final double bn2 = (3.0 * bn1 + ewaldParameters.an1 * exp2a) * rr2;
1123 double scale3 = 1.0;
1124 double scale5 = 1.0;
1125 double damp = pdi * pdk;
1126 final double pgamma = min(pti, ptk);
1127 final double rdamp = r * damp;
1128 damp = -pgamma * rdamp * rdamp * rdamp;
1129 if (damp > -50.0) {
1130 final double expdamp = exp(damp);
1131 scale3 = 1.0 - expdamp;
1132 scale5 = 1.0 - expdamp * (1.0 - damp);
1133 }
1134 double rr3 = rr1 * rr2;
1135 double rr5 = 3.0 * rr3 * rr2;
1136 rr3 *= (1.0 - scale3);
1137 rr5 *= (1.0 - scale5);
1138 final double xr = dx[0];
1139 final double yr = dx[1];
1140 final double zr = dx[2];
1141 final double[] dipolek = inds[k];
1142 final double ukx = dipolek[0];
1143 final double uky = dipolek[1];
1144 final double ukz = dipolek[2];
1145 final double[] dipolepk = indCRs[k];
1146 final double pkx = dipolepk[0];
1147 final double pky = dipolepk[1];
1148 final double pkz = dipolepk[2];
1149 final double ukr = ukx * xr + uky * yr + ukz * zr;
1150 final double bn2ukr = bn2 * ukr;
1151 final double fimx = -bn1 * ukx + bn2ukr * xr;
1152 final double fimy = -bn1 * uky + bn2ukr * yr;
1153 final double fimz = -bn1 * ukz + bn2ukr * zr;
1154 final double rr5ukr = rr5 * ukr;
1155 final double fidx = -rr3 * ukx + rr5ukr * xr;
1156 final double fidy = -rr3 * uky + rr5ukr * yr;
1157 final double fidz = -rr3 * ukz + rr5ukr * zr;
1158 fx += selfScale * (fimx - fidx);
1159 fy += selfScale * (fimy - fidy);
1160 fz += selfScale * (fimz - fidz);
1161 final double pkr = pkx * xr + pky * yr + pkz * zr;
1162 final double bn2pkr = bn2 * pkr;
1163 final double pimx = -bn1 * pkx + bn2pkr * xr;
1164 final double pimy = -bn1 * pky + bn2pkr * yr;
1165 final double pimz = -bn1 * pkz + bn2pkr * zr;
1166 final double rr5pkr = rr5 * pkr;
1167 final double pidx = -rr3 * pkx + rr5pkr * xr;
1168 final double pidy = -rr3 * pky + rr5pkr * yr;
1169 final double pidz = -rr3 * pkz + rr5pkr * zr;
1170 px += selfScale * (pimx - pidx);
1171 py += selfScale * (pimy - pidy);
1172 pz += selfScale * (pimz - pidz);
1173 final double uir = uix * xr + uiy * yr + uiz * zr;
1174 final double bn2uir = bn2 * uir;
1175 final double fkmx = -bn1 * uix + bn2uir * xr;
1176 final double fkmy = -bn1 * uiy + bn2uir * yr;
1177 final double fkmz = -bn1 * uiz + bn2uir * zr;
1178 final double rr5uir = rr5 * uir;
1179 final double fkdx = -rr3 * uix + rr5uir * xr;
1180 final double fkdy = -rr3 * uiy + rr5uir * yr;
1181 final double fkdz = -rr3 * uiz + rr5uir * zr;
1182 double xc = selfScale * (fkmx - fkdx);
1183 double yc = selfScale * (fkmy - fkdy);
1184 double zc = selfScale * (fkmz - fkdz);
1185 double fkx = (xc * transOp[0][0] + yc * transOp[1][0] + zc * transOp[2][0]);
1186 double fky = (xc * transOp[0][1] + yc * transOp[1][1] + zc * transOp[2][1]);
1187 double fkz = (xc * transOp[0][2] + yc * transOp[1][2] + zc * transOp[2][2]);
1188 field.add(threadID, k, fkx, fky, fkz);
1189 final double pir = pix * xr + piy * yr + piz * zr;
1190 final double bn2pir = bn2 * pir;
1191 final double pkmx = -bn1 * pix + bn2pir * xr;
1192 final double pkmy = -bn1 * piy + bn2pir * yr;
1193 final double pkmz = -bn1 * piz + bn2pir * zr;
1194 final double rr5pir = rr5 * pir;
1195 final double pkdx = -rr3 * pix + rr5pir * xr;
1196 final double pkdy = -rr3 * piy + rr5pir * yr;
1197 final double pkdz = -rr3 * piz + rr5pir * zr;
1198 xc = selfScale * (pkmx - pkdx);
1199 yc = selfScale * (pkmy - pkdy);
1200 zc = selfScale * (pkmz - pkdz);
1201 fkx = (xc * transOp[0][0] + yc * transOp[1][0] + zc * transOp[2][0]);
1202 fky = (xc * transOp[0][1] + yc * transOp[1][1] + zc * transOp[2][1]);
1203 fkz = (xc * transOp[0][2] + yc * transOp[1][2] + zc * transOp[2][2]);
1204 fieldCR.add(threadID, k, fkx, fky, fkz);
1205 }
1206 field.add(threadID, i, fx, fy, fz);
1207 fieldCR.add(threadID, i, px, py, pz);
1208 }
1209 }
1210 }
1211
1212 @Override
1213 public IntegerSchedule schedule() {
1214 return realSpaceSchedule;
1215 }
1216
1217 @Override
1218 public void start() {
1219 threadID = getThreadIndex();
1220 realSpaceSCFTime[threadID] -= System.nanoTime();
1221 x = coordinates[0][0];
1222 y = coordinates[0][1];
1223 z = coordinates[0][2];
1224 ind = inducedDipole[0];
1225 indCR = inducedDipoleCR[0];
1226 }
1227 }
1228 }
1229 }