View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.potential.nonbonded.pme;
39  
40  import static ffx.numerics.special.Erf.erfc;
41  import static java.lang.String.format;
42  import static org.apache.commons.math3.util.FastMath.exp;
43  import static org.apache.commons.math3.util.FastMath.max;
44  import static org.apache.commons.math3.util.FastMath.min;
45  import static org.apache.commons.math3.util.FastMath.sqrt;
46  
47  import edu.rit.pj.IntegerForLoop;
48  import edu.rit.pj.IntegerSchedule;
49  import edu.rit.pj.ParallelRegion;
50  import edu.rit.pj.ParallelTeam;
51  import edu.rit.pj.reduction.SharedDouble;
52  import ffx.crystal.Crystal;
53  import ffx.crystal.SymOp;
54  import ffx.numerics.atomic.AtomicDoubleArray3D;
55  import ffx.potential.bonded.Atom;
56  import ffx.potential.nonbonded.ParticleMeshEwald;
57  import ffx.potential.parameters.ForceField;
58  import ffx.potential.utils.EnergyException;
59  import ffx.utilities.Constants;
60  
61  import java.util.List;
62  import java.util.logging.Level;
63  import java.util.logging.Logger;
64  
65  /**
66   * Parallel pre-conditioned conjugate gradient solver for the self-consistent field.
67   * <p>
68   * This solves the linear system A x = b using an iterative approach where
69   * <p>
70   * A: is an N x N symmetric, positive definite matrix
71   * <p>
72   * B: is a known vector of length N.
73   * <p>
74   * x: is the unknown vector of length N.
75   * <p>
76   * For the AMOEBA SCF, the linear system Ax = b is usually denoted: C u = E_multipoles
77   * <br>
78   * where C = [alpha^-1 - T]
79   * <br>
80   * u are the induced dipoles.
81   * <br>
82   * E_multipoles is the direct field from permanent multipoles.
83   * <p>
84   * The matrix alpha^-1 is the inverse of the N x N diagonal polarizability matrix. The matrix T is
85   * the N x N matrix that produces the field due to induced dipoles.
86   * <p>
87   * Initialization:
88   * <br>
89   * 1) Compute the residual:        r_0 = E_mutipoles - C u_direct = E_direct_induced
90   * <br>
91   * 2) Compute the preconditioner:  z_0 = M^1 r_0
92   * <br>
93   * 3) Compute the conjugate:       p_0 = z_0
94   * <br>
95   * 4) Initial loop index: k = 0
96   * <p>
97   * Then loop over:
98   * <br>
99   * 1) Update the step size:        alpha = r_k dot z_k / (p_k A p_k)
100  * <br>
101  * 2) Update the induced dipoles:  u_k+1 = u_k + alpha * p_k
102  * <br>
103  * 3) Update the residual:         r_k+1 = r_k - alpha * A p_k
104  * <br>
105  * 4) Check for convergence of r_k+1.
106  * <br>
107  * 5) Update the preconditioner:   z_k+1 = M^-1 r_k+1
108  * <br>
109  * 6) Update the step size:        beta = r_k+1 dot z_k+1 / (r_k dot z_k)
110  * <br>
111  * 7) Update the conjugate:        p_k+1 = z_k+1 + beta * p_k
112  * <br>
113  * 8) Update the loop index:       k = k + 1
114  *
115  * @author Michael J. Schnieders
116  * @since 1.0
117  */
118 public class PCGSolver {
119 
120   private static final Logger logger = Logger.getLogger(PCGSolver.class.getName());
121 
122   /**
123    * Compute the initial residual.
124    */
125   private final InitResidualRegion initResidualRegion;
126   /**
127    * Compute the initial conjugate search direction.
128    */
129   private final InitConjugateRegion initConjugateRegion;
130   /**
131    * Update the residual.
132    */
133   private final UpdateResidualRegion updateResidualRegion;
134   /**
135    * Update teh conjugate search direction.
136    */
137   private final UpdateConjugateRegion updateConjugateRegion;
138   /**
139    * Apply the preconditioner.
140    */
141   private final PreconditionerRegion preconditionerRegion;
142 
143   /**
144    * The default Beta step size used to update the conjugate direction is based on the Fletcher-Reeves formula.
145    * An alternative Beta step size uses the Polak-Ribiere formula for the flexible preconditioned conjugate gradient method.
146    * For a symmetric positive definite preconditioner, the FR and PR steps sizes are equivalent.
147    * The steepest decent method sets beta to zero.
148    */
149   private enum PRECONDITION_MODE {FLETCHER_REEVES, FLEXIBLE, STEEPEST_DECENT}
150 
151   /**
152    * The SCF convergence criteria in Debye.
153    */
154   private final double poleps;
155   /**
156    * The cutoff used by the CG preconditioner in Angstroms.
157    */
158   private final double preconditionerCutoff;
159   /**
160    * The Ewald coefficient used by the CG preconditioner.
161    */
162   private final double preconditionerEwald;
163   /**
164    * Acceleration factor for induced dipole SCF iterations. This parameter weights the diagonol
165    * elements of the preconditioning matrix.
166    */
167   private final double preconditionerScale;
168   /**
169    * The default Beta step size used to update the conjugate direction is based on the Fletcher-Reeves formula.
170    * An alternative Beta step size uses the Polak-Ribiere formula for the flexible preconditioned conjugate gradient method.
171    * For a symmetric positive definite preconditioner, the FR and PR steps sizes are equivalent.
172    * The steepest decent method sets beta to zero.
173    */
174   private final PRECONDITION_MODE preconditionMode;
175   /**
176    * Neighbor lists, without atoms beyond the preconditioner cutoff.
177    * [nSymm][nAtoms][nIncludedNeighbors]
178    */
179   private int[][][] preconditionerLists;
180   /**
181    * Number of neighboring atoms within the preconditioner cutoff. [nSymm][nAtoms]
182    */
183   private int[][] preconditionerCounts;
184   /**
185    * Residual vector (an electric field).
186    */
187   private double[][] r;
188   /**
189    * Residual vector for the chain-rule dipoles (an electric field).
190    */
191   private double[][] rCR;
192   /**
193    * Preconditioner dipoles (z = M^-1 r).
194    */
195   private double[][] zpre;
196   /**
197    * Preconditioner dipoles for the chain-rule dipoles (zCR = M^-1 rCR).
198    */
199   private double[][] zpreCR;
200   /**
201    * Conjugate search direction (induced dipoles).
202    */
203   private double[][] p;
204   /**
205    * Conjugate search direction for the chain-rule dipoles (induced dipoles).
206    */
207   private double[][] pCR;
208   /**
209    * Work vector.
210    */
211   private double[][] vec;
212   /**
213    * Work vector for the chain-rule dipoles.
214    */
215   private double[][] vecCR;
216   /**
217    * An ordered array of atoms in the system.
218    */
219   private Atom[] atoms;
220   /**
221    * Dimensions of [nsymm][xyz][nAtoms].
222    */
223   private double[][][] coordinates;
224   /**
225    * Polarizability of each atom.
226    */
227   private double[] polarizability;
228   /**
229    * Inverse of pdamp for each atom.
230    */
231   private double[] ipdamp;
232   /**
233    * Thole parameter for each atom.
234    */
235   private double[] thole;
236   /**
237    * When computing the polarization energy at Lambda there are 3 pieces.
238    *
239    * <p>1.) Upol(1) = The polarization energy computed normally (i.e. system with ligand).
240    *
241    * <p>2.) Uenv = The polarization energy of the system without the ligand.
242    *
243    * <p>3.) Uligand = The polarization energy of the ligand by itself.
244    *
245    * <p>Upol(L) = L*Upol(1) + (1-L)*(Uenv + Uligand)
246    *
247    * <p>Set the "use" array to true for all atoms for part 1.
248    * <p>Set the "use" array to true for all atoms except the ligand for part 2.
249    * <p>Set the "use" array to true only for the ligand atoms for part 3.
250    *
251    * <p>The "use" array can also be employed to turn off atoms for computing the electrostatic
252    * energy of sub-structures.
253    */
254   private boolean[] use;
255   /**
256    * Unit cell and spacegroup information.
257    */
258   private Crystal crystal;
259   /**
260    * Induced dipoles with dimensions of [nsymm][nAtoms][3].
261    */
262   private double[][][] inducedDipole;
263   private double[][][] inducedDipoleCR;
264   /**
265    * Direct induced dipoles with dimensions of [nAtoms][3].
266    */
267   private double[][] directDipole;
268   private double[][] directDipoleCR;
269   /**
270    * Field array.
271    */
272   private AtomicDoubleArray3D field;
273   /**
274    * Chain rule field array.
275    */
276   private AtomicDoubleArray3D fieldCR;
277   private EwaldParameters ewaldParameters;
278   /**
279    * The default ParallelTeam encapsulates the maximum number of threads used to parallelize the
280    * electrostatics calculation.
281    */
282   private ParallelTeam parallelTeam;
283   /**
284    * Pairwise schedule for load balancing.
285    */
286   private IntegerSchedule realSpaceSchedule;
287   private long[] realSpaceSCFTime;
288 
289   /**
290    * A preconditioner cut-off of 3 to 4 Angstroms generally works well.
291    * <br>
292    * z = M^(-1) r = polarizability * (E_r + scale * r)
293    * <br>
294    * The cutoff is applied when computing E_r (the field due to polarizability * residual).
295    */
296   public static final double DEFAULT_CG_PRECONDITIONER_CUTOFF = 4.5;
297 
298   /**
299    * An Ewald coefficient of 0 indicates use of Coulomb's law with no damping.
300    * <br>
301    * z = M^(-1) r = polarizability * (E_r + scale * r)
302    * <br>
303    * Ewald or Coulomb is used when computing E_r (the field due to polarizability * residual).
304    */
305   public static final double DEFAULT_CG_PRECONDITIONER_EWALD = 0.0;
306 
307   /**
308    * The scale factor is applied to the diagonal terms of the preconditioner.
309    * <br>
310    * z = M^(-1) r = polar * (E_r + scale * r)
311    * <br>
312    * The "scale * polar * r" term is the diagonal part of M^(-1)
313    */
314   public static final double DEFAULT_CG_PRECONDITIONER_SCALE = 2.0;
315 
316   private double dieletric;
317 
318   /**
319    * Constructor the PCG solver.
320    *
321    * @param maxThreads Number of threads.
322    * @param poleps     Convergence criteria (RMS Debye).
323    * @param forceField Force field in use.
324    * @param nAtoms     Initial number of atoms.
325    */
326   public PCGSolver(int maxThreads, double poleps, ForceField forceField, int nAtoms) {
327     this.poleps = poleps;
328     preconditionerRegion = new PreconditionerRegion(maxThreads);
329     initResidualRegion = new InitResidualRegion(maxThreads);
330     initConjugateRegion = new InitConjugateRegion(maxThreads);
331     updateResidualRegion = new UpdateResidualRegion(maxThreads);
332     updateConjugateRegion = new UpdateConjugateRegion(maxThreads);
333 
334     // The size of the preconditioner neighbor list depends on the size of the preconditioner
335     // cutoff.
336     boolean preconditioner = forceField.getBoolean("USE_SCF_PRECONDITIONER", true);
337     if (preconditioner) {
338       preconditionerCutoff = forceField.getDouble("CG_PRECONDITIONER_CUTOFF", DEFAULT_CG_PRECONDITIONER_CUTOFF);
339       preconditionerEwald = forceField.getDouble("CG_PRECONDITIONER_EWALD", DEFAULT_CG_PRECONDITIONER_EWALD);
340       preconditionerScale = forceField.getDouble("CG_PRECONDITIONER_SCALE", DEFAULT_CG_PRECONDITIONER_SCALE);
341       PRECONDITION_MODE mode = PRECONDITION_MODE.FLETCHER_REEVES;
342       try {
343         String m = forceField.getString("CG_PRECONDITIONER_MODE", PRECONDITION_MODE.FLETCHER_REEVES.toString());
344         m = ForceField.toEnumForm(m);
345         mode = PRECONDITION_MODE.valueOf(m);
346       } catch (Exception e) {
347         // Do nothing.
348       }
349       preconditionMode = mode;
350     } else {
351       preconditionerCutoff = 0.0;
352       preconditionerEwald = 0.0;
353       preconditionerScale = 0.0;
354       preconditionMode = PRECONDITION_MODE.FLETCHER_REEVES;
355     }
356 
357     allocateVectors(nAtoms);
358   }
359 
360   /**
361    * Allocate storage for pre-conditioner neighbor list.
362    *
363    * @param nSymm  Number of symmetry operators.
364    * @param nAtoms Number of atoms.
365    */
366   public void allocateLists(int nSymm, int nAtoms) {
367     int preconditionerListSize = 50;
368     preconditionerLists = new int[nSymm][nAtoms][preconditionerListSize];
369     preconditionerCounts = new int[nSymm][nAtoms];
370   }
371 
372   /**
373    * Allocate PCG vectors.
374    *
375    * @param nAtoms The number of atoms.
376    */
377   public void allocateVectors(int nAtoms) {
378     if (r == null || r[0].length != nAtoms) {
379       r = new double[3][nAtoms];
380       rCR = new double[3][nAtoms];
381       zpre = new double[3][nAtoms];
382       zpreCR = new double[3][nAtoms];
383       p = new double[3][nAtoms];
384       pCR = new double[3][nAtoms];
385       vec = new double[3][nAtoms];
386       vecCR = new double[3][nAtoms];
387     }
388   }
389 
390   /**
391    * Get the preconditioner cutoff.
392    *
393    * @return The cutoff in Angstroms.
394    */
395   public double getPreconditionerCutoff() {
396     return preconditionerCutoff;
397   }
398 
399   /**
400    * Get the preconditioner Ewald coefficient.
401    *
402    * @return The Ewald coefficient.
403    */
404   public double getPreconditionerEwald() {
405     return preconditionerEwald;
406   }
407 
408   /**
409    * Get the preconditioner matrix diagonal scale factor.
410    *
411    * @return The unitless scale factor.
412    */
413   public double getPreconditionerScale() {
414     return preconditionerScale;
415   }
416 
417   /**
418    * Get the preconditioner mode.
419    *
420    * @return The mode.
421    */
422   public String getPreconditionerMode() {
423     return preconditionMode.toString();
424   }
425 
426   /**
427    * Neighbor lists when applying the preconditioner.
428    *
429    * @return Storage for the preconditioner neighbor lists.
430    */
431   public int[][][] getPreconditionerLists() {
432     return preconditionerLists;
433   }
434 
435   /**
436    * Number of neighbors when applying the preconditioner.
437    *
438    * @return Storage for the preconditioner counts.
439    */
440   public int[][] getPreconditionerCounts() {
441     return preconditionerCounts;
442   }
443 
444   public void init(
445       Atom[] atoms,
446       double[][][] coordinates,
447       double[] polarizability,
448       double[] ipdamp,
449       double[] thole,
450       boolean[] use,
451       Crystal crystal,
452       double[][][] inducedDipole,
453       double[][][] inducedDipoleCR,
454       double[][] directDipole,
455       double[][] directDipoleCR,
456       AtomicDoubleArray3D field,
457       AtomicDoubleArray3D fieldCR,
458       EwaldParameters ewaldParameters,
459       double dieletric,
460       ParallelTeam parallelTeam,
461       IntegerSchedule realSpaceSchedule,
462       long[] realSpaceSCFTime) {
463     this.atoms = atoms;
464     this.coordinates = coordinates;
465     this.polarizability = polarizability;
466     this.ipdamp = ipdamp;
467     this.thole = thole;
468     this.use = use;
469     this.crystal = crystal;
470     this.inducedDipole = inducedDipole;
471     this.inducedDipoleCR = inducedDipoleCR;
472     this.directDipole = directDipole;
473     this.directDipoleCR = directDipoleCR;
474     this.field = field;
475     this.fieldCR = fieldCR;
476     this.ewaldParameters = ewaldParameters;
477     this.dieletric = dieletric;
478     this.parallelTeam = parallelTeam;
479     this.realSpaceSchedule = realSpaceSchedule;
480     this.realSpaceSCFTime = realSpaceSCFTime;
481   }
482 
483   public int scfByPCG(boolean print, long startTime, ParticleMeshEwald particleMeshEwald) {
484     long directTime = System.nanoTime() - startTime;
485     // A request of 0 SCF cycles simplifies mutual polarization to direct polarization.
486     StringBuilder sb = null;
487     if (print) {
488       sb = new StringBuilder("\n Self-Consistent Field\n Iter  RMS Change (Debye)  Time\n");
489     }
490 
491     // Find the induced dipole field due to direct dipoles
492     // (or predicted induced dipoles from previous steps).
493     particleMeshEwald.computeInduceDipoleField();
494 
495     try {
496       // Set the initial residual.
497       parallelTeam.execute(initResidualRegion);
498 
499       // Compute the induced field due to the residual using short cut-offs.
500       computePreconditioner();
501 
502       // Set initial conjugate vector.
503       parallelTeam.execute(initConjugateRegion);
504     } catch (Exception e) {
505       String message = "Exception initializing preconditioned conjugate-gradient SCF solver.";
506       logger.log(Level.SEVERE, message, e);
507     }
508 
509     // Conjugate gradient iteration of the mutual induced dipoles.
510     int completedSCFCycles = 0;
511     int maxSCFCycles = 1000;
512     double eps = 100.0;
513     double previousEps;
514     boolean done = false;
515     while (!done) {
516       long cycleTime = -System.nanoTime();
517 
518       // Find the induced dipole field due to the conjugate search direction p.
519       // Note that induced dipole vectors are loaded with p by:
520       //  A) The InitConjugateRegion instance prior to the first iteration.
521       //  B) The UpdateConjugateRegion instance for subsequent iterations.
522       particleMeshEwald.computeInduceDipoleField();
523 
524       try {
525         /*
526          * 1) Using the induced dipole field, compute A p_k = [1/alpha - T] p_k
527          *   p / alpha  the field that induced the conjugate induced dipoles.
528          *   T p        the field due to the conjugate induced dipoles.
529          * 2) Compute the step size alpha.
530          * 3) Update the residual and induced dipole solution.
531          */
532         parallelTeam.execute(updateResidualRegion);
533       } catch (Exception e) {
534         String message = "Exception updating residual during CG iteration.";
535         logger.log(Level.SEVERE, message, e);
536       }
537 
538       // Check for convergence.
539       previousEps = eps;
540       eps = max(updateResidualRegion.getEps(), updateResidualRegion.getEpsCR());
541       completedSCFCycles++;
542       int nAtoms = atoms.length;
543       eps = Constants.ELEC_ANG_TO_DEBYE * sqrt(eps / (double) nAtoms);
544       cycleTime += System.nanoTime();
545       if (print) {
546         sb.append(format(" %4d     %15.10f %7.4f\n", completedSCFCycles, eps, cycleTime * Constants.NS2SEC));
547       }
548 
549       // If the RMS Debye change increases, fail the SCF process.
550       if (eps > previousEps) {
551         if (sb != null) {
552           logger.warning(sb.toString());
553         }
554         String message = format("SCF convergence failure: (%10.5f > %10.5f)\n", eps, previousEps);
555         throw new EnergyException(message);
556       }
557 
558       // The SCF should converge before the max iteration check.
559       if (completedSCFCycles >= maxSCFCycles) {
560         if (sb != null) {
561           logger.warning(sb.toString());
562         }
563         String message = format("Maximum SCF iterations reached: (%d)\n", completedSCFCycles);
564         throw new EnergyException(message);
565       }
566 
567       // Check if the convergence criteria has been achieved.
568       if (eps < poleps) {
569         done = true;
570       } else {
571         // Compute the induced field due to the residual using short cut-offs.
572         computePreconditioner();
573         try {
574           /*
575            * 1) Compute the dot product of the residual (r) and preconditioner field (z).
576            * 2) Compute the step size beta.
577            * 3) Update the conjugate search direction (p).
578            */
579           updateConjugateRegion.previousRDotZ = updateResidualRegion.getRDotZ();
580           updateConjugateRegion.previousRDotZCR = updateResidualRegion.getRDotZCR();
581           parallelTeam.execute(updateConjugateRegion);
582         } catch (Exception e) {
583           String message = "Exception updating conjugate search direction during CG iteration.";
584           logger.log(Level.SEVERE, message, e);
585         }
586       }
587     }
588 
589     if (print) {
590       sb.append(format(" Direct:                  %7.4f\n", Constants.NS2SEC * directTime));
591       startTime = System.nanoTime() - startTime;
592       sb.append(format(" Total:                   %7.4f", startTime * Constants.NS2SEC));
593       logger.info(sb.toString());
594     }
595 
596     // Find the final induced dipole field.
597     particleMeshEwald.computeInduceDipoleField();
598 
599     return completedSCFCycles;
600   }
601 
602   /**
603    * Set the initial Residual r_0 = E_perm - u_0 / alpha + E_u_0.
604    */
605   private class InitResidualRegion extends ParallelRegion {
606 
607     private final InitResidualLoop[] initResidualLoops;
608 
609     public InitResidualRegion(int nt) {
610       initResidualLoops = new InitResidualLoop[nt];
611     }
612 
613     @Override
614     public void run() throws Exception {
615       try {
616         int ti = getThreadIndex();
617         if (initResidualLoops[ti] == null) {
618           initResidualLoops[ti] = new InitResidualLoop();
619         }
620         int nAtoms = atoms.length;
621         execute(0, nAtoms - 1, initResidualLoops[ti]);
622       } catch (Exception e) {
623         String message =
624             "Fatal exception computing the mutual induced dipoles in thread "
625                 + getThreadIndex()
626                 + "\n";
627         logger.log(Level.SEVERE, message, e);
628       }
629     }
630 
631     private class InitResidualLoop extends IntegerForLoop {
632 
633       @Override
634       public void run(int lb, int ub) throws Exception {
635         for (int i = lb; i <= ub; i++) {
636           // Set initial residual to the direct field.
637           if (use[i] && polarizability[i] > 0.0) {
638             double ipolar = 1.0 / polarizability[i];
639             r[0][i] = (directDipole[i][0] - inducedDipole[0][i][0]) * ipolar + field.getX(i);
640             r[1][i] = (directDipole[i][1] - inducedDipole[0][i][1]) * ipolar + field.getY(i);
641             r[2][i] = (directDipole[i][2] - inducedDipole[0][i][2]) * ipolar + field.getZ(i);
642             rCR[0][i] = (directDipoleCR[i][0] - inducedDipoleCR[0][i][0]) * ipolar + fieldCR.getX(i);
643             rCR[1][i] = (directDipoleCR[i][1] - inducedDipoleCR[0][i][1]) * ipolar + fieldCR.getY(i);
644             rCR[2][i] = (directDipoleCR[i][2] - inducedDipoleCR[0][i][2]) * ipolar + fieldCR.getZ(i);
645           } else {
646             r[0][i] = 0.0;
647             r[1][i] = 0.0;
648             r[2][i] = 0.0;
649             rCR[0][i] = 0.0;
650             rCR[1][i] = 0.0;
651             rCR[2][i] = 0.0;
652           }
653         }
654       }
655 
656       @Override
657       public IntegerSchedule schedule() {
658         return IntegerSchedule.fixed();
659       }
660     }
661   }
662 
663   /**
664    * Set the initial conjugate direction p_0 = z_0 = M^-1 * r_0.
665    * <p>
666    * Where M^-1 is the inverse of the preconditioner matrix.
667    */
668   private class InitConjugateRegion extends ParallelRegion {
669 
670     private final InitConjugateLoop[] initConjugateLoops;
671 
672     public InitConjugateRegion(int nt) {
673       initConjugateLoops = new InitConjugateLoop[nt];
674     }
675 
676     @Override
677     public void run() throws Exception {
678       try {
679         int ti = getThreadIndex();
680         if (initConjugateLoops[ti] == null) {
681           initConjugateLoops[ti] = new InitConjugateLoop();
682         }
683         int nAtoms = atoms.length;
684         execute(0, nAtoms - 1, initConjugateLoops[ti]);
685       } catch (Exception e) {
686         String message =
687             "Fatal exception computing the mutual induced dipoles in thread "
688                 + getThreadIndex()
689                 + "\n";
690         logger.log(Level.SEVERE, message, e);
691       }
692     }
693 
694     private class InitConjugateLoop extends IntegerForLoop {
695 
696       @Override
697       public void run(int lb, int ub) throws Exception {
698 
699         for (int i = lb; i <= ub; i++) {
700           if (use[i]) {
701             // Set initial conjugate vector p (induced dipoles).
702             double polar = polarizability[i];
703             zpre[0][i] = polar * (field.getX(i) + preconditionerScale * r[0][i]);
704             zpre[1][i] = polar * (field.getY(i) + preconditionerScale * r[1][i]);
705             zpre[2][i] = polar * (field.getZ(i) + preconditionerScale * r[2][i]);
706             zpreCR[0][i] = polar * (fieldCR.getX(i) + preconditionerScale * rCR[0][i]);
707             zpreCR[1][i] = polar * (fieldCR.getY(i) + preconditionerScale * rCR[1][i]);
708             zpreCR[2][i] = polar * (fieldCR.getZ(i) + preconditionerScale * rCR[2][i]);
709             p[0][i] = zpre[0][i];
710             p[1][i] = zpre[1][i];
711             p[2][i] = zpre[2][i];
712             pCR[0][i] = zpreCR[0][i];
713             pCR[1][i] = zpreCR[1][i];
714             pCR[2][i] = zpreCR[2][i];
715 
716             // Store the current induced dipoles.
717             vec[0][i] = inducedDipole[0][i][0];
718             vec[1][i] = inducedDipole[0][i][1];
719             vec[2][i] = inducedDipole[0][i][2];
720             vecCR[0][i] = inducedDipoleCR[0][i][0];
721             vecCR[1][i] = inducedDipoleCR[0][i][1];
722             vecCR[2][i] = inducedDipoleCR[0][i][2];
723 
724             // Load conjugate vector whose field will be computed next.
725             inducedDipole[0][i][0] = p[0][i];
726             inducedDipole[0][i][1] = p[1][i];
727             inducedDipole[0][i][2] = p[2][i];
728             inducedDipoleCR[0][i][0] = pCR[0][i];
729             inducedDipoleCR[0][i][1] = pCR[1][i];
730             inducedDipoleCR[0][i][2] = pCR[2][i];
731 
732           } else {
733             zpre[0][i] = 0.0;
734             zpre[1][i] = 0.0;
735             zpre[2][i] = 0.0;
736             zpreCR[0][i] = 0.0;
737             zpreCR[1][i] = 0.0;
738             zpreCR[2][i] = 0.0;
739             p[0][i] = 0.0;
740             p[1][i] = 0.0;
741             p[2][i] = 0.0;
742             pCR[0][i] = 0.0;
743             pCR[1][i] = 0.0;
744             pCR[2][i] = 0.0;
745             vec[0][i] = 0.0;
746             vec[1][i] = 0.0;
747             vec[2][i] = 0.0;
748             vecCR[0][i] = 0.0;
749             vecCR[1][i] = 0.0;
750             vecCR[2][i] = 0.0;
751             inducedDipole[0][i][0] = 0.0;
752             inducedDipole[0][i][1] = 0.0;
753             inducedDipole[0][i][2] = 0.0;
754             inducedDipoleCR[0][i][0] = 0.0;
755             inducedDipoleCR[0][i][1] = 0.0;
756             inducedDipoleCR[0][i][2] = 0.0;
757           }
758         }
759       }
760 
761       @Override
762       public IntegerSchedule schedule() {
763         return IntegerSchedule.fixed();
764       }
765     }
766   }
767 
768   /**
769    * Update the induced dipoles and residuals.
770    */
771   private class UpdateResidualRegion extends ParallelRegion {
772 
773     private final UpdateApLoop[] updateApLoops;
774     private final UpdateResidualAndInducedLoop[] updateResidualAndInducedLoops;
775     private final SharedDouble pDotApShared;
776     private final SharedDouble pDotApCRShared;
777     private final SharedDouble rDotZShared;
778     private final SharedDouble rDotZCRShared;
779     private final SharedDouble epsShared;
780     private final SharedDouble epsCRShared;
781 
782     public UpdateResidualRegion(int nt) {
783       updateApLoops = new UpdateApLoop[nt];
784       updateResidualAndInducedLoops = new UpdateResidualAndInducedLoop[nt];
785       pDotApShared = new SharedDouble();
786       pDotApCRShared = new SharedDouble();
787       rDotZShared = new SharedDouble();
788       rDotZCRShared = new SharedDouble();
789       epsShared = new SharedDouble();
790       epsCRShared = new SharedDouble();
791     }
792 
793     public double getRDotZ() {
794       return rDotZShared.get();
795     }
796 
797     public double getRDotZCR() {
798       return rDotZCRShared.get();
799     }
800 
801     public double getEps() {
802       return epsShared.get();
803     }
804 
805     public double getEpsCR() {
806       return epsCRShared.get();
807     }
808 
809     @Override
810     public void run() throws Exception {
811       try {
812         int ti = getThreadIndex();
813         int nAtoms = atoms.length;
814         if (updateApLoops[ti] == null) {
815           updateApLoops[ti] = new UpdateApLoop();
816           updateResidualAndInducedLoops[ti] = new UpdateResidualAndInducedLoop();
817         }
818         execute(0, nAtoms - 1, updateApLoops[ti]);
819         execute(0, nAtoms - 1, updateResidualAndInducedLoops[ti]);
820       } catch (Exception e) {
821         String message =
822             "Fatal exception computing the mutual induced dipoles in thread "
823                 + getThreadIndex()
824                 + "\n";
825         logger.log(Level.SEVERE, message, e);
826       }
827     }
828 
829     @Override
830     public void start() {
831       pDotApShared.set(0.0);
832       pDotApCRShared.set(0.0);
833       rDotZShared.set(0.0);
834       rDotZCRShared.set(0.0);
835       epsShared.set(0.0);
836       epsCRShared.set(0.0);
837     }
838 
839     private class UpdateApLoop extends IntegerForLoop {
840 
841       public double pDotAp;
842       public double pDotApCR;
843       public double rDotZ;
844       public double rDotZCR;
845 
846       @Override
847       public void finish() {
848         pDotApShared.addAndGet(pDotAp);
849         pDotApCRShared.addAndGet(pDotApCR);
850         rDotZShared.addAndGet(rDotZ);
851         rDotZCRShared.addAndGet(rDotZCR);
852       }
853 
854       @Override
855       public void run(int lb, int ub) throws Exception {
856         for (int i = lb; i <= ub; i++) {
857           if (use[i] && polarizability[i] > 0) {
858             double ipolar = 1.0 / polarizability[i];
859             // Load induced dipoles from the work arrays.
860             inducedDipole[0][i][0] = vec[0][i];
861             inducedDipole[0][i][1] = vec[1][i];
862             inducedDipole[0][i][2] = vec[2][i];
863             inducedDipoleCR[0][i][0] = vecCR[0][i];
864             inducedDipoleCR[0][i][1] = vecCR[1][i];
865             inducedDipoleCR[0][i][2] = vecCR[2][i];
866             // Compute Ap = [1/alpha - T] p
867             // p / alpha  the field that induced the conjugate induced dipoles.
868             // T p        the field due to the conjugate induced dipoles.
869             vec[0][i] = p[0][i] * ipolar - field.getX(i);
870             vec[1][i] = p[1][i] * ipolar - field.getY(i);
871             vec[2][i] = p[2][i] * ipolar - field.getZ(i);
872             vecCR[0][i] = pCR[0][i] * ipolar - fieldCR.getX(i);
873             vecCR[1][i] = pCR[1][i] * ipolar - fieldCR.getY(i);
874             vecCR[2][i] = pCR[2][i] * ipolar - fieldCR.getZ(i);
875           } else {
876             inducedDipole[0][i][0] = 0.0;
877             inducedDipole[0][i][1] = 0.0;
878             inducedDipole[0][i][2] = 0.0;
879             inducedDipoleCR[0][i][0] = 0.0;
880             inducedDipoleCR[0][i][1] = 0.0;
881             inducedDipoleCR[0][i][2] = 0.0;
882             vec[0][i] = 0.0;
883             vec[1][i] = 0.0;
884             vec[2][i] = 0.0;
885             vecCR[0][i] = 0.0;
886             vecCR[1][i] = 0.0;
887             vecCR[2][i] = 0.0;
888           }
889 
890           // Compute dot product of the conjugate vector (p) with Ap (stored in vec).
891           pDotAp += p[0][i] * vec[0][i] + p[1][i] * vec[1][i] + p[2][i] * vec[2][i];
892           pDotApCR += pCR[0][i] * vecCR[0][i] + pCR[1][i] * vecCR[1][i] + pCR[2][i] * vecCR[2][i];
893 
894           // Compute dot product of the residual (r) and preconditioner (z).
895           rDotZ += r[0][i] * zpre[0][i] + r[1][i] * zpre[1][i] + r[2][i] * zpre[2][i];
896           rDotZCR += rCR[0][i] * zpreCR[0][i] + rCR[1][i] * zpreCR[1][i] + rCR[2][i] * zpreCR[2][i];
897         }
898       }
899 
900       @Override
901       public IntegerSchedule schedule() {
902         return IntegerSchedule.fixed();
903       }
904 
905       @Override
906       public void start() {
907         pDotAp = 0.0;
908         pDotApCR = 0.0;
909         rDotZ = 0.0;
910         rDotZCR = 0.0;
911       }
912     }
913 
914     private class UpdateResidualAndInducedLoop extends IntegerForLoop {
915 
916       double eps;
917       double epsCR;
918 
919       @Override
920       public void start() {
921         eps = 0.0;
922         epsCR = 0.0;
923       }
924 
925       @Override
926       public void finish() {
927         epsShared.addAndGet(eps);
928         epsCRShared.addAndGet(epsCR);
929       }
930 
931       @Override
932       public void run(int lb, int ub) throws Exception {
933         double alpha = rDotZShared.get();
934         if (pDotApShared.get() != 0) {
935           alpha /= pDotApShared.get();
936         }
937         double alphaCR = rDotZCRShared.get();
938         if (pDotApCRShared.get() != 0) {
939           alphaCR /= pDotApCRShared.get();
940         }
941         for (int i = lb; i <= ub; i++) {
942           if (use[i]) {
943             // Update the residual r_k+1 using Ap (stored in vec)
944             r[0][i] -= alpha * vec[0][i];
945             r[1][i] -= alpha * vec[1][i];
946             r[2][i] -= alpha * vec[2][i];
947             rCR[0][i] -= alphaCR * vecCR[0][i];
948             rCR[1][i] -= alphaCR * vecCR[1][i];
949             rCR[2][i] -= alphaCR * vecCR[2][i];
950 
951             // Update the induced dipoles based on the scaled conjugate vector.
952             inducedDipole[0][i][0] += alpha * p[0][i];
953             inducedDipole[0][i][1] += alpha * p[1][i];
954             inducedDipole[0][i][2] += alpha * p[2][i];
955             inducedDipoleCR[0][i][0] += alphaCR * pCR[0][i];
956             inducedDipoleCR[0][i][1] += alphaCR * pCR[1][i];
957             inducedDipoleCR[0][i][2] += alphaCR * pCR[2][i];
958 
959             // Sum the square of the residual field.
960             eps += r[0][i] * r[0][i] + r[1][i] * r[1][i] + r[2][i] * r[2][i];
961             epsCR += rCR[0][i] * rCR[0][i] + rCR[1][i] * rCR[1][i] + rCR[2][i] * rCR[2][i];
962           } else {
963             r[0][i] = 0.0;
964             r[1][i] = 0.0;
965             r[2][i] = 0.0;
966             rCR[0][i] = 0.0;
967             rCR[1][i] = 0.0;
968             rCR[2][i] = 0.0;
969             inducedDipole[0][i][0] = 0.0;
970             inducedDipole[0][i][1] = 0.0;
971             inducedDipole[0][i][2] = 0.0;
972             inducedDipoleCR[0][i][0] = 0.0;
973             inducedDipoleCR[0][i][1] = 0.0;
974             inducedDipoleCR[0][i][2] = 0.0;
975           }
976         }
977       }
978 
979       @Override
980       public IntegerSchedule schedule() {
981         return IntegerSchedule.fixed();
982       }
983     }
984   }
985 
986   /**
987    * Update the preconditioner and conjugate search direction.
988    */
989   private class UpdateConjugateRegion extends ParallelRegion {
990 
991     private final UpdatePreconditionerLoop[] updatePreconditionerLoops;
992     private final UpdateConjugateLoop[] updateConjugateLoops;
993     private final SharedDouble betaShared;
994     private final SharedDouble betaCRShared;
995     public double previousRDotZ;
996     public double previousRDotZCR;
997 
998     public UpdateConjugateRegion(int nt) {
999       updatePreconditionerLoops = new UpdatePreconditionerLoop[nt];
1000       updateConjugateLoops = new UpdateConjugateLoop[nt];
1001       betaShared = new SharedDouble();
1002       betaCRShared = new SharedDouble();
1003     }
1004 
1005     @Override
1006     public void run() throws Exception {
1007       try {
1008         int ti = getThreadIndex();
1009         if (updatePreconditionerLoops[ti] == null) {
1010           updatePreconditionerLoops[ti] = new UpdatePreconditionerLoop();
1011           updateConjugateLoops[ti] = new UpdateConjugateLoop();
1012         }
1013         int nAtoms = atoms.length;
1014         execute(0, nAtoms - 1, updatePreconditionerLoops[ti]);
1015         execute(0, nAtoms - 1, updateConjugateLoops[ti]);
1016       } catch (Exception e) {
1017         String message =
1018             "Fatal exception computing the mutual induced dipoles in thread "
1019                 + getThreadIndex()
1020                 + "\n";
1021         logger.log(Level.SEVERE, message, e);
1022       }
1023     }
1024 
1025     @Override
1026     public void start() {
1027       betaShared.set(0.0);
1028       betaCRShared.set(0.0);
1029       if (previousRDotZ == 0.0) {
1030         previousRDotZ = 1.0;
1031       }
1032       if (previousRDotZCR == 0.0) {
1033         previousRDotZCR = 1.0;
1034       }
1035     }
1036 
1037     private class UpdatePreconditionerLoop extends IntegerForLoop {
1038 
1039       public double rDotZ;
1040       public double rDotZCR;
1041 
1042       private final double[] deltaZ = new double[3];
1043       private final double[] deltaZCR = new double[3];
1044 
1045       @Override
1046       public void finish() {
1047         betaShared.addAndGet(rDotZ / previousRDotZ);
1048         betaCRShared.addAndGet(rDotZCR / previousRDotZCR);
1049       }
1050 
1051       @Override
1052       public void run(int lb, int ub) throws Exception {
1053         for (int i = lb; i <= ub; i++) {
1054           if (use[i]) {
1055             // Save the negative of the current preconditioner z_k.
1056             deltaZ[0] = -zpre[0][i];
1057             deltaZ[1] = -zpre[1][i];
1058             deltaZ[2] = -zpre[2][i];
1059             deltaZCR[0] = -zpreCR[0][i];
1060             deltaZCR[1] = -zpreCR[1][i];
1061             deltaZCR[2] = -zpreCR[2][i];
1062 
1063             // Update the preconditioner
1064             // z_k+1 = M^(-1) r_k+1
1065             //       = polar * (E_r_k+1 + diagonal scale * r_k+1)
1066             double polar = polarizability[i];
1067             zpre[0][i] = polar * (field.getX(i) + preconditionerScale * r[0][i]);
1068             zpre[1][i] = polar * (field.getY(i) + preconditionerScale * r[1][i]);
1069             zpre[2][i] = polar * (field.getZ(i) + preconditionerScale * r[2][i]);
1070             zpreCR[0][i] = polar * (fieldCR.getX(i) + preconditionerScale * rCR[0][i]);
1071             zpreCR[1][i] = polar * (fieldCR.getY(i) + preconditionerScale * rCR[1][i]);
1072             zpreCR[2][i] = polar * (fieldCR.getZ(i) + preconditionerScale * rCR[2][i]);
1073 
1074             switch (preconditionMode) {
1075               case FLEXIBLE:
1076                 // Set work to z_k+1 - z_k
1077                 deltaZ[0] += zpre[0][i];
1078                 deltaZ[1] += zpre[1][i];
1079                 deltaZ[2] += zpre[2][i];
1080                 deltaZCR[0] += zpreCR[0][i];
1081                 deltaZCR[1] += zpreCR[1][i];
1082                 deltaZCR[2] += zpreCR[2][i];
1083                 // For Flexible, compute the dot product of the residual with the change in preconditioner (z_k+1 - z_k).
1084                 rDotZ += r[0][i] * deltaZ[0] + r[1][i] * deltaZ[1] + r[2][i] * deltaZ[2];
1085                 rDotZCR += rCR[0][i] * deltaZCR[0] + rCR[1][i] * deltaZCR[1] + rCR[2][i] * deltaZCR[2];
1086                 break;
1087               case STEEPEST_DECENT:
1088                 // For Steepest-Decent, rDotZ is zero (i.e. Beta is zero).
1089                 continue;
1090               default:
1091               case FLETCHER_REEVES:
1092                 // For Fletcher-Reeves, compute the dot product of the residual and preconditioner (z_k+1).
1093                 rDotZ += r[0][i] * zpre[0][i] + r[1][i] * zpre[1][i] + r[2][i] * zpre[2][i];
1094                 rDotZCR += rCR[0][i] * zpreCR[0][i] + rCR[1][i] * zpreCR[1][i] + rCR[2][i] * zpreCR[2][i];
1095             }
1096           } else {
1097             zpre[0][i] = 0.0;
1098             zpre[1][i] = 0.0;
1099             zpre[2][i] = 0.0;
1100             zpreCR[0][i] = 0.0;
1101             zpreCR[1][i] = 0.0;
1102             zpreCR[2][i] = 0.0;
1103           }
1104         }
1105       }
1106 
1107       @Override
1108       public IntegerSchedule schedule() {
1109         return IntegerSchedule.fixed();
1110       }
1111 
1112       @Override
1113       public void start() {
1114         rDotZ = 0.0;
1115         rDotZCR = 0.0;
1116       }
1117     }
1118 
1119     private class UpdateConjugateLoop extends IntegerForLoop {
1120 
1121       @Override
1122       public void run(int lb, int ub) throws Exception {
1123         double beta = betaShared.get();
1124         double betaCR = betaCRShared.get();
1125         for (int i = lb; i <= ub; i++) {
1126           if (use[i]) {
1127             // Update the conjugate vector.
1128             p[0][i] = zpre[0][i] + beta * p[0][i];
1129             p[1][i] = zpre[1][i] + beta * p[1][i];
1130             p[2][i] = zpre[2][i] + beta * p[2][i];
1131             pCR[0][i] = zpreCR[0][i] + betaCR * pCR[0][i];
1132             pCR[1][i] = zpreCR[1][i] + betaCR * pCR[1][i];
1133             pCR[2][i] = zpreCR[2][i] + betaCR * pCR[2][i];
1134             // Store induced dipoles.
1135             vec[0][i] = inducedDipole[0][i][0];
1136             vec[1][i] = inducedDipole[0][i][1];
1137             vec[2][i] = inducedDipole[0][i][2];
1138             vecCR[0][i] = inducedDipoleCR[0][i][0];
1139             vecCR[1][i] = inducedDipoleCR[0][i][1];
1140             vecCR[2][i] = inducedDipoleCR[0][i][2];
1141             // Load the conjugate vector whose field will be computed next.
1142             inducedDipole[0][i][0] = p[0][i];
1143             inducedDipole[0][i][1] = p[1][i];
1144             inducedDipole[0][i][2] = p[2][i];
1145             inducedDipoleCR[0][i][0] = pCR[0][i];
1146             inducedDipoleCR[0][i][1] = pCR[1][i];
1147             inducedDipoleCR[0][i][2] = pCR[2][i];
1148           } else {
1149             p[0][i] = 0.0;
1150             p[1][i] = 0.0;
1151             p[2][i] = 0.0;
1152             pCR[0][i] = 0.0;
1153             pCR[1][i] = 0.0;
1154             pCR[2][i] = 0.0;
1155             vec[0][i] = 0.0;
1156             vec[1][i] = 0.0;
1157             vec[2][i] = 0.0;
1158             vecCR[0][i] = 0.0;
1159             vecCR[1][i] = 0.0;
1160             vecCR[2][i] = 0.0;
1161             inducedDipole[0][i][0] = 0.0;
1162             inducedDipole[0][i][1] = 0.0;
1163             inducedDipole[0][i][2] = 0.0;
1164             inducedDipoleCR[0][i][0] = 0.0;
1165             inducedDipoleCR[0][i][1] = 0.0;
1166             inducedDipoleCR[0][i][2] = 0.0;
1167           }
1168         }
1169       }
1170 
1171       @Override
1172       public IntegerSchedule schedule() {
1173         return IntegerSchedule.fixed();
1174       }
1175     }
1176   }
1177 
1178   /**
1179    * Compute the induced field due to the Uind = polarizability * Eresidual.
1180    */
1181   private void computePreconditioner() {
1182     try {
1183       // Reset the preconditioner field.
1184       field.reset(parallelTeam);
1185       fieldCR.reset(parallelTeam);
1186 
1187       // Use a special Ewald coefficient for the pre-conditioner.
1188       double aewaldTemp = ewaldParameters.aewald;
1189       ewaldParameters.setEwaldParameters(ewaldParameters.off, preconditionerEwald);
1190       parallelTeam.execute(preconditionerRegion);
1191       ewaldParameters.setEwaldParameters(ewaldParameters.off, aewaldTemp);
1192 
1193       // Reduce the preconditioner field.
1194       field.reduce(parallelTeam);
1195       fieldCR.reduce(parallelTeam);
1196 
1197       // Scale the pre-conditioner field by the inverse of the dielectric.
1198       if (dieletric > 1.0) {
1199         int nAtoms = atoms.length;
1200         double invDielectric = 1.0 / dieletric;
1201         for (int i = 0; i < nAtoms; i++) {
1202           field.scale(0, i, invDielectric);
1203           fieldCR.scale(0, i, invDielectric);
1204         }
1205       }
1206 
1207     } catch (Exception e) {
1208       String message = "Exception computing the induced field for the preconditioner.";
1209       logger.log(Level.SEVERE, message, e);
1210     }
1211   }
1212 
1213   /**
1214    * Evaluate the real space field due to induced dipoles using a short cutoff (~3-4 A).
1215    */
1216   private class PreconditionerRegion extends ParallelRegion {
1217 
1218     private final InducedPreconditionerFieldLoop[] inducedPreconditionerFieldLoop;
1219 
1220     PreconditionerRegion(int threadCount) {
1221       inducedPreconditionerFieldLoop = new InducedPreconditionerFieldLoop[threadCount];
1222     }
1223 
1224     @Override
1225     public void run() {
1226       int threadIndex = getThreadIndex();
1227       if (inducedPreconditionerFieldLoop[threadIndex] == null) {
1228         inducedPreconditionerFieldLoop[threadIndex] = new InducedPreconditionerFieldLoop();
1229       }
1230       try {
1231         int nAtoms = atoms.length;
1232         execute(0, nAtoms - 1, inducedPreconditionerFieldLoop[threadIndex]);
1233       } catch (Exception e) {
1234         String message =
1235             "Fatal exception computing the induced real space field in thread "
1236                 + getThreadIndex()
1237                 + "\n";
1238         logger.log(Level.SEVERE, message, e);
1239       }
1240     }
1241 
1242     private class InducedPreconditionerFieldLoop extends IntegerForLoop {
1243 
1244       private int threadID;
1245       private double[] x, y, z;
1246 
1247       InducedPreconditionerFieldLoop() {
1248       }
1249 
1250       @Override
1251       public void finish() {
1252         realSpaceSCFTime[threadID] += System.nanoTime();
1253       }
1254 
1255       @Override
1256       public void run(int lb, int ub) {
1257         final double[] dx = new double[3];
1258         final double[] rk = new double[3];
1259         final double[] rCRk = new double[3];
1260         final double[][] transOp = new double[3][3];
1261         // Loop over a chunk of atoms.
1262         int[][] lists = preconditionerLists[0];
1263         int[] counts = preconditionerCounts[0];
1264         for (int i = lb; i <= ub; i++) {
1265           if (!use[i]) {
1266             continue;
1267           }
1268           double fx = 0.0;
1269           double fy = 0.0;
1270           double fz = 0.0;
1271           double px = 0.0;
1272           double py = 0.0;
1273           double pz = 0.0;
1274           final double xi = x[i];
1275           final double yi = y[i];
1276           final double zi = z[i];
1277           // Multiply the residue field rCR by the polarizability.
1278           final double polari = polarizability[i];
1279           final double uix = polari * r[0][i];
1280           final double uiy = polari * r[1][i];
1281           final double uiz = polari * r[2][i];
1282           final double pix = polari * rCR[0][i];
1283           final double piy = polari * rCR[1][i];
1284           final double piz = polari * rCR[2][i];
1285           final double pdi = ipdamp[i];
1286           final double pti = thole[i];
1287           // Loop over the neighbor list.
1288           final int[] list = lists[i];
1289           final int npair = counts[i];
1290           for (int j = 0; j < npair; j++) {
1291             final int k = list[j];
1292             if (!use[k]) {
1293               continue;
1294             }
1295             final double pdk = ipdamp[k];
1296             final double ptk = thole[k];
1297             dx[0] = x[k] - xi;
1298             dx[1] = y[k] - yi;
1299             dx[2] = z[k] - zi;
1300             final double r2 = crystal.image(dx);
1301             // Calculate the error function damping terms.
1302             final double r = sqrt(r2);
1303             final double rr1 = 1.0 / r;
1304             final double rr2 = rr1 * rr1;
1305             final double ralpha = ewaldParameters.aewald * r;
1306             final double exp2a = exp(-ralpha * ralpha);
1307             final double bn0 = erfc(ralpha) * rr1;
1308             final double bn1 = (bn0 + ewaldParameters.an0 * exp2a) * rr2;
1309             final double bn2 = (3.0 * bn1 + ewaldParameters.an1 * exp2a) * rr2;
1310             double scale3 = 1.0;
1311             double scale5 = 1.0;
1312             double damp = pdi * pdk;
1313             final double pgamma = min(pti, ptk);
1314             final double rdamp = r * damp;
1315             damp = -pgamma * rdamp * rdamp * rdamp;
1316             if (damp > -50.0) {
1317               final double expdamp = exp(damp);
1318               scale3 = 1.0 - expdamp;
1319               scale5 = 1.0 - expdamp * (1.0 - damp);
1320             }
1321             double rr3 = rr1 * rr2;
1322             double rr5 = 3.0 * rr3 * rr2;
1323             rr3 *= (1.0 - scale3);
1324             rr5 *= (1.0 - scale5);
1325             final double xr = dx[0];
1326             final double yr = dx[1];
1327             final double zr = dx[2];
1328             // Multiply the residue field rCR by the polarizability.
1329             final double polarK = polarizability[k];
1330             final double ukx = polarK * PCGSolver.this.r[0][k];
1331             final double uky = polarK * PCGSolver.this.r[1][k];
1332             final double ukz = polarK * PCGSolver.this.r[2][k];
1333             final double ukr = ukx * xr + uky * yr + ukz * zr;
1334             final double bn2ukr = bn2 * ukr;
1335             final double fimx = -bn1 * ukx + bn2ukr * xr;
1336             final double fimy = -bn1 * uky + bn2ukr * yr;
1337             final double fimz = -bn1 * ukz + bn2ukr * zr;
1338             final double rr5ukr = rr5 * ukr;
1339             final double fidx = -rr3 * ukx + rr5ukr * xr;
1340             final double fidy = -rr3 * uky + rr5ukr * yr;
1341             final double fidz = -rr3 * ukz + rr5ukr * zr;
1342             fx += (fimx - fidx);
1343             fy += (fimy - fidy);
1344             fz += (fimz - fidz);
1345             // Multiply the residue field rCR by the polarizability.
1346             final double pkx = polarK * PCGSolver.this.rCR[0][k];
1347             final double pky = polarK * PCGSolver.this.rCR[1][k];
1348             final double pkz = polarK * PCGSolver.this.rCR[2][k];
1349             final double pkr = pkx * xr + pky * yr + pkz * zr;
1350             final double bn2pkr = bn2 * pkr;
1351             final double pimx = -bn1 * pkx + bn2pkr * xr;
1352             final double pimy = -bn1 * pky + bn2pkr * yr;
1353             final double pimz = -bn1 * pkz + bn2pkr * zr;
1354             final double rr5pkr = rr5 * pkr;
1355             final double pidx = -rr3 * pkx + rr5pkr * xr;
1356             final double pidy = -rr3 * pky + rr5pkr * yr;
1357             final double pidz = -rr3 * pkz + rr5pkr * zr;
1358             px += (pimx - pidx);
1359             py += (pimy - pidy);
1360             pz += (pimz - pidz);
1361             final double uir = uix * xr + uiy * yr + uiz * zr;
1362             final double bn2uir = bn2 * uir;
1363             final double fkmx = -bn1 * uix + bn2uir * xr;
1364             final double fkmy = -bn1 * uiy + bn2uir * yr;
1365             final double fkmz = -bn1 * uiz + bn2uir * zr;
1366             final double rr5uir = rr5 * uir;
1367             final double fkdx = -rr3 * uix + rr5uir * xr;
1368             final double fkdy = -rr3 * uiy + rr5uir * yr;
1369             final double fkdz = -rr3 * uiz + rr5uir * zr;
1370             field.add(threadID, k, fkmx - fkdx, fkmy - fkdy, fkmz - fkdz);
1371             final double pir = pix * xr + piy * yr + piz * zr;
1372             final double bn2pir = bn2 * pir;
1373             final double pkmx = -bn1 * pix + bn2pir * xr;
1374             final double pkmy = -bn1 * piy + bn2pir * yr;
1375             final double pkmz = -bn1 * piz + bn2pir * zr;
1376             final double rr5pir = rr5 * pir;
1377             final double pkdx = -rr3 * pix + rr5pir * xr;
1378             final double pkdy = -rr3 * piy + rr5pir * yr;
1379             final double pkdz = -rr3 * piz + rr5pir * zr;
1380             fieldCR.add(threadID, k, pkmx - pkdx, pkmy - pkdy, pkmz - pkdz);
1381           }
1382           field.add(threadID, i, fx, fy, fz);
1383           fieldCR.add(threadID, i, px, py, pz);
1384         }
1385 
1386         // Loop over symmetry mates.
1387         List<SymOp> symOps = crystal.spaceGroup.symOps;
1388         int nSymm = symOps.size();
1389         for (int iSymm = 1; iSymm < nSymm; iSymm++) {
1390           SymOp symOp = crystal.spaceGroup.getSymOp(iSymm);
1391           crystal.getTransformationOperator(symOp, transOp);
1392           lists = preconditionerLists[iSymm];
1393           counts = preconditionerCounts[iSymm];
1394           final double[] xs = coordinates[iSymm][0];
1395           final double[] ys = coordinates[iSymm][1];
1396           final double[] zs = coordinates[iSymm][2];
1397           // Loop over a chunk of atoms.
1398           for (int i = lb; i <= ub; i++) {
1399             if (!use[i]) {
1400               continue;
1401             }
1402             double fx = 0.0;
1403             double fy = 0.0;
1404             double fz = 0.0;
1405             double px = 0.0;
1406             double py = 0.0;
1407             double pz = 0.0;
1408             final double xi = x[i];
1409             final double yi = y[i];
1410             final double zi = z[i];
1411             final double polar = polarizability[i];
1412             final double uix = polar * PCGSolver.this.r[0][i];
1413             final double uiy = polar * PCGSolver.this.r[1][i];
1414             final double uiz = polar * PCGSolver.this.r[2][i];
1415             final double pix = polar * PCGSolver.this.rCR[0][i];
1416             final double piy = polar * PCGSolver.this.rCR[1][i];
1417             final double piz = polar * PCGSolver.this.rCR[2][i];
1418             final double pdi = ipdamp[i];
1419             final double pti = thole[i];
1420             // Loop over the neighbor list.
1421             final int[] list = lists[i];
1422             final int npair = counts[i];
1423             for (int j = 0; j < npair; j++) {
1424               final int k = list[j];
1425               if (!use[k]) {
1426                 continue;
1427               }
1428               double selfScale = 1.0;
1429               if (i == k) {
1430                 selfScale = 0.5;
1431               }
1432               final double pdk = ipdamp[k];
1433               final double ptk = thole[k];
1434               dx[0] = xs[k] - xi;
1435               dx[1] = ys[k] - yi;
1436               dx[2] = zs[k] - zi;
1437               final double r2 = crystal.image(dx);
1438               // Calculate the error function damping terms.
1439               final double r = sqrt(r2);
1440               final double rr1 = 1.0 / r;
1441               final double rr2 = rr1 * rr1;
1442               final double ralpha = ewaldParameters.aewald * r;
1443               final double exp2a = exp(-ralpha * ralpha);
1444               final double bn0 = erfc(ralpha) * rr1;
1445               final double bn1 = (bn0 + ewaldParameters.an0 * exp2a) * rr2;
1446               final double bn2 = (3.0 * bn1 + ewaldParameters.an1 * exp2a) * rr2;
1447               double scale3 = 1.0;
1448               double scale5 = 1.0;
1449               double damp = pdi * pdk;
1450               final double pgamma = min(pti, ptk);
1451               final double rdamp = r * damp;
1452               damp = -pgamma * rdamp * rdamp * rdamp;
1453               if (damp > -50.0) {
1454                 final double expdamp = exp(damp);
1455                 scale3 = 1.0 - expdamp;
1456                 scale5 = 1.0 - expdamp * (1.0 - damp);
1457               }
1458               double rr3 = rr1 * rr2;
1459               double rr5 = 3.0 * rr3 * rr2;
1460               rr3 *= (1.0 - scale3);
1461               rr5 *= (1.0 - scale5);
1462               final double xr = dx[0];
1463               final double yr = dx[1];
1464               final double zr = dx[2];
1465               // Rotate the residual field.
1466               rk[0] = PCGSolver.this.r[0][k];
1467               rk[1] = PCGSolver.this.r[1][k];
1468               rk[2] = PCGSolver.this.r[2][k];
1469               crystal.applySymRot(rk, rk, symOp);
1470               rCRk[0] = PCGSolver.this.rCR[0][k];
1471               rCRk[1] = PCGSolver.this.rCR[1][k];
1472               rCRk[2] = PCGSolver.this.rCR[2][k];
1473               crystal.applySymRot(rCRk, rCRk, symOp);
1474               // Multiply the residue field by the polarizability.
1475               final double polarK = polarizability[k];
1476               final double ukx = polarK * rk[0];
1477               final double uky = polarK * rk[1];
1478               final double ukz = polarK * rk[2];
1479               final double pkx = polarK * rCRk[0];
1480               final double pky = polarK * rCRk[1];
1481               final double pkz = polarK * rCRk[2];
1482               final double ukr = ukx * xr + uky * yr + ukz * zr;
1483               final double bn2ukr = bn2 * ukr;
1484               final double fimx = -bn1 * ukx + bn2ukr * xr;
1485               final double fimy = -bn1 * uky + bn2ukr * yr;
1486               final double fimz = -bn1 * ukz + bn2ukr * zr;
1487               final double rr5ukr = rr5 * ukr;
1488               final double fidx = -rr3 * ukx + rr5ukr * xr;
1489               final double fidy = -rr3 * uky + rr5ukr * yr;
1490               final double fidz = -rr3 * ukz + rr5ukr * zr;
1491               fx += selfScale * (fimx - fidx);
1492               fy += selfScale * (fimy - fidy);
1493               fz += selfScale * (fimz - fidz);
1494               final double pkr = pkx * xr + pky * yr + pkz * zr;
1495               final double bn2pkr = bn2 * pkr;
1496               final double pimx = -bn1 * pkx + bn2pkr * xr;
1497               final double pimy = -bn1 * pky + bn2pkr * yr;
1498               final double pimz = -bn1 * pkz + bn2pkr * zr;
1499               final double rr5pkr = rr5 * pkr;
1500               final double pidx = -rr3 * pkx + rr5pkr * xr;
1501               final double pidy = -rr3 * pky + rr5pkr * yr;
1502               final double pidz = -rr3 * pkz + rr5pkr * zr;
1503               px += selfScale * (pimx - pidx);
1504               py += selfScale * (pimy - pidy);
1505               pz += selfScale * (pimz - pidz);
1506               final double uir = uix * xr + uiy * yr + uiz * zr;
1507               final double bn2uir = bn2 * uir;
1508               final double fkmx = -bn1 * uix + bn2uir * xr;
1509               final double fkmy = -bn1 * uiy + bn2uir * yr;
1510               final double fkmz = -bn1 * uiz + bn2uir * zr;
1511               final double rr5uir = rr5 * uir;
1512               final double fkdx = -rr3 * uix + rr5uir * xr;
1513               final double fkdy = -rr3 * uiy + rr5uir * yr;
1514               final double fkdz = -rr3 * uiz + rr5uir * zr;
1515               double xc = selfScale * (fkmx - fkdx);
1516               double yc = selfScale * (fkmy - fkdy);
1517               double zc = selfScale * (fkmz - fkdz);
1518               double fkx = (xc * transOp[0][0] + yc * transOp[1][0] + zc * transOp[2][0]);
1519               double fky = (xc * transOp[0][1] + yc * transOp[1][1] + zc * transOp[2][1]);
1520               double fkz = (xc * transOp[0][2] + yc * transOp[1][2] + zc * transOp[2][2]);
1521               field.add(threadID, k, fkx, fky, fkz);
1522               final double pir = pix * xr + piy * yr + piz * zr;
1523               final double bn2pir = bn2 * pir;
1524               final double pkmx = -bn1 * pix + bn2pir * xr;
1525               final double pkmy = -bn1 * piy + bn2pir * yr;
1526               final double pkmz = -bn1 * piz + bn2pir * zr;
1527               final double rr5pir = rr5 * pir;
1528               final double pkdx = -rr3 * pix + rr5pir * xr;
1529               final double pkdy = -rr3 * piy + rr5pir * yr;
1530               final double pkdz = -rr3 * piz + rr5pir * zr;
1531               xc = selfScale * (pkmx - pkdx);
1532               yc = selfScale * (pkmy - pkdy);
1533               zc = selfScale * (pkmz - pkdz);
1534               fkx = (xc * transOp[0][0] + yc * transOp[1][0] + zc * transOp[2][0]);
1535               fky = (xc * transOp[0][1] + yc * transOp[1][1] + zc * transOp[2][1]);
1536               fkz = (xc * transOp[0][2] + yc * transOp[1][2] + zc * transOp[2][2]);
1537               fieldCR.add(threadID, k, fkx, fky, fkz);
1538             }
1539             field.add(threadID, i, fx, fy, fz);
1540             fieldCR.add(threadID, i, px, py, pz);
1541           }
1542         }
1543       }
1544 
1545       @Override
1546       public IntegerSchedule schedule() {
1547         return realSpaceSchedule;
1548       }
1549 
1550       @Override
1551       public void start() {
1552         threadID = getThreadIndex();
1553         realSpaceSCFTime[threadID] -= System.nanoTime();
1554         x = coordinates[0][0];
1555         y = coordinates[0][1];
1556         z = coordinates[0][2];
1557       }
1558     }
1559   }
1560 }