View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.potential.nonbonded.pme;
39  
40  import edu.rit.pj.IntegerForLoop;
41  import edu.rit.pj.IntegerSchedule;
42  import edu.rit.pj.ParallelRegion;
43  import edu.rit.pj.ParallelTeam;
44  import edu.rit.pj.reduction.SharedDouble;
45  import ffx.crystal.Crystal;
46  import ffx.crystal.SymOp;
47  import ffx.numerics.atomic.AtomicDoubleArray3D;
48  import ffx.potential.bonded.Atom;
49  import ffx.potential.nonbonded.ParticleMeshEwald;
50  import ffx.potential.parameters.ForceField;
51  import ffx.potential.utils.EnergyException;
52  import ffx.utilities.Constants;
53  
54  import java.util.List;
55  import java.util.logging.Level;
56  import java.util.logging.Logger;
57  
58  import static ffx.numerics.special.Erf.erfc;
59  import static java.lang.String.format;
60  import static org.apache.commons.math3.util.FastMath.exp;
61  import static org.apache.commons.math3.util.FastMath.max;
62  import static org.apache.commons.math3.util.FastMath.min;
63  import static org.apache.commons.math3.util.FastMath.sqrt;
64  
65  /**
66   * Parallel pre-conditioned conjugate gradient solver for the self-consistent field.
67   * <p>
68   * This solves the linear system A x = b using an iterative approach where
69   * <p>
70   * A: is an N x N symmetric, positive definite matrix
71   * <p>
72   * B: is a known vector of length N.
73   * <p>
74   * x: is the unknown vector of length N.
75   * <p>
76   * For the AMOEBA SCF, the linear system Ax = b is usually denoted: C u = E_direct
77   * <br>
78   * where C = [alpha^-1 - T]
79   * <br>
80   * u are the induced dipoles.
81   * <br>
82   * E_direct is the direct field from permanent multipoles.
83   * <p>
84   * The matrix alpha^-1 is the inverse of the N x N diagonal polarizability matrix. The matrix T is
85   * the N x N matrix that produces the field due to induced dipoles.
86   * <p>
87   * Initialization:
88   * <br>
89   * 1) Compute the residual where x_0 is either u_direct or u_mutual (a pcg guess):<br>
90   * r_0 = b - A x_0<br>
91   * r_0 = E_direct - C u_0<br>
92   * r_0 = E_direct - [alpha^-1 - T] u_0<br>
93   * r_0 = (u_direct - u_0) * alpha^-1 - E_u_0
94   * <br>
95   * 2) Compute the preconditioner:  z_0 = M^1 r_0
96   * <br>
97   * 3) Compute the conjugate:       p_0 = z_0
98   * <br>
99   * 4) Initial loop index: k = 0
100  * <p>
101  * Then loop over:
102  * <br>
103  * 1) Update the step size:        alpha = r_k dot z_k / (p_k A p_k)
104  * <br>
105  * 2) Update the induced dipoles:  u_k+1 = u_k + alpha * p_k
106  * <br>
107  * 3) Update the residual:         r_k+1 = r_k - alpha * A p_k
108  * <br>
109  * 4) Check for convergence of r_k+1.
110  * <br>
111  * 5) Update the preconditioner:   z_k+1 = M^-1 r_k+1
112  * <br>
113  * 6) Update the step size:        beta = r_k+1 dot z_k+1 / (r_k dot z_k)
114  * <br>
115  * 7) Update the conjugate:        p_k+1 = z_k+1 + beta * p_k
116  * <br>
117  * 8) Update the loop index:       k = k + 1
118  *
119  * @author Michael J. Schnieders
120  * @since 1.0
121  */
122 public class PCGSolver {
123 
124   private static final Logger logger = Logger.getLogger(PCGSolver.class.getName());
125 
126   /**
127    * Compute the initial residual.
128    */
129   private final InitResidualRegion initResidualRegion;
130   /**
131    * Compute the initial conjugate search direction.
132    */
133   private final InitConjugateRegion initConjugateRegion;
134   /**
135    * Update the residual.
136    */
137   private final UpdateResidualRegion updateResidualRegion;
138   /**
139    * Update teh conjugate search direction.
140    */
141   private final UpdateConjugateRegion updateConjugateRegion;
142   /**
143    * Apply the preconditioner.
144    */
145   private final PreconditionerRegion preconditionerRegion;
146 
147   /**
148    * The default Beta step size used to update the conjugate direction is based on the Fletcher-Reeves formula.
149    * An alternative Beta step size uses the Polak-Ribiere formula for the flexible preconditioned conjugate gradient method.
150    * For a symmetric positive definite preconditioner, the FR and PR steps sizes are equivalent.
151    * The steepest decent method sets beta to zero.
152    */
153   private enum PRECONDITION_MODE {FLETCHER_REEVES, FLEXIBLE, STEEPEST_DECENT}
154 
155   /**
156    * The SCF convergence criteria in Debye.
157    */
158   private final double poleps;
159   /**
160    * The cutoff used by the CG preconditioner in Angstroms.
161    */
162   private final double preconditionerCutoff;
163   /**
164    * The Ewald coefficient used by the CG preconditioner.
165    */
166   private final double preconditionerEwald;
167   /**
168    * Acceleration factor for induced dipole SCF iterations. This parameter weights the diagonol
169    * elements of the preconditioning matrix.
170    */
171   private final double preconditionerScale;
172   /**
173    * The default Beta step size used to update the conjugate direction is based on the Fletcher-Reeves formula.
174    * An alternative Beta step size uses the Polak-Ribiere formula for the flexible preconditioned conjugate gradient method.
175    * For a symmetric positive definite preconditioner, the FR and PR steps sizes are equivalent.
176    * The steepest decent method sets beta to zero.
177    */
178   private final PRECONDITION_MODE preconditionMode;
179   /**
180    * Neighbor lists, without atoms beyond the preconditioner cutoff.
181    * [nSymm][nAtoms][nIncludedNeighbors]
182    */
183   private int[][][] preconditionerLists;
184   /**
185    * Number of neighboring atoms within the preconditioner cutoff. [nSymm][nAtoms]
186    */
187   private int[][] preconditionerCounts;
188   /**
189    * Residual vector (an electric field).
190    */
191   private double[][] r;
192   /**
193    * Residual vector for the chain-rule dipoles (an electric field).
194    */
195   private double[][] rCR;
196   /**
197    * Preconditioner dipoles (z = M^-1 r).
198    */
199   private double[][] zpre;
200   /**
201    * Preconditioner dipoles for the chain-rule dipoles (zCR = M^-1 rCR).
202    */
203   private double[][] zpreCR;
204   /**
205    * Conjugate search direction (induced dipoles).
206    */
207   private double[][] p;
208   /**
209    * Conjugate search direction for the chain-rule dipoles (induced dipoles).
210    */
211   private double[][] pCR;
212   /**
213    * Work vector.
214    */
215   private double[][] vec;
216   /**
217    * Work vector for the chain-rule dipoles.
218    */
219   private double[][] vecCR;
220   /**
221    * An ordered array of atoms in the system.
222    */
223   private Atom[] atoms;
224   /**
225    * Dimensions of [nsymm][xyz][nAtoms].
226    */
227   private double[][][] coordinates;
228   /**
229    * Polarizability of each atom.
230    */
231   private double[] polarizability;
232   /**
233    * Inverse of pdamp for each atom.
234    */
235   private double[] ipdamp;
236   /**
237    * Thole parameter for each atom.
238    */
239   private double[] thole;
240   /**
241    * When computing the polarization energy at Lambda there are 3 pieces.
242    *
243    * <p>1.) Upol(1) = The polarization energy computed normally (i.e. system with ligand).
244    *
245    * <p>2.) Uenv = The polarization energy of the system without the ligand.
246    *
247    * <p>3.) Uligand = The polarization energy of the ligand by itself.
248    *
249    * <p>Upol(L) = L*Upol(1) + (1-L)*(Uenv + Uligand)
250    *
251    * <p>Set the "use" array to true for all atoms for part 1.
252    * <p>Set the "use" array to true for all atoms except the ligand for part 2.
253    * <p>Set the "use" array to true only for the ligand atoms for part 3.
254    *
255    * <p>The "use" array can also be employed to turn off atoms for computing the electrostatic
256    * energy of sub-structures.
257    */
258   private boolean[] use;
259   /**
260    * Unit cell and spacegroup information.
261    */
262   private Crystal crystal;
263   /**
264    * Induced dipoles with dimensions of [nsymm][nAtoms][3].
265    */
266   private double[][][] inducedDipole;
267   private double[][][] inducedDipoleCR;
268   /**
269    * Direct induced dipoles with dimensions of [nAtoms][3].
270    */
271   private double[][] directDipole;
272   private double[][] directDipoleCR;
273   /**
274    * Field array.
275    */
276   private AtomicDoubleArray3D field;
277   /**
278    * Chain rule field array.
279    */
280   private AtomicDoubleArray3D fieldCR;
281   private EwaldParameters ewaldParameters;
282   /**
283    * The default ParallelTeam encapsulates the maximum number of threads used to parallelize the
284    * electrostatics calculation.
285    */
286   private ParallelTeam parallelTeam;
287   /**
288    * Pairwise schedule for load balancing.
289    */
290   private IntegerSchedule realSpaceSchedule;
291   private PMETimings pmeTimings;
292 
293   /**
294    * A preconditioner cut-off of 3 to 4 Angstroms generally works well.
295    * <br>
296    * z = M^(-1) r = polarizability * (E_r + scale * r)
297    * <br>
298    * The cutoff is applied when computing E_r (the field due to polarizability * residual).
299    */
300   public static final double DEFAULT_CG_PRECONDITIONER_CUTOFF = 4.5;
301 
302   /**
303    * An Ewald coefficient of 0 indicates use of Coulomb's law with no damping.
304    * <br>
305    * z = M^(-1) r = polarizability * (E_r + scale * r)
306    * <br>
307    * Ewald or Coulomb is used when computing E_r (the field due to polarizability * residual).
308    */
309   public static final double DEFAULT_CG_PRECONDITIONER_EWALD = 0.0;
310 
311   /**
312    * The scale factor is applied to the diagonal terms of the preconditioner.
313    * <br>
314    * z = M^(-1) r = polar * (E_r + scale * r)
315    * <br>
316    * The "scale * polar * r" term is the diagonal part of M^(-1)
317    */
318   public static final double DEFAULT_CG_PRECONDITIONER_SCALE = 2.0;
319 
320   private double dieletric;
321 
322   /**
323    * Constructor the PCG solver.
324    *
325    * @param maxThreads Number of threads.
326    * @param poleps     Convergence criteria (RMS Debye).
327    * @param forceField Force field in use.
328    * @param nAtoms     Initial number of atoms.
329    */
330   public PCGSolver(int maxThreads, double poleps, ForceField forceField, int nAtoms) {
331     this.poleps = poleps;
332     preconditionerRegion = new PreconditionerRegion(maxThreads);
333     initResidualRegion = new InitResidualRegion(maxThreads);
334     initConjugateRegion = new InitConjugateRegion(maxThreads);
335     updateResidualRegion = new UpdateResidualRegion(maxThreads);
336     updateConjugateRegion = new UpdateConjugateRegion(maxThreads);
337 
338     // The size of the preconditioner neighbor list depends on the size of the preconditioner
339     // cutoff.
340     boolean preconditioner = forceField.getBoolean("USE_SCF_PRECONDITIONER", true);
341     if (preconditioner) {
342       preconditionerCutoff = forceField.getDouble("CG_PRECONDITIONER_CUTOFF", DEFAULT_CG_PRECONDITIONER_CUTOFF);
343       preconditionerEwald = forceField.getDouble("CG_PRECONDITIONER_EWALD", DEFAULT_CG_PRECONDITIONER_EWALD);
344       preconditionerScale = forceField.getDouble("CG_PRECONDITIONER_SCALE", DEFAULT_CG_PRECONDITIONER_SCALE);
345       PRECONDITION_MODE mode = PRECONDITION_MODE.FLETCHER_REEVES;
346       try {
347         String m = forceField.getString("CG_PRECONDITIONER_MODE", PRECONDITION_MODE.FLETCHER_REEVES.toString());
348         m = ForceField.toEnumForm(m);
349         mode = PRECONDITION_MODE.valueOf(m);
350       } catch (Exception e) {
351         // Do nothing.
352       }
353       preconditionMode = mode;
354     } else {
355       preconditionerCutoff = 0.0;
356       preconditionerEwald = 0.0;
357       preconditionerScale = 0.0;
358       preconditionMode = PRECONDITION_MODE.FLETCHER_REEVES;
359     }
360 
361     allocateVectors(nAtoms);
362   }
363 
364   /**
365    * Allocate storage for pre-conditioner neighbor list.
366    *
367    * @param nSymm  Number of symmetry operators.
368    * @param nAtoms Number of atoms.
369    */
370   public void allocateLists(int nSymm, int nAtoms) {
371     int preconditionerListSize = 50;
372     preconditionerLists = new int[nSymm][nAtoms][preconditionerListSize];
373     preconditionerCounts = new int[nSymm][nAtoms];
374   }
375 
376   /**
377    * Allocate PCG vectors.
378    *
379    * @param nAtoms The number of atoms.
380    */
381   public void allocateVectors(int nAtoms) {
382     if (r == null || r[0].length != nAtoms) {
383       r = new double[3][nAtoms];
384       rCR = new double[3][nAtoms];
385       zpre = new double[3][nAtoms];
386       zpreCR = new double[3][nAtoms];
387       p = new double[3][nAtoms];
388       pCR = new double[3][nAtoms];
389       vec = new double[3][nAtoms];
390       vecCR = new double[3][nAtoms];
391     }
392   }
393 
394   /**
395    * Get the preconditioner cutoff.
396    *
397    * @return The cutoff in Angstroms.
398    */
399   public double getPreconditionerCutoff() {
400     return preconditionerCutoff;
401   }
402 
403   /**
404    * Get the preconditioner Ewald coefficient.
405    *
406    * @return The Ewald coefficient.
407    */
408   public double getPreconditionerEwald() {
409     return preconditionerEwald;
410   }
411 
412   /**
413    * Get the preconditioner matrix diagonal scale factor.
414    *
415    * @return The unitless scale factor.
416    */
417   public double getPreconditionerScale() {
418     return preconditionerScale;
419   }
420 
421   /**
422    * Get the preconditioner mode.
423    *
424    * @return The mode.
425    */
426   public String getPreconditionerMode() {
427     return preconditionMode.toString();
428   }
429 
430   /**
431    * Neighbor lists when applying the preconditioner.
432    *
433    * @return Storage for the preconditioner neighbor lists.
434    */
435   public int[][][] getPreconditionerLists() {
436     return preconditionerLists;
437   }
438 
439   /**
440    * Number of neighbors when applying the preconditioner.
441    *
442    * @return Storage for the preconditioner counts.
443    */
444   public int[][] getPreconditionerCounts() {
445     return preconditionerCounts;
446   }
447 
448   public void init(
449       Atom[] atoms,
450       double[][][] coordinates,
451       double[] polarizability,
452       double[] ipdamp,
453       double[] thole,
454       boolean[] use,
455       Crystal crystal,
456       double[][][] inducedDipole,
457       double[][][] inducedDipoleCR,
458       double[][] directDipole,
459       double[][] directDipoleCR,
460       AtomicDoubleArray3D field,
461       AtomicDoubleArray3D fieldCR,
462       EwaldParameters ewaldParameters,
463       double dieletric,
464       ParallelTeam parallelTeam,
465       IntegerSchedule realSpaceSchedule,
466       PMETimings pmeTimings) {
467     this.atoms = atoms;
468     this.coordinates = coordinates;
469     this.polarizability = polarizability;
470     this.ipdamp = ipdamp;
471     this.thole = thole;
472     this.use = use;
473     this.crystal = crystal;
474     this.inducedDipole = inducedDipole;
475     this.inducedDipoleCR = inducedDipoleCR;
476     this.directDipole = directDipole;
477     this.directDipoleCR = directDipoleCR;
478     this.field = field;
479     this.fieldCR = fieldCR;
480     this.ewaldParameters = ewaldParameters;
481     this.dieletric = dieletric;
482     this.parallelTeam = parallelTeam;
483     this.realSpaceSchedule = realSpaceSchedule;
484     this.pmeTimings = pmeTimings;
485   }
486 
487   public int scfByPCG(boolean print, long startTime, ParticleMeshEwald particleMeshEwald) {
488     long directTime = System.nanoTime() - startTime;
489     // A request of 0 SCF cycles simplifies mutual polarization to direct polarization.
490     StringBuilder sb = null;
491     if (print) {
492       sb = new StringBuilder("\n Self-Consistent Field\n Iter  RMS Change (Debye)  Time\n");
493     }
494 
495     // Find the induced dipole field due to direct dipoles
496     // (or predicted induced dipoles from previous steps).
497     particleMeshEwald.computeInduceDipoleField();
498 
499     try {
500       // Set the initial residual.
501       parallelTeam.execute(initResidualRegion);
502 
503       // Compute the induced field due to the residual using short cut-offs.
504       computePreconditioner();
505 
506       // Set initial conjugate vector.
507       parallelTeam.execute(initConjugateRegion);
508     } catch (Exception e) {
509       String message = "Exception initializing preconditioned conjugate-gradient SCF solver.";
510       logger.log(Level.SEVERE, message, e);
511     }
512 
513     // Conjugate gradient iteration of the mutual induced dipoles.
514     int completedSCFCycles = 0;
515     int maxSCFCycles = 1000;
516     double eps = 100.0;
517     double previousEps;
518     boolean done = false;
519     while (!done) {
520       long cycleTime = -System.nanoTime();
521 
522       // Find the induced dipole field due to the conjugate search direction p.
523       // Note that induced dipole vectors are loaded with p by:
524       //  A) The InitConjugateRegion instance prior to the first iteration.
525       //  B) The UpdateConjugateRegion instance for subsequent iterations.
526       particleMeshEwald.computeInduceDipoleField();
527 
528       try {
529         /*
530          * 1) Using the induced dipole field, compute A p_k = [1/alpha - T] p_k
531          *   p / alpha  the field that induced the conjugate induced dipoles.
532          *   T p        the field due to the conjugate induced dipoles.
533          * 2) Compute the step size alpha.
534          * 3) Update the residual and induced dipole solution.
535          */
536         parallelTeam.execute(updateResidualRegion);
537       } catch (Exception e) {
538         String message = "Exception updating residual during CG iteration.";
539         logger.log(Level.SEVERE, message, e);
540       }
541 
542       // Check for convergence.
543       previousEps = eps;
544       eps = max(updateResidualRegion.getEps(), updateResidualRegion.getEpsCR());
545       completedSCFCycles++;
546       int nAtoms = atoms.length;
547       eps = Constants.ELEC_ANG_TO_DEBYE * sqrt(eps / (double) nAtoms);
548       cycleTime += System.nanoTime();
549       if (print) {
550         sb.append(format(" %4d     %15.10f %7.4f\n", completedSCFCycles, eps, cycleTime * Constants.NS2SEC));
551       }
552 
553       // If the RMS Debye change increases, fail the SCF process.
554       if (eps > previousEps) {
555         if (sb != null) {
556           logger.warning(sb.toString());
557         }
558         String message = format("SCF convergence failure: (%10.5f > %10.5f)\n", eps, previousEps);
559         throw new EnergyException(message);
560       }
561 
562       // The SCF should converge before the max iteration check.
563       if (completedSCFCycles >= maxSCFCycles) {
564         if (sb != null) {
565           logger.warning(sb.toString());
566         }
567         String message = format("Maximum SCF iterations reached: (%d)\n", completedSCFCycles);
568         throw new EnergyException(message);
569       }
570 
571       // Check if the convergence criteria has been achieved.
572       if (eps < poleps) {
573         done = true;
574       } else {
575         // Compute the induced field due to the residual using short cut-offs.
576         computePreconditioner();
577         try {
578           /*
579            * 1) Compute the dot product of the residual (r) and preconditioner field (z).
580            * 2) Compute the step size beta.
581            * 3) Update the conjugate search direction (p).
582            */
583           updateConjugateRegion.previousRDotZ = updateResidualRegion.getRDotZ();
584           updateConjugateRegion.previousRDotZCR = updateResidualRegion.getRDotZCR();
585           parallelTeam.execute(updateConjugateRegion);
586         } catch (Exception e) {
587           String message = "Exception updating conjugate search direction during CG iteration.";
588           logger.log(Level.SEVERE, message, e);
589         }
590       }
591     }
592 
593     if (print) {
594       sb.append(format(" Direct:                  %7.4f\n", Constants.NS2SEC * directTime));
595       startTime = System.nanoTime() - startTime;
596       sb.append(format(" Total:                   %7.4f", startTime * Constants.NS2SEC));
597       logger.info(sb.toString());
598     }
599 
600     // Find the final induced dipole field.
601     particleMeshEwald.computeInduceDipoleField();
602 
603     return completedSCFCycles;
604   }
605 
606   /**
607    * Set the initial Residual r_0 = E_perm - u_0 / alpha + E_u_0.
608    */
609   private class InitResidualRegion extends ParallelRegion {
610 
611     private final InitResidualLoop[] initResidualLoops;
612 
613     public InitResidualRegion(int nt) {
614       initResidualLoops = new InitResidualLoop[nt];
615     }
616 
617     @Override
618     public void run() throws Exception {
619       try {
620         int ti = getThreadIndex();
621         if (initResidualLoops[ti] == null) {
622           initResidualLoops[ti] = new InitResidualLoop();
623         }
624         int nAtoms = atoms.length;
625         execute(0, nAtoms - 1, initResidualLoops[ti]);
626       } catch (Exception e) {
627         String message = "Fatal exception computing the mutual induced dipoles in thread "
628                 + getThreadIndex() + "\n";
629         logger.log(Level.SEVERE, message, e);
630       }
631     }
632 
633     private class InitResidualLoop extends IntegerForLoop {
634 
635       @Override
636       public void run(int lb, int ub) throws Exception {
637         for (int i = lb; i <= ub; i++) {
638           // Set initial residual to the direct field.
639           if (use[i] && polarizability[i] > 0.0) {
640             double ipolar = 1.0 / polarizability[i];
641             r[0][i] = (directDipole[i][0] - inducedDipole[0][i][0]) * ipolar + field.getX(i);
642             r[1][i] = (directDipole[i][1] - inducedDipole[0][i][1]) * ipolar + field.getY(i);
643             r[2][i] = (directDipole[i][2] - inducedDipole[0][i][2]) * ipolar + field.getZ(i);
644             rCR[0][i] = (directDipoleCR[i][0] - inducedDipoleCR[0][i][0]) * ipolar + fieldCR.getX(i);
645             rCR[1][i] = (directDipoleCR[i][1] - inducedDipoleCR[0][i][1]) * ipolar + fieldCR.getY(i);
646             rCR[2][i] = (directDipoleCR[i][2] - inducedDipoleCR[0][i][2]) * ipolar + fieldCR.getZ(i);
647           } else {
648             r[0][i] = 0.0;
649             r[1][i] = 0.0;
650             r[2][i] = 0.0;
651             rCR[0][i] = 0.0;
652             rCR[1][i] = 0.0;
653             rCR[2][i] = 0.0;
654           }
655         }
656       }
657 
658       @Override
659       public IntegerSchedule schedule() {
660         return IntegerSchedule.fixed();
661       }
662     }
663   }
664 
665   /**
666    * Set the initial conjugate direction p_0 = z_0 = M^-1 * r_0.
667    * <p>
668    * Where M^-1 is the inverse of the preconditioner matrix.
669    */
670   private class InitConjugateRegion extends ParallelRegion {
671 
672     private final InitConjugateLoop[] initConjugateLoops;
673 
674     public InitConjugateRegion(int nt) {
675       initConjugateLoops = new InitConjugateLoop[nt];
676     }
677 
678     @Override
679     public void run() throws Exception {
680       try {
681         int ti = getThreadIndex();
682         if (initConjugateLoops[ti] == null) {
683           initConjugateLoops[ti] = new InitConjugateLoop();
684         }
685         int nAtoms = atoms.length;
686         execute(0, nAtoms - 1, initConjugateLoops[ti]);
687       } catch (Exception e) {
688         String message =
689             "Fatal exception computing the mutual induced dipoles in thread "
690                 + getThreadIndex()
691                 + "\n";
692         logger.log(Level.SEVERE, message, e);
693       }
694     }
695 
696     private class InitConjugateLoop extends IntegerForLoop {
697 
698       @Override
699       public void run(int lb, int ub) throws Exception {
700 
701         for (int i = lb; i <= ub; i++) {
702           if (use[i]) {
703             // Set initial conjugate vector p (induced dipoles).
704             double polar = polarizability[i];
705             zpre[0][i] = polar * (field.getX(i) + preconditionerScale * r[0][i]);
706             zpre[1][i] = polar * (field.getY(i) + preconditionerScale * r[1][i]);
707             zpre[2][i] = polar * (field.getZ(i) + preconditionerScale * r[2][i]);
708             zpreCR[0][i] = polar * (fieldCR.getX(i) + preconditionerScale * rCR[0][i]);
709             zpreCR[1][i] = polar * (fieldCR.getY(i) + preconditionerScale * rCR[1][i]);
710             zpreCR[2][i] = polar * (fieldCR.getZ(i) + preconditionerScale * rCR[2][i]);
711             p[0][i] = zpre[0][i];
712             p[1][i] = zpre[1][i];
713             p[2][i] = zpre[2][i];
714             pCR[0][i] = zpreCR[0][i];
715             pCR[1][i] = zpreCR[1][i];
716             pCR[2][i] = zpreCR[2][i];
717 
718             // Store the current induced dipoles.
719             vec[0][i] = inducedDipole[0][i][0];
720             vec[1][i] = inducedDipole[0][i][1];
721             vec[2][i] = inducedDipole[0][i][2];
722             vecCR[0][i] = inducedDipoleCR[0][i][0];
723             vecCR[1][i] = inducedDipoleCR[0][i][1];
724             vecCR[2][i] = inducedDipoleCR[0][i][2];
725 
726             // Load conjugate vector whose field will be computed next.
727             inducedDipole[0][i][0] = p[0][i];
728             inducedDipole[0][i][1] = p[1][i];
729             inducedDipole[0][i][2] = p[2][i];
730             inducedDipoleCR[0][i][0] = pCR[0][i];
731             inducedDipoleCR[0][i][1] = pCR[1][i];
732             inducedDipoleCR[0][i][2] = pCR[2][i];
733 
734           } else {
735             zpre[0][i] = 0.0;
736             zpre[1][i] = 0.0;
737             zpre[2][i] = 0.0;
738             zpreCR[0][i] = 0.0;
739             zpreCR[1][i] = 0.0;
740             zpreCR[2][i] = 0.0;
741             p[0][i] = 0.0;
742             p[1][i] = 0.0;
743             p[2][i] = 0.0;
744             pCR[0][i] = 0.0;
745             pCR[1][i] = 0.0;
746             pCR[2][i] = 0.0;
747             vec[0][i] = 0.0;
748             vec[1][i] = 0.0;
749             vec[2][i] = 0.0;
750             vecCR[0][i] = 0.0;
751             vecCR[1][i] = 0.0;
752             vecCR[2][i] = 0.0;
753             inducedDipole[0][i][0] = 0.0;
754             inducedDipole[0][i][1] = 0.0;
755             inducedDipole[0][i][2] = 0.0;
756             inducedDipoleCR[0][i][0] = 0.0;
757             inducedDipoleCR[0][i][1] = 0.0;
758             inducedDipoleCR[0][i][2] = 0.0;
759           }
760         }
761       }
762 
763       @Override
764       public IntegerSchedule schedule() {
765         return IntegerSchedule.fixed();
766       }
767     }
768   }
769 
770   /**
771    * Update the induced dipoles and residuals.
772    */
773   private class UpdateResidualRegion extends ParallelRegion {
774 
775     private final UpdateApLoop[] updateApLoops;
776     private final UpdateResidualAndInducedLoop[] updateResidualAndInducedLoops;
777     private final SharedDouble pDotApShared;
778     private final SharedDouble pDotApCRShared;
779     private final SharedDouble rDotZShared;
780     private final SharedDouble rDotZCRShared;
781     private final SharedDouble epsShared;
782     private final SharedDouble epsCRShared;
783 
784     public UpdateResidualRegion(int nt) {
785       updateApLoops = new UpdateApLoop[nt];
786       updateResidualAndInducedLoops = new UpdateResidualAndInducedLoop[nt];
787       pDotApShared = new SharedDouble();
788       pDotApCRShared = new SharedDouble();
789       rDotZShared = new SharedDouble();
790       rDotZCRShared = new SharedDouble();
791       epsShared = new SharedDouble();
792       epsCRShared = new SharedDouble();
793     }
794 
795     public double getRDotZ() {
796       return rDotZShared.get();
797     }
798 
799     public double getRDotZCR() {
800       return rDotZCRShared.get();
801     }
802 
803     public double getEps() {
804       return epsShared.get();
805     }
806 
807     public double getEpsCR() {
808       return epsCRShared.get();
809     }
810 
811     @Override
812     public void run() throws Exception {
813       try {
814         int ti = getThreadIndex();
815         int nAtoms = atoms.length;
816         if (updateApLoops[ti] == null) {
817           updateApLoops[ti] = new UpdateApLoop();
818           updateResidualAndInducedLoops[ti] = new UpdateResidualAndInducedLoop();
819         }
820         execute(0, nAtoms - 1, updateApLoops[ti]);
821         execute(0, nAtoms - 1, updateResidualAndInducedLoops[ti]);
822       } catch (Exception e) {
823         String message =
824             "Fatal exception computing the mutual induced dipoles in thread "
825                 + getThreadIndex()
826                 + "\n";
827         logger.log(Level.SEVERE, message, e);
828       }
829     }
830 
831     @Override
832     public void start() {
833       pDotApShared.set(0.0);
834       pDotApCRShared.set(0.0);
835       rDotZShared.set(0.0);
836       rDotZCRShared.set(0.0);
837       epsShared.set(0.0);
838       epsCRShared.set(0.0);
839     }
840 
841     private class UpdateApLoop extends IntegerForLoop {
842 
843       public double pDotAp;
844       public double pDotApCR;
845       public double rDotZ;
846       public double rDotZCR;
847 
848       @Override
849       public void finish() {
850         pDotApShared.addAndGet(pDotAp);
851         pDotApCRShared.addAndGet(pDotApCR);
852         rDotZShared.addAndGet(rDotZ);
853         rDotZCRShared.addAndGet(rDotZCR);
854       }
855 
856       @Override
857       public void run(int lb, int ub) throws Exception {
858         for (int i = lb; i <= ub; i++) {
859           if (use[i] && polarizability[i] > 0) {
860             double ipolar = 1.0 / polarizability[i];
861             // Load induced dipoles from the work arrays.
862             inducedDipole[0][i][0] = vec[0][i];
863             inducedDipole[0][i][1] = vec[1][i];
864             inducedDipole[0][i][2] = vec[2][i];
865             inducedDipoleCR[0][i][0] = vecCR[0][i];
866             inducedDipoleCR[0][i][1] = vecCR[1][i];
867             inducedDipoleCR[0][i][2] = vecCR[2][i];
868             // Compute Ap = [1/alpha - T] p
869             // p / alpha  the field that induced the conjugate induced dipoles.
870             // T p        the field due to the conjugate induced dipoles.
871             vec[0][i] = p[0][i] * ipolar - field.getX(i);
872             vec[1][i] = p[1][i] * ipolar - field.getY(i);
873             vec[2][i] = p[2][i] * ipolar - field.getZ(i);
874             vecCR[0][i] = pCR[0][i] * ipolar - fieldCR.getX(i);
875             vecCR[1][i] = pCR[1][i] * ipolar - fieldCR.getY(i);
876             vecCR[2][i] = pCR[2][i] * ipolar - fieldCR.getZ(i);
877           } else {
878             inducedDipole[0][i][0] = 0.0;
879             inducedDipole[0][i][1] = 0.0;
880             inducedDipole[0][i][2] = 0.0;
881             inducedDipoleCR[0][i][0] = 0.0;
882             inducedDipoleCR[0][i][1] = 0.0;
883             inducedDipoleCR[0][i][2] = 0.0;
884             vec[0][i] = 0.0;
885             vec[1][i] = 0.0;
886             vec[2][i] = 0.0;
887             vecCR[0][i] = 0.0;
888             vecCR[1][i] = 0.0;
889             vecCR[2][i] = 0.0;
890           }
891 
892           // Compute dot product of the conjugate vector (p) with Ap (stored in vec).
893           pDotAp += p[0][i] * vec[0][i] + p[1][i] * vec[1][i] + p[2][i] * vec[2][i];
894           pDotApCR += pCR[0][i] * vecCR[0][i] + pCR[1][i] * vecCR[1][i] + pCR[2][i] * vecCR[2][i];
895 
896           // Compute dot product of the residual (r) and preconditioner (z).
897           rDotZ += r[0][i] * zpre[0][i] + r[1][i] * zpre[1][i] + r[2][i] * zpre[2][i];
898           rDotZCR += rCR[0][i] * zpreCR[0][i] + rCR[1][i] * zpreCR[1][i] + rCR[2][i] * zpreCR[2][i];
899         }
900       }
901 
902       @Override
903       public IntegerSchedule schedule() {
904         return IntegerSchedule.fixed();
905       }
906 
907       @Override
908       public void start() {
909         pDotAp = 0.0;
910         pDotApCR = 0.0;
911         rDotZ = 0.0;
912         rDotZCR = 0.0;
913       }
914     }
915 
916     private class UpdateResidualAndInducedLoop extends IntegerForLoop {
917 
918       double eps;
919       double epsCR;
920 
921       @Override
922       public void start() {
923         eps = 0.0;
924         epsCR = 0.0;
925       }
926 
927       @Override
928       public void finish() {
929         epsShared.addAndGet(eps);
930         epsCRShared.addAndGet(epsCR);
931       }
932 
933       @Override
934       public void run(int lb, int ub) throws Exception {
935         double alpha = rDotZShared.get();
936         if (pDotApShared.get() != 0) {
937           alpha /= pDotApShared.get();
938         }
939         double alphaCR = rDotZCRShared.get();
940         if (pDotApCRShared.get() != 0) {
941           alphaCR /= pDotApCRShared.get();
942         }
943         for (int i = lb; i <= ub; i++) {
944           if (use[i]) {
945             // Update the residual r_k+1 using Ap (stored in vec)
946             r[0][i] -= alpha * vec[0][i];
947             r[1][i] -= alpha * vec[1][i];
948             r[2][i] -= alpha * vec[2][i];
949             rCR[0][i] -= alphaCR * vecCR[0][i];
950             rCR[1][i] -= alphaCR * vecCR[1][i];
951             rCR[2][i] -= alphaCR * vecCR[2][i];
952 
953             // Update the induced dipoles based on the scaled conjugate vector.
954             inducedDipole[0][i][0] += alpha * p[0][i];
955             inducedDipole[0][i][1] += alpha * p[1][i];
956             inducedDipole[0][i][2] += alpha * p[2][i];
957             inducedDipoleCR[0][i][0] += alphaCR * pCR[0][i];
958             inducedDipoleCR[0][i][1] += alphaCR * pCR[1][i];
959             inducedDipoleCR[0][i][2] += alphaCR * pCR[2][i];
960 
961             // Sum the square of the residual field.
962             eps += r[0][i] * r[0][i] + r[1][i] * r[1][i] + r[2][i] * r[2][i];
963             epsCR += rCR[0][i] * rCR[0][i] + rCR[1][i] * rCR[1][i] + rCR[2][i] * rCR[2][i];
964           } else {
965             r[0][i] = 0.0;
966             r[1][i] = 0.0;
967             r[2][i] = 0.0;
968             rCR[0][i] = 0.0;
969             rCR[1][i] = 0.0;
970             rCR[2][i] = 0.0;
971             inducedDipole[0][i][0] = 0.0;
972             inducedDipole[0][i][1] = 0.0;
973             inducedDipole[0][i][2] = 0.0;
974             inducedDipoleCR[0][i][0] = 0.0;
975             inducedDipoleCR[0][i][1] = 0.0;
976             inducedDipoleCR[0][i][2] = 0.0;
977           }
978         }
979       }
980 
981       @Override
982       public IntegerSchedule schedule() {
983         return IntegerSchedule.fixed();
984       }
985     }
986   }
987 
988   /**
989    * Update the preconditioner and conjugate search direction.
990    */
991   private class UpdateConjugateRegion extends ParallelRegion {
992 
993     private final UpdatePreconditionerLoop[] updatePreconditionerLoops;
994     private final UpdateConjugateLoop[] updateConjugateLoops;
995     private final SharedDouble betaShared;
996     private final SharedDouble betaCRShared;
997     public double previousRDotZ;
998     public double previousRDotZCR;
999 
1000     public UpdateConjugateRegion(int nt) {
1001       updatePreconditionerLoops = new UpdatePreconditionerLoop[nt];
1002       updateConjugateLoops = new UpdateConjugateLoop[nt];
1003       betaShared = new SharedDouble();
1004       betaCRShared = new SharedDouble();
1005     }
1006 
1007     @Override
1008     public void run() throws Exception {
1009       try {
1010         int ti = getThreadIndex();
1011         if (updatePreconditionerLoops[ti] == null) {
1012           updatePreconditionerLoops[ti] = new UpdatePreconditionerLoop();
1013           updateConjugateLoops[ti] = new UpdateConjugateLoop();
1014         }
1015         int nAtoms = atoms.length;
1016         execute(0, nAtoms - 1, updatePreconditionerLoops[ti]);
1017         execute(0, nAtoms - 1, updateConjugateLoops[ti]);
1018       } catch (Exception e) {
1019         String message =
1020             "Fatal exception computing the mutual induced dipoles in thread "
1021                 + getThreadIndex()
1022                 + "\n";
1023         logger.log(Level.SEVERE, message, e);
1024       }
1025     }
1026 
1027     @Override
1028     public void start() {
1029       betaShared.set(0.0);
1030       betaCRShared.set(0.0);
1031       if (previousRDotZ == 0.0) {
1032         previousRDotZ = 1.0;
1033       }
1034       if (previousRDotZCR == 0.0) {
1035         previousRDotZCR = 1.0;
1036       }
1037     }
1038 
1039     private class UpdatePreconditionerLoop extends IntegerForLoop {
1040 
1041       public double rDotZ;
1042       public double rDotZCR;
1043 
1044       private final double[] deltaZ = new double[3];
1045       private final double[] deltaZCR = new double[3];
1046 
1047       @Override
1048       public void finish() {
1049         betaShared.addAndGet(rDotZ / previousRDotZ);
1050         betaCRShared.addAndGet(rDotZCR / previousRDotZCR);
1051       }
1052 
1053       @Override
1054       public void run(int lb, int ub) throws Exception {
1055         for (int i = lb; i <= ub; i++) {
1056           if (use[i]) {
1057             // Save the negative of the current preconditioner z_k.
1058             deltaZ[0] = -zpre[0][i];
1059             deltaZ[1] = -zpre[1][i];
1060             deltaZ[2] = -zpre[2][i];
1061             deltaZCR[0] = -zpreCR[0][i];
1062             deltaZCR[1] = -zpreCR[1][i];
1063             deltaZCR[2] = -zpreCR[2][i];
1064 
1065             // Update the preconditioner
1066             // z_k+1 = M^(-1) r_k+1
1067             //       = polar * (E_r_k+1 + diagonal scale * r_k+1)
1068             double polar = polarizability[i];
1069             zpre[0][i] = polar * (field.getX(i) + preconditionerScale * r[0][i]);
1070             zpre[1][i] = polar * (field.getY(i) + preconditionerScale * r[1][i]);
1071             zpre[2][i] = polar * (field.getZ(i) + preconditionerScale * r[2][i]);
1072             zpreCR[0][i] = polar * (fieldCR.getX(i) + preconditionerScale * rCR[0][i]);
1073             zpreCR[1][i] = polar * (fieldCR.getY(i) + preconditionerScale * rCR[1][i]);
1074             zpreCR[2][i] = polar * (fieldCR.getZ(i) + preconditionerScale * rCR[2][i]);
1075 
1076             switch (preconditionMode) {
1077               case FLEXIBLE:
1078                 // Set work to z_k+1 - z_k
1079                 deltaZ[0] += zpre[0][i];
1080                 deltaZ[1] += zpre[1][i];
1081                 deltaZ[2] += zpre[2][i];
1082                 deltaZCR[0] += zpreCR[0][i];
1083                 deltaZCR[1] += zpreCR[1][i];
1084                 deltaZCR[2] += zpreCR[2][i];
1085                 // For Flexible, compute the dot product of the residual with the change in preconditioner (z_k+1 - z_k).
1086                 rDotZ += r[0][i] * deltaZ[0] + r[1][i] * deltaZ[1] + r[2][i] * deltaZ[2];
1087                 rDotZCR += rCR[0][i] * deltaZCR[0] + rCR[1][i] * deltaZCR[1] + rCR[2][i] * deltaZCR[2];
1088                 break;
1089               case STEEPEST_DECENT:
1090                 // For Steepest-Decent, rDotZ is zero (i.e. Beta is zero).
1091                 continue;
1092               default:
1093               case FLETCHER_REEVES:
1094                 // For Fletcher-Reeves, compute the dot product of the residual and preconditioner (z_k+1).
1095                 rDotZ += r[0][i] * zpre[0][i] + r[1][i] * zpre[1][i] + r[2][i] * zpre[2][i];
1096                 rDotZCR += rCR[0][i] * zpreCR[0][i] + rCR[1][i] * zpreCR[1][i] + rCR[2][i] * zpreCR[2][i];
1097             }
1098           } else {
1099             zpre[0][i] = 0.0;
1100             zpre[1][i] = 0.0;
1101             zpre[2][i] = 0.0;
1102             zpreCR[0][i] = 0.0;
1103             zpreCR[1][i] = 0.0;
1104             zpreCR[2][i] = 0.0;
1105           }
1106         }
1107       }
1108 
1109       @Override
1110       public IntegerSchedule schedule() {
1111         return IntegerSchedule.fixed();
1112       }
1113 
1114       @Override
1115       public void start() {
1116         rDotZ = 0.0;
1117         rDotZCR = 0.0;
1118       }
1119     }
1120 
1121     private class UpdateConjugateLoop extends IntegerForLoop {
1122 
1123       @Override
1124       public void run(int lb, int ub) throws Exception {
1125         double beta = betaShared.get();
1126         double betaCR = betaCRShared.get();
1127         for (int i = lb; i <= ub; i++) {
1128           if (use[i]) {
1129             // Update the conjugate vector.
1130             p[0][i] = zpre[0][i] + beta * p[0][i];
1131             p[1][i] = zpre[1][i] + beta * p[1][i];
1132             p[2][i] = zpre[2][i] + beta * p[2][i];
1133             pCR[0][i] = zpreCR[0][i] + betaCR * pCR[0][i];
1134             pCR[1][i] = zpreCR[1][i] + betaCR * pCR[1][i];
1135             pCR[2][i] = zpreCR[2][i] + betaCR * pCR[2][i];
1136             // Store induced dipoles.
1137             vec[0][i] = inducedDipole[0][i][0];
1138             vec[1][i] = inducedDipole[0][i][1];
1139             vec[2][i] = inducedDipole[0][i][2];
1140             vecCR[0][i] = inducedDipoleCR[0][i][0];
1141             vecCR[1][i] = inducedDipoleCR[0][i][1];
1142             vecCR[2][i] = inducedDipoleCR[0][i][2];
1143             // Load the conjugate vector whose field will be computed next.
1144             inducedDipole[0][i][0] = p[0][i];
1145             inducedDipole[0][i][1] = p[1][i];
1146             inducedDipole[0][i][2] = p[2][i];
1147             inducedDipoleCR[0][i][0] = pCR[0][i];
1148             inducedDipoleCR[0][i][1] = pCR[1][i];
1149             inducedDipoleCR[0][i][2] = pCR[2][i];
1150           } else {
1151             p[0][i] = 0.0;
1152             p[1][i] = 0.0;
1153             p[2][i] = 0.0;
1154             pCR[0][i] = 0.0;
1155             pCR[1][i] = 0.0;
1156             pCR[2][i] = 0.0;
1157             vec[0][i] = 0.0;
1158             vec[1][i] = 0.0;
1159             vec[2][i] = 0.0;
1160             vecCR[0][i] = 0.0;
1161             vecCR[1][i] = 0.0;
1162             vecCR[2][i] = 0.0;
1163             inducedDipole[0][i][0] = 0.0;
1164             inducedDipole[0][i][1] = 0.0;
1165             inducedDipole[0][i][2] = 0.0;
1166             inducedDipoleCR[0][i][0] = 0.0;
1167             inducedDipoleCR[0][i][1] = 0.0;
1168             inducedDipoleCR[0][i][2] = 0.0;
1169           }
1170         }
1171       }
1172 
1173       @Override
1174       public IntegerSchedule schedule() {
1175         return IntegerSchedule.fixed();
1176       }
1177     }
1178   }
1179 
1180   /**
1181    * Compute the induced field due to the Uind = polarizability * Eresidual.
1182    */
1183   private void computePreconditioner() {
1184     try {
1185       // Reset the preconditioner field.
1186       field.reset(parallelTeam);
1187       fieldCR.reset(parallelTeam);
1188 
1189       // Use a special Ewald coefficient for the pre-conditioner.
1190       double aewaldTemp = ewaldParameters.aewald;
1191       ewaldParameters.setEwaldParameters(ewaldParameters.off, preconditionerEwald);
1192       parallelTeam.execute(preconditionerRegion);
1193       ewaldParameters.setEwaldParameters(ewaldParameters.off, aewaldTemp);
1194 
1195       // Reduce the preconditioner field.
1196       field.reduce(parallelTeam);
1197       fieldCR.reduce(parallelTeam);
1198 
1199       // Scale the pre-conditioner field by the inverse of the dielectric.
1200       if (dieletric > 1.0) {
1201         int nAtoms = atoms.length;
1202         double invDielectric = 1.0 / dieletric;
1203         for (int i = 0; i < nAtoms; i++) {
1204           field.scale(0, i, invDielectric);
1205           fieldCR.scale(0, i, invDielectric);
1206         }
1207       }
1208 
1209     } catch (Exception e) {
1210       String message = "Exception computing the induced field for the preconditioner.";
1211       logger.log(Level.SEVERE, message, e);
1212     }
1213   }
1214 
1215   /**
1216    * Evaluate the real space field due to induced dipoles using a short cutoff (~3-4 A).
1217    */
1218   private class PreconditionerRegion extends ParallelRegion {
1219 
1220     private final InducedPreconditionerFieldLoop[] inducedPreconditionerFieldLoop;
1221 
1222     PreconditionerRegion(int threadCount) {
1223       inducedPreconditionerFieldLoop = new InducedPreconditionerFieldLoop[threadCount];
1224     }
1225 
1226     @Override
1227     public void start() {
1228       pmeTimings.realSpaceSCFTotalTime -= System.nanoTime();
1229     }
1230 
1231     @Override
1232     public void finish() {
1233       pmeTimings.realSpaceSCFTotalTime += System.nanoTime();
1234     }
1235 
1236     @Override
1237     public void run() {
1238       int threadIndex = getThreadIndex();
1239       if (inducedPreconditionerFieldLoop[threadIndex] == null) {
1240         inducedPreconditionerFieldLoop[threadIndex] = new InducedPreconditionerFieldLoop();
1241       }
1242       try {
1243         int nAtoms = atoms.length;
1244         execute(0, nAtoms - 1, inducedPreconditionerFieldLoop[threadIndex]);
1245       } catch (Exception e) {
1246         String message =
1247             "Fatal exception computing the induced real space field in thread "
1248                 + getThreadIndex()
1249                 + "\n";
1250         logger.log(Level.SEVERE, message, e);
1251       }
1252     }
1253 
1254     private class InducedPreconditionerFieldLoop extends IntegerForLoop {
1255 
1256       private int threadID;
1257       private double[] x, y, z;
1258 
1259       InducedPreconditionerFieldLoop() {
1260       }
1261 
1262       @Override
1263       public void finish() {
1264         pmeTimings.realSpaceSCFTime[threadID] += System.nanoTime();
1265       }
1266 
1267       @Override
1268       public void run(int lb, int ub) {
1269         final double[] dx = new double[3];
1270         final double[] rk = new double[3];
1271         final double[] rCRk = new double[3];
1272         final double[][] transOp = new double[3][3];
1273         // Loop over a chunk of atoms.
1274         int[][] lists = preconditionerLists[0];
1275         int[] counts = preconditionerCounts[0];
1276         for (int i = lb; i <= ub; i++) {
1277           if (!use[i]) {
1278             continue;
1279           }
1280           double fx = 0.0;
1281           double fy = 0.0;
1282           double fz = 0.0;
1283           double px = 0.0;
1284           double py = 0.0;
1285           double pz = 0.0;
1286           final double xi = x[i];
1287           final double yi = y[i];
1288           final double zi = z[i];
1289           // Multiply the residue field rCR by the polarizability.
1290           final double polari = polarizability[i];
1291           final double uix = polari * r[0][i];
1292           final double uiy = polari * r[1][i];
1293           final double uiz = polari * r[2][i];
1294           final double pix = polari * rCR[0][i];
1295           final double piy = polari * rCR[1][i];
1296           final double piz = polari * rCR[2][i];
1297           final double pdi = ipdamp[i];
1298           final double pti = thole[i];
1299           // Loop over the neighbor list.
1300           final int[] list = lists[i];
1301           final int npair = counts[i];
1302           for (int j = 0; j < npair; j++) {
1303             final int k = list[j];
1304             if (!use[k]) {
1305               continue;
1306             }
1307             final double pdk = ipdamp[k];
1308             final double ptk = thole[k];
1309             dx[0] = x[k] - xi;
1310             dx[1] = y[k] - yi;
1311             dx[2] = z[k] - zi;
1312             final double r2 = crystal.image(dx);
1313             // Calculate the error function damping terms.
1314             final double r = sqrt(r2);
1315             final double rr1 = 1.0 / r;
1316             final double rr2 = rr1 * rr1;
1317             final double ralpha = ewaldParameters.aewald * r;
1318             final double exp2a = exp(-ralpha * ralpha);
1319             final double bn0 = erfc(ralpha) * rr1;
1320             final double bn1 = (bn0 + ewaldParameters.an0 * exp2a) * rr2;
1321             final double bn2 = (3.0 * bn1 + ewaldParameters.an1 * exp2a) * rr2;
1322             double scale3 = 1.0;
1323             double scale5 = 1.0;
1324             double damp = pdi * pdk;
1325             final double pgamma = min(pti, ptk);
1326             final double rdamp = r * damp;
1327             damp = -pgamma * rdamp * rdamp * rdamp;
1328             if (damp > -50.0) {
1329               final double expdamp = exp(damp);
1330               scale3 = 1.0 - expdamp;
1331               scale5 = 1.0 - expdamp * (1.0 - damp);
1332             }
1333             double rr3 = rr1 * rr2;
1334             double rr5 = 3.0 * rr3 * rr2;
1335             rr3 *= (1.0 - scale3);
1336             rr5 *= (1.0 - scale5);
1337             final double xr = dx[0];
1338             final double yr = dx[1];
1339             final double zr = dx[2];
1340             // Multiply the residue field rCR by the polarizability.
1341             final double polarK = polarizability[k];
1342             final double ukx = polarK * PCGSolver.this.r[0][k];
1343             final double uky = polarK * PCGSolver.this.r[1][k];
1344             final double ukz = polarK * PCGSolver.this.r[2][k];
1345             final double ukr = ukx * xr + uky * yr + ukz * zr;
1346             final double bn2ukr = bn2 * ukr;
1347             final double fimx = -bn1 * ukx + bn2ukr * xr;
1348             final double fimy = -bn1 * uky + bn2ukr * yr;
1349             final double fimz = -bn1 * ukz + bn2ukr * zr;
1350             final double rr5ukr = rr5 * ukr;
1351             final double fidx = -rr3 * ukx + rr5ukr * xr;
1352             final double fidy = -rr3 * uky + rr5ukr * yr;
1353             final double fidz = -rr3 * ukz + rr5ukr * zr;
1354             fx += (fimx - fidx);
1355             fy += (fimy - fidy);
1356             fz += (fimz - fidz);
1357             // Multiply the residue field rCR by the polarizability.
1358             final double pkx = polarK * PCGSolver.this.rCR[0][k];
1359             final double pky = polarK * PCGSolver.this.rCR[1][k];
1360             final double pkz = polarK * PCGSolver.this.rCR[2][k];
1361             final double pkr = pkx * xr + pky * yr + pkz * zr;
1362             final double bn2pkr = bn2 * pkr;
1363             final double pimx = -bn1 * pkx + bn2pkr * xr;
1364             final double pimy = -bn1 * pky + bn2pkr * yr;
1365             final double pimz = -bn1 * pkz + bn2pkr * zr;
1366             final double rr5pkr = rr5 * pkr;
1367             final double pidx = -rr3 * pkx + rr5pkr * xr;
1368             final double pidy = -rr3 * pky + rr5pkr * yr;
1369             final double pidz = -rr3 * pkz + rr5pkr * zr;
1370             px += (pimx - pidx);
1371             py += (pimy - pidy);
1372             pz += (pimz - pidz);
1373             final double uir = uix * xr + uiy * yr + uiz * zr;
1374             final double bn2uir = bn2 * uir;
1375             final double fkmx = -bn1 * uix + bn2uir * xr;
1376             final double fkmy = -bn1 * uiy + bn2uir * yr;
1377             final double fkmz = -bn1 * uiz + bn2uir * zr;
1378             final double rr5uir = rr5 * uir;
1379             final double fkdx = -rr3 * uix + rr5uir * xr;
1380             final double fkdy = -rr3 * uiy + rr5uir * yr;
1381             final double fkdz = -rr3 * uiz + rr5uir * zr;
1382             field.add(threadID, k, fkmx - fkdx, fkmy - fkdy, fkmz - fkdz);
1383             final double pir = pix * xr + piy * yr + piz * zr;
1384             final double bn2pir = bn2 * pir;
1385             final double pkmx = -bn1 * pix + bn2pir * xr;
1386             final double pkmy = -bn1 * piy + bn2pir * yr;
1387             final double pkmz = -bn1 * piz + bn2pir * zr;
1388             final double rr5pir = rr5 * pir;
1389             final double pkdx = -rr3 * pix + rr5pir * xr;
1390             final double pkdy = -rr3 * piy + rr5pir * yr;
1391             final double pkdz = -rr3 * piz + rr5pir * zr;
1392             fieldCR.add(threadID, k, pkmx - pkdx, pkmy - pkdy, pkmz - pkdz);
1393           }
1394           field.add(threadID, i, fx, fy, fz);
1395           fieldCR.add(threadID, i, px, py, pz);
1396         }
1397 
1398         // Loop over symmetry mates.
1399         List<SymOp> symOps = crystal.spaceGroup.symOps;
1400         int nSymm = symOps.size();
1401         for (int iSymm = 1; iSymm < nSymm; iSymm++) {
1402           SymOp symOp = crystal.spaceGroup.getSymOp(iSymm);
1403           crystal.getTransformationOperator(symOp, transOp);
1404           lists = preconditionerLists[iSymm];
1405           counts = preconditionerCounts[iSymm];
1406           final double[] xs = coordinates[iSymm][0];
1407           final double[] ys = coordinates[iSymm][1];
1408           final double[] zs = coordinates[iSymm][2];
1409           // Loop over a chunk of atoms.
1410           for (int i = lb; i <= ub; i++) {
1411             if (!use[i]) {
1412               continue;
1413             }
1414             double fx = 0.0;
1415             double fy = 0.0;
1416             double fz = 0.0;
1417             double px = 0.0;
1418             double py = 0.0;
1419             double pz = 0.0;
1420             final double xi = x[i];
1421             final double yi = y[i];
1422             final double zi = z[i];
1423             final double polar = polarizability[i];
1424             final double uix = polar * PCGSolver.this.r[0][i];
1425             final double uiy = polar * PCGSolver.this.r[1][i];
1426             final double uiz = polar * PCGSolver.this.r[2][i];
1427             final double pix = polar * PCGSolver.this.rCR[0][i];
1428             final double piy = polar * PCGSolver.this.rCR[1][i];
1429             final double piz = polar * PCGSolver.this.rCR[2][i];
1430             final double pdi = ipdamp[i];
1431             final double pti = thole[i];
1432             // Loop over the neighbor list.
1433             final int[] list = lists[i];
1434             final int npair = counts[i];
1435             for (int j = 0; j < npair; j++) {
1436               final int k = list[j];
1437               if (!use[k]) {
1438                 continue;
1439               }
1440               double selfScale = 1.0;
1441               if (i == k) {
1442                 selfScale = 0.5;
1443               }
1444               final double pdk = ipdamp[k];
1445               final double ptk = thole[k];
1446               dx[0] = xs[k] - xi;
1447               dx[1] = ys[k] - yi;
1448               dx[2] = zs[k] - zi;
1449               final double r2 = crystal.image(dx);
1450               // Calculate the error function damping terms.
1451               final double r = sqrt(r2);
1452               final double rr1 = 1.0 / r;
1453               final double rr2 = rr1 * rr1;
1454               final double ralpha = ewaldParameters.aewald * r;
1455               final double exp2a = exp(-ralpha * ralpha);
1456               final double bn0 = erfc(ralpha) * rr1;
1457               final double bn1 = (bn0 + ewaldParameters.an0 * exp2a) * rr2;
1458               final double bn2 = (3.0 * bn1 + ewaldParameters.an1 * exp2a) * rr2;
1459               double scale3 = 1.0;
1460               double scale5 = 1.0;
1461               double damp = pdi * pdk;
1462               final double pgamma = min(pti, ptk);
1463               final double rdamp = r * damp;
1464               damp = -pgamma * rdamp * rdamp * rdamp;
1465               if (damp > -50.0) {
1466                 final double expdamp = exp(damp);
1467                 scale3 = 1.0 - expdamp;
1468                 scale5 = 1.0 - expdamp * (1.0 - damp);
1469               }
1470               double rr3 = rr1 * rr2;
1471               double rr5 = 3.0 * rr3 * rr2;
1472               rr3 *= (1.0 - scale3);
1473               rr5 *= (1.0 - scale5);
1474               final double xr = dx[0];
1475               final double yr = dx[1];
1476               final double zr = dx[2];
1477               // Rotate the residual field.
1478               rk[0] = PCGSolver.this.r[0][k];
1479               rk[1] = PCGSolver.this.r[1][k];
1480               rk[2] = PCGSolver.this.r[2][k];
1481               crystal.applySymRot(rk, rk, symOp);
1482               rCRk[0] = PCGSolver.this.rCR[0][k];
1483               rCRk[1] = PCGSolver.this.rCR[1][k];
1484               rCRk[2] = PCGSolver.this.rCR[2][k];
1485               crystal.applySymRot(rCRk, rCRk, symOp);
1486               // Multiply the residue field by the polarizability.
1487               final double polarK = polarizability[k];
1488               final double ukx = polarK * rk[0];
1489               final double uky = polarK * rk[1];
1490               final double ukz = polarK * rk[2];
1491               final double pkx = polarK * rCRk[0];
1492               final double pky = polarK * rCRk[1];
1493               final double pkz = polarK * rCRk[2];
1494               final double ukr = ukx * xr + uky * yr + ukz * zr;
1495               final double bn2ukr = bn2 * ukr;
1496               final double fimx = -bn1 * ukx + bn2ukr * xr;
1497               final double fimy = -bn1 * uky + bn2ukr * yr;
1498               final double fimz = -bn1 * ukz + bn2ukr * zr;
1499               final double rr5ukr = rr5 * ukr;
1500               final double fidx = -rr3 * ukx + rr5ukr * xr;
1501               final double fidy = -rr3 * uky + rr5ukr * yr;
1502               final double fidz = -rr3 * ukz + rr5ukr * zr;
1503               fx += selfScale * (fimx - fidx);
1504               fy += selfScale * (fimy - fidy);
1505               fz += selfScale * (fimz - fidz);
1506               final double pkr = pkx * xr + pky * yr + pkz * zr;
1507               final double bn2pkr = bn2 * pkr;
1508               final double pimx = -bn1 * pkx + bn2pkr * xr;
1509               final double pimy = -bn1 * pky + bn2pkr * yr;
1510               final double pimz = -bn1 * pkz + bn2pkr * zr;
1511               final double rr5pkr = rr5 * pkr;
1512               final double pidx = -rr3 * pkx + rr5pkr * xr;
1513               final double pidy = -rr3 * pky + rr5pkr * yr;
1514               final double pidz = -rr3 * pkz + rr5pkr * zr;
1515               px += selfScale * (pimx - pidx);
1516               py += selfScale * (pimy - pidy);
1517               pz += selfScale * (pimz - pidz);
1518               final double uir = uix * xr + uiy * yr + uiz * zr;
1519               final double bn2uir = bn2 * uir;
1520               final double fkmx = -bn1 * uix + bn2uir * xr;
1521               final double fkmy = -bn1 * uiy + bn2uir * yr;
1522               final double fkmz = -bn1 * uiz + bn2uir * zr;
1523               final double rr5uir = rr5 * uir;
1524               final double fkdx = -rr3 * uix + rr5uir * xr;
1525               final double fkdy = -rr3 * uiy + rr5uir * yr;
1526               final double fkdz = -rr3 * uiz + rr5uir * zr;
1527               double xc = selfScale * (fkmx - fkdx);
1528               double yc = selfScale * (fkmy - fkdy);
1529               double zc = selfScale * (fkmz - fkdz);
1530               double fkx = (xc * transOp[0][0] + yc * transOp[1][0] + zc * transOp[2][0]);
1531               double fky = (xc * transOp[0][1] + yc * transOp[1][1] + zc * transOp[2][1]);
1532               double fkz = (xc * transOp[0][2] + yc * transOp[1][2] + zc * transOp[2][2]);
1533               field.add(threadID, k, fkx, fky, fkz);
1534               final double pir = pix * xr + piy * yr + piz * zr;
1535               final double bn2pir = bn2 * pir;
1536               final double pkmx = -bn1 * pix + bn2pir * xr;
1537               final double pkmy = -bn1 * piy + bn2pir * yr;
1538               final double pkmz = -bn1 * piz + bn2pir * zr;
1539               final double rr5pir = rr5 * pir;
1540               final double pkdx = -rr3 * pix + rr5pir * xr;
1541               final double pkdy = -rr3 * piy + rr5pir * yr;
1542               final double pkdz = -rr3 * piz + rr5pir * zr;
1543               xc = selfScale * (pkmx - pkdx);
1544               yc = selfScale * (pkmy - pkdy);
1545               zc = selfScale * (pkmz - pkdz);
1546               fkx = (xc * transOp[0][0] + yc * transOp[1][0] + zc * transOp[2][0]);
1547               fky = (xc * transOp[0][1] + yc * transOp[1][1] + zc * transOp[2][1]);
1548               fkz = (xc * transOp[0][2] + yc * transOp[1][2] + zc * transOp[2][2]);
1549               fieldCR.add(threadID, k, fkx, fky, fkz);
1550             }
1551             field.add(threadID, i, fx, fy, fz);
1552             fieldCR.add(threadID, i, px, py, pz);
1553           }
1554         }
1555       }
1556 
1557       @Override
1558       public IntegerSchedule schedule() {
1559         return realSpaceSchedule;
1560       }
1561 
1562       @Override
1563       public void start() {
1564         threadID = getThreadIndex();
1565         pmeTimings.realSpaceSCFTime[threadID] -= System.nanoTime();
1566         x = coordinates[0][0];
1567         y = coordinates[0][1];
1568         z = coordinates[0][2];
1569       }
1570     }
1571   }
1572 }