View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.potential.nonbonded;
39  
40  import edu.rit.pj.IntegerForLoop;
41  import edu.rit.pj.IntegerSchedule;
42  import edu.rit.pj.ParallelRegion;
43  import edu.rit.pj.ParallelTeam;
44  import ffx.crystal.Crystal;
45  import ffx.numerics.fft.Complex;
46  import ffx.numerics.fft.Complex3DParallel;
47  import ffx.potential.bonded.Atom;
48  import ffx.potential.parameters.ForceField;
49  import ffx.utilities.FFXProperty;
50  import ffx.utilities.PropertyGroup;
51  import org.apache.commons.configuration2.CompositeConfiguration;
52  
53  import java.nio.DoubleBuffer;
54  import java.util.logging.Level;
55  import java.util.logging.Logger;
56  
57  import static ffx.numerics.fft.Complex3D.iComplex3D;
58  import static ffx.numerics.math.ScalarMath.mod;
59  import static ffx.numerics.spline.UniformBSpline.bSpline;
60  import static ffx.numerics.spline.UniformBSpline.bSplineDerivatives;
61  import static ffx.potential.parameters.MultipoleType.t000;
62  import static ffx.potential.parameters.MultipoleType.t001;
63  import static ffx.potential.parameters.MultipoleType.t002;
64  import static ffx.potential.parameters.MultipoleType.t003;
65  import static ffx.potential.parameters.MultipoleType.t010;
66  import static ffx.potential.parameters.MultipoleType.t011;
67  import static ffx.potential.parameters.MultipoleType.t012;
68  import static ffx.potential.parameters.MultipoleType.t020;
69  import static ffx.potential.parameters.MultipoleType.t021;
70  import static ffx.potential.parameters.MultipoleType.t030;
71  import static ffx.potential.parameters.MultipoleType.t100;
72  import static ffx.potential.parameters.MultipoleType.t101;
73  import static ffx.potential.parameters.MultipoleType.t102;
74  import static ffx.potential.parameters.MultipoleType.t110;
75  import static ffx.potential.parameters.MultipoleType.t111;
76  import static ffx.potential.parameters.MultipoleType.t120;
77  import static ffx.potential.parameters.MultipoleType.t200;
78  import static ffx.potential.parameters.MultipoleType.t201;
79  import static ffx.potential.parameters.MultipoleType.t210;
80  import static ffx.potential.parameters.MultipoleType.t300;
81  import static java.lang.Math.fma;
82  import static java.lang.String.format;
83  import static java.lang.System.arraycopy;
84  import static org.apache.commons.math3.util.FastMath.PI;
85  import static org.apache.commons.math3.util.FastMath.cos;
86  import static org.apache.commons.math3.util.FastMath.exp;
87  import static org.apache.commons.math3.util.FastMath.floor;
88  import static org.apache.commons.math3.util.FastMath.max;
89  import static org.apache.commons.math3.util.FastMath.min;
90  import static org.apache.commons.math3.util.FastMath.pow;
91  import static org.apache.commons.math3.util.FastMath.round;
92  import static org.apache.commons.math3.util.FastMath.sin;
93  import static org.apache.commons.math3.util.FastMath.sqrt;
94  
95  /**
96   * The Reciprocal Space class computes the reciprocal space contribution to {@link ParticleMeshEwald}
97   * for the AMOEBA force field.
98   *
99   * <ol>
100  *   <li>Assignment of polarizable multipole charge density to the 3D grid, via b-Splines, is
101  *       parallelized using a spatial decomposition.
102  *   <li>The convolution depends on methods of the {@link ffx.numerics.fft.Real3DParallel} and
103  *       {@link ffx.numerics.fft.Complex3DParallel} classes.
104  *   <li>Finally, the electric potential and its gradients are collected, in parallel, off the grid
105  *       using b-Splines.
106  * </ol>
107  *
108  * @author Michael J. Schnieders
109  * @since 1.0
110  */
111 public class ReciprocalSpace {
112 
113   private static final Logger logger = Logger.getLogger(ReciprocalSpace.class.getName());
114   /**
115    * First lookup index to pack a 2D tensor into a 1D array.
116    */
117   private static final int[] qi1 = {0, 1, 2, 0, 0, 1};
118   /**
119    * Second lookup index to pack a 2D tensor into a 1D array.
120    */
121   private static final int[] qi2 = {0, 1, 2, 1, 2, 2};
122 
123   private static final double toSeconds = 1.0e-9;
124   private final ParticleMeshEwald particleMeshEwald;
125 
126   private final ForceField.ELEC_FORM elecForm;
127 
128   private final ForceField forceField;
129   /**
130    * Number of unit cell symmetry operators.
131    */
132   private final int nSymm;
133   /**
134    * Number of threads.
135    */
136   private final int threadCount;
137 
138   /**
139    * The default b-Spline order to use for discretization to/from the reciprocal grid.
140    */
141   private static final int DEFAULT_PME_ORDER = 5;
142 
143   /**
144    * The b-Spline order to use for discretization to/from the reciprocal grid.
145    */
146   @FFXProperty(name = "pme-order", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
147       defaultValue = "5", description = """
148       Sets the order of the B-spline interpolation used during particle mesh Ewald summation for partial charge
149       or atomic multipole electrostatics. A default value of 5 is used in the absence of the pme-order keyword.
150       """)
151   private final int bSplineOrder;
152   /**
153    * Three derivatives of the potential are needed for AMOEBA. Specifically, the field gradient is
154    * used to compute the energy of the quadrupole moment and the 2nd field gradient (3rd derivative
155    * of the reciprocal potential) the energy gradient.
156    */
157   private final int derivOrder = 3;
158   /**
159    * The Ewald convergence parameter.
160    */
161   private final double aEwald;
162   /**
163    * Parallel team instance.
164    */
165   private final ParallelTeam parallelTeam;
166   private final ParallelTeam fftTeam;
167   private final BSplineRegion bSplineRegion;
168   private final FractionalMultipoleRegion fractionalMultipoleRegion;
169   private final FractionalInducedRegion fractionalInducedRegion;
170   private final SpatialPermanentLoop[] spatialPermanentLoops;
171   private final SpatialInducedLoop[] spatialInducedLoops;
172   private final SlicePermanentLoop[] slicePermanentLoops;
173   private final SliceInducedLoop[] sliceInducedLoops;
174   private final RowPermanentLoop[] rowPermanentLoops;
175   private final RowInducedLoop[] rowInducedLoops;
176   /**
177    * Number of atoms for a given symmetry operator that a given thread is responsible for applying to
178    * the FFT grid. gridAtomCount[nSymm][nThread]
179    */
180   private final int[][] gridAtomCount;
181   /**
182    * Atom indices for a given symmetry operator that a given thread is responsible for applying to
183    * the FFT grid. gridAtomCount[nSymm][nThread][nAtoms]
184    */
185   private final int[][][] gridAtomList;
186   private final PermanentPhiRegion permanentPhiRegion;
187   private final InducedPhiRegion polarizationPhiRegion;
188   private final IntegerSchedule recipSchedule;
189   /**
190    * Convolution variables.
191    */
192   private final double[][] transformFieldMatrix = new double[10][10];
193   private final double[][] transformMultipoleMatrix = new double[10][10];
194   private final double[][] a = new double[3][3];
195   private double[][][] inducedDipoleFrac;
196   private double[][][] inducedDipoleFracCR;
197   /**
198    * The unit cell for the simulation. A ReplicatesCrystal will give equivalent results up to the
199    * limits of double precision, but is more expensive.
200    */
201   private Crystal crystal;
202   /**
203    * Array of atoms in the asymmetric unit.
204    */
205   private Atom[] atoms;
206   /**
207    * Number of atoms in the asymmetric unit.
208    */
209   private int nAtoms;
210 
211   private static final double DEFAULT_PME_MESH_DENSITY = 1.2;
212   private static final double oneThird = 1.0 / 3.0;
213 
214   @FFXProperty(name = "pme-mesh-density", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
215       defaultValue = "1.2", description = """
216       The default in the absence of the pme-grid keyword is to set the grid size along each
217       axis to the smallest factor of 2, 3 and/or 5 that is at least as large as
218       pme-mesh-density times the axis length in Angstroms.
219       """)
220   private double density;
221 
222   /**
223    * The X-dimension of the FFT grid.
224    */
225   @FFXProperty(name = "pme-grid-x", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
226       defaultValue = "NONE", description =
227       "Specifies the PME grid dimension along the x-axis and takes precedence over the pme-grid keyword.")
228   @FFXProperty(name = "pme-grid", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
229       defaultValue = "NONE", description = """
230       [3 integers]
231       Sets the dimensions of the reciprocal space grid used during particle mesh Ewald
232       summation for electrostatics. The three modifiers give the size along the X-, Y- and Z- axes,
233       respectively. If either the Y- or Z-axis dimensions are omitted, then they are set equal
234       to the X-axis dimension. The default in the absence of the pme-grid keyword is to set the
235       grid size along each axis to the smallest value that can be factored by 2, 3, 4 and/or 5
236       and is at least as large as 1.2 times the axis length in Angstroms.
237       The value 1.2 can be changed using the pme-mesh-density property.
238       """)
239   private int fftX;
240 
241   /**
242    * The Y-dimension of the FFT grid.
243    */
244   @FFXProperty(name = "pme-grid-y", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
245       defaultValue = "NONE", description =
246       "Specifies the PME grid dimension along the y-axis and takes precedence over the pme-grid keyword.")
247   private int fftY;
248 
249   /**
250    * The Z-dimension of the FFT grid.
251    */
252   @FFXProperty(name = "pme-grid-z", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
253       defaultValue = "NONE", description =
254       "Specifies the PME grid dimension along the z-axis and takes precedence over the pme-grid keyword.")
255   private int fftZ;
256 
257   /**
258    * Number of doubles needed to compute a complex to complex 3D FFT (fftX * fftY * fftZ * 2).
259    */
260   private int fftSpace;
261   /**
262    * Reciprocal space grid. [fftSpace]
263    */
264   private double[] splineGrid;
265   /**
266    * Wraps the splineGrid.
267    */
268   private DoubleBuffer splineBuffer;
269   /**
270    * Reference to atomic coordinate array.
271    */
272   private double[][][] coordinates;
273   // Spline moments and perform the convolution.
274   private SpatialDensityRegion spatialDensityRegion;
275   private SliceRegion sliceRegion;
276   private RowRegion rowRegion;
277   private Complex3DParallel complex3DFFT;
278   private GridMethod gridMethod;
279   // Timing variables.
280   private long bSplineTotal, splinePermanentTotal, splineInducedTotal;
281   private long permanentPhiTotal, inducedPhiTotal, convTotal;
282   /**
283    * Timing variables.
284    */
285   private final long[] bSplineTime;
286   private final long[] splinePermanentTime;
287   private final long[] permanentPhiTime;
288   private final long[] splineInducedTime;
289   private final int[] splineCount;
290   private final long[] inducedPhiTime;
291 
292 
293   /**
294    * Reciprocal Space PME contribution.
295    *
296    * @param particleMeshEwald a {@link ParticleMeshEwald} object.
297    * @param crystal           a {@link ffx.crystal.Crystal} object.
298    * @param forceField        a {@link ffx.potential.parameters.ForceField} object.
299    * @param atoms             an array of {@link ffx.potential.bonded.Atom} objects.
300    * @param aewald            the Ewald parameter.
301    * @param fftTeam           a {@link edu.rit.pj.ParallelTeam} object.
302    * @param parallelTeam      a {@link edu.rit.pj.ParallelTeam} object.
303    */
304   public ReciprocalSpace(
305       ParticleMeshEwald particleMeshEwald,
306       Crystal crystal,
307       ForceField forceField,
308       Atom[] atoms,
309       double aewald,
310       ParallelTeam fftTeam,
311       ParallelTeam parallelTeam) {
312 
313     this.particleMeshEwald = particleMeshEwald;
314     this.crystal = crystal.getUnitCell();
315     this.forceField = forceField;
316     this.atoms = atoms;
317     this.nAtoms = atoms.length;
318     this.aEwald = aewald;
319     this.fftTeam = fftTeam;
320     this.parallelTeam = parallelTeam;
321     this.elecForm = particleMeshEwald.getElecForm();
322 
323     coordinates = particleMeshEwald.getCoordinates();
324     threadCount = parallelTeam.getThreadCount();
325     nSymm = this.crystal.spaceGroup.getNumberOfSymOps();
326 
327     // Construct the parallel convolution object.
328     boolean available;
329     String recipStrategy = null;
330     try {
331       recipStrategy = forceField.getString("RECIP_SCHEDULE");
332       IntegerSchedule.parse(recipStrategy);
333       available = true;
334     } catch (Exception e) {
335       available = false;
336     }
337     if (available) {
338       recipSchedule = IntegerSchedule.parse(recipStrategy);
339       logger.log(Level.INFO, "  Convolution schedule {0}", recipStrategy);
340     } else {
341       recipSchedule = IntegerSchedule.fixed();
342     }
343 
344     CompositeConfiguration properties = forceField.getProperties();
345     String gridString = properties.getString("grid-method", "SPATIAL").toUpperCase();
346     try {
347       gridMethod = GridMethod.valueOf(gridString);
348     } catch (Exception e) {
349       gridMethod = GridMethod.SPATIAL;
350     }
351 
352     bSplineOrder = forceField.getInteger("PME_ORDER", DEFAULT_PME_ORDER);
353 
354     // Initialize convolution objects that may be re-allocated during NPT simulations.
355     double density = initConvolution();
356 
357     // Log PME reciprocal space parameters.
358     if (logger.isLoggable(Level.INFO)) {
359       StringBuilder sb = new StringBuilder();
360       sb.append(format("    B-Spline Order:                    %8d\n", bSplineOrder));
361       sb.append(format("    Mesh Density:                      %8.3f\n", density));
362       sb.append(format("    Mesh Dimensions:              (%3d,%3d,%3d)\n", fftX, fftY, fftZ));
363       sb.append(format("    Grid Method:                       %8s\n", gridMethod.toString()));
364       logger.info(sb.toString());
365     }
366 
367     // Construct the parallel BSplineRegion, DensityLoops and Phi objects.
368     bSplineRegion = new BSplineRegion();
369     fractionalMultipoleRegion = new FractionalMultipoleRegion();
370     fractionalInducedRegion = new FractionalInducedRegion();
371     switch (gridMethod) {
372       default -> {
373         spatialPermanentLoops = new SpatialPermanentLoop[threadCount];
374         spatialInducedLoops = new SpatialInducedLoop[threadCount];
375         for (int i = 0; i < threadCount; i++) {
376           spatialPermanentLoops[i] = new SpatialPermanentLoop(spatialDensityRegion, bSplineRegion);
377           spatialInducedLoops[i] = new SpatialInducedLoop(spatialDensityRegion, bSplineRegion);
378         }
379         slicePermanentLoops = null;
380         sliceInducedLoops = null;
381         rowPermanentLoops = null;
382         rowInducedLoops = null;
383         gridAtomCount = null;
384         gridAtomList = null;
385       }
386       case ROW -> {
387         rowPermanentLoops = new RowPermanentLoop[threadCount];
388         rowInducedLoops = new RowInducedLoop[threadCount];
389         for (int i = 0; i < threadCount; i++) {
390           rowPermanentLoops[i] = new RowPermanentLoop(rowRegion, bSplineRegion);
391           rowInducedLoops[i] = new RowInducedLoop(rowRegion, bSplineRegion);
392         }
393         gridAtomCount = new int[nSymm][threadCount];
394         gridAtomList = new int[nSymm][threadCount][nAtoms];
395         spatialPermanentLoops = null;
396         spatialInducedLoops = null;
397         slicePermanentLoops = null;
398         sliceInducedLoops = null;
399       }
400       case SLICE -> {
401         slicePermanentLoops = new SlicePermanentLoop[threadCount];
402         sliceInducedLoops = new SliceInducedLoop[threadCount];
403         for (int i = 0; i < threadCount; i++) {
404           slicePermanentLoops[i] = new SlicePermanentLoop(sliceRegion, bSplineRegion);
405           sliceInducedLoops[i] = new SliceInducedLoop(sliceRegion, bSplineRegion);
406         }
407         gridAtomCount = new int[nSymm][threadCount];
408         gridAtomList = new int[nSymm][threadCount][nAtoms];
409         spatialPermanentLoops = null;
410         spatialInducedLoops = null;
411         rowPermanentLoops = null;
412         rowInducedLoops = null;
413       }
414     }
415     permanentPhiRegion = new PermanentPhiRegion(bSplineRegion);
416     polarizationPhiRegion = new InducedPhiRegion(bSplineRegion);
417 
418     // Initialize timing variables.
419     bSplineTime = new long[threadCount];
420     splinePermanentTime = new long[threadCount];
421     splineInducedTime = new long[threadCount];
422     splineCount = new int[threadCount];
423     permanentPhiTime = new long[threadCount];
424     inducedPhiTime = new long[threadCount];
425   }
426 
427   /**
428    * Computes the modulus of the discrete Fourier Transform of "bsarray" and stores it in "bsmod".
429    *
430    * @param bsmod   B-Spline modulus.
431    * @param bsarray B-Spline array.
432    * @param nfft    FFT dimension.
433    * @param order   B-Spline order.
434    */
435   private static void discreteFTMod(double[] bsmod, double[] bsarray, int nfft, int order) {
436     // Get the modulus of the discrete Fourier fft.
437     double factor = 2.0 * PI / nfft;
438     for (int i = 0; i < nfft; i++) {
439       double sum1 = 0.0;
440       double sum2 = 0.0;
441       for (int j = 0; j < nfft; j++) {
442         double arg = factor * (i * j);
443         sum1 = sum1 + bsarray[j] * cos(arg);
444         sum2 = sum2 + bsarray[j] * sin(arg);
445       }
446       bsmod[i] = sum1 * sum1 + sum2 * sum2;
447     }
448 
449     // Fix for exponential Euler spline interpolation failure.
450     double eps = 1.0e-7;
451     if (bsmod[0] < eps) {
452       bsmod[0] = 0.5 * bsmod[1];
453     }
454     for (int i = 1; i < nfft - 1; i++) {
455       if (bsmod[i] < eps) {
456         bsmod[i] = 0.5 * (bsmod[i - 1] + bsmod[i + 1]);
457       }
458     }
459     if (bsmod[nfft - 1] < eps) {
460       bsmod[nfft - 1] = 0.5 * bsmod[nfft - 2];
461     }
462 
463     // Compute and apply the optimal zeta coefficient.
464     int jcut = 50;
465     int order2 = 2 * order;
466     for (int i = 0; i < nfft; i++) {
467       int k = i;
468       double zeta;
469       if (i > nfft / 2) {
470         k = k - nfft;
471       }
472       if (k == 0) {
473         zeta = 1.0;
474       } else {
475         double sum1 = 1.0;
476         double sum2 = 1.0;
477         factor = PI * k / nfft;
478         for (int j = 0; j < jcut; j++) {
479           double arg = factor / (factor + PI * (j + 1));
480           sum1 = sum1 + pow(arg, order);
481           sum2 = sum2 + pow(arg, order2);
482         }
483         for (int j = 0; j < jcut; j++) {
484           double arg = factor / (factor - PI * (j + 1));
485           sum1 = sum1 + pow(arg, order);
486           sum2 = sum2 + pow(arg, order2);
487         }
488         zeta = sum2 / sum1;
489       }
490       bsmod[i] = bsmod[i] * zeta * zeta;
491     }
492   }
493 
494   /**
495    * cartToFracInducedDipoles
496    *
497    * @param inducedDipole       Input Cartesian induced dipole.
498    * @param inducedDipoleCR     Input Cartesian induced dipole CR.
499    * @param fracInducedDipole   Output fractional induced dipole.
500    * @param fracInducedDipoleCR Output fractional induced dipole CR.
501    */
502   public void cartToFracInducedDipole(
503       double[] inducedDipole,
504       double[] inducedDipoleCR,
505       double[] fracInducedDipole,
506       double[] fracInducedDipoleCR) {
507     double[] in = inducedDipole;
508     fracInducedDipole[0] = a[0][0] * in[0] + a[0][1] * in[1] + a[0][2] * in[2];
509     fracInducedDipole[1] = a[1][0] * in[0] + a[1][1] * in[1] + a[1][2] * in[2];
510     fracInducedDipole[2] = a[2][0] * in[0] + a[2][1] * in[1] + a[2][2] * in[2];
511     in = inducedDipoleCR;
512     fracInducedDipoleCR[0] = a[0][0] * in[0] + a[0][1] * in[1] + a[0][2] * in[2];
513     fracInducedDipoleCR[1] = a[1][0] * in[0] + a[1][1] * in[1] + a[1][2] * in[2];
514     fracInducedDipoleCR[2] = a[2][0] * in[0] + a[2][1] * in[1] + a[2][2] * in[2];
515   }
516 
517   /**
518    * computeBSplines
519    */
520   public void computeBSplines() {
521     try {
522       bSplineTotal -= System.nanoTime();
523       parallelTeam.execute(bSplineRegion);
524       bSplineTotal += System.nanoTime();
525     } catch (Exception e) {
526       String message = " Fatal exception evaluating b-Splines.";
527       logger.log(Level.SEVERE, message, e);
528     }
529   }
530 
531   /**
532    * computeInducedPhi
533    *
534    * @param cartInducedDipolePhi   an array of double.
535    * @param cartInducedDipoleCRPhi an array of double.
536    * @param fracInducedDipolePhi   an array of double.
537    * @param fracInducedDipoleCRPhi an array of double.
538    */
539   public void computeInducedPhi(
540       double[][] cartInducedDipolePhi, double[][] cartInducedDipoleCRPhi,
541       double[][] fracInducedDipolePhi, double[][] fracInducedDipoleCRPhi) {
542     inducedPhiTotal -= System.nanoTime();
543     try {
544       polarizationPhiRegion.setInducedDipolePhi(
545           cartInducedDipolePhi, cartInducedDipoleCRPhi,
546           fracInducedDipolePhi, fracInducedDipoleCRPhi);
547       parallelTeam.execute(polarizationPhiRegion);
548     } catch (Exception e) {
549       String message = "Fatal exception evaluating induced reciprocal space potential.";
550       logger.log(Level.SEVERE, message, e);
551     }
552     inducedPhiTotal += System.nanoTime();
553   }
554 
555   /**
556    * Compute the potential Phi and its derivatives for all atoms.
557    *
558    * @param cartPermanentPhi an array of double.
559    * @param fracPermanentPhi an array of double.
560    */
561   public void computePermanentPhi(double[][] cartPermanentPhi, double[][] fracPermanentPhi) {
562     permanentPhiTotal -= System.nanoTime();
563     try {
564       permanentPhiRegion.setPermanentPhi(cartPermanentPhi, fracPermanentPhi);
565       parallelTeam.execute(permanentPhiRegion);
566     } catch (Exception e) {
567       String message = " Fatal exception evaluating permanent reciprocal space potential.";
568       logger.log(Level.SEVERE, message, e);
569     }
570     permanentPhiTotal += System.nanoTime();
571   }
572 
573   /**
574    * getXDim
575    *
576    * @return a double.
577    */
578   public int getXDim() {
579     return fftX;
580   }
581 
582   /**
583    * getYDim
584    *
585    * @return a double.
586    */
587   public int getYDim() {
588     return fftY;
589   }
590 
591   /**
592    * getZDim
593    *
594    * @return a double.
595    */
596   public int getZDim() {
597     return fftZ;
598   }
599 
600   /**
601    * Compute the reciprocal space field using a convolution.
602    */
603   public void performConvolution() {
604     convTotal -= System.nanoTime();
605     try {
606       complex3DFFT.convolution(splineGrid);
607     } catch (Exception e) {
608       String message = "Fatal exception evaluating the convolution.";
609       logger.log(Level.SEVERE, message, e);
610     }
611     convTotal += System.nanoTime();
612   }
613 
614   /**
615    * initTimings.
616    */
617   public void initTimings() {
618     // Reset total timings.
619     bSplineTotal = 0;
620     splinePermanentTotal = 0;
621     splineInducedTotal = 0;
622     permanentPhiTotal = 0;
623     inducedPhiTotal = 0;
624     convTotal = 0;
625 
626     // Reset timing arrays.
627     for (int i = 0; i < threadCount; i++) {
628       bSplineTime[i] = 0;
629       splinePermanentTime[i] = 0;
630       splineInducedTime[i] = 0;
631       permanentPhiTime[i] = 0;
632       inducedPhiTime[i] = 0;
633       splineCount[i] = 0;
634     }
635 
636     // Reset 3D convolution timing.
637     if (complex3DFFT != null) {
638       complex3DFFT.initTiming();
639     }
640   }
641 
642   /**
643    * printTimings.
644    */
645   public void printTimings() {
646     if (logger.isLoggable(Level.FINE)) {
647       if (complex3DFFT != null) {
648         double total = (bSplineTotal + convTotal
649             + splinePermanentTotal + permanentPhiTotal
650             + splineInducedTotal + inducedPhiTotal) * toSeconds;
651 
652         logger.fine(format("\n Reciprocal Space: %7.4f (sec)", total));
653         long[] convTime = complex3DFFT.getTimings();
654         logger.fine("                           Direct Field    SCF Field");
655         logger.fine(" Thread  B-Spline  3DConv  Spline  Phi     Spline  Phi      Count");
656 
657         long minBSpline = Long.MAX_VALUE;
658         long maxBSpline = 0;
659         long minConv = Long.MAX_VALUE;
660         long maxConv = 0;
661         long minBSPerm = Long.MAX_VALUE;
662         long maxBSPerm = 0;
663         long minPhiPerm = Long.MAX_VALUE;
664         long maxPhiPerm = 0;
665         long minBSInduced = Long.MAX_VALUE;
666         long maxBSInduced = 0;
667         long minPhiInduced = Long.MAX_VALUE;
668         long maxPhiInduced = 0;
669         int minCount = Integer.MAX_VALUE;
670         int maxCount = 0;
671 
672         for (int i = 0; i < threadCount; i++) {
673           logger.fine(format("    %3d   %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f  %6d",
674               i, bSplineTime[i] * toSeconds, convTime[i] * toSeconds,
675               splinePermanentTime[i] * toSeconds, permanentPhiTime[i] * toSeconds,
676               splineInducedTime[i] * toSeconds, inducedPhiTime[i] * toSeconds,
677               splineCount[i]));
678           minBSpline = min(bSplineTime[i], minBSpline);
679           maxBSpline = max(bSplineTime[i], maxBSpline);
680           minConv = min(convTime[i], minConv);
681           maxConv = max(convTime[i], maxConv);
682           minBSPerm = min(splinePermanentTime[i], minBSPerm);
683           maxBSPerm = max(splinePermanentTime[i], maxBSPerm);
684           minPhiPerm = min(permanentPhiTime[i], minPhiPerm);
685           maxPhiPerm = max(permanentPhiTime[i], maxPhiPerm);
686           minBSInduced = min(splineInducedTime[i], minBSInduced);
687           maxBSInduced = max(splineInducedTime[i], maxBSInduced);
688           minPhiInduced = min(inducedPhiTime[i], minPhiInduced);
689           maxPhiInduced = max(inducedPhiTime[i], maxPhiInduced);
690           minCount = min(splineCount[i], minCount);
691           maxCount = max(splineCount[i], maxCount);
692         }
693         logger.fine(format(" Min      %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f  %6d",
694             minBSpline * toSeconds, minConv * toSeconds,
695             minBSPerm * toSeconds, minPhiPerm * toSeconds,
696             minBSInduced * toSeconds, minPhiInduced * toSeconds,
697             minCount));
698         logger.fine(format(" Max      %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f  %6d",
699             maxBSpline * toSeconds, maxConv * toSeconds,
700             maxBSPerm * toSeconds, maxPhiPerm * toSeconds,
701             maxBSInduced * toSeconds, maxPhiInduced * toSeconds,
702             maxCount));
703         logger.fine(format(" Delta    %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f  %6d",
704             (maxBSpline - minBSpline) * toSeconds,
705             (maxConv - minConv) * toSeconds,
706             (maxBSPerm - minBSPerm) * toSeconds,
707             (maxPhiPerm - minPhiPerm) * toSeconds,
708             (maxBSInduced - minBSInduced) * toSeconds,
709             (maxPhiInduced - minPhiInduced) * toSeconds,
710             (maxCount - minCount)));
711         logger.fine(format(" Actual   %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f  %6d",
712             bSplineTotal * toSeconds, convTotal * toSeconds,
713             splinePermanentTotal * toSeconds, permanentPhiTotal * toSeconds,
714             splineInducedTotal * toSeconds, inducedPhiTotal * toSeconds,
715             nAtoms * nSymm));
716       }
717     }
718   }
719 
720   /**
721    * Setter for the field <code>atoms</code>.
722    *
723    * @param atoms an array of {@link ffx.potential.bonded.Atom} objects.
724    */
725   public void setAtoms(Atom[] atoms) {
726     this.atoms = atoms;
727     nAtoms = atoms.length;
728     coordinates = particleMeshEwald.getCoordinates();
729     switch (gridMethod) {
730       case SPATIAL -> {
731         spatialDensityRegion.setAtoms(atoms);
732         spatialDensityRegion.coordinates = coordinates;
733       }
734       case ROW -> {
735         rowRegion.setAtoms(atoms);
736         rowRegion.coordinates = coordinates;
737       }
738       case SLICE -> {
739         sliceRegion.setAtoms(atoms);
740         sliceRegion.coordinates = coordinates;
741       }
742     }
743   }
744 
745   /**
746    * Setter for the field <code>crystal</code>.
747    *
748    * @param crystal a {@link ffx.crystal.Crystal} object.
749    */
750   public void setCrystal(Crystal crystal) {
751     // Check if the number of symmetry operators has changed.
752     this.crystal = crystal.getUnitCell();
753     if (nSymm != this.crystal.spaceGroup.getNumberOfSymOps()) {
754       logger.info(this.crystal.toString());
755       logger.info(crystal.toString());
756       logger.severe(
757           " The reciprocal space class does not currently allow changes in the number of symmetry operators.");
758     }
759     this.coordinates = particleMeshEwald.getCoordinates();
760     initConvolution();
761   }
762 
763   /**
764    * Place the induced dipoles onto the FFT grid for the atoms in use.
765    *
766    * @param inducedDipole   Induced dipoles.
767    * @param inducedDipoleCR Chain rule term for induced dipole gradient.
768    * @param use             The atoms in use.
769    */
770   public void splineInducedDipoles(
771       double[][][] inducedDipole, double[][][] inducedDipoleCR, boolean[] use) {
772     splineInducedTotal -= System.nanoTime();
773 
774     try {
775       // Allocate memory if needed.
776       if (inducedDipoleFrac == null ||
777           inducedDipoleFrac.length != inducedDipole.length ||
778           inducedDipoleFrac[0].length != inducedDipole[0].length) {
779         int nAtoms = inducedDipole.length;
780         int nSymm = inducedDipole[0].length;
781         inducedDipoleFrac = new double[nAtoms][nSymm][3];
782         inducedDipoleFracCR = new double[nAtoms][nSymm][3];
783       }
784       // Compute fractional induced dipoles.
785       fractionalInducedRegion.setUse(use);
786       fractionalInducedRegion.setInducedDipoles(inducedDipole, inducedDipoleCR,
787           inducedDipoleFrac, inducedDipoleFracCR);
788       parallelTeam.execute(fractionalInducedRegion);
789     } catch (Exception e) {
790       String message = " Fatal exception evaluating fractional induced dipoles.";
791       logger.log(Level.SEVERE, message, e);
792     }
793 
794     switch (gridMethod) {
795       case SPATIAL -> {
796         spatialDensityRegion.setDensityLoop(spatialInducedLoops);
797         for (int i = 0; i < threadCount; i++) {
798           spatialInducedLoops[i].setInducedDipoles(inducedDipoleFrac, inducedDipoleFracCR);
799           spatialInducedLoops[i].setUse(use);
800           spatialInducedLoops[i].setRegion(spatialDensityRegion);
801         }
802         try {
803           parallelTeam.execute(spatialDensityRegion);
804         } catch (Exception e) {
805           String message = " Fatal exception evaluating induced density.\n";
806           logger.log(Level.SEVERE, message, e);
807         }
808       }
809       case ROW -> {
810         rowRegion.setDensityLoop(rowInducedLoops);
811         for (int i = 0; i < threadCount; i++) {
812           rowInducedLoops[i].setInducedDipoles(inducedDipoleFrac, inducedDipoleFracCR);
813           rowInducedLoops[i].setUse(use);
814         }
815         try {
816           parallelTeam.execute(rowRegion);
817         } catch (Exception e) {
818           String message = " Fatal exception evaluating induced density.";
819           logger.log(Level.SEVERE, message, e);
820         }
821       }
822       default -> {
823         sliceRegion.setDensityLoop(sliceInducedLoops);
824         for (int i = 0; i < threadCount; i++) {
825           sliceInducedLoops[i].setInducedDipoles(inducedDipoleFrac, inducedDipoleFracCR);
826           sliceInducedLoops[i].setUse(use);
827         }
828         try {
829           parallelTeam.execute(sliceRegion);
830         } catch (Exception e) {
831           String message = " Fatal exception evaluating induced density.";
832           logger.log(Level.SEVERE, message, e);
833         }
834       }
835     }
836     splineInducedTotal += System.nanoTime();
837   }
838 
839   /**
840    * Use b-Splines to place the permanent multipoles onto the FFT grid for the atoms in use.
841    *
842    * @param globalMultipoles an array of double.
843    * @param fracMultipoles   an array of double.
844    * @param use              an array of boolean.
845    */
846   public void splinePermanentMultipoles(double[][][] globalMultipoles, double[][][] fracMultipoles,
847                                         boolean[] use) {
848     splinePermanentTotal -= System.nanoTime();
849 
850     try {
851       fractionalMultipoleRegion.setUse(use);
852       fractionalMultipoleRegion.setPermanent(globalMultipoles, fracMultipoles);
853       parallelTeam.execute(fractionalMultipoleRegion);
854     } catch (Exception e) {
855       String message = " Fatal exception evaluating fractional multipoles.";
856       logger.log(Level.SEVERE, message, e);
857     }
858 
859     switch (gridMethod) {
860       case SPATIAL -> {
861         spatialDensityRegion.setCrystal(crystal.getUnitCell(), fftX, fftY, fftZ);
862         spatialDensityRegion.assignAtomsToCells();
863         spatialDensityRegion.setDensityLoop(spatialPermanentLoops);
864         for (int i = 0; i < threadCount; i++) {
865           spatialPermanentLoops[i].setPermanent(fracMultipoles);
866           spatialPermanentLoops[i].setUse(use);
867           spatialPermanentLoops[i].setRegion(spatialDensityRegion);
868         }
869         try {
870           parallelTeam.execute(spatialDensityRegion);
871         } catch (Exception e) {
872           String message = " Fatal exception evaluating permanent multipole density.";
873           logger.log(Level.SEVERE, message, e);
874         }
875       }
876       case ROW -> {
877         rowRegion.setCrystal(crystal.getUnitCell(), fftX, fftY, fftZ);
878         rowRegion.setDensityLoop(rowPermanentLoops);
879         for (int i = 0; i < threadCount; i++) {
880           rowPermanentLoops[i].setPermanent(fracMultipoles);
881           rowPermanentLoops[i].setUse(use);
882         }
883         try {
884           parallelTeam.execute(rowRegion);
885         } catch (Exception e) {
886           String message = " Fatal exception evaluating permanent multipole density.";
887           logger.log(Level.SEVERE, message, e);
888         }
889       }
890       case SLICE -> {
891         sliceRegion.setCrystal(crystal.getUnitCell(), fftX, fftY, fftZ);
892         sliceRegion.setDensityLoop(slicePermanentLoops);
893         for (int i = 0; i < threadCount; i++) {
894           slicePermanentLoops[i].setPermanent(fracMultipoles);
895           slicePermanentLoops[i].setUse(use);
896         }
897         try {
898           parallelTeam.execute(sliceRegion);
899         } catch (Exception e) {
900           String message = " Fatal exception evaluating permanent multipole density.";
901           logger.log(Level.SEVERE, message, e);
902         }
903       }
904     }
905 
906     splinePermanentTotal += System.nanoTime();
907   }
908 
909   private double initConvolution() {
910 
911     // Store the current reciprocal space grid dimensions.
912     int fftXCurrent = fftX;
913     int fftYCurrent = fftY;
914     int fftZCurrent = fftZ;
915 
916     // Support the PME-GRID keyword.
917     int defaultX = -1;
918     int defaultY = -1;
919     int defaultZ = -1;
920     if (forceField.hasProperty("PME_GRID")) {
921       String grid = forceField.getString("PME_GRID", "-1 -1 -1");
922       String[] values = grid.trim().split(" +");
923       if (values != null) {
924         defaultX = Integer.parseInt(values[0]);
925         defaultY = defaultX;
926         defaultZ = defaultX;
927         if (values.length > 1) {
928           defaultY = Integer.parseInt(values[1]);
929           if (values.length > 2) {
930             defaultZ = Integer.parseInt(values[2]);
931           }
932         }
933       }
934     }
935 
936     density = forceField.getDouble("PME_MESH_DENSITY", DEFAULT_PME_MESH_DENSITY);
937 
938     int nX = forceField.getInteger("PME_GRID_X", defaultX);
939     if (nX < 2) {
940       nX = (int) floor(crystal.a * density) + 1;
941       if (nX % 2 != 0) {
942         nX += 1;
943       }
944       while (!Complex.preferredDimension(nX)) {
945         nX += 2;
946       }
947     }
948 
949     int nY = forceField.getInteger("PME_GRID_Y", defaultY);
950     if (nY < 2) {
951       nY = (int) floor(crystal.b * density) + 1;
952       if (nY % 2 != 0) {
953         nY += 1;
954       }
955       while (!Complex.preferredDimension(nY)) {
956         nY += 2;
957       }
958     }
959 
960     int nZ = forceField.getInteger("PME_GRID_Z", defaultZ);
961     if (nZ < 2) {
962       nZ = (int) floor(crystal.c * density) + 1;
963       if (nZ % 2 != 0) {
964         nZ += 1;
965       }
966       while (!Complex.preferredDimension(nZ)) {
967         nZ += 2;
968       }
969     }
970 
971     int minGrid = forceField.getInteger("PME_GRID_MIN", 16);
972     nX = max(nX, minGrid);
973     nY = max(nY, minGrid);
974     nZ = max(nZ, minGrid);
975 
976     fftX = nX;
977     fftY = nY;
978     fftZ = nZ;
979 
980     // Populate the matrix that fractionalizes multipoles.
981     transformMultipoleMatrix();
982 
983     // Populate the matrix that convert fractional potential components into orthogonal Cartesian
984     // coordinates.
985     transformFieldMatrix();
986 
987     // Compute the Cartesian to fractional matrix.
988     for (int i = 0; i < 3; i++) {
989       a[0][i] = fftX * crystal.A[i][0];
990       a[1][i] = fftY * crystal.A[i][1];
991       a[2][i] = fftZ * crystal.A[i][2];
992     }
993 
994     fftSpace = fftX * fftY * fftZ * 2;
995     boolean dimChanged = fftX != fftXCurrent || fftY != fftYCurrent || fftZ != fftZCurrent;
996 
997     if (complex3DFFT == null || dimChanged) {
998       complex3DFFT = new Complex3DParallel(fftX, fftY, fftZ, fftTeam, recipSchedule);
999       if (splineGrid == null || splineGrid.length < fftSpace) {
1000         splineGrid = new double[fftSpace];
1001       }
1002       splineBuffer = DoubleBuffer.wrap(splineGrid);
1003     }
1004     complex3DFFT.setRecip(generalizedInfluenceFunction());
1005 
1006     switch (gridMethod) {
1007       case SPATIAL -> {
1008         if (spatialDensityRegion == null || dimChanged) {
1009           spatialDensityRegion = new SpatialDensityRegion(fftX, fftY, fftZ, splineGrid, bSplineOrder,
1010               nSymm, 10, threadCount, crystal, atoms, coordinates);
1011         } else {
1012           spatialDensityRegion.setCrystal(crystal, fftX, fftY, fftZ);
1013           spatialDensityRegion.coordinates = coordinates;
1014         }
1015       }
1016       case ROW -> {
1017         if (rowRegion == null || dimChanged) {
1018           rowRegion = new RowRegion(fftX, fftY, fftZ, splineGrid, nSymm, threadCount, atoms, coordinates);
1019         } else {
1020           rowRegion.setCrystal(crystal, fftX, fftY, fftZ);
1021           rowRegion.coordinates = coordinates;
1022         }
1023       }
1024       case SLICE -> {
1025         if (sliceRegion == null || dimChanged) {
1026           sliceRegion = new SliceRegion(fftX, fftY, fftZ, splineGrid, nSymm, threadCount, atoms, coordinates);
1027         } else {
1028           sliceRegion.setCrystal(crystal, fftX, fftY, fftZ);
1029           sliceRegion.coordinates = coordinates;
1030         }
1031       }
1032     }
1033 
1034     return density;
1035   }
1036 
1037   private int RowIndexZ(int i) {
1038     return i / fftY;
1039   }
1040 
1041   private int RowIndexY(int i) {
1042     return i % fftY;
1043   }
1044 
1045   private double[] generalizedInfluenceFunction() {
1046 
1047     double[] influenceFunction = new double[fftSpace / 2];
1048 
1049     double[] bsModX = new double[fftX];
1050     double[] bsModY = new double[fftY];
1051     double[] bsModZ = new double[fftZ];
1052     int maxfft = max(max(max(fftX, fftY), fftZ), bSplineOrder + 1);
1053     double[] bsArray = new double[maxfft];
1054     double[] c = new double[bSplineOrder];
1055 
1056     bSpline(0.0, bSplineOrder, c);
1057     arraycopy(c, 0, bsArray, 1, bSplineOrder);
1058 
1059     discreteFTMod(bsModX, bsArray, fftX, bSplineOrder);
1060     discreteFTMod(bsModY, bsArray, fftY, bSplineOrder);
1061     discreteFTMod(bsModZ, bsArray, fftZ, bSplineOrder);
1062 
1063     double r00 = crystal.A[0][0];
1064     double r01 = crystal.A[0][1];
1065     double r02 = crystal.A[0][2];
1066     double r10 = crystal.A[1][0];
1067     double r11 = crystal.A[1][1];
1068     double r12 = crystal.A[1][2];
1069     double r20 = crystal.A[2][0];
1070     double r21 = crystal.A[2][1];
1071     double r22 = crystal.A[2][2];
1072     int ntot = fftX * fftY * fftZ;
1073     double piTerm = (PI / aEwald) * (PI / aEwald);
1074     double volTerm = PI * crystal.volume;
1075     int nfXY = fftX * fftY;
1076     int nX_2 = (fftX + 1) / 2;
1077     int nY_2 = (fftY + 1) / 2;
1078     int nZ_2 = (fftZ + 1) / 2;
1079 
1080     for (int i = 0; i < ntot - 1; i++) {
1081       int kZ = (i + 1) / nfXY;
1082       int j = i - kZ * nfXY + 1;
1083       int kY = j / fftX;
1084       int kX = j - kY * fftX;
1085       int h = kX;
1086       int k = kY;
1087       int l = kZ;
1088       if (kX >= nX_2) {
1089         h -= fftX;
1090       }
1091       if (kY >= nY_2) {
1092         k -= fftY;
1093       }
1094       if (kZ >= nZ_2) {
1095         l -= fftZ;
1096       }
1097       double sX = r00 * h + r01 * k + r02 * l;
1098       double sY = r10 * h + r11 * k + r12 * l;
1099       double sZ = r20 * h + r21 * k + r22 * l;
1100       double sSquared = sX * sX + sY * sY + sZ * sZ;
1101       double term = -piTerm * sSquared;
1102       double expterm = 0.0;
1103       if (term > -50.0) {
1104         double denom = sSquared * volTerm * bsModX[kX] * bsModY[kY] * bsModZ[kZ];
1105         expterm = exp(term) / denom;
1106         if (crystal.aperiodic()) {
1107           expterm *= (1.0 - cos(PI * crystal.a * sqrt(sSquared)));
1108         }
1109       }
1110       int ii = iComplex3D(kX, kY, kZ, fftX, fftY) / 2;
1111       influenceFunction[ii] = expterm;
1112     }
1113 
1114     // Account for the zeroth grid point for a periodic system.
1115     influenceFunction[0] = 0.0;
1116     if (crystal.aperiodic()) {
1117       influenceFunction[0] = 0.5 * PI / crystal.a;
1118     }
1119 
1120     return influenceFunction;
1121   }
1122 
1123   private void transformMultipoleMatrix() {
1124     double[][] a = new double[3][3];
1125     for (int i = 0; i < 3; i++) {
1126       a[0][i] = fftX * crystal.A[i][0];
1127       a[1][i] = fftY * crystal.A[i][1];
1128       a[2][i] = fftZ * crystal.A[i][2];
1129     }
1130 
1131     for (int i = 0; i < 10; i++) {
1132       for (int j = 0; j < 10; j++) {
1133         transformMultipoleMatrix[i][j] = 0.0;
1134       }
1135     }
1136 
1137     // Charge
1138     transformMultipoleMatrix[0][0] = 1.0;
1139     // Dipole
1140     for (int i = 1; i < 4; i++) {
1141       arraycopy(a[i - 1], 0, transformMultipoleMatrix[i], 1, 3);
1142     }
1143     // Quadrupole
1144     for (int i1 = 0; i1 < 3; i1++) {
1145       int k = qi1[i1];
1146       for (int i2 = 0; i2 < 6; i2++) {
1147         int i = qi1[i2];
1148         int j = qi2[i2];
1149         transformMultipoleMatrix[i1 + 4][i2 + 4] = a[k][i] * a[k][j];
1150       }
1151     }
1152     for (int i1 = 3; i1 < 6; i1++) {
1153       int k = qi1[i1];
1154       int l = qi2[i1];
1155       for (int i2 = 0; i2 < 6; i2++) {
1156         int i = qi1[i2];
1157         int j = qi2[i2];
1158         transformMultipoleMatrix[i1 + 4][i2 + 4] = a[k][i] * a[l][j] + a[k][j] * a[l][i];
1159       }
1160     }
1161   }
1162 
1163   private void transformFieldMatrix() {
1164     double[][] a = new double[3][3];
1165 
1166     for (int i = 0; i < 3; i++) {
1167       a[i][0] = fftX * crystal.A[i][0];
1168       a[i][1] = fftY * crystal.A[i][1];
1169       a[i][2] = fftZ * crystal.A[i][2];
1170     }
1171     for (int i = 0; i < 10; i++) {
1172       for (int j = 0; j < 10; j++) {
1173         transformFieldMatrix[i][j] = 0.0;
1174       }
1175     }
1176     transformFieldMatrix[0][0] = 1.0;
1177     for (int i = 1; i < 4; i++) {
1178       arraycopy(a[i - 1], 0, transformFieldMatrix[i], 1, 3);
1179     }
1180     for (int i1 = 0; i1 < 3; i1++) {
1181       int k = qi1[i1];
1182       for (int i2 = 0; i2 < 3; i2++) {
1183         int i = qi1[i2];
1184         transformFieldMatrix[i1 + 4][i2 + 4] = a[k][i] * a[k][i];
1185       }
1186       for (int i2 = 3; i2 < 6; i2++) {
1187         int i = qi1[i2];
1188         int j = qi2[i2];
1189         transformFieldMatrix[i1 + 4][i2 + 4] = 2.0 * a[k][i] * a[k][j];
1190       }
1191     }
1192     for (int i1 = 3; i1 < 6; i1++) {
1193       int k = qi1[i1];
1194       int n = qi2[i1];
1195       for (int i2 = 0; i2 < 3; i2++) {
1196         int i = qi1[i2];
1197         transformFieldMatrix[i1 + 4][i2 + 4] = a[k][i] * a[n][i];
1198       }
1199       for (int i2 = 3; i2 < 6; i2++) {
1200         int i = qi1[i2];
1201         int j = qi2[i2];
1202         transformFieldMatrix[i1 + 4][i2 + 4] = a[k][i] * a[n][j] + a[n][i] * a[k][j];
1203       }
1204     }
1205   }
1206 
1207   /**
1208    * Convert a Cartesian multipole in the global frame into a factional multipole.
1209    *
1210    * @param gm Multipole in the global frame.
1211    * @param fm Fractional multipole.
1212    */
1213   private void toFractionalMultipole(double[] gm, double[] fm) {
1214     // Charge.
1215     fm[0] = gm[0];
1216 
1217     // Dipole.
1218     for (int j = 1; j < 4; j++) {
1219       fm[j] = 0.0;
1220       for (int k = 1; k < 4; k++) {
1221         fm[j] = fma(transformMultipoleMatrix[j][k], gm[k], fm[j]);
1222       }
1223     }
1224 
1225     // Quadrupole.
1226     for (int j = 4; j < 10; j++) {
1227       fm[j] = 0.0;
1228       for (int k = 4; k < 7; k++) {
1229         fm[j] = fma(transformMultipoleMatrix[j][k], gm[k], fm[j]);
1230       }
1231       for (int k = 7; k < 10; k++) {
1232         fm[j] = fma(transformMultipoleMatrix[j][k] * 2.0, gm[k], fm[j]);
1233       }
1234       /*
1235         Fractional quadrupole components are pre-multiplied by a
1236         factor of 1/3 that arises in their potential.
1237        */
1238       fm[j] = fm[j] * oneThird;
1239     }
1240   }
1241 
1242   /**
1243    * Convert a dipole in the global frame into a factional dipole.
1244    *
1245    * @param gd an array of {@link double} objects.
1246    * @param fd an array of {@link double} objects.
1247    */
1248   public void toFractionalDipole(double[] gd, double[] fd) {
1249     for (int j = 0; j < 3; j++) {
1250       fd[j] = 0.0;
1251       for (int k = 0; k < 3; k++) {
1252         fd[j] = fma(transformMultipoleMatrix[j + 1][k + 1], gd[k], fd[j]);
1253       }
1254     }
1255   }
1256 
1257   public enum GridMethod {
1258     SPATIAL,
1259     SLICE,
1260     ROW
1261   }
1262 
1263   /**
1264    * The class computes b-Splines that are used to spline multipoles and induced dipoles onto the PME
1265    * grid. Following convolution, the b-Splines are then used to obtain the reciprocal space
1266    * potential and fields from the PME grid. This class automatically updates itself to be consistent
1267    * with the current Crystal boundary conditions.
1268    *
1269    * <p>An external ParallelRegion can be used as follows: <code>
1270    * start() { bSplineRegion.start(); } run() { execute(0, nAtoms - 1,
1271    * bSplineRegion.bSplineLoop[threadID]); }
1272    * </code>
1273    */
1274   public class BSplineRegion extends ParallelRegion {
1275 
1276     final BSplineLoop[] bSplineLoop;
1277     double[][][][] splineX;
1278     double[][][][] splineY;
1279     double[][][][] splineZ;
1280     int[][][] initGrid;
1281     private double r00;
1282     private double r01;
1283     private double r02;
1284     private double r10;
1285     private double r11;
1286     private double r12;
1287     private double r20;
1288     private double r21;
1289     private double r22;
1290 
1291     BSplineRegion() {
1292       bSplineLoop = new BSplineLoop[threadCount];
1293       for (int i = 0; i < threadCount; i++) {
1294         bSplineLoop[i] = new BSplineLoop();
1295       }
1296     }
1297 
1298     @Override
1299     public void run() {
1300       /*
1301        Currently this condition would indicate a programming bug, since
1302        the space group is not allowed to change and the ReciprocalSpace
1303        class should be operating on a unit cell with a fixed number of
1304        space group symmetry operators (and not a replicated unit cell).
1305       */
1306       if (splineX.length < nSymm) {
1307         logger.severe(
1308             " Programming Error: the number of reciprocal space symmetry operators changed.");
1309       }
1310       try {
1311         int threadID = getThreadIndex();
1312         execute(0, nAtoms - 1, bSplineLoop[threadID]);
1313       } catch (Exception e) {
1314         logger.severe(e.toString());
1315       }
1316     }
1317 
1318     @Override
1319     public void start() {
1320       r00 = crystal.A[0][0];
1321       r01 = crystal.A[0][1];
1322       r02 = crystal.A[0][2];
1323       r10 = crystal.A[1][0];
1324       r11 = crystal.A[1][1];
1325       r12 = crystal.A[1][2];
1326       r20 = crystal.A[2][0];
1327       r21 = crystal.A[2][1];
1328       r22 = crystal.A[2][2];
1329       if (splineX == null || splineX[0].length < nAtoms) {
1330         initGrid = new int[nSymm][nAtoms][];
1331         splineX = new double[nSymm][nAtoms][][];
1332         splineY = new double[nSymm][nAtoms][][];
1333         splineZ = new double[nSymm][nAtoms][][];
1334       }
1335     }
1336 
1337     public class BSplineLoop extends IntegerForLoop {
1338 
1339       private int threadID;
1340       private final double[][] bSplineWork;
1341       private final IntegerSchedule schedule = IntegerSchedule.fixed();
1342 
1343       BSplineLoop() {
1344         bSplineWork = new double[bSplineOrder][bSplineOrder];
1345       }
1346 
1347       @Override
1348       public void finish() {
1349         bSplineTime[threadID] += System.nanoTime();
1350       }
1351 
1352       @Override
1353       public void run(int lb, int ub) {
1354         for (int iSymOp = 0; iSymOp < nSymm; iSymOp++) {
1355           final double[] x = coordinates[iSymOp][0];
1356           final double[] y = coordinates[iSymOp][1];
1357           final double[] z = coordinates[iSymOp][2];
1358           final int[][] initgridi = initGrid[iSymOp];
1359           final double[][][] splineXi = splineX[iSymOp];
1360           final double[][][] splineYi = splineY[iSymOp];
1361           final double[][][] splineZi = splineZ[iSymOp];
1362           for (int i = lb; i <= ub; i++) {
1363             if (initgridi[i] == null) {
1364               initgridi[i] = new int[3];
1365               splineXi[i] = new double[bSplineOrder][derivOrder + 1];
1366               splineYi[i] = new double[bSplineOrder][derivOrder + 1];
1367               splineZi[i] = new double[bSplineOrder][derivOrder + 1];
1368             }
1369             final double xi = x[i];
1370             final double yi = y[i];
1371             final double zi = z[i];
1372             final int[] grd = initgridi[i];
1373             // X-dimension
1374             final double wx = xi * r00 + yi * r10 + zi * r20;
1375             final double ux = wx - round(wx) + 0.5;
1376             final double frx = fftX * ux;
1377             final int ifrx = (int) frx;
1378             final double bx = frx - ifrx;
1379             grd[0] = ifrx - bSplineOrder;
1380             bSplineDerivatives(bx, bSplineOrder, derivOrder, splineXi[i], bSplineWork);
1381             // Y-dimension
1382             final double wy = xi * r01 + yi * r11 + zi * r21;
1383             final double uy = wy - round(wy) + 0.5;
1384             final double fry = fftY * uy;
1385             final int ifry = (int) fry;
1386             final double by = fry - ifry;
1387             grd[1] = ifry - bSplineOrder;
1388             bSplineDerivatives(by, bSplineOrder, derivOrder, splineYi[i], bSplineWork);
1389             // Z-dimension
1390             final double wz = xi * r02 + yi * r12 + zi * r22;
1391             final double uz = wz - round(wz) + 0.5;
1392             final double frz = fftZ * uz;
1393             final int ifrz = (int) frz;
1394             final double bz = frz - ifrz;
1395             grd[2] = ifrz - bSplineOrder;
1396             bSplineDerivatives(bz, bSplineOrder, derivOrder, splineZi[i], bSplineWork);
1397           }
1398         }
1399       }
1400 
1401       @Override
1402       public IntegerSchedule schedule() {
1403         return schedule;
1404       }
1405 
1406       @Override
1407       public void start() {
1408         threadID = getThreadIndex();
1409         bSplineTime[threadID] -= System.nanoTime();
1410       }
1411     }
1412   }
1413 
1414   private class SpatialPermanentLoop extends SpatialDensityLoop {
1415 
1416     private final BSplineRegion bSplines;
1417     private double[][][] fracMultipoles = null;
1418     private boolean[] use = null;
1419     private int threadIndex;
1420 
1421     SpatialPermanentLoop(SpatialDensityRegion region, BSplineRegion splines) {
1422       super(region, region.nSymm, region.actualCount);
1423       this.bSplines = splines;
1424     }
1425 
1426     @Override
1427     public void finish() {
1428       splinePermanentTime[threadIndex] += System.nanoTime();
1429     }
1430 
1431     @Override
1432     public void gridDensity(int iSymm, int n) {
1433       // Some atoms are not used during Lambda dynamics.
1434       if (use != null && !use[n]) {
1435         return;
1436       }
1437       splineCount[threadIndex]++;
1438       switch (elecForm) {
1439         case PAM -> gridMultipoleDensity(iSymm, n);
1440         case FIXED_CHARGE -> gridFixedChargeDensity(iSymm, n);
1441       }
1442     }
1443 
1444     private void gridMultipoleDensity(int iSymm, int n) {
1445       final double[] fm = fracMultipoles[iSymm][n];
1446       final double[][] splx = bSplines.splineX[iSymm][n];
1447       final double[][] sply = bSplines.splineY[iSymm][n];
1448       final double[][] splz = bSplines.splineZ[iSymm][n];
1449       final int igrd0 = bSplines.initGrid[iSymm][n][0];
1450       final int jgrd0 = bSplines.initGrid[iSymm][n][1];
1451       int k0 = bSplines.initGrid[iSymm][n][2];
1452       final double c = fm[t000];
1453       final double dx = fm[t100];
1454       final double dy = fm[t010];
1455       final double dz = fm[t001];
1456       final double qxx = fm[t200];
1457       final double qyy = fm[t020];
1458       final double qzz = fm[t002];
1459       final double qxy = fm[t110];
1460       final double qxz = fm[t101];
1461       final double qyz = fm[t011];
1462       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1463         final double[] splzi = splz[ith3];
1464         final double v0 = splzi[0];
1465         final double v1 = splzi[1];
1466         final double v2 = splzi[2];
1467         final double c0 = c * v0;
1468         final double dx0 = dx * v0;
1469         final double dy0 = dy * v0;
1470         final double dz1 = dz * v1;
1471         final double qxx0 = qxx * v0;
1472         final double qyy0 = qyy * v0;
1473         final double qzz2 = qzz * v2;
1474         final double qxy0 = qxy * v0;
1475         final double qxz1 = qxz * v1;
1476         final double qyz1 = qyz * v1;
1477         final double c0dz1qzz2 = c0 + dz1 + qzz2;
1478         final double dy0qyz1 = dy0 + qyz1;
1479         final double dx0qxz1 = dx0 + qxz1;
1480         final int k = mod(++k0, fftZ);
1481         int j0 = jgrd0;
1482         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1483           final double[] splyi = sply[ith2];
1484           final double u0 = splyi[0];
1485           final double u1 = splyi[1];
1486           final double u2 = splyi[2];
1487           final double term0 = fma(c0dz1qzz2, u0, fma(dy0qyz1, u1, qyy0 * u2));
1488           final double term1 = fma(dx0qxz1, u0, qxy0 * u1);
1489           final double term2 = qxx0 * u0;
1490           final int j = mod(++j0, fftY);
1491           int i0 = igrd0;
1492           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1493             final int i = mod(++i0, fftX);
1494             final int ii = iComplex3D(i, j, k, fftX, fftY);
1495             final double[] splxi = splx[ith1];
1496             final double current = splineBuffer.get(ii);
1497             double updated = fma(splxi[0], term0, current);
1498             updated = fma(splxi[1], term1, updated);
1499             updated = fma(splxi[2], term2, updated);
1500             splineBuffer.put(ii, updated);
1501           }
1502         }
1503       }
1504     }
1505 
1506     private void gridFixedChargeDensity(int iSymm, int n) {
1507       final double[] fm = fracMultipoles[iSymm][n];
1508       final double[][] splx = bSplines.splineX[iSymm][n];
1509       final double[][] sply = bSplines.splineY[iSymm][n];
1510       final double[][] splz = bSplines.splineZ[iSymm][n];
1511       final int igrd0 = bSplines.initGrid[iSymm][n][0];
1512       final int jgrd0 = bSplines.initGrid[iSymm][n][1];
1513       int k0 = bSplines.initGrid[iSymm][n][2];
1514       final double c = fm[t000];
1515       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1516         final double[] splzi = splz[ith3];
1517         final double v0 = splzi[0];
1518         final double c0 = c * v0;
1519         final int k = mod(++k0, fftZ);
1520         int j0 = jgrd0;
1521         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1522           final double[] splyi = sply[ith2];
1523           final double u0 = splyi[0];
1524           final double term0 = c0 * u0;
1525           final int j = mod(++j0, fftY);
1526           int i0 = igrd0;
1527           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1528             final int i = mod(++i0, fftX);
1529             final int ii = iComplex3D(i, j, k, fftX, fftY);
1530             final double[] splxi = splx[ith1];
1531             final double current = splineBuffer.get(ii);
1532             double updated = fma(splxi[0], term0, current);
1533             splineBuffer.put(ii, updated);
1534           }
1535         }
1536       }
1537     }
1538 
1539     @Override
1540     public void start() {
1541       threadIndex = getThreadIndex();
1542       splinePermanentTime[threadIndex] -= System.nanoTime();
1543     }
1544 
1545     void setPermanent(double[][][] fracMultipoles) {
1546       this.fracMultipoles = fracMultipoles;
1547     }
1548 
1549     private void setUse(boolean[] use) {
1550       this.use = use;
1551     }
1552   }
1553 
1554   private class SpatialInducedLoop extends SpatialDensityLoop {
1555 
1556     private final BSplineRegion bSplineRegion;
1557     private double[][][] inducedDipoleFrac = null;
1558     private double[][][] inducedDipoleFracCR = null;
1559     private boolean[] use = null;
1560 
1561     SpatialInducedLoop(SpatialDensityRegion region, BSplineRegion splines) {
1562       super(region, region.nSymm, region.actualCount);
1563       this.bSplineRegion = splines;
1564     }
1565 
1566     @Override
1567     public void finish() {
1568       splineInducedTime[getThreadIndex()] += System.nanoTime();
1569     }
1570 
1571     @Override
1572     public void gridDensity(int iSymm, int n) {
1573       if (use != null && !use[n]) {
1574         return;
1575       }
1576 
1577       // Convert Cartesian induced dipole to fractional induced dipole.
1578       final double[] ind = inducedDipoleFrac[iSymm][n];
1579       final double ux = ind[0];
1580       final double uy = ind[1];
1581       final double uz = ind[2];
1582       final double[] indCR = inducedDipoleFracCR[iSymm][n];
1583       final double px = indCR[0];
1584       final double py = indCR[1];
1585       final double pz = indCR[2];
1586       final double[][] splx = bSplineRegion.splineX[iSymm][n];
1587       final double[][] sply = bSplineRegion.splineY[iSymm][n];
1588       final double[][] splz = bSplineRegion.splineZ[iSymm][n];
1589       final int igrd0 = bSplineRegion.initGrid[iSymm][n][0];
1590       final int jgrd0 = bSplineRegion.initGrid[iSymm][n][1];
1591       int k0 = bSplineRegion.initGrid[iSymm][n][2];
1592       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1593         final double[] splzi = splz[ith3];
1594         final double v0 = splzi[0];
1595         final double v1 = splzi[1];
1596         final double dx0 = ux * v0;
1597         final double dy0 = uy * v0;
1598         final double dz1 = uz * v1;
1599         final double px0 = px * v0;
1600         final double py0 = py * v0;
1601         final double pz1 = pz * v1;
1602         final int k = mod(++k0, fftZ);
1603         int j0 = jgrd0;
1604         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1605           final double[] splyi = sply[ith2];
1606           final double u0 = splyi[0];
1607           final double u1 = splyi[1];
1608           final double term0 = fma(dz1, u0, dy0 * u1);
1609           final double term1 = dx0 * u0;
1610           final double termp0 = fma(pz1, u0, py0 * u1);
1611           final double termp1 = px0 * u0;
1612           final int j = mod(++j0, fftY);
1613           int i0 = igrd0;
1614           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1615             final int i = mod(++i0, fftX);
1616             final int ii = iComplex3D(i, j, k, fftX, fftY);
1617             final double[] splxi = splx[ith1];
1618             final double current = splineBuffer.get(ii);
1619             final double currenti = splineBuffer.get(ii + 1);
1620             final double updated = fma(splxi[0], term0, fma(splxi[1], term1, current));
1621             final double udpatedi = fma(splxi[0], termp0, fma(splxi[1], termp1, currenti));
1622             splineBuffer.put(ii, updated);
1623             splineBuffer.put(ii + 1, udpatedi);
1624           }
1625         }
1626       }
1627     }
1628 
1629     public void setUse(boolean[] use) {
1630       this.use = use;
1631     }
1632 
1633     @Override
1634     public void start() {
1635       splineInducedTime[getThreadIndex()] -= System.nanoTime();
1636     }
1637 
1638     void setInducedDipoles(double[][][] inducedDipoleFrac, double[][][] inducedDipoleFracCR) {
1639       this.inducedDipoleFrac = inducedDipoleFrac;
1640       this.inducedDipoleFracCR = inducedDipoleFracCR;
1641     }
1642   }
1643 
1644   private class RowPermanentLoop extends RowLoop {
1645 
1646     private final BSplineRegion bSplines;
1647     private double[][][] fracMultipoles = null;
1648     private boolean[] use = null;
1649     private int threadIndex;
1650 
1651     RowPermanentLoop(RowRegion region, BSplineRegion splines) {
1652       super(region.nAtoms, region.nSymm, region);
1653       this.bSplines = splines;
1654     }
1655 
1656     @Override
1657     public void finish() {
1658       splinePermanentTime[threadIndex] += System.nanoTime();
1659     }
1660 
1661     @Override
1662     public void gridDensity(int iSymm, int iAtom, int lb, int ub) {
1663       // Some atoms are not used during Lambda dynamics.
1664       if (use != null && !use[iAtom]) {
1665         return;
1666       }
1667       switch (elecForm) {
1668         case PAM -> gridMultipoleDensity(iSymm, iAtom, lb, ub);
1669         case FIXED_CHARGE -> gridFixedChargeDensity(iSymm, iAtom, lb, ub);
1670       }
1671     }
1672 
1673     private void gridMultipoleDensity(int iSymm, int iAtom, int lb, int ub) {
1674       boolean atomContributes = false;
1675       int k0 = bSplines.initGrid[iSymm][iAtom][2];
1676       int lbZ = RowIndexZ(lb);
1677       int ubZ = RowIndexZ(ub);
1678       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1679         final int k = mod(++k0, fftZ);
1680         if (lbZ <= k && k <= ubZ) {
1681           atomContributes = true;
1682           break;
1683         }
1684       }
1685       if (!atomContributes) {
1686         return;
1687       }
1688 
1689       splineCount[threadIndex]++;
1690 
1691       // Add atom n to the list for the current symmOp and thread.
1692       int index = gridAtomCount[iSymm][threadIndex];
1693       gridAtomList[iSymm][threadIndex][index] = iAtom;
1694       gridAtomCount[iSymm][threadIndex]++;
1695       final double[] fm = fracMultipoles[iSymm][iAtom];
1696       final double[][] splx = bSplines.splineX[iSymm][iAtom];
1697       final double[][] sply = bSplines.splineY[iSymm][iAtom];
1698       final double[][] splz = bSplines.splineZ[iSymm][iAtom];
1699       final int igrd0 = bSplines.initGrid[iSymm][iAtom][0];
1700       final int jgrd0 = bSplines.initGrid[iSymm][iAtom][1];
1701       k0 = bSplines.initGrid[iSymm][iAtom][2];
1702       final double c = fm[t000];
1703       final double dx = fm[t100];
1704       final double dy = fm[t010];
1705       final double dz = fm[t001];
1706       final double qxx = fm[t200];
1707       final double qyy = fm[t020];
1708       final double qzz = fm[t002];
1709       final double qxy = fm[t110];
1710       final double qxz = fm[t101];
1711       final double qyz = fm[t011];
1712       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1713         final int k = mod(++k0, fftZ);
1714         if (k < lbZ || k > ubZ) {
1715           continue;
1716         }
1717         final double[] splzi = splz[ith3];
1718         final double v0 = splzi[0];
1719         final double v1 = splzi[1];
1720         final double v2 = splzi[2];
1721         final double c0 = c * v0;
1722         final double dx0 = dx * v0;
1723         final double dy0 = dy * v0;
1724         final double dz1 = dz * v1;
1725         final double qxx0 = qxx * v0;
1726         final double qyy0 = qyy * v0;
1727         final double qzz2 = qzz * v2;
1728         final double qxy0 = qxy * v0;
1729         final double qxz1 = qxz * v1;
1730         final double qyz1 = qyz * v1;
1731         final double c0dz1qzz2 = c0 + dz1 + qzz2;
1732         final double dy0qyz1 = dy0 + qyz1;
1733         final double dx0qxz1 = dx0 + qxz1;
1734         int j0 = jgrd0;
1735         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1736           final int j = mod(++j0, fftY);
1737           int rowIndex = rowRegion.rowIndexForYZ(j, k);
1738           if (lb > rowIndex || rowIndex > ub) {
1739             continue;
1740           }
1741           final double[] splyi = sply[ith2];
1742           final double u0 = splyi[0];
1743           final double u1 = splyi[1];
1744           final double u2 = splyi[2];
1745           // Pieces of a multipole
1746           final double term0 = fma(c0dz1qzz2, u0, fma(dy0qyz1, u1, qyy0 * u2));
1747           final double term1 = fma(dx0qxz1, u0, qxy0 * u1);
1748           final double term2 = qxx0 * u0;
1749           int i0 = igrd0;
1750           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1751             final int i = mod(++i0, fftX);
1752             final int ii = iComplex3D(i, j, k, fftX, fftY);
1753             final double[] splxi = splx[ith1];
1754             double current = splineBuffer.get(ii);
1755             double updated = fma(splxi[0], term0, current);
1756             updated = fma(splxi[1], term1, updated);
1757             updated = fma(splxi[2], term2, updated);
1758             splineBuffer.put(ii, updated);
1759           }
1760         }
1761       }
1762     }
1763 
1764     private void gridFixedChargeDensity(int iSymm, int iAtom, int lb, int ub) {
1765       boolean atomContributes = false;
1766       int k0 = bSplines.initGrid[iSymm][iAtom][2];
1767       int lbZ = RowIndexZ(lb);
1768       int ubZ = RowIndexZ(ub);
1769       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1770         final int k = mod(++k0, fftZ);
1771         if (lbZ <= k && k <= ubZ) {
1772           atomContributes = true;
1773           break;
1774         }
1775       }
1776       if (!atomContributes) {
1777         return;
1778       }
1779 
1780       splineCount[threadIndex]++;
1781 
1782       // Add atom n to the list for the current symmOp and thread.
1783       int index = gridAtomCount[iSymm][threadIndex];
1784       gridAtomList[iSymm][threadIndex][index] = iAtom;
1785       gridAtomCount[iSymm][threadIndex]++;
1786       final double[] fm = fracMultipoles[iSymm][iAtom];
1787       final double[][] splx = bSplines.splineX[iSymm][iAtom];
1788       final double[][] sply = bSplines.splineY[iSymm][iAtom];
1789       final double[][] splz = bSplines.splineZ[iSymm][iAtom];
1790       final int igrd0 = bSplines.initGrid[iSymm][iAtom][0];
1791       final int jgrd0 = bSplines.initGrid[iSymm][iAtom][1];
1792       k0 = bSplines.initGrid[iSymm][iAtom][2];
1793       final double c = fm[t000];
1794       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1795         final int k = mod(++k0, fftZ);
1796         if (k < lbZ || k > ubZ) {
1797           continue;
1798         }
1799         final double[] splzi = splz[ith3];
1800         final double v0 = splzi[0];
1801         final double c0 = c * v0;
1802         int j0 = jgrd0;
1803         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1804           final int j = mod(++j0, fftY);
1805           int rowIndex = rowRegion.rowIndexForYZ(j, k);
1806           if (lb > rowIndex || rowIndex > ub) {
1807             continue;
1808           }
1809           final double[] splyi = sply[ith2];
1810           final double u0 = splyi[0];
1811           // Pieces of a multipole
1812           final double term0 = c0 * u0;
1813           int i0 = igrd0;
1814           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1815             final int i = mod(++i0, fftX);
1816             final int ii = iComplex3D(i, j, k, fftX, fftY);
1817             final double[] splxi = splx[ith1];
1818             double current = splineBuffer.get(ii);
1819             double updated = fma(splxi[0], term0, current);
1820             splineBuffer.put(ii, updated);
1821           }
1822         }
1823       }
1824     }
1825 
1826     @Override
1827     public void start() {
1828       threadIndex = getThreadIndex();
1829       splinePermanentTime[threadIndex] -= System.nanoTime();
1830       for (int i = 0; i < nSymm; i++) {
1831         gridAtomCount[i][threadIndex] = 0;
1832       }
1833     }
1834 
1835     void setPermanent(double[][][] fracMultipoles) {
1836       this.fracMultipoles = fracMultipoles;
1837     }
1838 
1839     private void setUse(boolean[] use) {
1840       this.use = use;
1841     }
1842   }
1843 
1844   private class RowInducedLoop extends RowLoop {
1845 
1846     private final BSplineRegion bSplineRegion;
1847     private double[][][] inducedDipoleFrac = null;
1848     private double[][][] inducedDipoleFracCR = null;
1849     private boolean[] use = null;
1850 
1851     RowInducedLoop(RowRegion region, BSplineRegion splines) {
1852       super(region.nAtoms, region.nSymm, region);
1853       this.bSplineRegion = splines;
1854     }
1855 
1856     @Override
1857     public void finish() {
1858       int threadIndex = getThreadIndex();
1859       splineInducedTime[threadIndex] += System.nanoTime();
1860     }
1861 
1862     @Override
1863     public void gridDensity(int iSymm, int iAtom, int lb, int ub) {
1864       if (use != null && !use[iAtom]) {
1865         return;
1866       }
1867 
1868       final int lbZ = RowIndexZ(lb);
1869       final int ubZ = RowIndexZ(ub);
1870       final double[] ind = inducedDipoleFrac[iSymm][iAtom];
1871       final double ux = ind[0];
1872       final double uy = ind[1];
1873       final double uz = ind[2];
1874       double[] indCR = inducedDipoleFracCR[iSymm][iAtom];
1875       final double px = indCR[0];
1876       final double py = indCR[1];
1877       final double pz = indCR[2];
1878       final double[][] splx = bSplineRegion.splineX[iSymm][iAtom];
1879       final double[][] sply = bSplineRegion.splineY[iSymm][iAtom];
1880       final double[][] splz = bSplineRegion.splineZ[iSymm][iAtom];
1881       final int igrd0 = bSplineRegion.initGrid[iSymm][iAtom][0];
1882       final int jgrd0 = bSplineRegion.initGrid[iSymm][iAtom][1];
1883       int k0 = bSplineRegion.initGrid[iSymm][iAtom][2];
1884       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1885         final int k = mod(++k0, fftZ);
1886         if (k < lbZ || k > ubZ) {
1887           continue;
1888         }
1889         final double[] splzi = splz[ith3];
1890         final double v0 = splzi[0];
1891         final double v1 = splzi[1];
1892         final double dx0 = ux * v0;
1893         final double dy0 = uy * v0;
1894         final double dz1 = uz * v1;
1895         final double px0 = px * v0;
1896         final double py0 = py * v0;
1897         final double pz1 = pz * v1;
1898         int j0 = jgrd0;
1899         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1900           final int j = mod(++j0, fftY);
1901           int rowIndex = rowRegion.rowIndexForYZ(j, k);
1902           if (lb > rowIndex || rowIndex > ub) {
1903             continue;
1904           }
1905           final double[] splyi = sply[ith2];
1906           final double u0 = splyi[0];
1907           final double u1 = splyi[1];
1908           final double term0 = fma(dz1, u0, dy0 * u1);
1909           final double term1 = dx0 * u0;
1910           final double termp0 = fma(pz1, u0, py0 * u1);
1911           final double termp1 = px0 * u0;
1912           int i0 = igrd0;
1913           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1914             final int i = mod(++i0, fftX);
1915             final int ii = iComplex3D(i, j, k, fftX, fftY);
1916             final double[] splxi = splx[ith1];
1917             final double current = splineBuffer.get(ii);
1918             final double currenti = splineBuffer.get(ii + 1);
1919             final double updated = fma(splxi[0], term0, fma(splxi[1], term1, current));
1920             final double udpatedi = fma(splxi[0], termp0, fma(splxi[1], termp1, currenti));
1921             splineBuffer.put(ii, updated);
1922             splineBuffer.put(ii + 1, udpatedi);
1923           }
1924         }
1925       }
1926     }
1927 
1928     @Override
1929     public void run(int lb, int ub) {
1930       int ti = getThreadIndex();
1931       for (int iSymm = 0; iSymm < nSymm; iSymm++) {
1932         int[] list = gridAtomList[iSymm][ti];
1933         int n = gridAtomCount[iSymm][ti];
1934         for (int i = 0; i < n; i++) {
1935           int iAtom = list[i];
1936           gridDensity(iSymm, iAtom, lb, ub);
1937         }
1938       }
1939     }
1940 
1941     public void setUse(boolean[] use) {
1942       this.use = use;
1943     }
1944 
1945     @Override
1946     public void start() {
1947       int threadIndex = getThreadIndex();
1948       splineInducedTime[threadIndex] -= System.nanoTime();
1949     }
1950 
1951     void setInducedDipoles(double[][][] inducedDipoleFrac, double[][][] inducedDipoleFracCR) {
1952       this.inducedDipoleFrac = inducedDipoleFrac;
1953       this.inducedDipoleFracCR = inducedDipoleFracCR;
1954     }
1955   }
1956 
1957   private class SlicePermanentLoop extends SliceLoop {
1958 
1959     private final BSplineRegion bSplines;
1960     private double[][][] fracMultipoles = null;
1961     private boolean[] use = null;
1962     private int threadIndex;
1963 
1964     SlicePermanentLoop(SliceRegion region, BSplineRegion splines) {
1965       super(region.nAtoms, region.nSymm, region);
1966       this.bSplines = splines;
1967     }
1968 
1969     @Override
1970     public void finish() {
1971       splinePermanentTime[threadIndex] += System.nanoTime();
1972     }
1973 
1974     @Override
1975     public void gridDensity(int iSymm, int iAtom, int lb, int ub) {
1976       // Some atoms are not used during Lambda dynamics.
1977       if (use != null && !use[iAtom]) {
1978         return;
1979       }
1980 
1981       switch (elecForm) {
1982         case PAM -> gridMultipoleDensity(iSymm, iAtom, lb, ub);
1983         case FIXED_CHARGE -> gridFixedChargeDensity(iSymm, iAtom, lb, ub);
1984       }
1985     }
1986 
1987     private void gridMultipoleDensity(int iSymm, int iAtom, int lb, int ub) {
1988       boolean atomContributes = false;
1989       int k0 = bSplines.initGrid[iSymm][iAtom][2];
1990       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1991         final int k = mod(++k0, fftZ);
1992         if (lb <= k && k <= ub) {
1993           atomContributes = true;
1994           break;
1995         }
1996       }
1997       if (!atomContributes) {
1998         return;
1999       }
2000 
2001       splineCount[threadIndex]++;
2002 
2003       // Add atom n to the list for the current symmOp and thread.
2004       int index = gridAtomCount[iSymm][threadIndex];
2005       gridAtomList[iSymm][threadIndex][index] = iAtom;
2006       gridAtomCount[iSymm][threadIndex]++;
2007       final double[] fm = fracMultipoles[iSymm][iAtom];
2008       final double[][] splx = bSplines.splineX[iSymm][iAtom];
2009       final double[][] sply = bSplines.splineY[iSymm][iAtom];
2010       final double[][] splz = bSplines.splineZ[iSymm][iAtom];
2011       final int igrd0 = bSplines.initGrid[iSymm][iAtom][0];
2012       final int jgrd0 = bSplines.initGrid[iSymm][iAtom][1];
2013       k0 = bSplines.initGrid[iSymm][iAtom][2];
2014       final double c = fm[t000];
2015       final double dx = fm[t100];
2016       final double dy = fm[t010];
2017       final double dz = fm[t001];
2018       final double qxx = fm[t200];
2019       final double qyy = fm[t020];
2020       final double qzz = fm[t002];
2021       final double qxy = fm[t110];
2022       final double qxz = fm[t101];
2023       final double qyz = fm[t011];
2024       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2025         final int k = mod(++k0, fftZ);
2026         if (k < lb || k > ub) {
2027           continue;
2028         }
2029         final double[] splzi = splz[ith3];
2030         final double v0 = splzi[0];
2031         final double v1 = splzi[1];
2032         final double v2 = splzi[2];
2033         final double c0 = c * v0;
2034         final double dx0 = dx * v0;
2035         final double dy0 = dy * v0;
2036         final double dz1 = dz * v1;
2037         final double qxx0 = qxx * v0;
2038         final double qyy0 = qyy * v0;
2039         final double qzz2 = qzz * v2;
2040         final double qxy0 = qxy * v0;
2041         final double qxz1 = qxz * v1;
2042         final double qyz1 = qyz * v1;
2043         final double c0dz1qzz2 = c0 + dz1 + qzz2;
2044         final double dy0qyz1 = dy0 + qyz1;
2045         final double dx0qxz1 = dx0 + qxz1;
2046         int j0 = jgrd0;
2047         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
2048           final double[] splyi = sply[ith2];
2049           final double u0 = splyi[0];
2050           final double u1 = splyi[1];
2051           final double u2 = splyi[2];
2052           // Pieces of a multipole
2053           final double term0 = fma(c0dz1qzz2, u0, fma(dy0qyz1, u1, qyy0 * u2));
2054           final double term1 = fma(dx0qxz1, u0, qxy0 * u1);
2055           final double term2 = qxx0 * u0;
2056           final int j = mod(++j0, fftY);
2057           int i0 = igrd0;
2058           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
2059             final int i = mod(++i0, fftX);
2060             final int ii = iComplex3D(i, j, k, fftX, fftY);
2061             final double[] splxi = splx[ith1];
2062             double current = splineBuffer.get(ii);
2063             double updated = fma(splxi[0], term0, current);
2064             updated = fma(splxi[1], term1, updated);
2065             updated = fma(splxi[2], term2, updated);
2066             splineBuffer.put(ii, updated);
2067           }
2068         }
2069       }
2070     }
2071 
2072     private void gridFixedChargeDensity(int iSymm, int iAtom, int lb, int ub) {
2073       boolean atomContributes = false;
2074       int k0 = bSplines.initGrid[iSymm][iAtom][2];
2075       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2076         final int k = mod(++k0, fftZ);
2077         if (lb <= k && k <= ub) {
2078           atomContributes = true;
2079           break;
2080         }
2081       }
2082       if (!atomContributes) {
2083         return;
2084       }
2085 
2086       splineCount[threadIndex]++;
2087 
2088       // Add atom n to the list for the current symmOp and thread.
2089       int index = gridAtomCount[iSymm][threadIndex];
2090       gridAtomList[iSymm][threadIndex][index] = iAtom;
2091       gridAtomCount[iSymm][threadIndex]++;
2092       final double[] fm = fracMultipoles[iSymm][iAtom];
2093       final double[][] splx = bSplines.splineX[iSymm][iAtom];
2094       final double[][] sply = bSplines.splineY[iSymm][iAtom];
2095       final double[][] splz = bSplines.splineZ[iSymm][iAtom];
2096       final int igrd0 = bSplines.initGrid[iSymm][iAtom][0];
2097       final int jgrd0 = bSplines.initGrid[iSymm][iAtom][1];
2098       k0 = bSplines.initGrid[iSymm][iAtom][2];
2099       final double c = fm[t000];
2100       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2101         final int k = mod(++k0, fftZ);
2102         if (k < lb || k > ub) {
2103           continue;
2104         }
2105         final double[] splzi = splz[ith3];
2106         final double v0 = splzi[0];
2107         final double c0 = c * v0;
2108         int j0 = jgrd0;
2109         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
2110           final double[] splyi = sply[ith2];
2111           final double u0 = splyi[0];
2112           final double term0 = c0 * u0;
2113           final int j = mod(++j0, fftY);
2114           int i0 = igrd0;
2115           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
2116             final int i = mod(++i0, fftX);
2117             final int ii = iComplex3D(i, j, k, fftX, fftY);
2118             final double[] splxi = splx[ith1];
2119             double current = splineBuffer.get(ii);
2120             double updated = fma(splxi[0], term0, current);
2121             splineBuffer.put(ii, updated);
2122           }
2123         }
2124       }
2125     }
2126 
2127     @Override
2128     public void start() {
2129       threadIndex = getThreadIndex();
2130       splinePermanentTime[threadIndex] -= System.nanoTime();
2131       for (int i = 0; i < nSymm; i++) {
2132         gridAtomCount[i][threadIndex] = 0;
2133       }
2134     }
2135 
2136     void setPermanent(double[][][] fracMultipoles) {
2137       this.fracMultipoles = fracMultipoles;
2138     }
2139 
2140     private void setUse(boolean[] use) {
2141       this.use = use;
2142     }
2143   }
2144 
2145   private class SliceInducedLoop extends SliceLoop {
2146 
2147     private final BSplineRegion bSplineRegion;
2148     private double[][][] inducedDipoleFrac = null;
2149     private double[][][] inducedDipoleFracCR = null;
2150     private boolean[] use = null;
2151 
2152     SliceInducedLoop(SliceRegion region, BSplineRegion splines) {
2153       super(region.nAtoms, region.nSymm, region);
2154       this.bSplineRegion = splines;
2155     }
2156 
2157     @Override
2158     public void finish() {
2159       int threadIndex = getThreadIndex();
2160       splineInducedTime[threadIndex] += System.nanoTime();
2161     }
2162 
2163     @Override
2164     public void gridDensity(int iSymm, int iAtom, int lb, int ub) {
2165       if (use != null && !use[iAtom]) {
2166         return;
2167       }
2168 
2169       // Convert Cartesian induced dipole to fractional induced dipole.
2170       double[] ind = inducedDipoleFrac[iSymm][iAtom];
2171       double ux = ind[0];
2172       double uy = ind[1];
2173       double uz = ind[2];
2174       double[] indCR = inducedDipoleFracCR[iSymm][iAtom];
2175       double px = indCR[0];
2176       double py = indCR[1];
2177       double pz = indCR[2];
2178       final double[][] splx = bSplineRegion.splineX[iSymm][iAtom];
2179       final double[][] sply = bSplineRegion.splineY[iSymm][iAtom];
2180       final double[][] splz = bSplineRegion.splineZ[iSymm][iAtom];
2181       final int igrd0 = bSplineRegion.initGrid[iSymm][iAtom][0];
2182       final int jgrd0 = bSplineRegion.initGrid[iSymm][iAtom][1];
2183       int k0 = bSplineRegion.initGrid[iSymm][iAtom][2];
2184       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2185         final int k = mod(++k0, fftZ);
2186         if (k < lb || k > ub) {
2187           continue;
2188         }
2189         final double[] splzi = splz[ith3];
2190         final double v0 = splzi[0];
2191         final double v1 = splzi[1];
2192         final double dx0 = ux * v0;
2193         final double dy0 = uy * v0;
2194         final double dz1 = uz * v1;
2195         final double px0 = px * v0;
2196         final double py0 = py * v0;
2197         final double pz1 = pz * v1;
2198         int j0 = jgrd0;
2199         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
2200           final double[] splyi = sply[ith2];
2201           final double u0 = splyi[0];
2202           final double u1 = splyi[1];
2203           final double term0 = fma(dz1, u0, dy0 * u1);
2204           final double term1 = dx0 * u0;
2205           final double termp0 = fma(pz1, u0, py0 * u1);
2206           final double termp1 = px0 * u0;
2207           final int j = mod(++j0, fftY);
2208           int i0 = igrd0;
2209           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
2210             final int i = mod(++i0, fftX);
2211             final int ii = iComplex3D(i, j, k, fftX, fftY);
2212             final double[] splxi = splx[ith1];
2213             final double current = splineBuffer.get(ii);
2214             final double currenti = splineBuffer.get(ii + 1);
2215             final double updated = fma(splxi[0], term0, fma(splxi[1], term1, current));
2216             final double udpatedi = fma(splxi[0], termp0, fma(splxi[1], termp1, currenti));
2217             splineBuffer.put(ii, updated);
2218             splineBuffer.put(ii + 1, udpatedi);
2219           }
2220         }
2221       }
2222     }
2223 
2224     @Override
2225     public void run(int lb, int ub) {
2226       int ti = getThreadIndex();
2227       for (int iSymm = 0; iSymm < nSymm; iSymm++) {
2228         int[] list = gridAtomList[iSymm][ti];
2229         int n = gridAtomCount[iSymm][ti];
2230         for (int i = 0; i < n; i++) {
2231           int iAtom = list[i];
2232           gridDensity(iSymm, iAtom, lb, ub);
2233         }
2234       }
2235     }
2236 
2237     public void setUse(boolean[] use) {
2238       this.use = use;
2239     }
2240 
2241     @Override
2242     public void start() {
2243       int threadIndex = getThreadIndex();
2244       splineInducedTime[threadIndex] -= System.nanoTime();
2245     }
2246 
2247     void setInducedDipoles(double[][][] inducedDipoleFrac, double[][][] inducedDipoleFracCR) {
2248       this.inducedDipoleFrac = inducedDipoleFrac;
2249       this.inducedDipoleFracCR = inducedDipoleFracCR;
2250     }
2251   }
2252 
2253   /**
2254    * This class is used to obtain the reciprocal space potential and fields due to permanent
2255    * multipoles from the PME grid.
2256    *
2257    * <p>An external ParallelRegion can be used as follows: <code>
2258    * start() { permanentPhiRegion.setCartPermanentPhi(...); }
2259    * <p>
2260    * run() { execute(0, nAtoms - 1, permanentPhiLoops[threadID]); }
2261    * </code>
2262    */
2263   private class PermanentPhiRegion extends ParallelRegion {
2264 
2265     final PermanentPhiLoop[] permanentPhiLoop;
2266 
2267     private final BSplineRegion bSplineRegion;
2268     private double[][] cartPermPhi;
2269     private double[][] fracPermPhi;
2270 
2271     PermanentPhiRegion(BSplineRegion bSplineRegion) {
2272       this.bSplineRegion = bSplineRegion;
2273       permanentPhiLoop = new PermanentPhiLoop[threadCount];
2274       for (int i = 0; i < threadCount; i++) {
2275         permanentPhiLoop[i] = new PermanentPhiLoop();
2276       }
2277     }
2278 
2279     @Override
2280     public void run() {
2281       try {
2282         int threadID = getThreadIndex();
2283         execute(0, nAtoms - 1, permanentPhiLoop[threadID]);
2284       } catch (Exception e) {
2285         logger.severe(e.toString());
2286       }
2287     }
2288 
2289     void setPermanentPhi(double[][] cartPermanentPhi, double[][] fracPermanentPhi) {
2290       this.cartPermPhi = cartPermanentPhi;
2291       this.fracPermPhi = fracPermanentPhi;
2292     }
2293 
2294     public class PermanentPhiLoop extends IntegerForLoop {
2295 
2296       @Override
2297       public void finish() {
2298         int threadIndex = getThreadIndex();
2299         permanentPhiTime[threadIndex] += System.nanoTime();
2300       }
2301 
2302       @Override
2303       public void run(final int lb, final int ub) {
2304         for (int n = lb; n <= ub; n++) {
2305           final double[][] splx = bSplineRegion.splineX[0][n];
2306           final double[][] sply = bSplineRegion.splineY[0][n];
2307           final double[][] splz = bSplineRegion.splineZ[0][n];
2308           final int[] igrd = bSplineRegion.initGrid[0][n];
2309           final int igrd0 = igrd[0];
2310           final int jgrd0 = igrd[1];
2311           int k0 = igrd[2];
2312           double tuv000 = 0.0;
2313           double tuv100 = 0.0;
2314           double tuv010 = 0.0;
2315           double tuv001 = 0.0;
2316           double tuv200 = 0.0;
2317           double tuv020 = 0.0;
2318           double tuv002 = 0.0;
2319           double tuv110 = 0.0;
2320           double tuv101 = 0.0;
2321           double tuv011 = 0.0;
2322           double tuv300 = 0.0;
2323           double tuv030 = 0.0;
2324           double tuv003 = 0.0;
2325           double tuv210 = 0.0;
2326           double tuv201 = 0.0;
2327           double tuv120 = 0.0;
2328           double tuv021 = 0.0;
2329           double tuv102 = 0.0;
2330           double tuv012 = 0.0;
2331           double tuv111 = 0.0;
2332           for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2333             final int k = mod(++k0, fftZ);
2334             int j0 = jgrd0;
2335             double tu00 = 0.0;
2336             double tu10 = 0.0;
2337             double tu01 = 0.0;
2338             double tu20 = 0.0;
2339             double tu11 = 0.0;
2340             double tu02 = 0.0;
2341             double tu30 = 0.0;
2342             double tu21 = 0.0;
2343             double tu12 = 0.0;
2344             double tu03 = 0.0;
2345             for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
2346               final int j = mod(++j0, fftY);
2347               int i0 = igrd0;
2348               double t0 = 0.0;
2349               double t1 = 0.0;
2350               double t2 = 0.0;
2351               double t3 = 0.0;
2352               for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
2353                 final int i = mod(++i0, fftX);
2354                 final int ii = iComplex3D(i, j, k, fftX, fftY);
2355                 final double tq = splineBuffer.get(ii);
2356                 final double[] splxi = splx[ith1];
2357                 t0 = fma(tq, splxi[0], t0);
2358                 t1 = fma(tq, splxi[1], t1);
2359                 t2 = fma(tq, splxi[2], t2);
2360                 t3 = fma(tq, splxi[3], t3);
2361               }
2362               final double[] splyi = sply[ith2];
2363               final double u0 = splyi[0];
2364               final double u1 = splyi[1];
2365               final double u2 = splyi[2];
2366               final double u3 = splyi[3];
2367               tu00 = fma(t0, u0, tu00);
2368               tu10 = fma(t1, u0, tu10);
2369               tu01 = fma(t0, u1, tu01);
2370               tu20 = fma(t2, u0, tu20);
2371               tu11 = fma(t1, u1, tu11);
2372               tu02 = fma(t0, u2, tu02);
2373               tu30 = fma(t3, u0, tu30);
2374               tu21 = fma(t2, u1, tu21);
2375               tu12 = fma(t1, u2, tu12);
2376               tu03 = fma(t0, u3, tu03);
2377             }
2378             final double[] splzi = splz[ith3];
2379             final double v0 = splzi[0];
2380             final double v1 = splzi[1];
2381             final double v2 = splzi[2];
2382             final double v3 = splzi[3];
2383             tuv000 = fma(tu00, v0, tuv000);
2384             tuv100 = fma(tu10, v0, tuv100);
2385             tuv010 = fma(tu01, v0, tuv010);
2386             tuv001 = fma(tu00, v1, tuv001);
2387             tuv200 = fma(tu20, v0, tuv200);
2388             tuv020 = fma(tu02, v0, tuv020);
2389             tuv002 = fma(tu00, v2, tuv002);
2390             tuv110 = fma(tu11, v0, tuv110);
2391             tuv101 = fma(tu10, v1, tuv101);
2392             tuv011 = fma(tu01, v1, tuv011);
2393             tuv300 = fma(tu30, v0, tuv300);
2394             tuv030 = fma(tu03, v0, tuv030);
2395             tuv003 = fma(tu00, v3, tuv003);
2396             tuv210 = fma(tu21, v0, tuv210);
2397             tuv201 = fma(tu20, v1, tuv201);
2398             tuv120 = fma(tu12, v0, tuv120);
2399             tuv021 = fma(tu02, v1, tuv021);
2400             tuv102 = fma(tu10, v2, tuv102);
2401             tuv012 = fma(tu01, v2, tuv012);
2402             tuv111 = fma(tu11, v1, tuv111);
2403           }
2404           double[] out = fracPermPhi[n];
2405           out[t000] = tuv000;
2406           out[t100] = tuv100;
2407           out[t010] = tuv010;
2408           out[t001] = tuv001;
2409           out[t200] = tuv200;
2410           out[t020] = tuv020;
2411           out[t002] = tuv002;
2412           out[t110] = tuv110;
2413           out[t101] = tuv101;
2414           out[t011] = tuv011;
2415           out[t300] = tuv300;
2416           out[t030] = tuv030;
2417           out[t003] = tuv003;
2418           out[t210] = tuv210;
2419           out[t201] = tuv201;
2420           out[t120] = tuv120;
2421           out[t021] = tuv021;
2422           out[t102] = tuv102;
2423           out[t012] = tuv012;
2424           out[t111] = tuv111;
2425           double[] in = out;
2426           out = cartPermPhi[n];
2427           out[0] = transformFieldMatrix[0][0] * in[0];
2428           for (int j = 1; j < 4; j++) {
2429             out[j] = 0.0;
2430             for (int k = 1; k < 4; k++) {
2431               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2432             }
2433           }
2434           for (int j = 4; j < 10; j++) {
2435             out[j] = 0.0;
2436             for (int k = 4; k < 10; k++) {
2437               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2438             }
2439           }
2440         }
2441       }
2442 
2443       @Override
2444       public IntegerSchedule schedule() {
2445         return IntegerSchedule.fixed();
2446       }
2447 
2448       @Override
2449       public void start() {
2450         int threadIndex = getThreadIndex();
2451         permanentPhiTime[threadIndex] -= System.nanoTime();
2452       }
2453     }
2454   }
2455 
2456   /**
2457    * This class is used to obtain the reciprocal space potential and fields due to induced dipoles
2458    * from the PME grid.
2459    *
2460    * <p>An external ParallelRegion can be used as follows: <code>
2461    * start() { inducedPhiRegion.setCartInducedDipolePhi(...); }
2462    * <p>
2463    * run() { execute(0, nAtoms - 1, inducedPhiLoops[threadID]); }
2464    * </code>
2465    */
2466   private class InducedPhiRegion extends ParallelRegion {
2467 
2468     final InducedPhiLoop[] inducedPhiLoops;
2469 
2470     private final BSplineRegion bSplineRegion;
2471     private double[][] cartInducedDipolePhi;
2472     private double[][] cartInducedDipolePhiCR;
2473     private double[][] fracInducedDipolePhi;
2474     private double[][] fracInducedDipolePhiCR;
2475 
2476     InducedPhiRegion(BSplineRegion bSplineRegion) {
2477       this.bSplineRegion = bSplineRegion;
2478       inducedPhiLoops = new InducedPhiLoop[threadCount];
2479       for (int i = 0; i < threadCount; i++) {
2480         inducedPhiLoops[i] = new InducedPhiLoop();
2481       }
2482     }
2483 
2484     @Override
2485     public void run() {
2486       try {
2487         int threadID = getThreadIndex();
2488         execute(0, nAtoms - 1, inducedPhiLoops[threadID]);
2489       } catch (Exception e) {
2490         logger.severe(e.toString());
2491       }
2492     }
2493 
2494     void setInducedDipolePhi(
2495         double[][] cartInducedDipolePhi,
2496         double[][] cartInducedDipolePhiCR,
2497         double[][] fracInducedDipolePhi,
2498         double[][] fracInducedDipolePhiCR) {
2499       this.cartInducedDipolePhi = cartInducedDipolePhi;
2500       this.cartInducedDipolePhiCR = cartInducedDipolePhiCR;
2501       this.fracInducedDipolePhi = fracInducedDipolePhi;
2502       this.fracInducedDipolePhiCR = fracInducedDipolePhiCR;
2503     }
2504 
2505     public class InducedPhiLoop extends IntegerForLoop {
2506 
2507       @Override
2508       public void finish() {
2509         int threadIndex = getThreadIndex();
2510         inducedPhiTime[threadIndex] += System.nanoTime();
2511       }
2512 
2513       @Override
2514       public void run(int lb, int ub) {
2515         for (int n = lb; n <= ub; n++) {
2516           final double[][] splx = bSplineRegion.splineX[0][n];
2517           final double[][] sply = bSplineRegion.splineY[0][n];
2518           final double[][] splz = bSplineRegion.splineZ[0][n];
2519           final int[] igrd = bSplineRegion.initGrid[0][n];
2520           final int igrd0 = igrd[0];
2521           final int jgrd0 = igrd[1];
2522           int k0 = igrd[2];
2523           double tuv000 = 0.0;
2524           double tuv100 = 0.0;
2525           double tuv010 = 0.0;
2526           double tuv001 = 0.0;
2527           double tuv200 = 0.0;
2528           double tuv020 = 0.0;
2529           double tuv002 = 0.0;
2530           double tuv110 = 0.0;
2531           double tuv101 = 0.0;
2532           double tuv011 = 0.0;
2533           double tuv300 = 0.0;
2534           double tuv030 = 0.0;
2535           double tuv003 = 0.0;
2536           double tuv210 = 0.0;
2537           double tuv201 = 0.0;
2538           double tuv120 = 0.0;
2539           double tuv021 = 0.0;
2540           double tuv102 = 0.0;
2541           double tuv012 = 0.0;
2542           double tuv111 = 0.0;
2543           double tuv000p = 0.0;
2544           double tuv100p = 0.0;
2545           double tuv010p = 0.0;
2546           double tuv001p = 0.0;
2547           double tuv200p = 0.0;
2548           double tuv020p = 0.0;
2549           double tuv002p = 0.0;
2550           double tuv110p = 0.0;
2551           double tuv101p = 0.0;
2552           double tuv011p = 0.0;
2553           double tuv300p = 0.0;
2554           double tuv030p = 0.0;
2555           double tuv003p = 0.0;
2556           double tuv210p = 0.0;
2557           double tuv201p = 0.0;
2558           double tuv120p = 0.0;
2559           double tuv021p = 0.0;
2560           double tuv102p = 0.0;
2561           double tuv012p = 0.0;
2562           double tuv111p = 0.0;
2563           for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2564             final int k = mod(++k0, fftZ);
2565             int j0 = jgrd0;
2566             double tu00 = 0.0;
2567             double tu10 = 0.0;
2568             double tu01 = 0.0;
2569             double tu20 = 0.0;
2570             double tu11 = 0.0;
2571             double tu02 = 0.0;
2572             double tu30 = 0.0;
2573             double tu21 = 0.0;
2574             double tu12 = 0.0;
2575             double tu03 = 0.0;
2576             double tu00p = 0.0;
2577             double tu10p = 0.0;
2578             double tu01p = 0.0;
2579             double tu20p = 0.0;
2580             double tu11p = 0.0;
2581             double tu02p = 0.0;
2582             double tu30p = 0.0;
2583             double tu21p = 0.0;
2584             double tu12p = 0.0;
2585             double tu03p = 0.0;
2586             for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
2587               final int j = mod(++j0, fftY);
2588               int i0 = igrd0;
2589               double t0 = 0.0;
2590               double t1 = 0.0;
2591               double t2 = 0.0;
2592               double t3 = 0.0;
2593               double t0p = 0.0;
2594               double t1p = 0.0;
2595               double t2p = 0.0;
2596               double t3p = 0.0;
2597               for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
2598                 final int i = mod(++i0, fftX);
2599                 final int ii = iComplex3D(i, j, k, fftX, fftY);
2600                 final double tq = splineBuffer.get(ii);
2601                 final double tp = splineBuffer.get(ii + 1);
2602                 final double[] splxi = splx[ith1];
2603                 final double s0 = splxi[0];
2604                 final double s1 = splxi[1];
2605                 final double s2 = splxi[2];
2606                 final double s3 = splxi[3];
2607                 t0 = fma(tq, s0, t0);
2608                 t1 = fma(tq, s1, t1);
2609                 t2 = fma(tq, s2, t2);
2610                 t3 = fma(tq, s3, t3);
2611                 t0p = fma(tp, s0, t0p);
2612                 t1p = fma(tp, s1, t1p);
2613                 t2p = fma(tp, s2, t2p);
2614                 t3p = fma(tp, s3, t3p);
2615               }
2616               final double[] splyi = sply[ith2];
2617               final double u0 = splyi[0];
2618               final double u1 = splyi[1];
2619               final double u2 = splyi[2];
2620               final double u3 = splyi[3];
2621               tu00 = fma(t0, u0, tu00);
2622               tu10 = fma(t1, u0, tu10);
2623               tu01 = fma(t0, u1, tu01);
2624               tu20 = fma(t2, u0, tu20);
2625               tu11 = fma(t1, u1, tu11);
2626               tu02 = fma(t0, u2, tu02);
2627               tu30 = fma(t3, u0, tu30);
2628               tu21 = fma(t2, u1, tu21);
2629               tu12 = fma(t1, u2, tu12);
2630               tu03 = fma(t0, u3, tu03);
2631               tu00p = fma(t0p, u0, tu00p);
2632               tu10p = fma(t1p, u0, tu10p);
2633               tu01p = fma(t0p, u1, tu01p);
2634               tu20p = fma(t2p, u0, tu20p);
2635               tu11p = fma(t1p, u1, tu11p);
2636               tu02p = fma(t0p, u2, tu02p);
2637               tu30p = fma(t3p, u0, tu30p);
2638               tu21p = fma(t2p, u1, tu21p);
2639               tu12p = fma(t1p, u2, tu12p);
2640               tu03p = fma(t0p, u3, tu03p);
2641             }
2642             final double[] splzi = splz[ith3];
2643             final double v0 = splzi[0];
2644             final double v1 = splzi[1];
2645             final double v2 = splzi[2];
2646             final double v3 = splzi[3];
2647             tuv000 = fma(tu00, v0, tuv000);
2648             tuv100 = fma(tu10, v0, tuv100);
2649             tuv010 = fma(tu01, v0, tuv010);
2650             tuv001 = fma(tu00, v1, tuv001);
2651             tuv200 = fma(tu20, v0, tuv200);
2652             tuv020 = fma(tu02, v0, tuv020);
2653             tuv002 = fma(tu00, v2, tuv002);
2654             tuv110 = fma(tu11, v0, tuv110);
2655             tuv101 = fma(tu10, v1, tuv101);
2656             tuv011 = fma(tu01, v1, tuv011);
2657             tuv300 = fma(tu30, v0, tuv300);
2658             tuv030 = fma(tu03, v0, tuv030);
2659             tuv003 = fma(tu00, v3, tuv003);
2660             tuv210 = fma(tu21, v0, tuv210);
2661             tuv201 = fma(tu20, v1, tuv201);
2662             tuv120 = fma(tu12, v0, tuv120);
2663             tuv021 = fma(tu02, v1, tuv021);
2664             tuv102 = fma(tu10, v2, tuv102);
2665             tuv012 = fma(tu01, v2, tuv012);
2666             tuv111 = fma(tu11, v1, tuv111);
2667             tuv000p = fma(tu00p, v0, tuv000p);
2668             tuv100p = fma(tu10p, v0, tuv100p);
2669             tuv010p = fma(tu01p, v0, tuv010p);
2670             tuv001p = fma(tu00p, v1, tuv001p);
2671             tuv200p = fma(tu20p, v0, tuv200p);
2672             tuv020p = fma(tu02p, v0, tuv020p);
2673             tuv002p = fma(tu00p, v2, tuv002p);
2674             tuv110p = fma(tu11p, v0, tuv110p);
2675             tuv101p = fma(tu10p, v1, tuv101p);
2676             tuv011p = fma(tu01p, v1, tuv011p);
2677             tuv300p = fma(tu30p, v0, tuv300p);
2678             tuv030p = fma(tu03p, v0, tuv030p);
2679             tuv003p = fma(tu00p, v3, tuv003p);
2680             tuv210p = fma(tu21p, v0, tuv210p);
2681             tuv201p = fma(tu20p, v1, tuv201p);
2682             tuv120p = fma(tu12p, v0, tuv120p);
2683             tuv021p = fma(tu02p, v1, tuv021p);
2684             tuv102p = fma(tu10p, v2, tuv102p);
2685             tuv012p = fma(tu01p, v2, tuv012p);
2686             tuv111p = fma(tu11p, v1, tuv111p);
2687           }
2688           double[] out = fracInducedDipolePhi[n];
2689           out[t000] = tuv000;
2690           out[t100] = tuv100;
2691           out[t010] = tuv010;
2692           out[t001] = tuv001;
2693           out[t200] = tuv200;
2694           out[t020] = tuv020;
2695           out[t002] = tuv002;
2696           out[t110] = tuv110;
2697           out[t101] = tuv101;
2698           out[t011] = tuv011;
2699           out[t300] = tuv300;
2700           out[t030] = tuv030;
2701           out[t003] = tuv003;
2702           out[t210] = tuv210;
2703           out[t201] = tuv201;
2704           out[t120] = tuv120;
2705           out[t021] = tuv021;
2706           out[t102] = tuv102;
2707           out[t012] = tuv012;
2708           out[t111] = tuv111;
2709           double[] in = out;
2710           out = cartInducedDipolePhi[n];
2711           out[0] = transformFieldMatrix[0][0] * in[0];
2712           for (int j = 1; j < 4; j++) {
2713             out[j] = 0.0;
2714             for (int k = 1; k < 4; k++) {
2715               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2716             }
2717           }
2718           for (int j = 4; j < 10; j++) {
2719             out[j] = 0.0;
2720             for (int k = 4; k < 10; k++) {
2721               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2722             }
2723           }
2724 
2725           out = fracInducedDipolePhiCR[n];
2726           out[t000] = tuv000p;
2727           out[t100] = tuv100p;
2728           out[t010] = tuv010p;
2729           out[t001] = tuv001p;
2730           out[t200] = tuv200p;
2731           out[t020] = tuv020p;
2732           out[t002] = tuv002p;
2733           out[t110] = tuv110p;
2734           out[t101] = tuv101p;
2735           out[t011] = tuv011p;
2736           out[t300] = tuv300p;
2737           out[t030] = tuv030p;
2738           out[t003] = tuv003p;
2739           out[t210] = tuv210p;
2740           out[t201] = tuv201p;
2741           out[t120] = tuv120p;
2742           out[t021] = tuv021p;
2743           out[t102] = tuv102p;
2744           out[t012] = tuv012p;
2745           out[t111] = tuv111p;
2746           in = out;
2747           out = cartInducedDipolePhiCR[n];
2748           out[0] = transformFieldMatrix[0][0] * in[0];
2749           for (int j = 1; j < 4; j++) {
2750             out[j] = 0.0;
2751             for (int k = 1; k < 4; k++) {
2752               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2753             }
2754           }
2755           for (int j = 4; j < 10; j++) {
2756             out[j] = 0.0;
2757             for (int k = 4; k < 10; k++) {
2758               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2759             }
2760           }
2761         }
2762       }
2763 
2764       @Override
2765       public IntegerSchedule schedule() {
2766         return IntegerSchedule.fixed();
2767       }
2768 
2769       @Override
2770       public void start() {
2771         int threadIndex = getThreadIndex();
2772         inducedPhiTime[threadIndex] -= System.nanoTime();
2773       }
2774     }
2775   }
2776 
2777   private class FractionalMultipoleRegion extends ParallelRegion {
2778 
2779     private double[][][] globalMultipoles = null;
2780     private double[][][] fracMultipoles = null;
2781     private boolean[] use = null;
2782     private FractionalMultipoleLoop[] fractionalMultipoleLoops;
2783 
2784     @Override
2785     public void start() {
2786       if (fractionalMultipoleLoops == null) {
2787         int nThreads = getThreadCount();
2788         fractionalMultipoleLoops = new FractionalMultipoleLoop[nThreads];
2789         for (int i = 0; i < nThreads; i++) {
2790           fractionalMultipoleLoops[i] = new FractionalMultipoleLoop();
2791         }
2792       }
2793     }
2794 
2795     void setPermanent(double[][][] globalMultipoles, double[][][] fracMultipoles) {
2796       this.globalMultipoles = globalMultipoles;
2797       this.fracMultipoles = fracMultipoles;
2798     }
2799 
2800     private void setUse(boolean[] use) {
2801       this.use = use;
2802     }
2803 
2804     @Override
2805     public void run() {
2806       int threadIndex = getThreadIndex();
2807       try {
2808         execute(0, nAtoms - 1, fractionalMultipoleLoops[threadIndex]);
2809       } catch (Exception e) {
2810         throw new RuntimeException(e);
2811       }
2812     }
2813 
2814     private class FractionalMultipoleLoop extends IntegerForLoop {
2815 
2816       @Override
2817       public void run(int lb, int ub) {
2818         for (int s = 0; s < nSymm; s++) {
2819           for (int i = lb; i <= ub; i++) {
2820             if (use[i]) {
2821               toFractionalMultipole(globalMultipoles[s][i], fracMultipoles[s][i]);
2822             }
2823           }
2824         }
2825       }
2826 
2827       @Override
2828       public IntegerSchedule schedule() {
2829         return IntegerSchedule.fixed();
2830       }
2831     }
2832 
2833   }
2834 
2835   private class FractionalInducedRegion extends ParallelRegion {
2836 
2837     private double[][][] inducedDipole;
2838     private double[][][] inducedDipoleCR;
2839     private double[][][] inducedDipoleFrac;
2840     private double[][][] inducedDipoleFracCR;
2841     private boolean[] use = null;
2842     private FractionalInducedLoop[] fractionalInducedLoops;
2843 
2844     @Override
2845     public void start() {
2846       if (fractionalInducedLoops == null) {
2847         int nThreads = getThreadCount();
2848         fractionalInducedLoops = new FractionalInducedLoop[nThreads];
2849         for (int i = 0; i < nThreads; i++) {
2850           fractionalInducedLoops[i] = new FractionalInducedLoop();
2851         }
2852       }
2853     }
2854 
2855     void setInducedDipoles(double[][][] inducedDipole, double[][][] inducedDipoleCR,
2856                            double[][][] inducedDipoleFrac, double[][][] inducedDipoleFracCR) {
2857       this.inducedDipole = inducedDipole;
2858       this.inducedDipoleCR = inducedDipoleCR;
2859       this.inducedDipoleFrac = inducedDipoleFrac;
2860       this.inducedDipoleFracCR = inducedDipoleFracCR;
2861     }
2862 
2863     private void setUse(boolean[] use) {
2864       this.use = use;
2865     }
2866 
2867     @Override
2868     public void run() {
2869       int threadIndex = getThreadIndex();
2870       try {
2871         execute(0, nAtoms - 1, fractionalInducedLoops[threadIndex]);
2872       } catch (Exception e) {
2873         throw new RuntimeException(e);
2874       }
2875     }
2876 
2877     private class FractionalInducedLoop extends IntegerForLoop {
2878 
2879       @Override
2880       public void run(int lb, int ub) {
2881         for (int s = 0; s < nSymm; s++) {
2882           for (int i = lb; i <= ub; i++) {
2883             if (use[i]) {
2884               toFractionalDipole(inducedDipole[s][i], inducedDipoleFrac[s][i]);
2885               toFractionalDipole(inducedDipoleCR[s][i], inducedDipoleFracCR[s][i]);
2886             }
2887           }
2888         }
2889       }
2890 
2891       @Override
2892       public IntegerSchedule schedule() {
2893         return IntegerSchedule.fixed();
2894       }
2895     }
2896 
2897   }
2898 }