View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.potential.nonbonded;
39  
40  import edu.rit.pj.IntegerForLoop;
41  import edu.rit.pj.IntegerSchedule;
42  import edu.rit.pj.ParallelRegion;
43  import edu.rit.pj.ParallelTeam;
44  import ffx.crystal.Crystal;
45  import ffx.numerics.fft.Complex;
46  import ffx.numerics.fft.Complex3DParallel;
47  import ffx.potential.bonded.Atom;
48  import ffx.potential.parameters.ForceField;
49  import ffx.utilities.FFXProperty;
50  import ffx.utilities.PropertyGroup;
51  import org.apache.commons.configuration2.CompositeConfiguration;
52  
53  import java.nio.DoubleBuffer;
54  import java.util.logging.Level;
55  import java.util.logging.Logger;
56  
57  import static ffx.numerics.fft.Complex3D.interleavedIndex;
58  import static ffx.numerics.math.ScalarMath.mod;
59  import static ffx.numerics.spline.UniformBSpline.bSpline;
60  import static ffx.numerics.spline.UniformBSpline.bSplineDerivatives;
61  import static ffx.potential.parameters.MultipoleType.t000;
62  import static ffx.potential.parameters.MultipoleType.t001;
63  import static ffx.potential.parameters.MultipoleType.t002;
64  import static ffx.potential.parameters.MultipoleType.t003;
65  import static ffx.potential.parameters.MultipoleType.t010;
66  import static ffx.potential.parameters.MultipoleType.t011;
67  import static ffx.potential.parameters.MultipoleType.t012;
68  import static ffx.potential.parameters.MultipoleType.t020;
69  import static ffx.potential.parameters.MultipoleType.t021;
70  import static ffx.potential.parameters.MultipoleType.t030;
71  import static ffx.potential.parameters.MultipoleType.t100;
72  import static ffx.potential.parameters.MultipoleType.t101;
73  import static ffx.potential.parameters.MultipoleType.t102;
74  import static ffx.potential.parameters.MultipoleType.t110;
75  import static ffx.potential.parameters.MultipoleType.t111;
76  import static ffx.potential.parameters.MultipoleType.t120;
77  import static ffx.potential.parameters.MultipoleType.t200;
78  import static ffx.potential.parameters.MultipoleType.t201;
79  import static ffx.potential.parameters.MultipoleType.t210;
80  import static ffx.potential.parameters.MultipoleType.t300;
81  import static java.lang.Math.fma;
82  import static java.lang.String.format;
83  import static java.lang.System.arraycopy;
84  import static org.apache.commons.math3.util.FastMath.PI;
85  import static org.apache.commons.math3.util.FastMath.cos;
86  import static org.apache.commons.math3.util.FastMath.exp;
87  import static org.apache.commons.math3.util.FastMath.floor;
88  import static org.apache.commons.math3.util.FastMath.max;
89  import static org.apache.commons.math3.util.FastMath.min;
90  import static org.apache.commons.math3.util.FastMath.pow;
91  import static org.apache.commons.math3.util.FastMath.round;
92  import static org.apache.commons.math3.util.FastMath.sin;
93  import static org.apache.commons.math3.util.FastMath.sqrt;
94  
95  /**
96   * The Reciprocal Space class computes the reciprocal space contribution to {@link ParticleMeshEwald}
97   * for the AMOEBA force field.
98   *
99   * <ol>
100  *   <li>Assignment of polarizable multipole charge density to the 3D grid, via b-Splines, is
101  *       parallelized using a spatial decomposition.
102  *   <li>The convolution depends on methods of the {@link ffx.numerics.fft.Real3DParallel} and
103  *       {@link ffx.numerics.fft.Complex3DParallel} classes.
104  *   <li>Finally, the electric potential and its gradients are collected, in parallel, off the grid
105  *       using b-Splines.
106  * </ol>
107  *
108  * @author Michael J. Schnieders
109  * @since 1.0
110  */
111 public class ReciprocalSpace {
112 
113   private static final Logger logger = Logger.getLogger(ReciprocalSpace.class.getName());
114   /**
115    * First lookup index to pack a 2D tensor into a 1D array.
116    */
117   private static final int[] qi1 = {0, 1, 2, 0, 0, 1};
118   /**
119    * Second lookup index to pack a 2D tensor into a 1D array.
120    */
121   private static final int[] qi2 = {0, 1, 2, 1, 2, 2};
122 
123   private static final double toSeconds = 1.0e-9;
124   private final ParticleMeshEwald particleMeshEwald;
125 
126   private final ForceField.ELEC_FORM elecForm;
127 
128   private final ForceField forceField;
129   /**
130    * Number of unit cell symmetry operators.
131    */
132   private final int nSymm;
133   /**
134    * Number of threads.
135    */
136   private final int threadCount;
137 
138   /**
139    * The default b-Spline order to use for discretization to/from the reciprocal grid.
140    */
141   private static final int DEFAULT_PME_ORDER = 5;
142 
143   /**
144    * The b-Spline order to use for discretization to/from the reciprocal grid.
145    */
146   @FFXProperty(name = "pme-order", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
147       defaultValue = "5", description = """
148       Sets the order of the B-spline interpolation used during particle mesh Ewald summation for partial charge
149       or atomic multipole electrostatics. A default value of 5 is used in the absence of the pme-order keyword.
150       """)
151   private final int bSplineOrder;
152   /**
153    * Three derivatives of the potential are needed for AMOEBA. Specifically, the field gradient is
154    * used to compute the energy of the quadrupole moment and the 2nd field gradient (3rd derivative
155    * of the reciprocal potential) the energy gradient.
156    */
157   private final int derivOrder = 3;
158   /**
159    * The Ewald convergence parameter.
160    */
161   private final double aEwald;
162   /**
163    * Parallel team instance.
164    */
165   private final ParallelTeam parallelTeam;
166   private final ParallelTeam fftTeam;
167   private final BSplineRegion bSplineRegion;
168   private final FractionalMultipoleRegion fractionalMultipoleRegion;
169   private final FractionalInducedRegion fractionalInducedRegion;
170   private final SpatialPermanentLoop[] spatialPermanentLoops;
171   private final SpatialInducedLoop[] spatialInducedLoops;
172   private final SlicePermanentLoop[] slicePermanentLoops;
173   private final SliceInducedLoop[] sliceInducedLoops;
174   private final RowPermanentLoop[] rowPermanentLoops;
175   private final RowInducedLoop[] rowInducedLoops;
176   /**
177    * Number of atoms for a given symmetry operator that a given thread is responsible for applying to
178    * the FFT grid. gridAtomCount[nSymm][nThread]
179    */
180   private final int[][] gridAtomCount;
181   /**
182    * Atom indices for a given symmetry operator that a given thread is responsible for applying to
183    * the FFT grid. gridAtomCount[nSymm][nThread][nAtoms]
184    */
185   private final int[][][] gridAtomList;
186   private final PermanentPhiRegion permanentPhiRegion;
187   private final InducedPhiRegion polarizationPhiRegion;
188   private final IntegerSchedule recipSchedule;
189   /**
190    * Convolution variables.
191    */
192   private final double[][] transformFieldMatrix = new double[10][10];
193   private final double[][] transformMultipoleMatrix = new double[10][10];
194   private final double[][] a = new double[3][3];
195   private double[][][] inducedDipoleFrac;
196   private double[][][] inducedDipoleFracCR;
197   /**
198    * The unit cell for the simulation. A ReplicatesCrystal will give equivalent results up to the
199    * limits of double precision, but is more expensive.
200    */
201   private Crystal crystal;
202   /**
203    * Array of atoms in the asymmetric unit.
204    */
205   private Atom[] atoms;
206   /**
207    * Number of atoms in the asymmetric unit.
208    */
209   private int nAtoms;
210 
211   private static final double DEFAULT_PME_MESH_DENSITY = 1.2;
212   private static final double oneThird = 1.0 / 3.0;
213 
214   @FFXProperty(name = "pme-mesh-density", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
215       defaultValue = "1.2", description = """
216       The default in the absence of the pme-grid keyword is to set the grid size along each
217       axis to the smallest factor of 2, 3 and/or 5 that is at least as large as
218       pme-mesh-density times the axis length in Angstroms.
219       """)
220   private double density;
221 
222   /**
223    * The X-dimension of the FFT grid.
224    */
225   @FFXProperty(name = "pme-grid-x", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
226       defaultValue = "NONE", description =
227       "Specifies the PME grid dimension along the x-axis and takes precedence over the pme-grid keyword.")
228   @FFXProperty(name = "pme-grid", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
229       defaultValue = "NONE", description = """
230       [3 integers]
231       Sets the dimensions of the reciprocal space grid used during particle mesh Ewald
232       summation for electrostatics. The three modifiers give the size along the X-, Y- and Z- axes,
233       respectively. If either the Y- or Z-axis dimensions are omitted, then they are set equal
234       to the X-axis dimension. The default in the absence of the pme-grid keyword is to set the
235       grid size along each axis to the smallest value that can be factored by 2, 3, 4 and/or 5
236       and is at least as large as 1.2 times the axis length in Angstroms.
237       The value 1.2 can be changed using the pme-mesh-density property.
238       """)
239   private int fftX;
240 
241   /**
242    * The Y-dimension of the FFT grid.
243    */
244   @FFXProperty(name = "pme-grid-y", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
245       defaultValue = "NONE", description =
246       "Specifies the PME grid dimension along the y-axis and takes precedence over the pme-grid keyword.")
247   private int fftY;
248 
249   /**
250    * The Z-dimension of the FFT grid.
251    */
252   @FFXProperty(name = "pme-grid-z", clazz = Integer.class, propertyGroup = PropertyGroup.ParticleMeshEwald,
253       defaultValue = "NONE", description =
254       "Specifies the PME grid dimension along the z-axis and takes precedence over the pme-grid keyword.")
255   private int fftZ;
256 
257   /**
258    * Number of doubles needed to compute a complex to complex 3D FFT (fftX * fftY * fftZ * 2).
259    */
260   private int fftSpace;
261   /**
262    * Reciprocal space grid. [fftSpace]
263    */
264   private double[] splineGrid;
265   /**
266    * Wraps the splineGrid.
267    */
268   private DoubleBuffer splineBuffer;
269   /**
270    * Reference to atomic coordinate array.
271    */
272   private double[][][] coordinates;
273   // Spline moments and perform the convolution.
274   private SpatialDensityRegion spatialDensityRegion;
275   private SliceRegion sliceRegion;
276   private RowRegion rowRegion;
277   private Complex3DParallel complex3DFFT;
278   private GridMethod gridMethod;
279   // Timing variables.
280   private long bSplineTotal, splinePermanentTotal, splineInducedTotal;
281   private long permanentPhiTotal, inducedPhiTotal, convTotal;
282   /**
283    * Timing variables.
284    */
285   private final long[] bSplineTime;
286   private final long[] splinePermanentTime;
287   private final long[] permanentPhiTime;
288   private final long[] splineInducedTime;
289   private final int[] splineCount;
290   private final long[] inducedPhiTime;
291 
292 
293   /**
294    * Reciprocal Space PME contribution.
295    *
296    * @param particleMeshEwald a {@link ParticleMeshEwald} object.
297    * @param crystal           a {@link ffx.crystal.Crystal} object.
298    * @param forceField        a {@link ffx.potential.parameters.ForceField} object.
299    * @param atoms             an array of {@link ffx.potential.bonded.Atom} objects.
300    * @param aewald            the Ewald parameter.
301    * @param fftTeam           a {@link edu.rit.pj.ParallelTeam} object.
302    * @param parallelTeam      a {@link edu.rit.pj.ParallelTeam} object.
303    */
304   public ReciprocalSpace(
305       ParticleMeshEwald particleMeshEwald,
306       Crystal crystal,
307       ForceField forceField,
308       Atom[] atoms,
309       double aewald,
310       ParallelTeam fftTeam,
311       ParallelTeam parallelTeam) {
312 
313     this.particleMeshEwald = particleMeshEwald;
314     this.crystal = crystal.getUnitCell();
315     this.forceField = forceField;
316     this.atoms = atoms;
317     this.nAtoms = atoms.length;
318     this.aEwald = aewald;
319     this.fftTeam = fftTeam;
320     this.parallelTeam = parallelTeam;
321     this.elecForm = particleMeshEwald.getElecForm();
322 
323     coordinates = particleMeshEwald.getCoordinates();
324     threadCount = parallelTeam.getThreadCount();
325     nSymm = this.crystal.spaceGroup.getNumberOfSymOps();
326 
327     // Construct the parallel convolution object.
328     boolean available;
329     String recipStrategy = null;
330     try {
331       recipStrategy = forceField.getString("RECIP_SCHEDULE");
332       IntegerSchedule.parse(recipStrategy);
333       available = true;
334     } catch (Exception e) {
335       available = false;
336     }
337     if (available) {
338       recipSchedule = IntegerSchedule.parse(recipStrategy);
339       logger.log(Level.INFO, "  Convolution schedule {0}", recipStrategy);
340     } else {
341       recipSchedule = IntegerSchedule.fixed();
342     }
343 
344     CompositeConfiguration properties = forceField.getProperties();
345     String gridString = properties.getString("grid-method", "SPATIAL").toUpperCase();
346     try {
347       gridMethod = GridMethod.valueOf(gridString);
348     } catch (Exception e) {
349       gridMethod = GridMethod.SPATIAL;
350     }
351 
352     bSplineOrder = forceField.getInteger("PME_ORDER", DEFAULT_PME_ORDER);
353 
354     // Initialize convolution objects that may be re-allocated during NPT simulations.
355     double density = initConvolution();
356 
357     // Log PME reciprocal space parameters.
358     if (logger.isLoggable(Level.INFO)) {
359       StringBuilder sb = new StringBuilder();
360       sb.append(format("    B-Spline Order:                    %8d\n", bSplineOrder));
361       sb.append(format("    Mesh Density:                      %8.3f\n", density));
362       sb.append(format("    Mesh Dimensions:              (%3d,%3d,%3d)\n", fftX, fftY, fftZ));
363       sb.append(format("    Grid Method:                       %8s\n", gridMethod.toString()));
364       logger.info(sb.toString());
365     }
366 
367     // Construct the parallel BSplineRegion, DensityLoops and Phi objects.
368     bSplineRegion = new BSplineRegion();
369     fractionalMultipoleRegion = new FractionalMultipoleRegion();
370     fractionalInducedRegion = new FractionalInducedRegion();
371     switch (gridMethod) {
372       default -> {
373         spatialPermanentLoops = new SpatialPermanentLoop[threadCount];
374         spatialInducedLoops = new SpatialInducedLoop[threadCount];
375         for (int i = 0; i < threadCount; i++) {
376           spatialPermanentLoops[i] = new SpatialPermanentLoop(spatialDensityRegion, bSplineRegion);
377           spatialInducedLoops[i] = new SpatialInducedLoop(spatialDensityRegion, bSplineRegion);
378         }
379         slicePermanentLoops = null;
380         sliceInducedLoops = null;
381         rowPermanentLoops = null;
382         rowInducedLoops = null;
383         gridAtomCount = null;
384         gridAtomList = null;
385       }
386       case ROW -> {
387         rowPermanentLoops = new RowPermanentLoop[threadCount];
388         rowInducedLoops = new RowInducedLoop[threadCount];
389         for (int i = 0; i < threadCount; i++) {
390           rowPermanentLoops[i] = new RowPermanentLoop(rowRegion, bSplineRegion);
391           rowInducedLoops[i] = new RowInducedLoop(rowRegion, bSplineRegion);
392         }
393         gridAtomCount = new int[nSymm][threadCount];
394         gridAtomList = new int[nSymm][threadCount][nAtoms];
395         spatialPermanentLoops = null;
396         spatialInducedLoops = null;
397         slicePermanentLoops = null;
398         sliceInducedLoops = null;
399       }
400       case SLICE -> {
401         slicePermanentLoops = new SlicePermanentLoop[threadCount];
402         sliceInducedLoops = new SliceInducedLoop[threadCount];
403         for (int i = 0; i < threadCount; i++) {
404           slicePermanentLoops[i] = new SlicePermanentLoop(sliceRegion, bSplineRegion);
405           sliceInducedLoops[i] = new SliceInducedLoop(sliceRegion, bSplineRegion);
406         }
407         gridAtomCount = new int[nSymm][threadCount];
408         gridAtomList = new int[nSymm][threadCount][nAtoms];
409         spatialPermanentLoops = null;
410         spatialInducedLoops = null;
411         rowPermanentLoops = null;
412         rowInducedLoops = null;
413       }
414     }
415     permanentPhiRegion = new PermanentPhiRegion(bSplineRegion);
416     polarizationPhiRegion = new InducedPhiRegion(bSplineRegion);
417 
418     // Initialize timing variables.
419     bSplineTime = new long[threadCount];
420     splinePermanentTime = new long[threadCount];
421     splineInducedTime = new long[threadCount];
422     splineCount = new int[threadCount];
423     permanentPhiTime = new long[threadCount];
424     inducedPhiTime = new long[threadCount];
425   }
426 
427   /**
428    * Computes the modulus of the discrete Fourier Transform of "bsarray" and stores it in "bsmod".
429    *
430    * @param bsmod   B-Spline modulus.
431    * @param bsarray B-Spline array.
432    * @param nfft    FFT dimension.
433    * @param order   B-Spline order.
434    */
435   private static void discreteFTMod(double[] bsmod, double[] bsarray, int nfft, int order) {
436     // Get the modulus of the discrete Fourier fft.
437     double factor = 2.0 * PI / nfft;
438     for (int i = 0; i < nfft; i++) {
439       double sum1 = 0.0;
440       double sum2 = 0.0;
441       for (int j = 0; j < nfft; j++) {
442         double arg = factor * (i * j);
443         sum1 = sum1 + bsarray[j] * cos(arg);
444         sum2 = sum2 + bsarray[j] * sin(arg);
445       }
446       bsmod[i] = sum1 * sum1 + sum2 * sum2;
447     }
448 
449     // Fix for exponential Euler spline interpolation failure.
450     double eps = 1.0e-7;
451     if (bsmod[0] < eps) {
452       bsmod[0] = 0.5 * bsmod[1];
453     }
454     for (int i = 1; i < nfft - 1; i++) {
455       if (bsmod[i] < eps) {
456         bsmod[i] = 0.5 * (bsmod[i - 1] + bsmod[i + 1]);
457       }
458     }
459     if (bsmod[nfft - 1] < eps) {
460       bsmod[nfft - 1] = 0.5 * bsmod[nfft - 2];
461     }
462 
463     // Compute and apply the optimal zeta coefficient.
464     int jcut = 50;
465     int order2 = 2 * order;
466     for (int i = 0; i < nfft; i++) {
467       int k = i;
468       double zeta;
469       if (i > nfft / 2) {
470         k = k - nfft;
471       }
472       if (k == 0) {
473         zeta = 1.0;
474       } else {
475         double sum1 = 1.0;
476         double sum2 = 1.0;
477         factor = PI * k / nfft;
478         for (int j = 0; j < jcut; j++) {
479           double arg = factor / (factor + PI * (j + 1));
480           sum1 = sum1 + pow(arg, order);
481           sum2 = sum2 + pow(arg, order2);
482         }
483         for (int j = 0; j < jcut; j++) {
484           double arg = factor / (factor - PI * (j + 1));
485           sum1 = sum1 + pow(arg, order);
486           sum2 = sum2 + pow(arg, order2);
487         }
488         zeta = sum2 / sum1;
489       }
490       bsmod[i] = bsmod[i] * zeta * zeta;
491     }
492   }
493 
494   /**
495    * cartToFracInducedDipoles
496    *
497    * @param inducedDipole       Input Cartesian induced dipole.
498    * @param inducedDipoleCR     Input Cartesian induced dipole CR.
499    * @param fracInducedDipole   Output fractional induced dipole.
500    * @param fracInducedDipoleCR Output fractional induced dipole CR.
501    */
502   public void cartToFracInducedDipole(
503       double[] inducedDipole,
504       double[] inducedDipoleCR,
505       double[] fracInducedDipole,
506       double[] fracInducedDipoleCR) {
507     double[] in = inducedDipole;
508     fracInducedDipole[0] = a[0][0] * in[0] + a[0][1] * in[1] + a[0][2] * in[2];
509     fracInducedDipole[1] = a[1][0] * in[0] + a[1][1] * in[1] + a[1][2] * in[2];
510     fracInducedDipole[2] = a[2][0] * in[0] + a[2][1] * in[1] + a[2][2] * in[2];
511     in = inducedDipoleCR;
512     fracInducedDipoleCR[0] = a[0][0] * in[0] + a[0][1] * in[1] + a[0][2] * in[2];
513     fracInducedDipoleCR[1] = a[1][0] * in[0] + a[1][1] * in[1] + a[1][2] * in[2];
514     fracInducedDipoleCR[2] = a[2][0] * in[0] + a[2][1] * in[1] + a[2][2] * in[2];
515   }
516 
517   /**
518    * computeBSplines
519    */
520   public void computeBSplines() {
521     try {
522       bSplineTotal -= System.nanoTime();
523       parallelTeam.execute(bSplineRegion);
524       bSplineTotal += System.nanoTime();
525     } catch (Exception e) {
526       String message = " Fatal exception evaluating b-Splines.";
527       logger.log(Level.SEVERE, message, e);
528     }
529   }
530 
531   /**
532    * computeInducedPhi
533    *
534    * @param cartInducedDipolePhi   an array of double.
535    * @param cartInducedDipoleCRPhi an array of double.
536    * @param fracInducedDipolePhi   an array of double.
537    * @param fracInducedDipoleCRPhi an array of double.
538    */
539   public void computeInducedPhi(
540       double[][] cartInducedDipolePhi, double[][] cartInducedDipoleCRPhi,
541       double[][] fracInducedDipolePhi, double[][] fracInducedDipoleCRPhi) {
542     inducedPhiTotal -= System.nanoTime();
543     try {
544       polarizationPhiRegion.setInducedDipolePhi(
545           cartInducedDipolePhi, cartInducedDipoleCRPhi,
546           fracInducedDipolePhi, fracInducedDipoleCRPhi);
547       parallelTeam.execute(polarizationPhiRegion);
548     } catch (Exception e) {
549       String message = "Fatal exception evaluating induced reciprocal space potential.";
550       logger.log(Level.SEVERE, message, e);
551     }
552     inducedPhiTotal += System.nanoTime();
553   }
554 
555   /**
556    * Compute the potential Phi and its derivatives for all atoms.
557    *
558    * @param cartPermanentPhi an array of double.
559    * @param fracPermanentPhi an array of double.
560    */
561   public void computePermanentPhi(double[][] cartPermanentPhi, double[][] fracPermanentPhi) {
562     permanentPhiTotal -= System.nanoTime();
563     try {
564       permanentPhiRegion.setPermanentPhi(cartPermanentPhi, fracPermanentPhi);
565       parallelTeam.execute(permanentPhiRegion);
566     } catch (Exception e) {
567       String message = " Fatal exception evaluating permanent reciprocal space potential.";
568       logger.log(Level.SEVERE, message, e);
569     }
570     permanentPhiTotal += System.nanoTime();
571   }
572 
573   /**
574    * getXDim
575    *
576    * @return the X dimension of the FFT grid.
577    */
578   public int getXDim() {
579     return fftX;
580   }
581 
582   /**
583    * getYDim
584    *
585    * @return the Y dimension of the FFT grid.
586    */
587   public int getYDim() {
588     return fftY;
589   }
590 
591   /**
592    * getZDim
593    *
594    * @return the Z dimension of the FFT grid.
595    */
596   public int getZDim() {
597     return fftZ;
598   }
599 
600   /**
601    * Compute the reciprocal space field using a convolution.
602    */
603   public void performConvolution() {
604     convTotal -= System.nanoTime();
605     try {
606       complex3DFFT.convolution(splineGrid);
607     } catch (Exception e) {
608       String message = "Fatal exception evaluating the convolution.";
609       logger.log(Level.SEVERE, message, e);
610     }
611     convTotal += System.nanoTime();
612   }
613 
614   /**
615    * initTimings.
616    */
617   public void initTimings() {
618     // Reset total timings.
619     bSplineTotal = 0;
620     splinePermanentTotal = 0;
621     splineInducedTotal = 0;
622     permanentPhiTotal = 0;
623     inducedPhiTotal = 0;
624     convTotal = 0;
625 
626     // Reset timing arrays.
627     for (int i = 0; i < threadCount; i++) {
628       bSplineTime[i] = 0;
629       splinePermanentTime[i] = 0;
630       splineInducedTime[i] = 0;
631       permanentPhiTime[i] = 0;
632       inducedPhiTime[i] = 0;
633       splineCount[i] = 0;
634     }
635 
636     // Reset 3D convolution timing.
637     if (complex3DFFT != null) {
638       complex3DFFT.initTiming();
639     }
640   }
641 
642   /**
643    * printTimings.
644    */
645   public void printTimings() {
646     if (logger.isLoggable(Level.FINE)) {
647       if (complex3DFFT != null) {
648         double total = (bSplineTotal + convTotal
649             + splinePermanentTotal + permanentPhiTotal
650             + splineInducedTotal + inducedPhiTotal) * toSeconds;
651 
652         logger.fine(format("\n Reciprocal Space: %7.4f (sec)", total));
653         long[] convTime = complex3DFFT.getTiming();
654         logger.fine("                           Direct Field    SCF Field");
655         logger.fine(" Thread  B-Spline  3DConv  Spline  Phi     Spline  Phi      Count");
656 
657         long minBSpline = Long.MAX_VALUE;
658         long maxBSpline = 0;
659         long minConv = Long.MAX_VALUE;
660         long maxConv = 0;
661         long minBSPerm = Long.MAX_VALUE;
662         long maxBSPerm = 0;
663         long minPhiPerm = Long.MAX_VALUE;
664         long maxPhiPerm = 0;
665         long minBSInduced = Long.MAX_VALUE;
666         long maxBSInduced = 0;
667         long minPhiInduced = Long.MAX_VALUE;
668         long maxPhiInduced = 0;
669         int minCount = Integer.MAX_VALUE;
670         int maxCount = 0;
671 
672         for (int i = 0; i < threadCount; i++) {
673           logger.fine(format("    %3d   %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f  %6d",
674               i, bSplineTime[i] * toSeconds, convTime[i] * toSeconds,
675               splinePermanentTime[i] * toSeconds, permanentPhiTime[i] * toSeconds,
676               splineInducedTime[i] * toSeconds, inducedPhiTime[i] * toSeconds,
677               splineCount[i]));
678           minBSpline = min(bSplineTime[i], minBSpline);
679           maxBSpline = max(bSplineTime[i], maxBSpline);
680           minConv = min(convTime[i], minConv);
681           maxConv = max(convTime[i], maxConv);
682           minBSPerm = min(splinePermanentTime[i], minBSPerm);
683           maxBSPerm = max(splinePermanentTime[i], maxBSPerm);
684           minPhiPerm = min(permanentPhiTime[i], minPhiPerm);
685           maxPhiPerm = max(permanentPhiTime[i], maxPhiPerm);
686           minBSInduced = min(splineInducedTime[i], minBSInduced);
687           maxBSInduced = max(splineInducedTime[i], maxBSInduced);
688           minPhiInduced = min(inducedPhiTime[i], minPhiInduced);
689           maxPhiInduced = max(inducedPhiTime[i], maxPhiInduced);
690           minCount = min(splineCount[i], minCount);
691           maxCount = max(splineCount[i], maxCount);
692         }
693         logger.fine(format(" Min      %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f  %6d",
694             minBSpline * toSeconds, minConv * toSeconds,
695             minBSPerm * toSeconds, minPhiPerm * toSeconds,
696             minBSInduced * toSeconds, minPhiInduced * toSeconds,
697             minCount));
698         logger.fine(format(" Max      %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f  %6d",
699             maxBSpline * toSeconds, maxConv * toSeconds,
700             maxBSPerm * toSeconds, maxPhiPerm * toSeconds,
701             maxBSInduced * toSeconds, maxPhiInduced * toSeconds,
702             maxCount));
703         logger.fine(format(" Delta    %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f  %6d",
704             (maxBSpline - minBSpline) * toSeconds,
705             (maxConv - minConv) * toSeconds,
706             (maxBSPerm - minBSPerm) * toSeconds,
707             (maxPhiPerm - minPhiPerm) * toSeconds,
708             (maxBSInduced - minBSInduced) * toSeconds,
709             (maxPhiInduced - minPhiInduced) * toSeconds,
710             (maxCount - minCount)));
711         logger.fine(format(" Actual   %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f  %6d",
712             bSplineTotal * toSeconds, convTotal * toSeconds,
713             splinePermanentTotal * toSeconds, permanentPhiTotal * toSeconds,
714             splineInducedTotal * toSeconds, inducedPhiTotal * toSeconds,
715             nAtoms * nSymm));
716       }
717     }
718   }
719 
720   /**
721    * Setter for the field <code>atoms</code>.
722    *
723    * @param atoms an array of {@link ffx.potential.bonded.Atom} objects.
724    */
725   public void setAtoms(Atom[] atoms) {
726     this.atoms = atoms;
727     nAtoms = atoms.length;
728     coordinates = particleMeshEwald.getCoordinates();
729     switch (gridMethod) {
730       case SPATIAL -> {
731         spatialDensityRegion.setAtoms(atoms);
732         spatialDensityRegion.coordinates = coordinates;
733       }
734       case ROW -> {
735         rowRegion.setAtoms(atoms);
736         rowRegion.coordinates = coordinates;
737       }
738       case SLICE -> {
739         sliceRegion.setAtoms(atoms);
740         sliceRegion.coordinates = coordinates;
741       }
742     }
743   }
744 
745   /**
746    * Setter for the field <code>crystal</code>.
747    *
748    * @param crystal a {@link ffx.crystal.Crystal} object.
749    */
750   public void setCrystal(Crystal crystal) {
751     // Check if the number of symmetry operators has changed.
752     this.crystal = crystal.getUnitCell();
753     if (nSymm != this.crystal.spaceGroup.getNumberOfSymOps()) {
754       logger.info(this.crystal.toString());
755       logger.info(crystal.toString());
756       logger.severe(
757           " The reciprocal space class does not currently allow changes in the number of symmetry operators unless it is mapped by a user supplied symmetry operator.");
758     }
759     this.coordinates = particleMeshEwald.getCoordinates();
760     initConvolution();
761   }
762 
763   /**
764    * Place the induced dipoles onto the FFT grid for the atoms in use.
765    *
766    * @param inducedDipole   Induced dipoles.
767    * @param inducedDipoleCR Chain rule term for induced dipole gradient.
768    * @param use             The atoms in use.
769    */
770   public void splineInducedDipoles(
771       double[][][] inducedDipole, double[][][] inducedDipoleCR, boolean[] use) {
772     splineInducedTotal -= System.nanoTime();
773 
774     try {
775       // Allocate memory if needed.
776       if (inducedDipoleFrac == null ||
777           inducedDipoleFrac.length != inducedDipole.length ||
778           inducedDipoleFrac[0].length != inducedDipole[0].length) {
779         int nAtoms = inducedDipole.length;
780         int nSymm = inducedDipole[0].length;
781         inducedDipoleFrac = new double[nAtoms][nSymm][3];
782         inducedDipoleFracCR = new double[nAtoms][nSymm][3];
783       }
784       // Compute fractional induced dipoles.
785       fractionalInducedRegion.setUse(use);
786       fractionalInducedRegion.setInducedDipoles(inducedDipole, inducedDipoleCR,
787           inducedDipoleFrac, inducedDipoleFracCR);
788       parallelTeam.execute(fractionalInducedRegion);
789     } catch (Exception e) {
790       String message = " Fatal exception evaluating fractional induced dipoles.";
791       logger.log(Level.SEVERE, message, e);
792     }
793 
794     switch (gridMethod) {
795       case SPATIAL -> {
796         spatialDensityRegion.setDensityLoop(spatialInducedLoops);
797         for (int i = 0; i < threadCount; i++) {
798           spatialInducedLoops[i].setInducedDipoles(inducedDipoleFrac, inducedDipoleFracCR);
799           spatialInducedLoops[i].setUse(use);
800           spatialInducedLoops[i].setRegion(spatialDensityRegion);
801         }
802         try {
803           parallelTeam.execute(spatialDensityRegion);
804         } catch (Exception e) {
805           String message = " Fatal exception evaluating induced density.\n";
806           logger.log(Level.SEVERE, message, e);
807         }
808       }
809       case ROW -> {
810         rowRegion.setDensityLoop(rowInducedLoops);
811         for (int i = 0; i < threadCount; i++) {
812           rowInducedLoops[i].setInducedDipoles(inducedDipoleFrac, inducedDipoleFracCR);
813           rowInducedLoops[i].setUse(use);
814         }
815         try {
816           parallelTeam.execute(rowRegion);
817         } catch (Exception e) {
818           String message = " Fatal exception evaluating induced density.";
819           logger.log(Level.SEVERE, message, e);
820         }
821       }
822       default -> {
823         sliceRegion.setDensityLoop(sliceInducedLoops);
824         for (int i = 0; i < threadCount; i++) {
825           sliceInducedLoops[i].setInducedDipoles(inducedDipoleFrac, inducedDipoleFracCR);
826           sliceInducedLoops[i].setUse(use);
827         }
828         try {
829           parallelTeam.execute(sliceRegion);
830         } catch (Exception e) {
831           String message = " Fatal exception evaluating induced density.";
832           logger.log(Level.SEVERE, message, e);
833         }
834       }
835     }
836     splineInducedTotal += System.nanoTime();
837   }
838 
839   /**
840    * Use b-Splines to place the permanent multipoles onto the FFT grid for the atoms in use.
841    *
842    * @param globalMultipoles an array of double.
843    * @param fracMultipoles   an array of double.
844    * @param use              an array of boolean.
845    */
846   public void splinePermanentMultipoles(double[][][] globalMultipoles, double[][][] fracMultipoles,
847                                         boolean[] use) {
848     splinePermanentTotal -= System.nanoTime();
849 
850     try {
851       fractionalMultipoleRegion.setUse(use);
852       fractionalMultipoleRegion.setPermanent(globalMultipoles, fracMultipoles);
853       parallelTeam.execute(fractionalMultipoleRegion);
854     } catch (Exception e) {
855       String message = " Fatal exception evaluating fractional multipoles.";
856       logger.log(Level.SEVERE, message, e);
857     }
858 
859     switch (gridMethod) {
860       case SPATIAL -> {
861         spatialDensityRegion.setCrystal(crystal.getUnitCell(), fftX, fftY, fftZ);
862         spatialDensityRegion.assignAtomsToCells();
863         spatialDensityRegion.setDensityLoop(spatialPermanentLoops);
864         for (int i = 0; i < threadCount; i++) {
865           spatialPermanentLoops[i].setPermanent(fracMultipoles);
866           spatialPermanentLoops[i].setUse(use);
867           spatialPermanentLoops[i].setRegion(spatialDensityRegion);
868         }
869         try {
870           parallelTeam.execute(spatialDensityRegion);
871         } catch (Exception e) {
872           String message = " Fatal exception evaluating permanent multipole density.";
873           logger.log(Level.SEVERE, message, e);
874         }
875       }
876       case ROW -> {
877         rowRegion.setCrystal(crystal.getUnitCell(), fftX, fftY, fftZ);
878         rowRegion.setDensityLoop(rowPermanentLoops);
879         for (int i = 0; i < threadCount; i++) {
880           rowPermanentLoops[i].setPermanent(fracMultipoles);
881           rowPermanentLoops[i].setUse(use);
882         }
883         try {
884           parallelTeam.execute(rowRegion);
885         } catch (Exception e) {
886           String message = " Fatal exception evaluating permanent multipole density.";
887           logger.log(Level.SEVERE, message, e);
888         }
889       }
890       case SLICE -> {
891         sliceRegion.setCrystal(crystal.getUnitCell(), fftX, fftY, fftZ);
892         sliceRegion.setDensityLoop(slicePermanentLoops);
893         for (int i = 0; i < threadCount; i++) {
894           slicePermanentLoops[i].setPermanent(fracMultipoles);
895           slicePermanentLoops[i].setUse(use);
896         }
897         try {
898           parallelTeam.execute(sliceRegion);
899         } catch (Exception e) {
900           String message = " Fatal exception evaluating permanent multipole density.";
901           logger.log(Level.SEVERE, message, e);
902         }
903       }
904     }
905 
906     splinePermanentTotal += System.nanoTime();
907   }
908 
909   private double initConvolution() {
910 
911     // Store the current reciprocal space grid dimensions.
912     int fftXCurrent = fftX;
913     int fftYCurrent = fftY;
914     int fftZCurrent = fftZ;
915 
916     // Support the PME-GRID keyword.
917     int defaultX = -1;
918     int defaultY = -1;
919     int defaultZ = -1;
920     if (forceField.hasProperty("PME_GRID")) {
921       String grid = forceField.getString("PME_GRID", "-1 -1 -1");
922       String[] values = grid.trim().split(" +");
923       if (values != null) {
924         defaultX = Integer.parseInt(values[0]);
925         defaultY = defaultX;
926         defaultZ = defaultX;
927         if (values.length > 1) {
928           defaultY = Integer.parseInt(values[1]);
929           if (values.length > 2) {
930             defaultZ = Integer.parseInt(values[2]);
931           }
932         }
933       }
934     }
935 
936     density = forceField.getDouble("PME_MESH_DENSITY", DEFAULT_PME_MESH_DENSITY);
937 
938     int nX = forceField.getInteger("PME_GRID_X", defaultX);
939     if (nX < 2) {
940       nX = (int) floor(crystal.a * density) + 1;
941       if (nX % 2 != 0) {
942         nX += 1;
943       }
944       while (!Complex.preferredDimension(nX)) {
945         nX += 2;
946       }
947     }
948 
949     int nY = forceField.getInteger("PME_GRID_Y", defaultY);
950     if (nY < 2) {
951       nY = (int) floor(crystal.b * density) + 1;
952       if (nY % 2 != 0) {
953         nY += 1;
954       }
955       while (!Complex.preferredDimension(nY)) {
956         nY += 2;
957       }
958     }
959 
960     int nZ = forceField.getInteger("PME_GRID_Z", defaultZ);
961     if (nZ < 2) {
962       nZ = (int) floor(crystal.c * density) + 1;
963       if (nZ % 2 != 0) {
964         nZ += 1;
965       }
966       while (!Complex.preferredDimension(nZ)) {
967         nZ += 2;
968       }
969     }
970 
971     int minGrid = forceField.getInteger("PME_GRID_MIN", 16);
972     nX = max(nX, minGrid);
973     nY = max(nY, minGrid);
974     nZ = max(nZ, minGrid);
975 
976     fftX = nX;
977     fftY = nY;
978     fftZ = nZ;
979 
980     // Populate the matrix that fractionalizes multipoles.
981     transformMultipoleMatrix();
982 
983     // Populate the matrix that convert fractional potential components into orthogonal Cartesian
984     // coordinates.
985     transformFieldMatrix();
986 
987     // Compute the Cartesian to fractional matrix.
988     for (int i = 0; i < 3; i++) {
989       a[0][i] = fftX * crystal.A[i][0];
990       a[1][i] = fftY * crystal.A[i][1];
991       a[2][i] = fftZ * crystal.A[i][2];
992     }
993 
994     fftSpace = fftX * fftY * fftZ * 2;
995     boolean dimChanged = fftX != fftXCurrent || fftY != fftYCurrent || fftZ != fftZCurrent;
996 
997     if (complex3DFFT == null || dimChanged) {
998       complex3DFFT = new Complex3DParallel(fftX, fftY, fftZ, fftTeam, recipSchedule);
999       if (splineGrid == null || splineGrid.length < fftSpace) {
1000         splineGrid = new double[fftSpace];
1001       }
1002       splineBuffer = DoubleBuffer.wrap(splineGrid);
1003     }
1004     complex3DFFT.setRecip(generalizedInfluenceFunction());
1005 
1006     switch (gridMethod) {
1007       case SPATIAL -> {
1008         if (spatialDensityRegion == null || dimChanged) {
1009           spatialDensityRegion = new SpatialDensityRegion(fftX, fftY, fftZ, splineGrid, bSplineOrder,
1010               nSymm, 10, threadCount, crystal, atoms, coordinates);
1011         } else {
1012           spatialDensityRegion.setCrystal(crystal, fftX, fftY, fftZ);
1013           spatialDensityRegion.coordinates = coordinates;
1014         }
1015       }
1016       case ROW -> {
1017         if (rowRegion == null || dimChanged) {
1018           rowRegion = new RowRegion(fftX, fftY, fftZ, splineGrid, nSymm, threadCount, atoms, coordinates);
1019         } else {
1020           rowRegion.setCrystal(crystal, fftX, fftY, fftZ);
1021           rowRegion.coordinates = coordinates;
1022         }
1023       }
1024       case SLICE -> {
1025         if (sliceRegion == null || dimChanged) {
1026           sliceRegion = new SliceRegion(fftX, fftY, fftZ, splineGrid, nSymm, threadCount, atoms, coordinates);
1027         } else {
1028           sliceRegion.setCrystal(crystal, fftX, fftY, fftZ);
1029           sliceRegion.coordinates = coordinates;
1030         }
1031       }
1032     }
1033 
1034     return density;
1035   }
1036 
1037   private int RowIndexZ(int i) {
1038     return i / fftY;
1039   }
1040 
1041   private int RowIndexY(int i) {
1042     return i % fftY;
1043   }
1044 
1045   private double[] generalizedInfluenceFunction() {
1046 
1047     double[] influenceFunction = new double[fftSpace / 2];
1048 
1049     double[] bsModX = new double[fftX];
1050     double[] bsModY = new double[fftY];
1051     double[] bsModZ = new double[fftZ];
1052     int maxfft = max(max(max(fftX, fftY), fftZ), bSplineOrder + 1);
1053     double[] bsArray = new double[maxfft];
1054     double[] c = new double[bSplineOrder];
1055 
1056     bSpline(0.0, bSplineOrder, c);
1057     arraycopy(c, 0, bsArray, 1, bSplineOrder);
1058 
1059     discreteFTMod(bsModX, bsArray, fftX, bSplineOrder);
1060     discreteFTMod(bsModY, bsArray, fftY, bSplineOrder);
1061     discreteFTMod(bsModZ, bsArray, fftZ, bSplineOrder);
1062 
1063     double r00 = crystal.A[0][0];
1064     double r01 = crystal.A[0][1];
1065     double r02 = crystal.A[0][2];
1066     double r10 = crystal.A[1][0];
1067     double r11 = crystal.A[1][1];
1068     double r12 = crystal.A[1][2];
1069     double r20 = crystal.A[2][0];
1070     double r21 = crystal.A[2][1];
1071     double r22 = crystal.A[2][2];
1072     int ntot = fftX * fftY * fftZ;
1073     double piTerm = (PI / aEwald) * (PI / aEwald);
1074     double volTerm = PI * crystal.volume;
1075     int nfXY = fftX * fftY;
1076     int nX_2 = (fftX + 1) / 2;
1077     int nY_2 = (fftY + 1) / 2;
1078     int nZ_2 = (fftZ + 1) / 2;
1079 
1080     for (int i = 0; i < ntot - 1; i++) {
1081       int kZ = (i + 1) / nfXY;
1082       int j = i - kZ * nfXY + 1;
1083       int kY = j / fftX;
1084       int kX = j - kY * fftX;
1085       int h = kX;
1086       int k = kY;
1087       int l = kZ;
1088       if (kX >= nX_2) {
1089         h -= fftX;
1090       }
1091       if (kY >= nY_2) {
1092         k -= fftY;
1093       }
1094       if (kZ >= nZ_2) {
1095         l -= fftZ;
1096       }
1097       double sX = r00 * h + r01 * k + r02 * l;
1098       double sY = r10 * h + r11 * k + r12 * l;
1099       double sZ = r20 * h + r21 * k + r22 * l;
1100       double sSquared = sX * sX + sY * sY + sZ * sZ;
1101       double term = -piTerm * sSquared;
1102       double expterm = 0.0;
1103       if (term > -50.0) {
1104         double denom = sSquared * volTerm * bsModX[kX] * bsModY[kY] * bsModZ[kZ];
1105         expterm = exp(term) / denom;
1106         if (crystal.aperiodic()) {
1107           expterm *= (1.0 - cos(PI * crystal.a * sqrt(sSquared)));
1108         }
1109       }
1110       int ii = interleavedIndex(kX, kY, kZ, fftX, fftY) / 2;
1111       influenceFunction[ii] = expterm;
1112     }
1113 
1114     // Account for the zeroth grid point for a periodic system.
1115     influenceFunction[0] = 0.0;
1116     if (crystal.aperiodic()) {
1117       influenceFunction[0] = 0.5 * PI / crystal.a;
1118     }
1119 
1120     return influenceFunction;
1121   }
1122 
1123   private void transformMultipoleMatrix() {
1124     double[][] a = new double[3][3];
1125     for (int i = 0; i < 3; i++) {
1126       a[0][i] = fftX * crystal.A[i][0];
1127       a[1][i] = fftY * crystal.A[i][1];
1128       a[2][i] = fftZ * crystal.A[i][2];
1129     }
1130 
1131     for (int i = 0; i < 10; i++) {
1132       for (int j = 0; j < 10; j++) {
1133         transformMultipoleMatrix[i][j] = 0.0;
1134       }
1135     }
1136 
1137     // Charge
1138     transformMultipoleMatrix[0][0] = 1.0;
1139     // Dipole
1140     for (int i = 1; i < 4; i++) {
1141       arraycopy(a[i - 1], 0, transformMultipoleMatrix[i], 1, 3);
1142     }
1143     // Quadrupole
1144     for (int i1 = 0; i1 < 3; i1++) {
1145       int k = qi1[i1];
1146       for (int i2 = 0; i2 < 6; i2++) {
1147         int i = qi1[i2];
1148         int j = qi2[i2];
1149         transformMultipoleMatrix[i1 + 4][i2 + 4] = a[k][i] * a[k][j];
1150       }
1151     }
1152     for (int i1 = 3; i1 < 6; i1++) {
1153       int k = qi1[i1];
1154       int l = qi2[i1];
1155       for (int i2 = 0; i2 < 6; i2++) {
1156         int i = qi1[i2];
1157         int j = qi2[i2];
1158         transformMultipoleMatrix[i1 + 4][i2 + 4] = a[k][i] * a[l][j] + a[k][j] * a[l][i];
1159       }
1160     }
1161   }
1162 
1163   private void transformFieldMatrix() {
1164     double[][] a = new double[3][3];
1165 
1166     for (int i = 0; i < 3; i++) {
1167       a[i][0] = fftX * crystal.A[i][0];
1168       a[i][1] = fftY * crystal.A[i][1];
1169       a[i][2] = fftZ * crystal.A[i][2];
1170     }
1171     for (int i = 0; i < 10; i++) {
1172       for (int j = 0; j < 10; j++) {
1173         transformFieldMatrix[i][j] = 0.0;
1174       }
1175     }
1176     transformFieldMatrix[0][0] = 1.0;
1177     for (int i = 1; i < 4; i++) {
1178       arraycopy(a[i - 1], 0, transformFieldMatrix[i], 1, 3);
1179     }
1180     for (int i1 = 0; i1 < 3; i1++) {
1181       int k = qi1[i1];
1182       for (int i2 = 0; i2 < 3; i2++) {
1183         int i = qi1[i2];
1184         transformFieldMatrix[i1 + 4][i2 + 4] = a[k][i] * a[k][i];
1185       }
1186       for (int i2 = 3; i2 < 6; i2++) {
1187         int i = qi1[i2];
1188         int j = qi2[i2];
1189         transformFieldMatrix[i1 + 4][i2 + 4] = 2.0 * a[k][i] * a[k][j];
1190       }
1191     }
1192     for (int i1 = 3; i1 < 6; i1++) {
1193       int k = qi1[i1];
1194       int n = qi2[i1];
1195       for (int i2 = 0; i2 < 3; i2++) {
1196         int i = qi1[i2];
1197         transformFieldMatrix[i1 + 4][i2 + 4] = a[k][i] * a[n][i];
1198       }
1199       for (int i2 = 3; i2 < 6; i2++) {
1200         int i = qi1[i2];
1201         int j = qi2[i2];
1202         transformFieldMatrix[i1 + 4][i2 + 4] = a[k][i] * a[n][j] + a[n][i] * a[k][j];
1203       }
1204     }
1205   }
1206 
1207   /**
1208    * Convert a Cartesian multipole in the global frame into a factional multipole.
1209    *
1210    * @param gm Multipole in the global frame.
1211    * @param fm Fractional multipole.
1212    */
1213   private void toFractionalMultipole(double[] gm, double[] fm) {
1214     // Charge.
1215     fm[0] = gm[0];
1216 
1217     // Dipole.
1218     for (int j = 1; j < 4; j++) {
1219       fm[j] = 0.0;
1220       for (int k = 1; k < 4; k++) {
1221         fm[j] = fma(transformMultipoleMatrix[j][k], gm[k], fm[j]);
1222       }
1223     }
1224 
1225     // Quadrupole.
1226     for (int j = 4; j < 10; j++) {
1227       fm[j] = 0.0;
1228       for (int k = 4; k < 7; k++) {
1229         fm[j] = fma(transformMultipoleMatrix[j][k], gm[k], fm[j]);
1230       }
1231       for (int k = 7; k < 10; k++) {
1232         fm[j] = fma(transformMultipoleMatrix[j][k] * 2.0, gm[k], fm[j]);
1233       }
1234       /*
1235         Fractional quadrupole components are pre-multiplied by a
1236         factor of 1/3 that arises in their potential.
1237        */
1238       fm[j] = fm[j] * oneThird;
1239     }
1240   }
1241 
1242   /**
1243    * Convert a dipole in the global frame into a factional dipole.
1244    *
1245    * @param globalDipole     dipole in the global frame.
1246    * @param fractionalDipole fractional dipole.
1247    */
1248   public void toFractionalDipole(double[] globalDipole, double[] fractionalDipole) {
1249     for (int j = 0; j < 3; j++) {
1250       fractionalDipole[j] = 0.0;
1251       for (int k = 0; k < 3; k++) {
1252         fractionalDipole[j] = fma(transformMultipoleMatrix[j + 1][k + 1],
1253             globalDipole[k], fractionalDipole[j]);
1254       }
1255     }
1256   }
1257 
1258   public enum GridMethod {
1259     SPATIAL,
1260     SLICE,
1261     ROW
1262   }
1263 
1264   /**
1265    * The class computes b-Splines that are used to spline multipoles and induced dipoles onto the PME
1266    * grid. Following convolution, the b-Splines are then used to obtain the reciprocal space
1267    * potential and fields from the PME grid. This class automatically updates itself to be consistent
1268    * with the current Crystal boundary conditions.
1269    *
1270    * <p>An external ParallelRegion can be used as follows: <code>
1271    * start() { bSplineRegion.start(); } run() { execute(0, nAtoms - 1,
1272    * bSplineRegion.bSplineLoop[threadID]); }
1273    * </code>
1274    */
1275   public class BSplineRegion extends ParallelRegion {
1276 
1277     final BSplineLoop[] bSplineLoop;
1278     double[][][][] splineX;
1279     double[][][][] splineY;
1280     double[][][][] splineZ;
1281     int[][][] initGrid;
1282     private double r00;
1283     private double r01;
1284     private double r02;
1285     private double r10;
1286     private double r11;
1287     private double r12;
1288     private double r20;
1289     private double r21;
1290     private double r22;
1291 
1292     BSplineRegion() {
1293       bSplineLoop = new BSplineLoop[threadCount];
1294       for (int i = 0; i < threadCount; i++) {
1295         bSplineLoop[i] = new BSplineLoop();
1296       }
1297     }
1298 
1299     @Override
1300     public void run() {
1301       /*
1302        Currently this condition would indicate a programming bug, since
1303        the space group is not allowed to change and the ReciprocalSpace
1304        class should be operating on a unit cell with a fixed number of
1305        space group symmetry operators (and not a replicated unit cell).
1306       */
1307       if (splineX.length < nSymm) {
1308         logger.severe(
1309             " Programming Error: the number of reciprocal space symmetry operators changed.");
1310       }
1311       try {
1312         int threadID = getThreadIndex();
1313         execute(0, nAtoms - 1, bSplineLoop[threadID]);
1314       } catch (Exception e) {
1315         logger.severe(e.toString());
1316       }
1317     }
1318 
1319     @Override
1320     public void start() {
1321       r00 = crystal.A[0][0];
1322       r01 = crystal.A[0][1];
1323       r02 = crystal.A[0][2];
1324       r10 = crystal.A[1][0];
1325       r11 = crystal.A[1][1];
1326       r12 = crystal.A[1][2];
1327       r20 = crystal.A[2][0];
1328       r21 = crystal.A[2][1];
1329       r22 = crystal.A[2][2];
1330       if (splineX == null || splineX[0].length < nAtoms) {
1331         initGrid = new int[nSymm][nAtoms][];
1332         splineX = new double[nSymm][nAtoms][][];
1333         splineY = new double[nSymm][nAtoms][][];
1334         splineZ = new double[nSymm][nAtoms][][];
1335       }
1336     }
1337 
1338     public class BSplineLoop extends IntegerForLoop {
1339 
1340       private int threadID;
1341       private final double[][] bSplineWork;
1342       private final IntegerSchedule schedule = IntegerSchedule.fixed();
1343 
1344       BSplineLoop() {
1345         bSplineWork = new double[bSplineOrder][bSplineOrder];
1346       }
1347 
1348       @Override
1349       public void finish() {
1350         bSplineTime[threadID] += System.nanoTime();
1351       }
1352 
1353       @Override
1354       public void run(int lb, int ub) {
1355         for (int iSymOp = 0; iSymOp < nSymm; iSymOp++) {
1356           final double[] x = coordinates[iSymOp][0];
1357           final double[] y = coordinates[iSymOp][1];
1358           final double[] z = coordinates[iSymOp][2];
1359           final int[][] initgridi = initGrid[iSymOp];
1360           final double[][][] splineXi = splineX[iSymOp];
1361           final double[][][] splineYi = splineY[iSymOp];
1362           final double[][][] splineZi = splineZ[iSymOp];
1363           for (int i = lb; i <= ub; i++) {
1364             if (initgridi[i] == null) {
1365               initgridi[i] = new int[3];
1366               splineXi[i] = new double[bSplineOrder][derivOrder + 1];
1367               splineYi[i] = new double[bSplineOrder][derivOrder + 1];
1368               splineZi[i] = new double[bSplineOrder][derivOrder + 1];
1369             }
1370             final double xi = x[i];
1371             final double yi = y[i];
1372             final double zi = z[i];
1373             final int[] grd = initgridi[i];
1374             // X-dimension
1375             final double wx = xi * r00 + yi * r10 + zi * r20;
1376             final double ux = wx - round(wx) + 0.5;
1377             final double frx = fftX * ux;
1378             final int ifrx = (int) frx;
1379             final double bx = frx - ifrx;
1380             grd[0] = ifrx - bSplineOrder;
1381             bSplineDerivatives(bx, bSplineOrder, derivOrder, splineXi[i], bSplineWork);
1382             // Y-dimension
1383             final double wy = xi * r01 + yi * r11 + zi * r21;
1384             final double uy = wy - round(wy) + 0.5;
1385             final double fry = fftY * uy;
1386             final int ifry = (int) fry;
1387             final double by = fry - ifry;
1388             grd[1] = ifry - bSplineOrder;
1389             bSplineDerivatives(by, bSplineOrder, derivOrder, splineYi[i], bSplineWork);
1390             // Z-dimension
1391             final double wz = xi * r02 + yi * r12 + zi * r22;
1392             final double uz = wz - round(wz) + 0.5;
1393             final double frz = fftZ * uz;
1394             final int ifrz = (int) frz;
1395             final double bz = frz - ifrz;
1396             grd[2] = ifrz - bSplineOrder;
1397             bSplineDerivatives(bz, bSplineOrder, derivOrder, splineZi[i], bSplineWork);
1398           }
1399         }
1400       }
1401 
1402       @Override
1403       public IntegerSchedule schedule() {
1404         return schedule;
1405       }
1406 
1407       @Override
1408       public void start() {
1409         threadID = getThreadIndex();
1410         bSplineTime[threadID] -= System.nanoTime();
1411       }
1412     }
1413   }
1414 
1415   private class SpatialPermanentLoop extends SpatialDensityLoop {
1416 
1417     private final BSplineRegion bSplines;
1418     private double[][][] fracMultipoles = null;
1419     private boolean[] use = null;
1420     private int threadIndex;
1421 
1422     SpatialPermanentLoop(SpatialDensityRegion region, BSplineRegion splines) {
1423       super(region, region.nSymm, region.actualCount);
1424       this.bSplines = splines;
1425     }
1426 
1427     @Override
1428     public void finish() {
1429       splinePermanentTime[threadIndex] += System.nanoTime();
1430     }
1431 
1432     @Override
1433     public void gridDensity(int iSymm, int n) {
1434       // Some atoms are not used during Lambda dynamics.
1435       if (use != null && !use[n]) {
1436         return;
1437       }
1438       splineCount[threadIndex]++;
1439       switch (elecForm) {
1440         case PAM -> gridMultipoleDensity(iSymm, n);
1441         case FIXED_CHARGE -> gridFixedChargeDensity(iSymm, n);
1442       }
1443     }
1444 
1445     private void gridMultipoleDensity(int iSymm, int n) {
1446       final double[] fm = fracMultipoles[iSymm][n];
1447       final double[][] splx = bSplines.splineX[iSymm][n];
1448       final double[][] sply = bSplines.splineY[iSymm][n];
1449       final double[][] splz = bSplines.splineZ[iSymm][n];
1450       final int igrd0 = bSplines.initGrid[iSymm][n][0];
1451       final int jgrd0 = bSplines.initGrid[iSymm][n][1];
1452       int k0 = bSplines.initGrid[iSymm][n][2];
1453       final double c = fm[t000];
1454       final double dx = fm[t100];
1455       final double dy = fm[t010];
1456       final double dz = fm[t001];
1457       final double qxx = fm[t200];
1458       final double qyy = fm[t020];
1459       final double qzz = fm[t002];
1460       final double qxy = fm[t110];
1461       final double qxz = fm[t101];
1462       final double qyz = fm[t011];
1463       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1464         final double[] splzi = splz[ith3];
1465         final double v0 = splzi[0];
1466         final double v1 = splzi[1];
1467         final double v2 = splzi[2];
1468         final double c0 = c * v0;
1469         final double dx0 = dx * v0;
1470         final double dy0 = dy * v0;
1471         final double dz1 = dz * v1;
1472         final double qxx0 = qxx * v0;
1473         final double qyy0 = qyy * v0;
1474         final double qzz2 = qzz * v2;
1475         final double qxy0 = qxy * v0;
1476         final double qxz1 = qxz * v1;
1477         final double qyz1 = qyz * v1;
1478         final double c0dz1qzz2 = c0 + dz1 + qzz2;
1479         final double dy0qyz1 = dy0 + qyz1;
1480         final double dx0qxz1 = dx0 + qxz1;
1481         final int k = mod(++k0, fftZ);
1482         int j0 = jgrd0;
1483         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1484           final double[] splyi = sply[ith2];
1485           final double u0 = splyi[0];
1486           final double u1 = splyi[1];
1487           final double u2 = splyi[2];
1488           final double term0 = fma(c0dz1qzz2, u0, fma(dy0qyz1, u1, qyy0 * u2));
1489           final double term1 = fma(dx0qxz1, u0, qxy0 * u1);
1490           final double term2 = qxx0 * u0;
1491           final int j = mod(++j0, fftY);
1492           int i0 = igrd0;
1493           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1494             final int i = mod(++i0, fftX);
1495             final int ii = interleavedIndex(i, j, k, fftX, fftY);
1496             final double[] splxi = splx[ith1];
1497             final double current = splineBuffer.get(ii);
1498             double updated = fma(splxi[0], term0, current);
1499             updated = fma(splxi[1], term1, updated);
1500             updated = fma(splxi[2], term2, updated);
1501             splineBuffer.put(ii, updated);
1502           }
1503         }
1504       }
1505     }
1506 
1507     private void gridFixedChargeDensity(int iSymm, int n) {
1508       final double[] fm = fracMultipoles[iSymm][n];
1509       final double[][] splx = bSplines.splineX[iSymm][n];
1510       final double[][] sply = bSplines.splineY[iSymm][n];
1511       final double[][] splz = bSplines.splineZ[iSymm][n];
1512       final int igrd0 = bSplines.initGrid[iSymm][n][0];
1513       final int jgrd0 = bSplines.initGrid[iSymm][n][1];
1514       int k0 = bSplines.initGrid[iSymm][n][2];
1515       final double c = fm[t000];
1516       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1517         final double[] splzi = splz[ith3];
1518         final double v0 = splzi[0];
1519         final double c0 = c * v0;
1520         final int k = mod(++k0, fftZ);
1521         int j0 = jgrd0;
1522         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1523           final double[] splyi = sply[ith2];
1524           final double u0 = splyi[0];
1525           final double term0 = c0 * u0;
1526           final int j = mod(++j0, fftY);
1527           int i0 = igrd0;
1528           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1529             final int i = mod(++i0, fftX);
1530             final int ii = interleavedIndex(i, j, k, fftX, fftY);
1531             final double[] splxi = splx[ith1];
1532             final double current = splineBuffer.get(ii);
1533             double updated = fma(splxi[0], term0, current);
1534             splineBuffer.put(ii, updated);
1535           }
1536         }
1537       }
1538     }
1539 
1540     @Override
1541     public void start() {
1542       threadIndex = getThreadIndex();
1543       splinePermanentTime[threadIndex] -= System.nanoTime();
1544     }
1545 
1546     void setPermanent(double[][][] fracMultipoles) {
1547       this.fracMultipoles = fracMultipoles;
1548     }
1549 
1550     private void setUse(boolean[] use) {
1551       this.use = use;
1552     }
1553   }
1554 
1555   private class SpatialInducedLoop extends SpatialDensityLoop {
1556 
1557     private final BSplineRegion bSplineRegion;
1558     private double[][][] inducedDipoleFrac = null;
1559     private double[][][] inducedDipoleFracCR = null;
1560     private boolean[] use = null;
1561 
1562     SpatialInducedLoop(SpatialDensityRegion region, BSplineRegion splines) {
1563       super(region, region.nSymm, region.actualCount);
1564       this.bSplineRegion = splines;
1565     }
1566 
1567     @Override
1568     public void finish() {
1569       splineInducedTime[getThreadIndex()] += System.nanoTime();
1570     }
1571 
1572     @Override
1573     public void gridDensity(int iSymm, int n) {
1574       if (use != null && !use[n]) {
1575         return;
1576       }
1577 
1578       // Convert Cartesian induced dipole to fractional induced dipole.
1579       final double[] ind = inducedDipoleFrac[iSymm][n];
1580       final double ux = ind[0];
1581       final double uy = ind[1];
1582       final double uz = ind[2];
1583       final double[] indCR = inducedDipoleFracCR[iSymm][n];
1584       final double px = indCR[0];
1585       final double py = indCR[1];
1586       final double pz = indCR[2];
1587       final double[][] splx = bSplineRegion.splineX[iSymm][n];
1588       final double[][] sply = bSplineRegion.splineY[iSymm][n];
1589       final double[][] splz = bSplineRegion.splineZ[iSymm][n];
1590       final int igrd0 = bSplineRegion.initGrid[iSymm][n][0];
1591       final int jgrd0 = bSplineRegion.initGrid[iSymm][n][1];
1592       int k0 = bSplineRegion.initGrid[iSymm][n][2];
1593       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1594         final double[] splzi = splz[ith3];
1595         final double v0 = splzi[0];
1596         final double v1 = splzi[1];
1597         final double dx0 = ux * v0;
1598         final double dy0 = uy * v0;
1599         final double dz1 = uz * v1;
1600         final double px0 = px * v0;
1601         final double py0 = py * v0;
1602         final double pz1 = pz * v1;
1603         final int k = mod(++k0, fftZ);
1604         int j0 = jgrd0;
1605         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1606           final double[] splyi = sply[ith2];
1607           final double u0 = splyi[0];
1608           final double u1 = splyi[1];
1609           final double term0 = fma(dz1, u0, dy0 * u1);
1610           final double term1 = dx0 * u0;
1611           final double termp0 = fma(pz1, u0, py0 * u1);
1612           final double termp1 = px0 * u0;
1613           final int j = mod(++j0, fftY);
1614           int i0 = igrd0;
1615           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1616             final int i = mod(++i0, fftX);
1617             final int ii = interleavedIndex(i, j, k, fftX, fftY);
1618             final double[] splxi = splx[ith1];
1619             final double current = splineBuffer.get(ii);
1620             final double currenti = splineBuffer.get(ii + 1);
1621             final double updated = fma(splxi[0], term0, fma(splxi[1], term1, current));
1622             final double udpatedi = fma(splxi[0], termp0, fma(splxi[1], termp1, currenti));
1623             splineBuffer.put(ii, updated);
1624             splineBuffer.put(ii + 1, udpatedi);
1625           }
1626         }
1627       }
1628     }
1629 
1630     public void setUse(boolean[] use) {
1631       this.use = use;
1632     }
1633 
1634     @Override
1635     public void start() {
1636       splineInducedTime[getThreadIndex()] -= System.nanoTime();
1637     }
1638 
1639     void setInducedDipoles(double[][][] inducedDipoleFrac, double[][][] inducedDipoleFracCR) {
1640       this.inducedDipoleFrac = inducedDipoleFrac;
1641       this.inducedDipoleFracCR = inducedDipoleFracCR;
1642     }
1643   }
1644 
1645   private class RowPermanentLoop extends RowLoop {
1646 
1647     private final BSplineRegion bSplines;
1648     private double[][][] fracMultipoles = null;
1649     private boolean[] use = null;
1650     private int threadIndex;
1651 
1652     RowPermanentLoop(RowRegion region, BSplineRegion splines) {
1653       super(region.nAtoms, region.nSymm, region);
1654       this.bSplines = splines;
1655     }
1656 
1657     @Override
1658     public void finish() {
1659       splinePermanentTime[threadIndex] += System.nanoTime();
1660     }
1661 
1662     @Override
1663     public void gridDensity(int iSymm, int iAtom, int lb, int ub) {
1664       // Some atoms are not used during Lambda dynamics.
1665       if (use != null && !use[iAtom]) {
1666         return;
1667       }
1668       switch (elecForm) {
1669         case PAM -> gridMultipoleDensity(iSymm, iAtom, lb, ub);
1670         case FIXED_CHARGE -> gridFixedChargeDensity(iSymm, iAtom, lb, ub);
1671       }
1672     }
1673 
1674     private void gridMultipoleDensity(int iSymm, int iAtom, int lb, int ub) {
1675       boolean atomContributes = false;
1676       int k0 = bSplines.initGrid[iSymm][iAtom][2];
1677       int lbZ = RowIndexZ(lb);
1678       int ubZ = RowIndexZ(ub);
1679       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1680         final int k = mod(++k0, fftZ);
1681         if (lbZ <= k && k <= ubZ) {
1682           atomContributes = true;
1683           break;
1684         }
1685       }
1686       if (!atomContributes) {
1687         return;
1688       }
1689 
1690       splineCount[threadIndex]++;
1691 
1692       // Add atom n to the list for the current symmOp and thread.
1693       int index = gridAtomCount[iSymm][threadIndex];
1694       gridAtomList[iSymm][threadIndex][index] = iAtom;
1695       gridAtomCount[iSymm][threadIndex]++;
1696       final double[] fm = fracMultipoles[iSymm][iAtom];
1697       final double[][] splx = bSplines.splineX[iSymm][iAtom];
1698       final double[][] sply = bSplines.splineY[iSymm][iAtom];
1699       final double[][] splz = bSplines.splineZ[iSymm][iAtom];
1700       final int igrd0 = bSplines.initGrid[iSymm][iAtom][0];
1701       final int jgrd0 = bSplines.initGrid[iSymm][iAtom][1];
1702       k0 = bSplines.initGrid[iSymm][iAtom][2];
1703       final double c = fm[t000];
1704       final double dx = fm[t100];
1705       final double dy = fm[t010];
1706       final double dz = fm[t001];
1707       final double qxx = fm[t200];
1708       final double qyy = fm[t020];
1709       final double qzz = fm[t002];
1710       final double qxy = fm[t110];
1711       final double qxz = fm[t101];
1712       final double qyz = fm[t011];
1713       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1714         final int k = mod(++k0, fftZ);
1715         if (k < lbZ || k > ubZ) {
1716           continue;
1717         }
1718         final double[] splzi = splz[ith3];
1719         final double v0 = splzi[0];
1720         final double v1 = splzi[1];
1721         final double v2 = splzi[2];
1722         final double c0 = c * v0;
1723         final double dx0 = dx * v0;
1724         final double dy0 = dy * v0;
1725         final double dz1 = dz * v1;
1726         final double qxx0 = qxx * v0;
1727         final double qyy0 = qyy * v0;
1728         final double qzz2 = qzz * v2;
1729         final double qxy0 = qxy * v0;
1730         final double qxz1 = qxz * v1;
1731         final double qyz1 = qyz * v1;
1732         final double c0dz1qzz2 = c0 + dz1 + qzz2;
1733         final double dy0qyz1 = dy0 + qyz1;
1734         final double dx0qxz1 = dx0 + qxz1;
1735         int j0 = jgrd0;
1736         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1737           final int j = mod(++j0, fftY);
1738           int rowIndex = rowRegion.rowIndexForYZ(j, k);
1739           if (lb > rowIndex || rowIndex > ub) {
1740             continue;
1741           }
1742           final double[] splyi = sply[ith2];
1743           final double u0 = splyi[0];
1744           final double u1 = splyi[1];
1745           final double u2 = splyi[2];
1746           // Pieces of a multipole
1747           final double term0 = fma(c0dz1qzz2, u0, fma(dy0qyz1, u1, qyy0 * u2));
1748           final double term1 = fma(dx0qxz1, u0, qxy0 * u1);
1749           final double term2 = qxx0 * u0;
1750           int i0 = igrd0;
1751           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1752             final int i = mod(++i0, fftX);
1753             final int ii = interleavedIndex(i, j, k, fftX, fftY);
1754             final double[] splxi = splx[ith1];
1755             double current = splineBuffer.get(ii);
1756             double updated = fma(splxi[0], term0, current);
1757             updated = fma(splxi[1], term1, updated);
1758             updated = fma(splxi[2], term2, updated);
1759             splineBuffer.put(ii, updated);
1760           }
1761         }
1762       }
1763     }
1764 
1765     private void gridFixedChargeDensity(int iSymm, int iAtom, int lb, int ub) {
1766       boolean atomContributes = false;
1767       int k0 = bSplines.initGrid[iSymm][iAtom][2];
1768       int lbZ = RowIndexZ(lb);
1769       int ubZ = RowIndexZ(ub);
1770       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1771         final int k = mod(++k0, fftZ);
1772         if (lbZ <= k && k <= ubZ) {
1773           atomContributes = true;
1774           break;
1775         }
1776       }
1777       if (!atomContributes) {
1778         return;
1779       }
1780 
1781       splineCount[threadIndex]++;
1782 
1783       // Add atom n to the list for the current symmOp and thread.
1784       int index = gridAtomCount[iSymm][threadIndex];
1785       gridAtomList[iSymm][threadIndex][index] = iAtom;
1786       gridAtomCount[iSymm][threadIndex]++;
1787       final double[] fm = fracMultipoles[iSymm][iAtom];
1788       final double[][] splx = bSplines.splineX[iSymm][iAtom];
1789       final double[][] sply = bSplines.splineY[iSymm][iAtom];
1790       final double[][] splz = bSplines.splineZ[iSymm][iAtom];
1791       final int igrd0 = bSplines.initGrid[iSymm][iAtom][0];
1792       final int jgrd0 = bSplines.initGrid[iSymm][iAtom][1];
1793       k0 = bSplines.initGrid[iSymm][iAtom][2];
1794       final double c = fm[t000];
1795       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1796         final int k = mod(++k0, fftZ);
1797         if (k < lbZ || k > ubZ) {
1798           continue;
1799         }
1800         final double[] splzi = splz[ith3];
1801         final double v0 = splzi[0];
1802         final double c0 = c * v0;
1803         int j0 = jgrd0;
1804         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1805           final int j = mod(++j0, fftY);
1806           int rowIndex = rowRegion.rowIndexForYZ(j, k);
1807           if (lb > rowIndex || rowIndex > ub) {
1808             continue;
1809           }
1810           final double[] splyi = sply[ith2];
1811           final double u0 = splyi[0];
1812           // Pieces of a multipole
1813           final double term0 = c0 * u0;
1814           int i0 = igrd0;
1815           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1816             final int i = mod(++i0, fftX);
1817             final int ii = interleavedIndex(i, j, k, fftX, fftY);
1818             final double[] splxi = splx[ith1];
1819             double current = splineBuffer.get(ii);
1820             double updated = fma(splxi[0], term0, current);
1821             splineBuffer.put(ii, updated);
1822           }
1823         }
1824       }
1825     }
1826 
1827     @Override
1828     public void start() {
1829       threadIndex = getThreadIndex();
1830       splinePermanentTime[threadIndex] -= System.nanoTime();
1831       for (int i = 0; i < nSymm; i++) {
1832         gridAtomCount[i][threadIndex] = 0;
1833       }
1834     }
1835 
1836     void setPermanent(double[][][] fracMultipoles) {
1837       this.fracMultipoles = fracMultipoles;
1838     }
1839 
1840     private void setUse(boolean[] use) {
1841       this.use = use;
1842     }
1843   }
1844 
1845   private class RowInducedLoop extends RowLoop {
1846 
1847     private final BSplineRegion bSplineRegion;
1848     private double[][][] inducedDipoleFrac = null;
1849     private double[][][] inducedDipoleFracCR = null;
1850     private boolean[] use = null;
1851 
1852     RowInducedLoop(RowRegion region, BSplineRegion splines) {
1853       super(region.nAtoms, region.nSymm, region);
1854       this.bSplineRegion = splines;
1855     }
1856 
1857     @Override
1858     public void finish() {
1859       int threadIndex = getThreadIndex();
1860       splineInducedTime[threadIndex] += System.nanoTime();
1861     }
1862 
1863     @Override
1864     public void gridDensity(int iSymm, int iAtom, int lb, int ub) {
1865       if (use != null && !use[iAtom]) {
1866         return;
1867       }
1868 
1869       final int lbZ = RowIndexZ(lb);
1870       final int ubZ = RowIndexZ(ub);
1871       final double[] ind = inducedDipoleFrac[iSymm][iAtom];
1872       final double ux = ind[0];
1873       final double uy = ind[1];
1874       final double uz = ind[2];
1875       double[] indCR = inducedDipoleFracCR[iSymm][iAtom];
1876       final double px = indCR[0];
1877       final double py = indCR[1];
1878       final double pz = indCR[2];
1879       final double[][] splx = bSplineRegion.splineX[iSymm][iAtom];
1880       final double[][] sply = bSplineRegion.splineY[iSymm][iAtom];
1881       final double[][] splz = bSplineRegion.splineZ[iSymm][iAtom];
1882       final int igrd0 = bSplineRegion.initGrid[iSymm][iAtom][0];
1883       final int jgrd0 = bSplineRegion.initGrid[iSymm][iAtom][1];
1884       int k0 = bSplineRegion.initGrid[iSymm][iAtom][2];
1885       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1886         final int k = mod(++k0, fftZ);
1887         if (k < lbZ || k > ubZ) {
1888           continue;
1889         }
1890         final double[] splzi = splz[ith3];
1891         final double v0 = splzi[0];
1892         final double v1 = splzi[1];
1893         final double dx0 = ux * v0;
1894         final double dy0 = uy * v0;
1895         final double dz1 = uz * v1;
1896         final double px0 = px * v0;
1897         final double py0 = py * v0;
1898         final double pz1 = pz * v1;
1899         int j0 = jgrd0;
1900         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
1901           final int j = mod(++j0, fftY);
1902           int rowIndex = rowRegion.rowIndexForYZ(j, k);
1903           if (lb > rowIndex || rowIndex > ub) {
1904             continue;
1905           }
1906           final double[] splyi = sply[ith2];
1907           final double u0 = splyi[0];
1908           final double u1 = splyi[1];
1909           final double term0 = fma(dz1, u0, dy0 * u1);
1910           final double term1 = dx0 * u0;
1911           final double termp0 = fma(pz1, u0, py0 * u1);
1912           final double termp1 = px0 * u0;
1913           int i0 = igrd0;
1914           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
1915             final int i = mod(++i0, fftX);
1916             final int ii = interleavedIndex(i, j, k, fftX, fftY);
1917             final double[] splxi = splx[ith1];
1918             final double current = splineBuffer.get(ii);
1919             final double currenti = splineBuffer.get(ii + 1);
1920             final double updated = fma(splxi[0], term0, fma(splxi[1], term1, current));
1921             final double udpatedi = fma(splxi[0], termp0, fma(splxi[1], termp1, currenti));
1922             splineBuffer.put(ii, updated);
1923             splineBuffer.put(ii + 1, udpatedi);
1924           }
1925         }
1926       }
1927     }
1928 
1929     @Override
1930     public void run(int lb, int ub) {
1931       int ti = getThreadIndex();
1932       for (int iSymm = 0; iSymm < nSymm; iSymm++) {
1933         int[] list = gridAtomList[iSymm][ti];
1934         int n = gridAtomCount[iSymm][ti];
1935         for (int i = 0; i < n; i++) {
1936           int iAtom = list[i];
1937           gridDensity(iSymm, iAtom, lb, ub);
1938         }
1939       }
1940     }
1941 
1942     public void setUse(boolean[] use) {
1943       this.use = use;
1944     }
1945 
1946     @Override
1947     public void start() {
1948       int threadIndex = getThreadIndex();
1949       splineInducedTime[threadIndex] -= System.nanoTime();
1950     }
1951 
1952     void setInducedDipoles(double[][][] inducedDipoleFrac, double[][][] inducedDipoleFracCR) {
1953       this.inducedDipoleFrac = inducedDipoleFrac;
1954       this.inducedDipoleFracCR = inducedDipoleFracCR;
1955     }
1956   }
1957 
1958   private class SlicePermanentLoop extends SliceLoop {
1959 
1960     private final BSplineRegion bSplines;
1961     private double[][][] fracMultipoles = null;
1962     private boolean[] use = null;
1963     private int threadIndex;
1964 
1965     SlicePermanentLoop(SliceRegion region, BSplineRegion splines) {
1966       super(region.nAtoms, region.nSymm, region);
1967       this.bSplines = splines;
1968     }
1969 
1970     @Override
1971     public void finish() {
1972       splinePermanentTime[threadIndex] += System.nanoTime();
1973     }
1974 
1975     @Override
1976     public void gridDensity(int iSymm, int iAtom, int lb, int ub) {
1977       // Some atoms are not used during Lambda dynamics.
1978       if (use != null && !use[iAtom]) {
1979         return;
1980       }
1981 
1982       switch (elecForm) {
1983         case PAM -> gridMultipoleDensity(iSymm, iAtom, lb, ub);
1984         case FIXED_CHARGE -> gridFixedChargeDensity(iSymm, iAtom, lb, ub);
1985       }
1986     }
1987 
1988     private void gridMultipoleDensity(int iSymm, int iAtom, int lb, int ub) {
1989       boolean atomContributes = false;
1990       int k0 = bSplines.initGrid[iSymm][iAtom][2];
1991       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
1992         final int k = mod(++k0, fftZ);
1993         if (lb <= k && k <= ub) {
1994           atomContributes = true;
1995           break;
1996         }
1997       }
1998       if (!atomContributes) {
1999         return;
2000       }
2001 
2002       splineCount[threadIndex]++;
2003 
2004       // Add atom n to the list for the current symmOp and thread.
2005       int index = gridAtomCount[iSymm][threadIndex];
2006       gridAtomList[iSymm][threadIndex][index] = iAtom;
2007       gridAtomCount[iSymm][threadIndex]++;
2008       final double[] fm = fracMultipoles[iSymm][iAtom];
2009       final double[][] splx = bSplines.splineX[iSymm][iAtom];
2010       final double[][] sply = bSplines.splineY[iSymm][iAtom];
2011       final double[][] splz = bSplines.splineZ[iSymm][iAtom];
2012       final int igrd0 = bSplines.initGrid[iSymm][iAtom][0];
2013       final int jgrd0 = bSplines.initGrid[iSymm][iAtom][1];
2014       k0 = bSplines.initGrid[iSymm][iAtom][2];
2015       final double c = fm[t000];
2016       final double dx = fm[t100];
2017       final double dy = fm[t010];
2018       final double dz = fm[t001];
2019       final double qxx = fm[t200];
2020       final double qyy = fm[t020];
2021       final double qzz = fm[t002];
2022       final double qxy = fm[t110];
2023       final double qxz = fm[t101];
2024       final double qyz = fm[t011];
2025       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2026         final int k = mod(++k0, fftZ);
2027         if (k < lb || k > ub) {
2028           continue;
2029         }
2030         final double[] splzi = splz[ith3];
2031         final double v0 = splzi[0];
2032         final double v1 = splzi[1];
2033         final double v2 = splzi[2];
2034         final double c0 = c * v0;
2035         final double dx0 = dx * v0;
2036         final double dy0 = dy * v0;
2037         final double dz1 = dz * v1;
2038         final double qxx0 = qxx * v0;
2039         final double qyy0 = qyy * v0;
2040         final double qzz2 = qzz * v2;
2041         final double qxy0 = qxy * v0;
2042         final double qxz1 = qxz * v1;
2043         final double qyz1 = qyz * v1;
2044         final double c0dz1qzz2 = c0 + dz1 + qzz2;
2045         final double dy0qyz1 = dy0 + qyz1;
2046         final double dx0qxz1 = dx0 + qxz1;
2047         int j0 = jgrd0;
2048         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
2049           final double[] splyi = sply[ith2];
2050           final double u0 = splyi[0];
2051           final double u1 = splyi[1];
2052           final double u2 = splyi[2];
2053           // Pieces of a multipole
2054           final double term0 = fma(c0dz1qzz2, u0, fma(dy0qyz1, u1, qyy0 * u2));
2055           final double term1 = fma(dx0qxz1, u0, qxy0 * u1);
2056           final double term2 = qxx0 * u0;
2057           final int j = mod(++j0, fftY);
2058           int i0 = igrd0;
2059           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
2060             final int i = mod(++i0, fftX);
2061             final int ii = interleavedIndex(i, j, k, fftX, fftY);
2062             final double[] splxi = splx[ith1];
2063             double current = splineBuffer.get(ii);
2064             double updated = fma(splxi[0], term0, current);
2065             updated = fma(splxi[1], term1, updated);
2066             updated = fma(splxi[2], term2, updated);
2067             splineBuffer.put(ii, updated);
2068           }
2069         }
2070       }
2071     }
2072 
2073     private void gridFixedChargeDensity(int iSymm, int iAtom, int lb, int ub) {
2074       boolean atomContributes = false;
2075       int k0 = bSplines.initGrid[iSymm][iAtom][2];
2076       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2077         final int k = mod(++k0, fftZ);
2078         if (lb <= k && k <= ub) {
2079           atomContributes = true;
2080           break;
2081         }
2082       }
2083       if (!atomContributes) {
2084         return;
2085       }
2086 
2087       splineCount[threadIndex]++;
2088 
2089       // Add atom n to the list for the current symmOp and thread.
2090       int index = gridAtomCount[iSymm][threadIndex];
2091       gridAtomList[iSymm][threadIndex][index] = iAtom;
2092       gridAtomCount[iSymm][threadIndex]++;
2093       final double[] fm = fracMultipoles[iSymm][iAtom];
2094       final double[][] splx = bSplines.splineX[iSymm][iAtom];
2095       final double[][] sply = bSplines.splineY[iSymm][iAtom];
2096       final double[][] splz = bSplines.splineZ[iSymm][iAtom];
2097       final int igrd0 = bSplines.initGrid[iSymm][iAtom][0];
2098       final int jgrd0 = bSplines.initGrid[iSymm][iAtom][1];
2099       k0 = bSplines.initGrid[iSymm][iAtom][2];
2100       final double c = fm[t000];
2101       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2102         final int k = mod(++k0, fftZ);
2103         if (k < lb || k > ub) {
2104           continue;
2105         }
2106         final double[] splzi = splz[ith3];
2107         final double v0 = splzi[0];
2108         final double c0 = c * v0;
2109         int j0 = jgrd0;
2110         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
2111           final double[] splyi = sply[ith2];
2112           final double u0 = splyi[0];
2113           final double term0 = c0 * u0;
2114           final int j = mod(++j0, fftY);
2115           int i0 = igrd0;
2116           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
2117             final int i = mod(++i0, fftX);
2118             final int ii = interleavedIndex(i, j, k, fftX, fftY);
2119             final double[] splxi = splx[ith1];
2120             double current = splineBuffer.get(ii);
2121             double updated = fma(splxi[0], term0, current);
2122             splineBuffer.put(ii, updated);
2123           }
2124         }
2125       }
2126     }
2127 
2128     @Override
2129     public void start() {
2130       threadIndex = getThreadIndex();
2131       splinePermanentTime[threadIndex] -= System.nanoTime();
2132       for (int i = 0; i < nSymm; i++) {
2133         gridAtomCount[i][threadIndex] = 0;
2134       }
2135     }
2136 
2137     void setPermanent(double[][][] fracMultipoles) {
2138       this.fracMultipoles = fracMultipoles;
2139     }
2140 
2141     private void setUse(boolean[] use) {
2142       this.use = use;
2143     }
2144   }
2145 
2146   private class SliceInducedLoop extends SliceLoop {
2147 
2148     private final BSplineRegion bSplineRegion;
2149     private double[][][] inducedDipoleFrac = null;
2150     private double[][][] inducedDipoleFracCR = null;
2151     private boolean[] use = null;
2152 
2153     SliceInducedLoop(SliceRegion region, BSplineRegion splines) {
2154       super(region.nAtoms, region.nSymm, region);
2155       this.bSplineRegion = splines;
2156     }
2157 
2158     @Override
2159     public void finish() {
2160       int threadIndex = getThreadIndex();
2161       splineInducedTime[threadIndex] += System.nanoTime();
2162     }
2163 
2164     @Override
2165     public void gridDensity(int iSymm, int iAtom, int lb, int ub) {
2166       if (use != null && !use[iAtom]) {
2167         return;
2168       }
2169 
2170       // Convert Cartesian induced dipole to fractional induced dipole.
2171       double[] ind = inducedDipoleFrac[iSymm][iAtom];
2172       double ux = ind[0];
2173       double uy = ind[1];
2174       double uz = ind[2];
2175       double[] indCR = inducedDipoleFracCR[iSymm][iAtom];
2176       double px = indCR[0];
2177       double py = indCR[1];
2178       double pz = indCR[2];
2179       final double[][] splx = bSplineRegion.splineX[iSymm][iAtom];
2180       final double[][] sply = bSplineRegion.splineY[iSymm][iAtom];
2181       final double[][] splz = bSplineRegion.splineZ[iSymm][iAtom];
2182       final int igrd0 = bSplineRegion.initGrid[iSymm][iAtom][0];
2183       final int jgrd0 = bSplineRegion.initGrid[iSymm][iAtom][1];
2184       int k0 = bSplineRegion.initGrid[iSymm][iAtom][2];
2185       for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2186         final int k = mod(++k0, fftZ);
2187         if (k < lb || k > ub) {
2188           continue;
2189         }
2190         final double[] splzi = splz[ith3];
2191         final double v0 = splzi[0];
2192         final double v1 = splzi[1];
2193         final double dx0 = ux * v0;
2194         final double dy0 = uy * v0;
2195         final double dz1 = uz * v1;
2196         final double px0 = px * v0;
2197         final double py0 = py * v0;
2198         final double pz1 = pz * v1;
2199         int j0 = jgrd0;
2200         for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
2201           final double[] splyi = sply[ith2];
2202           final double u0 = splyi[0];
2203           final double u1 = splyi[1];
2204           final double term0 = fma(dz1, u0, dy0 * u1);
2205           final double term1 = dx0 * u0;
2206           final double termp0 = fma(pz1, u0, py0 * u1);
2207           final double termp1 = px0 * u0;
2208           final int j = mod(++j0, fftY);
2209           int i0 = igrd0;
2210           for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
2211             final int i = mod(++i0, fftX);
2212             final int ii = interleavedIndex(i, j, k, fftX, fftY);
2213             final double[] splxi = splx[ith1];
2214             final double current = splineBuffer.get(ii);
2215             final double currenti = splineBuffer.get(ii + 1);
2216             final double updated = fma(splxi[0], term0, fma(splxi[1], term1, current));
2217             final double udpatedi = fma(splxi[0], termp0, fma(splxi[1], termp1, currenti));
2218             splineBuffer.put(ii, updated);
2219             splineBuffer.put(ii + 1, udpatedi);
2220           }
2221         }
2222       }
2223     }
2224 
2225     @Override
2226     public void run(int lb, int ub) {
2227       int ti = getThreadIndex();
2228       for (int iSymm = 0; iSymm < nSymm; iSymm++) {
2229         int[] list = gridAtomList[iSymm][ti];
2230         int n = gridAtomCount[iSymm][ti];
2231         for (int i = 0; i < n; i++) {
2232           int iAtom = list[i];
2233           gridDensity(iSymm, iAtom, lb, ub);
2234         }
2235       }
2236     }
2237 
2238     public void setUse(boolean[] use) {
2239       this.use = use;
2240     }
2241 
2242     @Override
2243     public void start() {
2244       int threadIndex = getThreadIndex();
2245       splineInducedTime[threadIndex] -= System.nanoTime();
2246     }
2247 
2248     void setInducedDipoles(double[][][] inducedDipoleFrac, double[][][] inducedDipoleFracCR) {
2249       this.inducedDipoleFrac = inducedDipoleFrac;
2250       this.inducedDipoleFracCR = inducedDipoleFracCR;
2251     }
2252   }
2253 
2254   /**
2255    * This class is used to obtain the reciprocal space potential and fields due to permanent
2256    * multipoles from the PME grid.
2257    *
2258    * <p>An external ParallelRegion can be used as follows: <code>
2259    * start() { permanentPhiRegion.setCartPermanentPhi(...); }
2260    * <p>
2261    * run() { execute(0, nAtoms - 1, permanentPhiLoops[threadID]); }
2262    * </code>
2263    */
2264   private class PermanentPhiRegion extends ParallelRegion {
2265 
2266     final PermanentPhiLoop[] permanentPhiLoop;
2267 
2268     private final BSplineRegion bSplineRegion;
2269     private double[][] cartPermPhi;
2270     private double[][] fracPermPhi;
2271 
2272     PermanentPhiRegion(BSplineRegion bSplineRegion) {
2273       this.bSplineRegion = bSplineRegion;
2274       permanentPhiLoop = new PermanentPhiLoop[threadCount];
2275       for (int i = 0; i < threadCount; i++) {
2276         permanentPhiLoop[i] = new PermanentPhiLoop();
2277       }
2278     }
2279 
2280     @Override
2281     public void run() {
2282       try {
2283         int threadID = getThreadIndex();
2284         execute(0, nAtoms - 1, permanentPhiLoop[threadID]);
2285       } catch (Exception e) {
2286         logger.severe(e.toString());
2287       }
2288     }
2289 
2290     void setPermanentPhi(double[][] cartPermanentPhi, double[][] fracPermanentPhi) {
2291       this.cartPermPhi = cartPermanentPhi;
2292       this.fracPermPhi = fracPermanentPhi;
2293     }
2294 
2295     public class PermanentPhiLoop extends IntegerForLoop {
2296 
2297       @Override
2298       public void finish() {
2299         int threadIndex = getThreadIndex();
2300         permanentPhiTime[threadIndex] += System.nanoTime();
2301       }
2302 
2303       @Override
2304       public void run(final int lb, final int ub) {
2305         for (int n = lb; n <= ub; n++) {
2306           final double[][] splx = bSplineRegion.splineX[0][n];
2307           final double[][] sply = bSplineRegion.splineY[0][n];
2308           final double[][] splz = bSplineRegion.splineZ[0][n];
2309           final int[] igrd = bSplineRegion.initGrid[0][n];
2310           final int igrd0 = igrd[0];
2311           final int jgrd0 = igrd[1];
2312           int k0 = igrd[2];
2313           double tuv000 = 0.0;
2314           double tuv100 = 0.0;
2315           double tuv010 = 0.0;
2316           double tuv001 = 0.0;
2317           double tuv200 = 0.0;
2318           double tuv020 = 0.0;
2319           double tuv002 = 0.0;
2320           double tuv110 = 0.0;
2321           double tuv101 = 0.0;
2322           double tuv011 = 0.0;
2323           double tuv300 = 0.0;
2324           double tuv030 = 0.0;
2325           double tuv003 = 0.0;
2326           double tuv210 = 0.0;
2327           double tuv201 = 0.0;
2328           double tuv120 = 0.0;
2329           double tuv021 = 0.0;
2330           double tuv102 = 0.0;
2331           double tuv012 = 0.0;
2332           double tuv111 = 0.0;
2333           for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2334             final int k = mod(++k0, fftZ);
2335             int j0 = jgrd0;
2336             double tu00 = 0.0;
2337             double tu10 = 0.0;
2338             double tu01 = 0.0;
2339             double tu20 = 0.0;
2340             double tu11 = 0.0;
2341             double tu02 = 0.0;
2342             double tu30 = 0.0;
2343             double tu21 = 0.0;
2344             double tu12 = 0.0;
2345             double tu03 = 0.0;
2346             for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
2347               final int j = mod(++j0, fftY);
2348               int i0 = igrd0;
2349               double t0 = 0.0;
2350               double t1 = 0.0;
2351               double t2 = 0.0;
2352               double t3 = 0.0;
2353               for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
2354                 final int i = mod(++i0, fftX);
2355                 final int ii = interleavedIndex(i, j, k, fftX, fftY);
2356                 final double tq = splineBuffer.get(ii);
2357                 final double[] splxi = splx[ith1];
2358                 t0 = fma(tq, splxi[0], t0);
2359                 t1 = fma(tq, splxi[1], t1);
2360                 t2 = fma(tq, splxi[2], t2);
2361                 t3 = fma(tq, splxi[3], t3);
2362               }
2363               final double[] splyi = sply[ith2];
2364               final double u0 = splyi[0];
2365               final double u1 = splyi[1];
2366               final double u2 = splyi[2];
2367               final double u3 = splyi[3];
2368               tu00 = fma(t0, u0, tu00);
2369               tu10 = fma(t1, u0, tu10);
2370               tu01 = fma(t0, u1, tu01);
2371               tu20 = fma(t2, u0, tu20);
2372               tu11 = fma(t1, u1, tu11);
2373               tu02 = fma(t0, u2, tu02);
2374               tu30 = fma(t3, u0, tu30);
2375               tu21 = fma(t2, u1, tu21);
2376               tu12 = fma(t1, u2, tu12);
2377               tu03 = fma(t0, u3, tu03);
2378             }
2379             final double[] splzi = splz[ith3];
2380             final double v0 = splzi[0];
2381             final double v1 = splzi[1];
2382             final double v2 = splzi[2];
2383             final double v3 = splzi[3];
2384             tuv000 = fma(tu00, v0, tuv000);
2385             tuv100 = fma(tu10, v0, tuv100);
2386             tuv010 = fma(tu01, v0, tuv010);
2387             tuv001 = fma(tu00, v1, tuv001);
2388             tuv200 = fma(tu20, v0, tuv200);
2389             tuv020 = fma(tu02, v0, tuv020);
2390             tuv002 = fma(tu00, v2, tuv002);
2391             tuv110 = fma(tu11, v0, tuv110);
2392             tuv101 = fma(tu10, v1, tuv101);
2393             tuv011 = fma(tu01, v1, tuv011);
2394             tuv300 = fma(tu30, v0, tuv300);
2395             tuv030 = fma(tu03, v0, tuv030);
2396             tuv003 = fma(tu00, v3, tuv003);
2397             tuv210 = fma(tu21, v0, tuv210);
2398             tuv201 = fma(tu20, v1, tuv201);
2399             tuv120 = fma(tu12, v0, tuv120);
2400             tuv021 = fma(tu02, v1, tuv021);
2401             tuv102 = fma(tu10, v2, tuv102);
2402             tuv012 = fma(tu01, v2, tuv012);
2403             tuv111 = fma(tu11, v1, tuv111);
2404           }
2405           double[] out = fracPermPhi[n];
2406           out[t000] = tuv000;
2407           out[t100] = tuv100;
2408           out[t010] = tuv010;
2409           out[t001] = tuv001;
2410           out[t200] = tuv200;
2411           out[t020] = tuv020;
2412           out[t002] = tuv002;
2413           out[t110] = tuv110;
2414           out[t101] = tuv101;
2415           out[t011] = tuv011;
2416           out[t300] = tuv300;
2417           out[t030] = tuv030;
2418           out[t003] = tuv003;
2419           out[t210] = tuv210;
2420           out[t201] = tuv201;
2421           out[t120] = tuv120;
2422           out[t021] = tuv021;
2423           out[t102] = tuv102;
2424           out[t012] = tuv012;
2425           out[t111] = tuv111;
2426           double[] in = out;
2427           out = cartPermPhi[n];
2428           out[0] = transformFieldMatrix[0][0] * in[0];
2429           for (int j = 1; j < 4; j++) {
2430             out[j] = 0.0;
2431             for (int k = 1; k < 4; k++) {
2432               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2433             }
2434           }
2435           for (int j = 4; j < 10; j++) {
2436             out[j] = 0.0;
2437             for (int k = 4; k < 10; k++) {
2438               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2439             }
2440           }
2441         }
2442       }
2443 
2444       @Override
2445       public IntegerSchedule schedule() {
2446         return IntegerSchedule.fixed();
2447       }
2448 
2449       @Override
2450       public void start() {
2451         int threadIndex = getThreadIndex();
2452         permanentPhiTime[threadIndex] -= System.nanoTime();
2453       }
2454     }
2455   }
2456 
2457   /**
2458    * This class is used to obtain the reciprocal space potential and fields due to induced dipoles
2459    * from the PME grid.
2460    *
2461    * <p>An external ParallelRegion can be used as follows: <code>
2462    * start() { inducedPhiRegion.setCartInducedDipolePhi(...); }
2463    * <p>
2464    * run() { execute(0, nAtoms - 1, inducedPhiLoops[threadID]); }
2465    * </code>
2466    */
2467   private class InducedPhiRegion extends ParallelRegion {
2468 
2469     final InducedPhiLoop[] inducedPhiLoops;
2470 
2471     private final BSplineRegion bSplineRegion;
2472     private double[][] cartInducedDipolePhi;
2473     private double[][] cartInducedDipolePhiCR;
2474     private double[][] fracInducedDipolePhi;
2475     private double[][] fracInducedDipolePhiCR;
2476 
2477     InducedPhiRegion(BSplineRegion bSplineRegion) {
2478       this.bSplineRegion = bSplineRegion;
2479       inducedPhiLoops = new InducedPhiLoop[threadCount];
2480       for (int i = 0; i < threadCount; i++) {
2481         inducedPhiLoops[i] = new InducedPhiLoop();
2482       }
2483     }
2484 
2485     @Override
2486     public void run() {
2487       try {
2488         int threadID = getThreadIndex();
2489         execute(0, nAtoms - 1, inducedPhiLoops[threadID]);
2490       } catch (Exception e) {
2491         logger.severe(e.toString());
2492       }
2493     }
2494 
2495     void setInducedDipolePhi(
2496         double[][] cartInducedDipolePhi,
2497         double[][] cartInducedDipolePhiCR,
2498         double[][] fracInducedDipolePhi,
2499         double[][] fracInducedDipolePhiCR) {
2500       this.cartInducedDipolePhi = cartInducedDipolePhi;
2501       this.cartInducedDipolePhiCR = cartInducedDipolePhiCR;
2502       this.fracInducedDipolePhi = fracInducedDipolePhi;
2503       this.fracInducedDipolePhiCR = fracInducedDipolePhiCR;
2504     }
2505 
2506     public class InducedPhiLoop extends IntegerForLoop {
2507 
2508       @Override
2509       public void finish() {
2510         int threadIndex = getThreadIndex();
2511         inducedPhiTime[threadIndex] += System.nanoTime();
2512       }
2513 
2514       @Override
2515       public void run(int lb, int ub) {
2516         for (int n = lb; n <= ub; n++) {
2517           final double[][] splx = bSplineRegion.splineX[0][n];
2518           final double[][] sply = bSplineRegion.splineY[0][n];
2519           final double[][] splz = bSplineRegion.splineZ[0][n];
2520           final int[] igrd = bSplineRegion.initGrid[0][n];
2521           final int igrd0 = igrd[0];
2522           final int jgrd0 = igrd[1];
2523           int k0 = igrd[2];
2524           double tuv000 = 0.0;
2525           double tuv100 = 0.0;
2526           double tuv010 = 0.0;
2527           double tuv001 = 0.0;
2528           double tuv200 = 0.0;
2529           double tuv020 = 0.0;
2530           double tuv002 = 0.0;
2531           double tuv110 = 0.0;
2532           double tuv101 = 0.0;
2533           double tuv011 = 0.0;
2534           double tuv300 = 0.0;
2535           double tuv030 = 0.0;
2536           double tuv003 = 0.0;
2537           double tuv210 = 0.0;
2538           double tuv201 = 0.0;
2539           double tuv120 = 0.0;
2540           double tuv021 = 0.0;
2541           double tuv102 = 0.0;
2542           double tuv012 = 0.0;
2543           double tuv111 = 0.0;
2544           double tuv000p = 0.0;
2545           double tuv100p = 0.0;
2546           double tuv010p = 0.0;
2547           double tuv001p = 0.0;
2548           double tuv200p = 0.0;
2549           double tuv020p = 0.0;
2550           double tuv002p = 0.0;
2551           double tuv110p = 0.0;
2552           double tuv101p = 0.0;
2553           double tuv011p = 0.0;
2554           double tuv300p = 0.0;
2555           double tuv030p = 0.0;
2556           double tuv003p = 0.0;
2557           double tuv210p = 0.0;
2558           double tuv201p = 0.0;
2559           double tuv120p = 0.0;
2560           double tuv021p = 0.0;
2561           double tuv102p = 0.0;
2562           double tuv012p = 0.0;
2563           double tuv111p = 0.0;
2564           for (int ith3 = 0; ith3 < bSplineOrder; ith3++) {
2565             final int k = mod(++k0, fftZ);
2566             int j0 = jgrd0;
2567             double tu00 = 0.0;
2568             double tu10 = 0.0;
2569             double tu01 = 0.0;
2570             double tu20 = 0.0;
2571             double tu11 = 0.0;
2572             double tu02 = 0.0;
2573             double tu30 = 0.0;
2574             double tu21 = 0.0;
2575             double tu12 = 0.0;
2576             double tu03 = 0.0;
2577             double tu00p = 0.0;
2578             double tu10p = 0.0;
2579             double tu01p = 0.0;
2580             double tu20p = 0.0;
2581             double tu11p = 0.0;
2582             double tu02p = 0.0;
2583             double tu30p = 0.0;
2584             double tu21p = 0.0;
2585             double tu12p = 0.0;
2586             double tu03p = 0.0;
2587             for (int ith2 = 0; ith2 < bSplineOrder; ith2++) {
2588               final int j = mod(++j0, fftY);
2589               int i0 = igrd0;
2590               double t0 = 0.0;
2591               double t1 = 0.0;
2592               double t2 = 0.0;
2593               double t3 = 0.0;
2594               double t0p = 0.0;
2595               double t1p = 0.0;
2596               double t2p = 0.0;
2597               double t3p = 0.0;
2598               for (int ith1 = 0; ith1 < bSplineOrder; ith1++) {
2599                 final int i = mod(++i0, fftX);
2600                 final int ii = interleavedIndex(i, j, k, fftX, fftY);
2601                 final double tq = splineBuffer.get(ii);
2602                 final double tp = splineBuffer.get(ii + 1);
2603                 final double[] splxi = splx[ith1];
2604                 final double s0 = splxi[0];
2605                 final double s1 = splxi[1];
2606                 final double s2 = splxi[2];
2607                 final double s3 = splxi[3];
2608                 t0 = fma(tq, s0, t0);
2609                 t1 = fma(tq, s1, t1);
2610                 t2 = fma(tq, s2, t2);
2611                 t3 = fma(tq, s3, t3);
2612                 t0p = fma(tp, s0, t0p);
2613                 t1p = fma(tp, s1, t1p);
2614                 t2p = fma(tp, s2, t2p);
2615                 t3p = fma(tp, s3, t3p);
2616               }
2617               final double[] splyi = sply[ith2];
2618               final double u0 = splyi[0];
2619               final double u1 = splyi[1];
2620               final double u2 = splyi[2];
2621               final double u3 = splyi[3];
2622               tu00 = fma(t0, u0, tu00);
2623               tu10 = fma(t1, u0, tu10);
2624               tu01 = fma(t0, u1, tu01);
2625               tu20 = fma(t2, u0, tu20);
2626               tu11 = fma(t1, u1, tu11);
2627               tu02 = fma(t0, u2, tu02);
2628               tu30 = fma(t3, u0, tu30);
2629               tu21 = fma(t2, u1, tu21);
2630               tu12 = fma(t1, u2, tu12);
2631               tu03 = fma(t0, u3, tu03);
2632               tu00p = fma(t0p, u0, tu00p);
2633               tu10p = fma(t1p, u0, tu10p);
2634               tu01p = fma(t0p, u1, tu01p);
2635               tu20p = fma(t2p, u0, tu20p);
2636               tu11p = fma(t1p, u1, tu11p);
2637               tu02p = fma(t0p, u2, tu02p);
2638               tu30p = fma(t3p, u0, tu30p);
2639               tu21p = fma(t2p, u1, tu21p);
2640               tu12p = fma(t1p, u2, tu12p);
2641               tu03p = fma(t0p, u3, tu03p);
2642             }
2643             final double[] splzi = splz[ith3];
2644             final double v0 = splzi[0];
2645             final double v1 = splzi[1];
2646             final double v2 = splzi[2];
2647             final double v3 = splzi[3];
2648             tuv000 = fma(tu00, v0, tuv000);
2649             tuv100 = fma(tu10, v0, tuv100);
2650             tuv010 = fma(tu01, v0, tuv010);
2651             tuv001 = fma(tu00, v1, tuv001);
2652             tuv200 = fma(tu20, v0, tuv200);
2653             tuv020 = fma(tu02, v0, tuv020);
2654             tuv002 = fma(tu00, v2, tuv002);
2655             tuv110 = fma(tu11, v0, tuv110);
2656             tuv101 = fma(tu10, v1, tuv101);
2657             tuv011 = fma(tu01, v1, tuv011);
2658             tuv300 = fma(tu30, v0, tuv300);
2659             tuv030 = fma(tu03, v0, tuv030);
2660             tuv003 = fma(tu00, v3, tuv003);
2661             tuv210 = fma(tu21, v0, tuv210);
2662             tuv201 = fma(tu20, v1, tuv201);
2663             tuv120 = fma(tu12, v0, tuv120);
2664             tuv021 = fma(tu02, v1, tuv021);
2665             tuv102 = fma(tu10, v2, tuv102);
2666             tuv012 = fma(tu01, v2, tuv012);
2667             tuv111 = fma(tu11, v1, tuv111);
2668             tuv000p = fma(tu00p, v0, tuv000p);
2669             tuv100p = fma(tu10p, v0, tuv100p);
2670             tuv010p = fma(tu01p, v0, tuv010p);
2671             tuv001p = fma(tu00p, v1, tuv001p);
2672             tuv200p = fma(tu20p, v0, tuv200p);
2673             tuv020p = fma(tu02p, v0, tuv020p);
2674             tuv002p = fma(tu00p, v2, tuv002p);
2675             tuv110p = fma(tu11p, v0, tuv110p);
2676             tuv101p = fma(tu10p, v1, tuv101p);
2677             tuv011p = fma(tu01p, v1, tuv011p);
2678             tuv300p = fma(tu30p, v0, tuv300p);
2679             tuv030p = fma(tu03p, v0, tuv030p);
2680             tuv003p = fma(tu00p, v3, tuv003p);
2681             tuv210p = fma(tu21p, v0, tuv210p);
2682             tuv201p = fma(tu20p, v1, tuv201p);
2683             tuv120p = fma(tu12p, v0, tuv120p);
2684             tuv021p = fma(tu02p, v1, tuv021p);
2685             tuv102p = fma(tu10p, v2, tuv102p);
2686             tuv012p = fma(tu01p, v2, tuv012p);
2687             tuv111p = fma(tu11p, v1, tuv111p);
2688           }
2689           double[] out = fracInducedDipolePhi[n];
2690           out[t000] = tuv000;
2691           out[t100] = tuv100;
2692           out[t010] = tuv010;
2693           out[t001] = tuv001;
2694           out[t200] = tuv200;
2695           out[t020] = tuv020;
2696           out[t002] = tuv002;
2697           out[t110] = tuv110;
2698           out[t101] = tuv101;
2699           out[t011] = tuv011;
2700           out[t300] = tuv300;
2701           out[t030] = tuv030;
2702           out[t003] = tuv003;
2703           out[t210] = tuv210;
2704           out[t201] = tuv201;
2705           out[t120] = tuv120;
2706           out[t021] = tuv021;
2707           out[t102] = tuv102;
2708           out[t012] = tuv012;
2709           out[t111] = tuv111;
2710           double[] in = out;
2711           out = cartInducedDipolePhi[n];
2712           out[0] = transformFieldMatrix[0][0] * in[0];
2713           for (int j = 1; j < 4; j++) {
2714             out[j] = 0.0;
2715             for (int k = 1; k < 4; k++) {
2716               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2717             }
2718           }
2719           for (int j = 4; j < 10; j++) {
2720             out[j] = 0.0;
2721             for (int k = 4; k < 10; k++) {
2722               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2723             }
2724           }
2725 
2726           out = fracInducedDipolePhiCR[n];
2727           out[t000] = tuv000p;
2728           out[t100] = tuv100p;
2729           out[t010] = tuv010p;
2730           out[t001] = tuv001p;
2731           out[t200] = tuv200p;
2732           out[t020] = tuv020p;
2733           out[t002] = tuv002p;
2734           out[t110] = tuv110p;
2735           out[t101] = tuv101p;
2736           out[t011] = tuv011p;
2737           out[t300] = tuv300p;
2738           out[t030] = tuv030p;
2739           out[t003] = tuv003p;
2740           out[t210] = tuv210p;
2741           out[t201] = tuv201p;
2742           out[t120] = tuv120p;
2743           out[t021] = tuv021p;
2744           out[t102] = tuv102p;
2745           out[t012] = tuv012p;
2746           out[t111] = tuv111p;
2747           in = out;
2748           out = cartInducedDipolePhiCR[n];
2749           out[0] = transformFieldMatrix[0][0] * in[0];
2750           for (int j = 1; j < 4; j++) {
2751             out[j] = 0.0;
2752             for (int k = 1; k < 4; k++) {
2753               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2754             }
2755           }
2756           for (int j = 4; j < 10; j++) {
2757             out[j] = 0.0;
2758             for (int k = 4; k < 10; k++) {
2759               out[j] = fma(transformFieldMatrix[j][k], in[k], out[j]);
2760             }
2761           }
2762         }
2763       }
2764 
2765       @Override
2766       public IntegerSchedule schedule() {
2767         return IntegerSchedule.fixed();
2768       }
2769 
2770       @Override
2771       public void start() {
2772         int threadIndex = getThreadIndex();
2773         inducedPhiTime[threadIndex] -= System.nanoTime();
2774       }
2775     }
2776   }
2777 
2778   private class FractionalMultipoleRegion extends ParallelRegion {
2779 
2780     private double[][][] globalMultipoles = null;
2781     private double[][][] fracMultipoles = null;
2782     private boolean[] use = null;
2783     private FractionalMultipoleLoop[] fractionalMultipoleLoops;
2784 
2785     @Override
2786     public void start() {
2787       if (fractionalMultipoleLoops == null) {
2788         int nThreads = getThreadCount();
2789         fractionalMultipoleLoops = new FractionalMultipoleLoop[nThreads];
2790         for (int i = 0; i < nThreads; i++) {
2791           fractionalMultipoleLoops[i] = new FractionalMultipoleLoop();
2792         }
2793       }
2794     }
2795 
2796     void setPermanent(double[][][] globalMultipoles, double[][][] fracMultipoles) {
2797       this.globalMultipoles = globalMultipoles;
2798       this.fracMultipoles = fracMultipoles;
2799     }
2800 
2801     private void setUse(boolean[] use) {
2802       this.use = use;
2803     }
2804 
2805     @Override
2806     public void run() {
2807       int threadIndex = getThreadIndex();
2808       try {
2809         execute(0, nAtoms - 1, fractionalMultipoleLoops[threadIndex]);
2810       } catch (Exception e) {
2811         throw new RuntimeException(e);
2812       }
2813     }
2814 
2815     private class FractionalMultipoleLoop extends IntegerForLoop {
2816 
2817       @Override
2818       public void run(int lb, int ub) {
2819         for (int s = 0; s < nSymm; s++) {
2820           for (int i = lb; i <= ub; i++) {
2821             if (use[i]) {
2822               toFractionalMultipole(globalMultipoles[s][i], fracMultipoles[s][i]);
2823             }
2824           }
2825         }
2826       }
2827 
2828       @Override
2829       public IntegerSchedule schedule() {
2830         return IntegerSchedule.fixed();
2831       }
2832     }
2833 
2834   }
2835 
2836   private class FractionalInducedRegion extends ParallelRegion {
2837 
2838     private double[][][] inducedDipole;
2839     private double[][][] inducedDipoleCR;
2840     private double[][][] inducedDipoleFrac;
2841     private double[][][] inducedDipoleFracCR;
2842     private boolean[] use = null;
2843     private FractionalInducedLoop[] fractionalInducedLoops;
2844 
2845     @Override
2846     public void start() {
2847       if (fractionalInducedLoops == null) {
2848         int nThreads = getThreadCount();
2849         fractionalInducedLoops = new FractionalInducedLoop[nThreads];
2850         for (int i = 0; i < nThreads; i++) {
2851           fractionalInducedLoops[i] = new FractionalInducedLoop();
2852         }
2853       }
2854     }
2855 
2856     void setInducedDipoles(double[][][] inducedDipole, double[][][] inducedDipoleCR,
2857                            double[][][] inducedDipoleFrac, double[][][] inducedDipoleFracCR) {
2858       this.inducedDipole = inducedDipole;
2859       this.inducedDipoleCR = inducedDipoleCR;
2860       this.inducedDipoleFrac = inducedDipoleFrac;
2861       this.inducedDipoleFracCR = inducedDipoleFracCR;
2862     }
2863 
2864     private void setUse(boolean[] use) {
2865       this.use = use;
2866     }
2867 
2868     @Override
2869     public void run() {
2870       int threadIndex = getThreadIndex();
2871       try {
2872         execute(0, nAtoms - 1, fractionalInducedLoops[threadIndex]);
2873       } catch (Exception e) {
2874         throw new RuntimeException(e);
2875       }
2876     }
2877 
2878     private class FractionalInducedLoop extends IntegerForLoop {
2879 
2880       @Override
2881       public void run(int lb, int ub) {
2882         for (int s = 0; s < nSymm; s++) {
2883           for (int i = lb; i <= ub; i++) {
2884             if (use[i]) {
2885               toFractionalDipole(inducedDipole[s][i], inducedDipoleFrac[s][i]);
2886               toFractionalDipole(inducedDipoleCR[s][i], inducedDipoleFracCR[s][i]);
2887             }
2888           }
2889         }
2890       }
2891 
2892       @Override
2893       public IntegerSchedule schedule() {
2894         return IntegerSchedule.fixed();
2895       }
2896     }
2897 
2898   }
2899 }