View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.numerics.fft;
39  
40  import static java.lang.Math.fma;
41  import static java.lang.System.arraycopy;
42  import static org.apache.commons.math3.util.FastMath.PI;
43  import static org.apache.commons.math3.util.FastMath.cos;
44  import static org.apache.commons.math3.util.FastMath.sin;
45  import static org.apache.commons.math3.util.FastMath.sqrt;
46  
47  import java.util.Vector;
48  import java.util.logging.Level;
49  import java.util.logging.Logger;
50  
51  /**
52   * Compute the FFT of complex, double precision data of arbitrary length n. This class uses a mixed
53   * radix method and has special methods to handle factors [2, 3, 4, 5, 6, 7] and a general method for
54   * larger prime factors.
55   *
56   * @author Michal J. Schnieders<br> Derived from: <br> Bruce R. Miller (bruce.miller@nist.gov) <br>
57   *     Contribution of the National Institute of Standards and Technology, not subject to copyright.
58   *     <br>
59   *     Derived from:<br> GSL (Gnu Scientific Library) FFT Code by Brian Gough
60   *     (bjg@network-theory.co.uk)
61   * @see <ul>
62   *     <li><a href="http://dx.doi.org/10.1016/0021-9991(83)90013-X" target="_blank"> Clive
63   *     Temperton. Self-sorting mixed-radix fast fourier transforms. Journal of Computational
64   *     Physics, 52(1):1-23, 1983. </a>
65   *     <li><a href="http://www.jstor.org/stable/2003354" target="_blank"> J. W. Cooley and J. W.
66   *     Tukey, Mathematics of Computation 19 (90), 297 (1965) </a>
67   *     <li><a href="http://en.wikipedia.org/wiki/Fast_Fourier_transform" target="_blank">FFT at
68   *     Wikipedia </a>
69   *     </ul>
70   * @since 1.0
71   */
72  public class Complex {
73  
74    private static final Logger logger = Logger.getLogger(Complex.class.getName());
75    // TINKER v. 5.0 factors to achieve exact numerical agreement.
76    // private static final int[] availableFactors = {5, 4, 3, 2};
77    // private static final int firstUnavailablePrime = 7;
78    private static final int[] availableFactors = {7, 6, 5, 4, 3, 2};
79    private static final int firstUnavailablePrime = 11;
80    private static final double oneThird = 1.0 / 3.0;
81    private static final double sqrt3_2 = sqrt(3.0) / 2.0;
82    private static final double sqrt5_4 = sqrt(5.0) / 4.0;
83    private static final double sinPI_5 = sin(PI / 5.0);
84    private static final double sin2PI_5 = sin(2.0 * PI / 5.0);
85    private static final double sin2PI_7 = sin(2.0 * PI / 7.0);
86    private static final double sin4PI_7 = sin(4.0 * PI / 7.0);
87    private static final double sin6PI_7 = sin(6.0 * PI / 7.0);
88    private static final double cos2PI_7 = cos(2.0 * PI / 7.0);
89    private static final double cos4PI_7 = cos(4.0 * PI / 7.0);
90    private static final double cos6PI_7 = cos(6.0 * PI / 7.0);
91    /**
92     * Number of complex numbers in the transform.
93     */
94    private final int n;
95    /**
96     * Factorization of n.
97     */
98    private final int[] factors;
99    /**
100    * Twiddle factors.
101    */
102   private final double[][][] twiddle;
103   /**
104    * Packing of non-contiguous data.
105    */
106   private final double[] packedData;
107   /**
108    * Scratch space for the transform.
109    */
110   private final double[] scratch;
111 
112   /**
113    * Sign of negative -1 is for forward transform. Sign of 1 is for inverse transform.
114    */
115   private int sign = -1;
116 
117   /**
118    * Construct a Complex instance for data of length n. Factorization of n is designed to use special
119    * methods for small factors, and a general routine for large odd prime factors. Scratch memory is
120    * created of length 2*n, which is reused each time a transform is computed.
121    *
122    * @param n Number of complex numbers (n .GT. 1).
123    */
124   public Complex(int n) {
125     assert (n > 1);
126 
127     this.n = n;
128     factors = factor();
129     twiddle = wavetable();
130     packedData = new double[2 * n];
131     scratch = new double[2 * n];
132   }
133 
134   /**
135    * Check if a dimension is a preferred dimension.
136    *
137    * @param dim the dimension to check.
138    * @return true if the dimension is a preferred dimension.
139    */
140   public static boolean preferredDimension(int dim) {
141     if (dim < 2) {
142       return false;
143     }
144 
145     // Apply preferred factors.
146     for (int factor : availableFactors) {
147       while ((dim % factor) == 0) {
148         dim /= factor;
149       }
150     }
151     return dim <= 1;
152   }
153 
154   /**
155    * Getter for the field <code>factors</code>.
156    *
157    * @return an array of int.
158    */
159   public int[] getFactors() {
160     return factors;
161   }
162 
163   /**
164    * Compute the Fast Fourier Transform of data leaving the result in data. The array data must
165    * contain the data points in the following locations:
166    *
167    * <PRE>
168    * Re(d[i]) = data[offset + stride*i] Im(d[i]) = data[offset + stride*i+1]
169    * </PRE>
170    *
171    * @param data an array of double.
172    * @param offset the offset to the beginning of the data.
173    * @param stride the stride between data points.
174    */
175   public void fft(double[] data, int offset, int stride) {
176     transformInternal(data, offset, stride, -1);
177   }
178 
179   /**
180    * Compute the (un-normalized) inverse FFT of data, leaving it in place. The frequency domain data
181    * must be in wrap-around order, and be stored in the following locations:
182    *
183    * <PRE>
184    * Re(D[i]) = data[offset + stride*i] Im(D[i]) = data[offset + stride*i+1]
185    * </PRE>
186    *
187    * @param data an array of double.
188    * @param offset the offset to the beginning of the data.
189    * @param stride the stride between data points.
190    */
191   public void ifft(double[] data, int offset, int stride) {
192     transformInternal(data, offset, stride, +1);
193   }
194 
195   /**
196    * Compute the normalized inverse FFT of data, leaving it in place. The frequency domain data must
197    * be stored in the following locations:
198    *
199    * <PRE>
200    * Re(D[i]) = data[offset + stride*i] Im(D[i]) = data[offset + stride*i+1]
201    * </PRE>
202    *
203    * @param data an array of double.
204    * @param offset the offset to the beginning of the data.
205    * @param stride the stride between data points.
206    */
207   public void inverse(double[] data, int offset, int stride) {
208     ifft(data, offset, stride);
209 
210     // Normalize inverse FFT with 1/n.
211     double norm = normalization();
212     for (int i = 0; i < n; i++) {
213       final int index = offset + stride * i;
214       data[index] *= norm;
215       data[index + 1] *= norm;
216     }
217   }
218 
219   /**
220    * Factor the data length into preferred factors (those with special methods), falling back to odd
221    * primes that the general routine must handle.
222    *
223    * @return integer factors
224    */
225   private int[] factor() {
226     if (n < 2) {
227       return null;
228     }
229     Vector<Integer> v = new Vector<>();
230     int nTest = n;
231 
232     // Use the preferred factors first
233     for (int factor : availableFactors) {
234       while ((nTest % factor) == 0) {
235         nTest /= factor;
236         v.add(factor);
237       }
238     }
239 
240     // Unavailable odd prime factors.
241     int factor = firstUnavailablePrime;
242     while (nTest > 1) {
243       while ((nTest % factor) != 0) {
244         factor += 2;
245       }
246       nTest /= factor;
247       v.add(factor);
248     }
249     int product = 1;
250     int nf = v.size();
251     int[] ret = new int[nf];
252     for (int i = 0; i < nf; i++) {
253       ret[i] = v.get(i);
254       product *= ret[i];
255     }
256 
257     // Report a failed factorization.
258     if (product != n) {
259       StringBuilder sb = new StringBuilder(" FFT factorization failed for " + n + "\n");
260       for (int i = 0; i < nf; i++) {
261         sb.append(" ");
262         sb.append(ret[i]);
263       }
264       sb.append("\n");
265       sb.append(" Factor product = ");
266       sb.append(product);
267       sb.append("\n");
268       logger.severe(sb.toString());
269       System.exit(-1);
270     } else {
271       if (logger.isLoggable(Level.FINEST)) {
272         StringBuilder sb = new StringBuilder(" FFT factorization for " + n + " = ");
273         for (int i = 0; i < nf - 1; i++) {
274           sb.append(ret[i]);
275           sb.append(" * ");
276         }
277         sb.append(ret[nf - 1]);
278         logger.fine(sb.toString());
279       }
280     }
281     return ret;
282   }
283 
284   /**
285    * References to the input and output data arrays.
286    *
287    * @param in Input data for the current pass.
288    * @param inOffset Offset into the input data.
289    * @param out Output data for the current pass.
290    * @param outOffset Offset into output array.
291    */
292   private record PassData(double[] in, int inOffset, double[] out, int outOffset) {
293     // Empty.
294   }
295 
296   /**
297    * Compute the Fast Fourier Transform of data leaving the result in data.
298    *
299    * @param data data an array of double.
300    * @param offset the offset to the beginning of the data.
301    * @param stride the stride between data points.
302    * @param sign the sign to apply.
303    */
304   private void transformInternal(final double[] data,
305       final int offset, final int stride, final int sign) {
306 
307     PassData[] passData;
308     boolean packed = false;
309     if (stride != 2) {
310       // Pack non-contiguous (stride > 2) data into a contiguous array.
311       packed = true;
312       for (int i = 0, i2 = 0, index = offset; i < n; i++, i2 += 2, index += stride) {
313         packedData[i2] = data[index];
314         packedData[i2 + 1] = data[index + 1];
315       }
316       PassData evenPass = new PassData(packedData, 0, scratch, 0);
317       PassData oddPass = new PassData(scratch, 0, packedData, 0);
318       passData = new PassData[] {evenPass, oddPass};
319     } else {
320       PassData evenPass = new PassData(data, offset, scratch, 0);
321       PassData oddPass = new PassData(scratch, 0, data, offset);
322       passData = new PassData[] {evenPass, oddPass};
323     }
324 
325     this.sign = sign;
326     int product = 1;
327 
328     final int nfactors = factors.length;
329     for (int i = 0; i < nfactors; i++) {
330       final int pass = i % 2;
331       final int factor = factors[i];
332       product *= factor;
333       switch (factor) {
334         case 2 -> pass2(product, passData[pass], twiddle[i]);
335         case 3 -> pass3(product, passData[pass], twiddle[i]);
336         case 4 -> pass4(product, passData[pass], twiddle[i]);
337         case 5 -> pass5(product, passData[pass], twiddle[i]);
338         case 6 -> pass6(product, passData[pass], twiddle[i]);
339         case 7 -> pass7(product, passData[pass], twiddle[i]);
340         default -> passOdd(factor, product, passData[pass], twiddle[i]);
341       }
342     }
343 
344     // If the number of factors is odd, the final result is in the scratch array.
345     if (nfactors % 2 == 1) {
346       // Copy the scratch array to the data array.
347       if (stride == 2) {
348         arraycopy(scratch, 0, data, offset, 2 * n);
349       } else {
350         for (int i = 0, i2 = 0, index = offset; i < n; i++, i2 += 2, index += stride) {
351           data[index] = scratch[i2];
352           data[index + 1] = scratch[i2 + 1];
353         }
354       }
355       // If the number of factors is even, the data may need to be unpacked.
356     } else if (packed) {
357       for (int i = 0, i2 = 0, index = offset; i < n; i++, i2 += 2, index += stride) {
358         data[index] = packedData[i2];
359         data[index + 1] = packedData[i2 + 1];
360       }
361     }
362   }
363 
364   /**
365    * Return the normalization factor. Multiply the elements of the back-transformed data to get the
366    * normalized inverse.
367    *
368    * @return a double.
369    */
370   private double normalization() {
371     return 1.0 / n;
372   }
373 
374   /**
375    * Handle factors of 2.
376    *
377    * @param product Product to apply.
378    * @param passData the data.
379    * @param twiddles the twiddle factors.
380    */
381   private void pass2(int product, PassData passData, double[][] twiddles) {
382     final double[] data = passData.in;
383     final double[] ret = passData.out;
384     final int factor = 2;
385     final int m = n / factor;
386     final int q = n / product;
387     final int product_1 = product / factor;
388     final int di = 2 * m;
389     final int dj = 2 * product_1;
390     int i = passData.inOffset;
391     int j = passData.outOffset;
392     for (int k = 0; k < q; k++, j += dj) {
393       final double[] twids = twiddles[k];
394       final double w_r = twids[0];
395       final double w_i = -sign * twids[1];
396       for (int k1 = 0; k1 < product_1; k1++, i += 2, j += 2) {
397         final double z0_r = data[i];
398         final double z0_i = data[i + 1];
399         final int idi = i + di;
400         final double z1_r = data[idi];
401         final double z1_i = data[idi + 1];
402         ret[j] = z0_r + z1_r;
403         ret[j + 1] = z0_i + z1_i;
404         final double x_r = z0_r - z1_r;
405         final double x_i = z0_i - z1_i;
406         final int jdj = j + dj;
407         ret[jdj] = fma(w_r, x_r, -w_i * x_i);
408         ret[jdj + 1] = fma(w_r, x_i, w_i * x_r);
409       }
410     }
411   }
412 
413   /**
414    * Handle factors of 3.
415    *
416    * @param product Product to apply.
417    * @param passData the data.
418    * @param twiddles the twiddle factors.
419    */
420   private void pass3(int product, PassData passData, double[][] twiddles) {
421     final double[] data = passData.in;
422     final double[] ret = passData.out;
423     final int factor = 3;
424     final int m = n / factor;
425     final int q = n / product;
426     final int product_1 = product / factor;
427     final double tau = sign * sqrt3_2;
428     final int di = 2 * m;
429     final int dj = 2 * product_1;
430     final int jstep = (factor - 1) * dj;
431     int i = passData.inOffset;
432     int j = passData.outOffset;
433     for (int k = 0; k < q; k++, j += jstep) {
434       final double[] twids = twiddles[k];
435       final double w1_r = twids[0];
436       final double w1_i = -sign * twids[1];
437       final double w2_r = twids[2];
438       final double w2_i = -sign * twids[3];
439       for (int k1 = 0; k1 < product_1; k1++, i += 2, j += 2) {
440         final double z0_r = data[i];
441         final double z0_i = data[i + 1];
442         int idi = i + di;
443         final double z1_r = data[idi];
444         final double z1_i = data[idi + 1];
445         idi += di;
446         final double z2_r = data[idi];
447         final double z2_i = data[idi + 1];
448         final double t1_r = z1_r + z2_r;
449         final double t1_i = z1_i + z2_i;
450         final double t2_r = fma(-0.5, t1_r, z0_r);
451         final double t2_i = fma(-0.5, t1_i, z0_i);
452         final double t3_r = tau * (z1_r - z2_r);
453         final double t3_i = tau * (z1_i - z2_i);
454         ret[j] = z0_r + t1_r;
455         ret[j + 1] = z0_i + t1_i;
456         double x_r = t2_r - t3_i;
457         double x_i = t2_i + t3_r;
458         int jdj = j + dj;
459         ret[jdj] = fma(w1_r, x_r, -w1_i * x_i);
460         ret[jdj + 1] = fma(w1_r, x_i, w1_i * x_r);
461         x_r = t2_r + t3_i;
462         x_i = t2_i - t3_r;
463         jdj += dj;
464         ret[jdj] = fma(w2_r, x_r, -w2_i * x_i);
465         ret[jdj + 1] = fma(w2_r, x_i, w2_i * x_r);
466       }
467     }
468   }
469 
470   /**
471    * Handle factors of 4.
472    *
473    * @param product Product to apply.
474    * @param passData the data.
475    * @param twiddles the twiddle factors.
476    */
477   private void pass4(int product, PassData passData, double[][] twiddles) {
478     final double[] data = passData.in;
479     final double[] ret = passData.out;
480     final int factor = 4;
481     final int m = n / factor;
482     final int q = n / product;
483     final int p_1 = product / factor;
484     final int di = 2 * m;
485     final int dj = 2 * p_1;
486     final int jstep = (factor - 1) * dj;
487     int i = passData.inOffset;
488     int j = passData.outOffset;
489     for (int k = 0; k < q; k++, j += jstep) {
490       final double[] twids = twiddles[k];
491       final double w1_r = twids[0];
492       final double w1_i = -sign * twids[1];
493       final double w2_r = twids[2];
494       final double w2_i = -sign * twids[3];
495       final double w3_r = twids[4];
496       final double w3_i = -sign * twids[5];
497       for (int k1 = 0; k1 < p_1; k1++, i += 2, j += 2) {
498         final double z0_r = data[i];
499         final double z0_i = data[i + 1];
500         int idi = i + di;
501         final double z1_r = data[idi];
502         final double z1_i = data[idi + 1];
503         idi += di;
504         final double z2_r = data[idi];
505         final double z2_i = data[idi + 1];
506         idi += di;
507         final double z3_r = data[idi];
508         final double z3_i = data[idi + 1];
509         final double t1_r = z0_r + z2_r;
510         final double t1_i = z0_i + z2_i;
511         final double t2_r = z1_r + z3_r;
512         final double t2_i = z1_i + z3_i;
513         final double t3_r = z0_r - z2_r;
514         final double t3_i = z0_i - z2_i;
515         final double t4_r = sign * (z1_r - z3_r);
516         final double t4_i = sign * (z1_i - z3_i);
517         ret[j] = t1_r + t2_r;
518         ret[j + 1] = t1_i + t2_i;
519         double x_r = t3_r - t4_i;
520         double x_i = t3_i + t4_r;
521         int jdj = j + dj;
522         ret[jdj] = fma(w1_r, x_r, -w1_i * x_i);
523         ret[jdj + 1] = fma(w1_r, x_i, w1_i * x_r);
524         x_r = t1_r - t2_r;
525         x_i = t1_i - t2_i;
526         jdj += dj;
527         ret[jdj] = fma(w2_r, x_r, -w2_i * x_i);
528         ret[jdj + 1] = fma(w2_r, x_i, w2_i * x_r);
529         x_r = t3_r + t4_i;
530         x_i = t3_i - t4_r;
531         jdj += dj;
532         ret[jdj] = fma(w3_r, x_r, -w3_i * x_i);
533         ret[jdj + 1] = fma(w3_r, x_i, w3_i * x_r);
534       }
535     }
536   }
537 
538   /**
539    * Handle factors of 5.
540    *
541    * @param product Product to apply.
542    * @param passData the data.
543    * @param twiddles the twiddle factors.
544    */
545   private void pass5(int product, PassData passData, double[][] twiddles) {
546     final double[] data = passData.in;
547     final double[] ret = passData.out;
548     final int factor = 5;
549     final int m = n / factor;
550     final int q = n / product;
551     final int p_1 = product / factor;
552     final double tau = sqrt5_4;
553     final double sin2PI_5s = sign * sin2PI_5;
554     final double sinPI_5s = sign * sinPI_5;
555     final int di = 2 * m;
556     final int dj = 2 * p_1;
557     final int jstep = (factor - 1) * dj;
558     int i = passData.inOffset;
559     int j = passData.outOffset;
560     for (int k = 0; k < q; k++, j += jstep) {
561       final double[] twids = twiddles[k];
562       final double w1r = twids[0];
563       final double w1i = -sign * twids[1];
564       final double w2r = twids[2];
565       final double w2i = -sign * twids[3];
566       final double w3r = twids[4];
567       final double w3i = -sign * twids[5];
568       final double w4r = twids[6];
569       final double w4i = -sign * twids[7];
570       for (int k1 = 0; k1 < p_1; k1++, i += 2, j += 2) {
571         final double z0r = data[i];
572         final double z0i = data[i + 1];
573         int idi = i + di;
574         final double z1r = data[idi];
575         final double z1i = data[idi + 1];
576         idi += di;
577         final double z2r = data[idi];
578         final double z2i = data[idi + 1];
579         idi += di;
580         final double z3r = data[idi];
581         final double z3i = data[idi + 1];
582         idi += di;
583         final double z4r = data[idi];
584         final double z4i = data[idi + 1];
585         final double t1r = z1r + z4r;
586         final double t1i = z1i + z4i;
587         final double t2r = z2r + z3r;
588         final double t2i = z2i + z3i;
589         final double t3r = z1r - z4r;
590         final double t3i = z1i - z4i;
591         final double t4r = z2r - z3r;
592         final double t4i = z2i - z3i;
593         final double t5r = t1r + t2r;
594         final double t5i = t1i + t2i;
595         final double t6r = tau * (t1r - t2r);
596         final double t6i = tau * (t1i - t2i);
597         final double t7r = fma(-0.25, t5r, z0r);
598         final double t7i = fma(-0.25, t5i, z0i);
599         final double t8r = t7r + t6r;
600         final double t8i = t7i + t6i;
601         final double t9r = t7r - t6r;
602         final double t9i = t7i - t6i;
603         final double t10r = fma(sin2PI_5s, t3r, sinPI_5s * t4r);
604         final double t10i = fma(sin2PI_5s, t3i, sinPI_5s * t4i);
605         final double t11r = fma(-sin2PI_5s, t4r, sinPI_5s * t3r);
606         final double t11i = fma(-sin2PI_5s, t4i, sinPI_5s * t3i);
607         ret[j] = z0r + t5r;
608         ret[j + 1] = z0i + t5i;
609         double xr = t8r - t10i;
610         double xi = t8i + t10r;
611         int jdj = j + dj;
612         ret[jdj] = fma(w1r, xr, -w1i * xi);
613         ret[jdj + 1] = fma(w1r, xi, w1i * xr);
614         xr = t9r - t11i;
615         xi = t9i + t11r;
616         jdj += dj;
617         ret[jdj] = fma(w2r, xr, -w2i * xi);
618         ret[jdj + 1] = fma(w2r, xi, w2i * xr);
619         xr = t9r + t11i;
620         xi = t9i - t11r;
621         jdj += dj;
622         ret[jdj] = fma(w3r, xr, -w3i * xi);
623         ret[jdj + 1] = fma(w3r, xi, w3i * xr);
624         xr = t8r + t10i;
625         xi = t8i - t10r;
626         jdj += dj;
627         ret[jdj] = fma(w4r, xr, -w4i * xi);
628         ret[jdj + 1] = fma(w4r, xi, w4i * xr);
629       }
630     }
631   }
632 
633   /**
634    * Handle factors of 6.
635    *
636    * @param product Product to apply.
637    * @param passData the data.
638    * @param twiddles the twiddle factors.
639    */
640   private void pass6(int product, PassData passData, double[][] twiddles) {
641     final double[] data = passData.in;
642     final double[] ret = passData.out;
643     final int factor = 6;
644     final int m = n / factor;
645     final int q = n / product;
646     final int p_1 = product / factor;
647     final double tau = sign * sqrt3_2;
648     final int di = 2 * m;
649     final int dj = 2 * p_1;
650     final int jstep = (factor - 1) * dj;
651     int i = passData.inOffset;
652     int j = passData.outOffset;
653     for (int k = 0; k < q; k++, j += jstep) {
654       final double[] twids = twiddles[k];
655       final double w1r = twids[0];
656       final double w1i = -sign * twids[1];
657       final double w2r = twids[2];
658       final double w2i = -sign * twids[3];
659       final double w3r = twids[4];
660       final double w3i = -sign * twids[5];
661       final double w4r = twids[6];
662       final double w4i = -sign * twids[7];
663       final double w5r = twids[8];
664       final double w5i = -sign * twids[9];
665       for (int k1 = 0; k1 < p_1; k1++, i += 2, j += 2) {
666         final double z0r = data[i];
667         final double z0i = data[i + 1];
668         int idi = i + di;
669         final double z1r = data[idi];
670         final double z1i = data[idi + 1];
671         idi += di;
672         final double z2r = data[idi];
673         final double z2i = data[idi + 1];
674         idi += di;
675         final double z3r = data[idi];
676         final double z3i = data[idi + 1];
677         idi += di;
678         final double z4r = data[idi];
679         final double z4i = data[idi + 1];
680         idi += di;
681         final double z5r = data[idi];
682         final double z5i = data[idi + 1];
683         final double ta1r = z2r + z4r;
684         final double ta1i = z2i + z4i;
685         final double ta2r = fma(-0.5, ta1r, z0r);
686         final double ta2i = fma(-0.5, ta1i, z0i);
687         final double ta3r = tau * (z2r - z4r);
688         final double ta3i = tau * (z2i - z4i);
689         final double a0r = z0r + ta1r;
690         final double a0i = z0i + ta1i;
691         final double a1r = ta2r - ta3i;
692         final double a1i = ta2i + ta3r;
693         final double a2r = ta2r + ta3i;
694         final double a2i = ta2i - ta3r;
695         final double tb1r = z5r + z1r;
696         final double tb1i = z5i + z1i;
697         final double tb2r = fma(-0.5, tb1r, z3r);
698         final double tb2i = fma(-0.5, tb1i, z3i);
699         final double tb3r = tau * (z5r - z1r);
700         final double tb3i = tau * (z5i - z1i);
701         final double b0r = z3r + tb1r;
702         final double b0i = z3i + tb1i;
703         final double b1r = tb2r - tb3i;
704         final double b1i = tb2i + tb3r;
705         final double b2r = tb2r + tb3i;
706         final double b2i = tb2i - tb3r;
707         ret[j] = a0r + b0r;
708         ret[j + 1] = a0i + b0i;
709         double xr = a1r - b1r;
710         double xi = a1i - b1i;
711         int jdj = j + dj;
712         ret[jdj] = fma(w1r, xr, -w1i * xi);
713         ret[jdj + 1] = fma(w1r, xi, w1i * xr);
714         xr = a2r + b2r;
715         xi = a2i + b2i;
716         jdj += dj;
717         ret[jdj] = fma(w2r, xr, -w2i * xi);
718         ret[jdj + 1] = fma(w2r, xi, w2i * xr);
719         xr = a0r - b0r;
720         xi = a0i - b0i;
721         jdj += dj;
722         ret[jdj] = fma(w3r, xr, -w3i * xi);
723         ret[jdj + 1] = fma(w3r, xi, w3i * xr);
724         xr = a1r + b1r;
725         xi = a1i + b1i;
726         jdj += dj;
727         ret[jdj] = fma(w4r, xr, -w4i * xi);
728         ret[jdj + 1] = fma(w4r, xi, w4i * xr);
729         xr = a2r - b2r;
730         xi = a2i - b2i;
731         jdj += dj;
732         ret[jdj] = fma(w5r, xr, -w5i * xi);
733         ret[jdj + 1] = fma(w5r, xi, w5i * xr);
734       }
735     }
736   }
737 
738   /**
739    * Handle factors of 7.
740    *
741    * @param product Product to apply.
742    * @param passData the data.
743    * @param twiddles the twiddle factors.
744    */
745   private void pass7(int product, PassData passData, double[][] twiddles) {
746     final double[] data = passData.in;
747     final double[] ret = passData.out;
748     final int factor = 7;
749     final int m = n / factor;
750     final int q = n / product;
751     final int p_1 = product / factor;
752     final double c1 = cos2PI_7;
753     final double c2 = cos4PI_7;
754     final double c3 = cos6PI_7;
755     final double s1 = (-sign) * sin2PI_7;
756     final double s2 = (-sign) * sin4PI_7;
757     final double s3 = (-sign) * sin6PI_7;
758     final int di = 2 * m;
759     final int dj = 2 * p_1;
760     final int jstep = (factor - 1) * dj;
761     int i = passData.inOffset;
762     int j = passData.outOffset;
763     for (int k = 0; k < q; k++, j += jstep) {
764       final double[] twids = twiddles[k];
765       final double w1r = twids[0];
766       final double w1i = -sign * twids[1];
767       final double w2r = twids[2];
768       final double w2i = -sign * twids[3];
769       final double w3r = twids[4];
770       final double w3i = -sign * twids[5];
771       final double w4r = twids[6];
772       final double w4i = -sign * twids[7];
773       final double w5r = twids[8];
774       final double w5i = -sign * twids[9];
775       final double w6r = twids[10];
776       final double w6i = -sign * twids[11];
777       for (int k1 = 0; k1 < p_1; k1++, i += 2, j += 2) {
778         final double z0r = data[i];
779         final double z0i = data[i + 1];
780         int idi = i + di;
781         final double z1r = data[idi];
782         final double z1i = data[idi + 1];
783         idi += di;
784         final double z2r = data[idi];
785         final double z2i = data[idi + 1];
786         idi += di;
787         final double z3r = data[idi];
788         final double z3i = data[idi + 1];
789         idi += di;
790         final double z4r = data[idi];
791         final double z4i = data[idi + 1];
792         idi += di;
793         final double z5r = data[idi];
794         final double z5i = data[idi + 1];
795         idi += di;
796         final double z6r = data[idi];
797         final double z6i = data[idi + 1];
798         final double t0r = z1r + z6r;
799         final double t0i = z1i + z6i;
800         final double t1r = z1r - z6r;
801         final double t1i = z1i - z6i;
802         final double t2r = z2r + z5r;
803         final double t2i = z2i + z5i;
804         final double t3r = z2r - z5r;
805         final double t3i = z2i - z5i;
806         final double t4r = z4r + z3r;
807         final double t4i = z4i + z3i;
808         final double t5r = z4r - z3r;
809         final double t5i = z4i - z3i;
810         final double t6r = t2r + t0r;
811         final double t6i = t2i + t0i;
812         final double t7r = t5r + t3r;
813         final double t7i = t5i + t3i;
814         final double b0r = z0r + t6r + t4r;
815         final double b0i = z0i + t6i + t4i;
816         final double v1 = (c1 + c2 + c3) * oneThird - 1.0;
817         final double v2 = (2.0 * c1 - c2 - c3) * oneThird;
818         final double v3 = (c1 - 2.0 * c2 + c3) * oneThird;
819         final double v4 = (c1 + c2 - 2.0 * c3) * oneThird;
820         final double v5 = (s1 + s2 - s3) * oneThird;
821         final double v6 = (2.0 * s1 - s2 + s3) * oneThird;
822         final double v7 = (s1 - 2.0 * s2 - s3) * oneThird;
823         final double v8 = (s1 + s2 + 2.0 * s3) * oneThird;
824         final double b1r = v1 * (t6r + t4r);
825         final double b1i = v1 * (t6i + t4i);
826         final double b2r = v2 * (t0r - t4r);
827         final double b2i = v2 * (t0i - t4i);
828         final double b3r = v3 * (t4r - t2r);
829         final double b3i = v3 * (t4i - t2i);
830         final double b4r = v4 * (t2r - t0r);
831         final double b4i = v4 * (t2i - t0i);
832         final double b5r = v5 * (t7r + t1r);
833         final double b5i = v5 * (t7i + t1i);
834         final double b6r = v6 * (t1r - t5r);
835         final double b6i = v6 * (t1i - t5i);
836         final double b7r = v7 * (t5r - t3r);
837         final double b7i = v7 * (t5i - t3i);
838         final double b8r = v8 * (t3r - t1r);
839         final double b8i = v8 * (t3i - t1i);
840         final double u0r = b0r + b1r;
841         final double u0i = b0i + b1i;
842         final double u1r = b2r + b3r;
843         final double u1i = b2i + b3i;
844         final double u2r = b4r - b3r;
845         final double u2i = b4i - b3i;
846         final double u3r = -b2r - b4r;
847         final double u3i = -b2i - b4i;
848         final double u4r = b6r + b7r;
849         final double u4i = b6i + b7i;
850         final double u5r = b8r - b7r;
851         final double u5i = b8i - b7i;
852         final double u6r = -b8r - b6r;
853         final double u6i = -b8i - b6i;
854         final double u7r = u0r + u1r;
855         final double u7i = u0i + u1i;
856         final double u8r = u0r + u2r;
857         final double u8i = u0i + u2i;
858         final double u9r = u0r + u3r;
859         final double u9i = u0i + u3i;
860         final double u10r = u4r + b5r;
861         final double u10i = u4i + b5i;
862         final double u11r = u5r + b5r;
863         final double u11i = u5i + b5i;
864         final double u12r = u6r + b5r;
865         final double u12i = u6i + b5i;
866         ret[j] = b0r;
867         ret[j + 1] = b0i;
868         double xr = u7r + u10i;
869         double xi = u7i - u10r;
870         int jdj = j + dj;
871         ret[jdj] = fma(w1r, xr, -w1i * xi);
872         ret[jdj + 1] = fma(w1r, xi, w1i * xr);
873         xr = u9r + u12i;
874         xi = u9i - u12r;
875         jdj += dj;
876         ret[jdj] = fma(w2r, xr, -w2i * xi);
877         ret[jdj + 1] = fma(w2r, xi, w2i * xr);
878         xr = u8r - u11i;
879         xi = u8i + u11r;
880         jdj += dj;
881         ret[jdj] = fma(w3r, xr, -w3i * xi);
882         ret[jdj + 1] = fma(w3r, xi, w3i * xr);
883         xr = u8r + u11i;
884         xi = u8i - u11r;
885         jdj += dj;
886         ret[jdj] = fma(w4r, xr, -w4i * xi);
887         ret[jdj + 1] = fma(w4r, xi, w4i * xr);
888         xr = u9r - u12i;
889         xi = u9i + u12r;
890         jdj += dj;
891         ret[jdj] = fma(w5r, xr, -w5i * xi);
892         ret[jdj + 1] = fma(w5r, xi, w5i * xr);
893         xr = u7r - u10i;
894         xi = u7i + u10r;
895         jdj += dj;
896         ret[jdj] = fma(w6r, xr, -w6i * xi);
897         ret[jdj + 1] = fma(w6r, xi, w6i * xr);
898       }
899     }
900   }
901 
902   /**
903    * Note that passOdd is only intended for odd factors (and fails for even factors).
904    *
905    * @param factor Factor to apply.
906    * @param product Product to apply.
907    * @param passData the data.
908    * @param twiddles the twiddle factors.
909    */
910   private void passOdd(int factor, int product, PassData passData, double[][] twiddles) {
911     final double[] data = passData.in;
912     final int dataOffset = passData.inOffset;
913     final double[] ret = passData.out;
914     final int retOffset = passData.outOffset;
915 
916     final int m = n / factor;
917     final int q = n / product;
918     final int p_1 = product / factor;
919     final int jump = (factor - 1) * p_1;
920     for (int i = 0; i < m; i++) {
921       ret[retOffset + 2 * i] = data[dataOffset + 2 * i];
922       ret[retOffset + 2 * i + 1] = data[dataOffset + 2 * i + 1];
923     }
924     for (int e = 1; e < (factor - 1) / 2 + 1; e++) {
925       for (int i = 0; i < m; i++) {
926         int idx = i + e * m;
927         int idxc = i + (factor - e) * m;
928         ret[retOffset + 2 * idx] = data[dataOffset + 2 * idx] + data[dataOffset + 2 * idxc];
929         ret[retOffset + 2 * idx + 1] =
930             data[dataOffset + 2 * idx + 1] + data[dataOffset + 2 * idxc + 1];
931         ret[retOffset + 2 * idxc] = data[dataOffset + 2 * idx] - data[dataOffset + 2 * idxc];
932         ret[retOffset + 2 * idxc + 1] =
933             data[dataOffset + 2 * idx + 1] - data[dataOffset + 2 * idxc + 1];
934       }
935     }
936     for (int i = 0; i < m; i++) {
937       data[dataOffset + 2 * i] = ret[retOffset + 2 * i];
938       data[dataOffset + 2 * i + 1] = ret[retOffset + 2 * i + 1];
939     }
940     for (int e1 = 1; e1 < (factor - 1) / 2 + 1; e1++) {
941       for (int i = 0; i < m; i++) {
942         int i1 = retOffset + 2 * (i + e1 * m);
943         data[dataOffset + 2 * i] += ret[i1];
944         data[dataOffset + 2 * i + 1] += ret[i1 + 1];
945       }
946     }
947     double[] twiddl = twiddles[q];
948     for (int e = 1; e < (factor - 1) / 2 + 1; e++) {
949       int idx = e;
950       double wr, wi;
951       int em = e * m;
952       int ecm = (factor - e) * m;
953       for (int i = 0; i < m; i++) {
954         data[dataOffset + 2 * (i + em)] = ret[retOffset + 2 * i];
955         data[dataOffset + 2 * (i + em) + 1] = ret[retOffset + 2 * i + 1];
956         data[dataOffset + 2 * (i + ecm)] = ret[retOffset + 2 * i];
957         data[dataOffset + 2 * (i + ecm) + 1] = ret[retOffset + 2 * i + 1];
958       }
959       for (int e1 = 1; e1 < (factor - 1) / 2 + 1; e1++) {
960         if (idx == 0) {
961           wr = 1;
962           wi = 0;
963         } else {
964           wr = twiddl[2 * (idx - 1)];
965           wi = -sign * twiddl[2 * (idx - 1) + 1];
966         }
967         for (int i = 0; i < m; i++) {
968           int i1 = retOffset + 2 * (i + e1 * m);
969           int i2 = retOffset + 2 * (i + (factor - e1) * m);
970           double ap = wr * ret[i1];
971           double am = wi * ret[i2 + 1];
972           double bp = wr * ret[i1 + 1];
973           double bm = wi * ret[i2];
974           data[dataOffset + 2 * (i + em)] += (ap - am);
975           data[dataOffset + 2 * (i + em) + 1] += (bp + bm);
976           data[dataOffset + 2 * (i + ecm)] += (ap + am);
977           data[dataOffset + 2 * (i + ecm) + 1] += (bp - bm);
978         }
979         idx += e;
980         idx %= factor;
981       }
982     }
983     /* k = 0 */
984     for (int k1 = 0; k1 < p_1; k1++) {
985       ret[retOffset + 2 * k1] = data[dataOffset + 2 * k1];
986       ret[retOffset + 2 * k1 + 1] = data[dataOffset + 2 * k1 + 1];
987     }
988     for (int e1 = 1; e1 < factor; e1++) {
989       for (int k1 = 0; k1 < p_1; k1++) {
990         int i = retOffset + 2 * (k1 + e1 * p_1);
991         int i1 = dataOffset + 2 * (k1 + e1 * m);
992         ret[i] = data[i1];
993         ret[i + 1] = data[i1 + 1];
994       }
995     }
996     int i = p_1;
997     int j = product;
998     for (int k = 1; k < q; k++) {
999       for (int k1 = 0; k1 < p_1; k1++) {
1000         ret[retOffset + 2 * j] = data[dataOffset + 2 * i];
1001         ret[retOffset + 2 * j + 1] = data[dataOffset + 2 * i + 1];
1002         i++;
1003         j++;
1004       }
1005       j += jump;
1006     }
1007     i = p_1;
1008     j = product;
1009     for (int k = 1; k < q; k++) {
1010       twiddl = twiddles[k];
1011       for (int k1 = 0; k1 < p_1; k1++) {
1012         for (int e1 = 1; e1 < factor; e1++) {
1013           int i1 = dataOffset + 2 * (i + e1 * m);
1014           double xr = data[i1];
1015           double xi = data[i1 + 1];
1016           double wr = twiddl[2 * (e1 - 1)];
1017           double wi = -sign * twiddl[2 * (e1 - 1) + 1];
1018           int i2 = retOffset + 2 * (j + e1 * p_1);
1019           ret[i2] = fma(wr, xr, -wi * xi);
1020           ret[i2 + 1] = fma(wr, xi, wi * xr);
1021         }
1022         i++;
1023         j++;
1024       }
1025       j += jump;
1026     }
1027   }
1028 
1029   /**
1030    * Compute twiddle factors. These are trigonometric constants that depend on the factoring of n.
1031    *
1032    * @return twiddle factors.
1033    */
1034   private double[][][] wavetable() {
1035     if (n < 2) {
1036       return null;
1037     }
1038     final double d_theta = -2.0 * PI / n;
1039     final double[][][] ret = new double[factors.length][][];
1040     int product = 1;
1041     for (int i = 0; i < factors.length; i++) {
1042       int factor = factors[i];
1043       int product_1 = product;
1044       product *= factor;
1045       final int q = n / product;
1046       ret[i] = new double[q + 1][2 * (factor - 1)];
1047       final double[][] twid = ret[i];
1048       for (int j = 0; j < factor - 1; j++) {
1049         twid[0][2 * j] = 1.0;
1050         twid[0][2 * j + 1] = 0.0;
1051       }
1052       for (int k = 1; k <= q; k++) {
1053         int m = 0;
1054         for (int j = 0; j < factor - 1; j++) {
1055           m += k * product_1;
1056           m %= n;
1057           final double theta = d_theta * m;
1058           twid[k][2 * j] = cos(theta);
1059           twid[k][2 * j + 1] = sin(theta);
1060         }
1061       }
1062     }
1063     return ret;
1064   }
1065 
1066   /**
1067    * Static DFT method used to test the FFT.
1068    *
1069    * @param in input array.
1070    * @param out output array.
1071    */
1072   public static void dft(double[] in, double[] out) {
1073     int n = in.length / 2;
1074     for (int k = 0; k < n; k++) { // For each output element
1075       double sumReal = 0;
1076       double simImag = 0;
1077       for (int t = 0; t < n; t++) { // For each input element
1078         double angle = (2 * PI * t * k) / n;
1079         int re = 2 * t;
1080         int im = 2 * t + 1;
1081         sumReal = fma(in[re], cos(angle), sumReal);
1082         sumReal = fma(in[im], sin(angle), sumReal);
1083         simImag = fma(-in[re], sin(angle), simImag);
1084         simImag = fma(in[im], cos(angle), simImag);
1085       }
1086       int re = 2 * k;
1087       int im = 2 * k + 1;
1088       out[re] = sumReal;
1089       out[im] = simImag;
1090     }
1091   }
1092 }