View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2025.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.numerics.fft;
39  
40  import java.util.Arrays;
41  import java.util.Random;
42  import java.util.Vector;
43  import java.util.logging.Level;
44  import java.util.logging.Logger;
45  
46  import static java.lang.Integer.max;
47  import static java.lang.Math.fma;
48  import static java.lang.System.arraycopy;
49  import static org.apache.commons.math3.util.FastMath.PI;
50  import static org.apache.commons.math3.util.FastMath.cos;
51  import static org.apache.commons.math3.util.FastMath.sin;
52  
53  /**
54   * Compute the FFT of complex, double precision data of arbitrary length n. This class uses a mixed
55   * radix method and has special methods to handle factors [2, 3, 4, 5, 6, 7] and a general method for
56   * larger prime factors.
57   *
58   * @author Michal J. Schnieders<br>
59   * Derived from:
60   * <br>
61   * Bruce R. Miller (bruce.miller@nist.gov)
62   * <br>
63   * Contribution of the National Institute of Standards and Technology, not subject to copyright.
64   * <br>
65   * Derived from:
66   * <br>
67   * GSL (Gnu Scientific Library) FFT Code by Brian Gough (bjg@network-theory.co.uk)
68   * @see <ul>
69   * <li><a href="http://dx.doi.org/10.1016/0021-9991(83)90013-X" target="_blank"> Clive
70   * Temperton. Self-sorting mixed-radix fast fourier transforms. Journal of Computational
71   * Physics, 52(1):1-23, 1983. </a>
72   * <li><a href="http://www.jstor.org/stable/2003354" target="_blank"> J. W. Cooley and J. W.
73   * Tukey, Mathematics of Computation 19 (90), 297 (1965) </a>
74   * <li><a href="http://en.wikipedia.org/wiki/Fast_Fourier_transform" target="_blank">FFT at Wikipedia </a>
75   * </ul>
76   * @since 1.0
77   */
78  public class Complex {
79  
80    private static final Logger logger = Logger.getLogger(Complex.class.getName());
81    private static final int[] availableFactors = {7, 6, 5, 4, 3, 2};
82    private static final int firstUnavailablePrime = 11;
83    /**
84     * Number of complex numbers in the transform.
85     */
86    private final int n;
87    /**
88     * The number of FFTs to process (default = 1).
89     */
90    private final int nFFTs;
91    /**
92     * Offset to the imaginary part of the input
93     * This is 1 for interleaved data.
94     * For blocked data, a typical offset is n in 1-dimension (or nX*nY + n in 2D).
95     */
96    private final int externalIm;
97    /**
98     * Offset to the imaginary part of the input.
99     * Internally this is 1 (for interleaved data) or n (for blocked data).
100    * <p>
101    * The internal imaginary offset cannot be larger than n due to use of a single scratch array of length 2n.
102    */
103   private final int im;
104   /**
105    * Internal increment for packing data (1 for blocked and 2 for interleaved data).
106    */
107   private final int ii;
108   /**
109    * Factorization of n.
110    */
111   private final int[] factors;
112   /**
113    * Packing of non-contiguous data.
114    */
115   private final double[] packedData;
116   /**
117    * Scratch space for the transform.
118    */
119   private final double[] scratch;
120   /**
121    * Constants for each pass.
122    */
123   private final MixedRadixFactor[] mixedRadixFactors;
124   /**
125    * Pass specific data for the transform with
126    * references to the input and output data arrays.
127    */
128   private final PassData[] passData;
129   /**
130    * Use SIMD operators.
131    */
132   private boolean useSIMD;
133   /**
134    * Minimum SIMD loop length set to the preferred SIMD vector length.
135    */
136   private int minSIMDLoopLength;
137 
138   /**
139    * Cache the last input dimension.
140    */
141   private static int lastN = -1;
142   /**
143    * Cache the last imaginary offset.
144    */
145   private static int lastIm = -1;
146   /**
147    * Cache the last nFFT size.
148    */
149   private static int lastNFFTs = -1;
150   /**
151    * Cache the last set of radix factors.
152    */
153   private static int[] factorsCache = null;
154   /**
155    * Cache the last set of radix factors. These classes are static and thread-safe.
156    */
157   private static MixedRadixFactor[] mixedRadixFactorsCache = null;
158 
159   /**
160    * Construct a Complex instance for interleaved data of length n. Factorization of n is designed to use special
161    * methods for small factors, and a general routine for large odd prime factors. Scratch memory is
162    * created of length 2*n, which is reused each time a transform is computed.
163    *
164    * @param n Number of complex numbers (n .GT. 1).
165    */
166   public Complex(int n) {
167     this(n, DataLayout1D.INTERLEAVED, 1);
168   }
169 
170   /**
171    * Construct a Complex instance for data of length n.
172    * The offset to each imaginary part relative to the real part is given by im.
173    * Factorization of n is designed to use special methods for small factors.
174    * Scratch memory is created of length 2*n, which is reused each time a transform is computed.
175    *
176    * @param n          Number of complex numbers (n .GT. 1).
177    * @param dataLayout Data layout (interleaved or blocked).
178    * @param imOffset   Offset to the imaginary part of each complex number relative to its real part.
179    */
180   public Complex(int n, DataLayout1D dataLayout, int imOffset) {
181     this(n, dataLayout, imOffset, 1);
182   }
183 
184   /**
185    * Construct a Complex instance for data of length n.
186    * The offset to each imaginary part relative to the real part is given by im.
187    * Factorization of n is designed to use special methods for small factors.
188    * Scratch memory is created of length 2*n, which is reused each time a transform is computed.
189    *
190    * @param n          Number of complex numbers (n .GT. 1).
191    * @param dataLayout Data layout (interleaved or blocked).
192    * @param imOffset   Offset to the imaginary part of each complex number relative to its real part.
193    * @param nFFTs      Number of FFTs to process (default = 1).
194    */
195   public Complex(int n, DataLayout1D dataLayout, int imOffset, int nFFTs) {
196     assert (n > 1);
197     this.n = n;
198     this.nFFTs = nFFTs;
199     this.externalIm = imOffset;
200 
201     /*
202      * The internal imaginary offset is always 1 or n.
203      *
204      * If the external imaginary offset is greater than n,
205      * the data will be packed into a contiguous array.
206      */
207     if (dataLayout == DataLayout1D.INTERLEAVED) {
208       im = 1;
209       ii = 2;
210     } else {
211       im = n * nFFTs;
212       ii = 1;
213     }
214     packedData = new double[2 * n * nFFTs];
215     scratch = new double[2 * n * nFFTs];
216     passData = new PassData[2];
217     passData[0] = new PassData(1, packedData, 0, scratch, 0);
218     passData[1] = new PassData(1, packedData, 0, scratch, 0);
219 
220     // For a 3D transform with 64 threads, there will be 3 * 64 = 192 transforms created.
221     // To conserve memory, the most recent set of twiddles and mixed radix factors are cached.
222     // Successive transforms with the same dimension and imaginary offset will reuse the cached values.
223     // Synchronize the creation of the twiddle factors.
224     synchronized (Complex.class) {
225       // The last set of factors, twiddles and mixed radix factors will be reused.
226       if (this.n == lastN && this.im == lastIm && this.nFFTs == lastNFFTs) {
227         factors = factorsCache;
228         mixedRadixFactors = mixedRadixFactorsCache;
229       } else {
230         // The cache cannot be reused and will be updated.
231         factors = factor(n);
232         double[][][] twiddle = wavetable(n, factors);
233         mixedRadixFactors = new MixedRadixFactor[factors.length];
234         lastN = this.n;
235         lastIm = this.im;
236         lastNFFTs = this.nFFTs;
237         factorsCache = factors;
238         mixedRadixFactorsCache = mixedRadixFactors;
239         // Allocate space for each pass and radix instances for each factor.
240         int product = 1;
241         for (int i = 0; i < factors.length; i++) {
242           final int factor = factors[i];
243           product *= factor;
244           PassConstants passConstants = new PassConstants(n, im, nFFTs, factor, product, twiddle[i]);
245           switch (factor) {
246             case 2 -> mixedRadixFactors[i] = new MixedRadixFactor2(passConstants);
247             case 3 -> mixedRadixFactors[i] = new MixedRadixFactor3(passConstants);
248             case 4 -> mixedRadixFactors[i] = new MixedRadixFactor4(passConstants);
249             case 5 -> mixedRadixFactors[i] = new MixedRadixFactor5(passConstants);
250             case 6 -> mixedRadixFactors[i] = new MixedRadixFactor6(passConstants);
251             case 7 -> mixedRadixFactors[i] = new MixedRadixFactor7(passConstants);
252             default -> {
253               if (dataLayout == DataLayout1D.BLOCKED) {
254                 throw new IllegalArgumentException(
255                     " Prime factors greater than 7 are only supported for interleaved data: " + factor);
256               }
257               mixedRadixFactors[i] = new MixedRadixFactorPrime(passConstants);
258             }
259           }
260           // logger.info(" Mixed Radix Factor: " + mixedRadixFactors[i]);
261         }
262       }
263 
264       // Use SIMD by default.
265       useSIMD = true;
266       String simd = System.getProperty("fft.simd", Boolean.toString(useSIMD));
267       try {
268         useSIMD = Boolean.parseBoolean(simd);
269       } catch (Exception e) {
270         logger.info(" Invalid value for fft.simd: " + simd);
271         useSIMD = false;
272       }
273 
274       // Minimum SIMD inner loop length.
275       // For interleaved data, the default minimum SIMD loop length is 1 using AVX-128 (1 Real and 1 Imaginary per load).
276       // For blocked data, the default minimum SIMD loop length is 2 using AVX-128 (2 Real or 2 Imaginary per load).
277       // Setting this value higher than one reverts to scalar operations for short inner loop lengths.
278       if (im == 1) {
279         // Interleaved data.
280         minSIMDLoopLength = MixedRadixFactor.LENGTH / 2;
281       } else {
282         // Blocked data.
283         minSIMDLoopLength = MixedRadixFactor.LENGTH;
284       }
285       String loop = System.getProperty("fft.minLoop", Integer.toString(minSIMDLoopLength));
286       try {
287         minSIMDLoopLength = max(minSIMDLoopLength, Integer.parseInt(loop));
288       } catch (Exception e) {
289         logger.info(" Invalid value for fft.minLoop: " + loop);
290         if (im == 1) {
291           // Interleaved data.
292           minSIMDLoopLength = MixedRadixFactor.LENGTH / 2;
293         } else {
294           // Blocked data.
295           minSIMDLoopLength = MixedRadixFactor.LENGTH;
296         }
297       }
298     }
299   }
300 
301   /**
302    * String representation of the Complex FFT.
303    *
304    * @return a String.
305    */
306   @Override
307   public String toString() {
308     StringBuilder sb = new StringBuilder(" Complex FFT: n = " + n + ", nFFTs = " + nFFTs + ", im = " + externalIm);
309     sb.append("\n  Factors: ").append(Arrays.toString(factors));
310     return sb.toString();
311   }
312 
313   /**
314    * Configure use of SIMD operators.
315    *
316    * @param useSIMD True to use SIMD operators.
317    */
318   public void setUseSIMD(boolean useSIMD) {
319     this.useSIMD = useSIMD;
320   }
321 
322   /**
323    * Configure the minimum SIMD inner loop length.
324    * For interleaved data, the default minimum SIMD loop length is 1 using AVX-128 (1 Real and 1 Imaginary per load).
325    * For blocked data, the default minimum SIMD loop length is 1 using AVX-128 (2 Real or 2 Imaginary per load).
326    * Setting this value higher than one reverts to scalar operations for short inner loop lengths.
327    *
328    * @param minSIMDLoopLength Minimum SIMD inner loop length.
329    */
330   public void setMinSIMDLoopLength(int minSIMDLoopLength) {
331     if (im == 1 && minSIMDLoopLength < 1) {
332       throw new IllegalArgumentException(" Minimum SIMD loop length for interleaved data is 1 or greater.");
333     }
334     if (im > 2 && minSIMDLoopLength < 2) {
335       throw new IllegalArgumentException(" Minimum SIMD loop length for blocked data is 2 or greater.");
336     }
337     this.minSIMDLoopLength = minSIMDLoopLength;
338   }
339 
340   /**
341    * Check if a dimension is a preferred dimension.
342    *
343    * @param dim the dimension to check.
344    * @return true if the dimension is a preferred dimension.
345    */
346   public static boolean preferredDimension(int dim) {
347     if (dim < 2) {
348       return false;
349     }
350 
351     // Apply preferred factors.
352     for (int factor : availableFactors) {
353       while ((dim % factor) == 0) {
354         dim /= factor;
355       }
356     }
357     return dim <= 1;
358   }
359 
360   /**
361    * Getter for the field <code>factors</code>.
362    *
363    * @return an array of int.
364    */
365   public int[] getFactors() {
366     return factors;
367   }
368 
369   /**
370    * Compute the Fast Fourier Transform of data leaving the result in data. The array data must
371    * contain the data points in the following locations:
372    *
373    * <PRE>
374    * Re(d[i]) = data[offset + stride*i]
375    * Im(d[i]) = data[offset + stride*i + im]
376    * </PRE>
377    * <p>
378    * where im is 1 for interleaved data or a constant set when the class was constructed.
379    *
380    * @param data   an array of double.
381    * @param offset the offset to the beginning of the data.
382    * @param stride the stride between data points.
383    */
384   public void fft(double[] data, int offset, int stride) {
385     transformInternal(data, offset, stride, -1, 2 * n);
386   }
387 
388   /**
389    * Compute the Fast Fourier Transform of data leaving the result in data.
390    * The array data must contain the data points in the following locations:
391    *
392    * <PRE>
393    * Re(d[i]) = data[offset + stride*i] + k * nextFFT
394    * Im(d[i]) = data[offset + stride*i + im] + k * nextFFT
395    * </PRE>
396    * <p>
397    * where im is 1 for interleaved data or a constant set when the class was constructed.
398    * The value of k is the FFT number (0 to nFFTs-1).
399    * The value of nextFFT is the stride between FFT data sets. The nextFFT value is ignored if the number of FFTs is 1.
400    *
401    * @param data    an array of double.
402    * @param offset  the offset to the beginning of the data.
403    * @param stride  the stride between data points.
404    * @param nextFFT the offset to the beginning of the next FFT when nFFTs > 1.
405    */
406   public void fft(double[] data, int offset, int stride, int nextFFT) {
407     transformInternal(data, offset, stride, -1, nextFFT);
408   }
409 
410   /**
411    * Compute the (un-normalized) inverse FFT of data, leaving it in place. The frequency domain data
412    * must be in wrap-around order, and be stored in the following locations:
413    *
414    * <PRE>
415    * Re(D[i]) = data[offset + stride*i]
416    * Im(D[i]) = data[offset + stride*i + im]
417    * </PRE>
418    *
419    * @param data   an array of double.
420    * @param offset the offset to the beginning of the data.
421    * @param stride the stride between data points.
422    */
423   public void ifft(double[] data, int offset, int stride) {
424     transformInternal(data, offset, stride, +1, 2 * n);
425   }
426 
427   /**
428    * Compute the (un-normalized) inverse FFT of data, leaving it in place. The frequency domain data
429    * must be in wrap-around order, and be stored in the following locations:
430    *
431    * <PRE>
432    * Re(d[i]) = data[offset + stride*i] + k * nextFFT
433    * Im(d[i]) = data[offset + stride*i + im] + k * nextFFT
434    * </PRE>
435    * <p>
436    * where im is 1 for interleaved data or a constant set when the class was constructed.
437    * The value of k is the FFT number (0 to nFFTs-1).
438    * The value of nextFFT is the stride between FFT data sets. The nextFFT value is ignored if the number of FFTs is 1.
439    *
440    * @param data    an array of double.
441    * @param offset  the offset to the beginning of the data.
442    * @param stride  the stride between data points.
443    * @param nextFFT the offset to the beginning of the next FFT when nFFTs > 1.
444    */
445   public void ifft(double[] data, int offset, int stride, int nextFFT) {
446     transformInternal(data, offset, stride, +1, nextFFT);
447   }
448 
449   /**
450    * Compute the normalized inverse FFT of data, leaving it in place. The frequency domain data must
451    * be stored in the following locations:
452    *
453    * <PRE>
454    * Re(d[i]) = data[offset + stride*i]
455    * Im(d[i]) = data[offset + stride*i + im]
456    * </PRE>
457    * <p>
458    * where im is 1 for interleaved data or a constant set when the class was constructed.
459    *
460    * @param data   an array of double.
461    * @param offset the offset to the beginning of the data.
462    * @param stride the stride between data points.
463    */
464   public void inverse(double[] data, int offset, int stride) {
465     inverse(data, offset, stride, 2 * n);
466   }
467 
468   /**
469    * Compute the normalized inverse FFT of data, leaving it in place. The frequency domain data must
470    * be stored in the following locations:
471    *
472    * <PRE>
473    * Re(d[i]) = data[offset + stride*i] + k * nextFFT
474    * Im(d[i]) = data[offset + stride*i + im] + k * nextFFT
475    * </PRE>
476    * <p>
477    * where im is 1 for interleaved data or a constant set when the class was constructed.
478    * The value of k is the FFT number (0 to nFFTs-1).
479    * The value of nextFFT is the stride between FFT data sets. The nextFFT value is ignored if the number of FFTs is 1.
480    *
481    * @param data    an array of double.
482    * @param offset  the offset to the beginning of the data.
483    * @param stride  the stride between data points.
484    * @param nextFFT the offset to the beginning of the next FFT when nFFTs > 1.
485    */
486   public void inverse(double[] data, int offset, int stride, int nextFFT) {
487     ifft(data, offset, stride, nextFFT);
488 
489     // Normalize inverse FFT with 1/n.
490     double norm = normalization();
491     int index = 0;
492     for (int f = 0; f < nFFTs; f++) {
493       for (int i = 0; i < 2 * n; i++) {
494         data[index++] *= norm;
495       }
496     }
497   }
498 
499   /**
500    * Compute the Fast Fourier Transform of data leaving the result in data.
501    *
502    * @param data    data an array of double.
503    * @param offset  the offset to the beginning of the data.
504    * @param stride  the stride between data points.
505    * @param sign    the sign to apply (forward -1 and inverse 1).
506    * @param nextFFT the offset to the beginning of the next FFT when nFFTs > 1.
507    */
508   private void transformInternal(
509       final double[] data, final int offset, final int stride, final int sign, final int nextFFT) {
510 
511     // Even pass data.
512     passData[0].sign = sign;
513     passData[0].in = data;
514     passData[0].inOffset = offset;
515     passData[0].out = scratch;
516     passData[0].outOffset = 0;
517     // Odd pass data.
518     passData[1].sign = sign;
519     passData[1].in = scratch;
520     passData[1].inOffset = 0;
521     passData[1].out = data;
522     passData[1].outOffset = offset;
523 
524     // Configure the pass data for the transform.
525     boolean packed = false;
526     if (stride > 2 || externalIm > n * nFFTs) {
527       // System.out.println(" Packing data: " + n + " offset " + offset + " stride " + stride + " nextFFT " + nextFFT + " n * nFFTs " + (n * nFFTs) + " externalIm " + externalIm);
528       // Pack non-contiguous (stride > 2) data into a contiguous array.
529       packed = true;
530       pack(data, offset, stride, nextFFT);
531       // The even pass input data is in the packedData array with no offset.
532       passData[0].in = packedData;
533       passData[0].inOffset = 0;
534       // The odd pass input data is in the scratch array with no offset.
535       passData[1].out = packedData;
536       passData[1].outOffset = 0;
537     }
538 
539     // Perform the FFT by looping over the factors.
540     final int nfactors = factors.length;
541     for (int i = 0; i < nfactors; i++) {
542       final int pass = i % 2;
543       MixedRadixFactor mixedRadixFactor = mixedRadixFactors[i];
544       if (useSIMD && mixedRadixFactor.innerLoopLimit >= minSIMDLoopLength) {
545         mixedRadixFactor.passSIMD(passData[pass]);
546       } else {
547         mixedRadixFactor.passScalar(passData[pass]);
548       }
549     }
550 
551     // If the number of factors is odd, the final result is in the scratch array.
552     if (nfactors % 2 == 1) {
553       // Copy the scratch array to the data array.
554       if (stride <= 2 && (im == externalIm) && nextFFT == 2 * n) {
555         arraycopy(scratch, 0, data, offset, 2 * n * nFFTs);
556       } else {
557         unpack(scratch, data, offset, stride, nextFFT);
558       }
559       // If the number of factors is even, the data may need to be unpacked.
560     } else if (packed) {
561       unpack(packedData, data, offset, stride, nextFFT);
562     }
563   }
564 
565   /**
566    * Pack the data into a contiguous array.
567    *
568    * @param data    the data to pack.
569    * @param offset  the offset to the first point data.
570    * @param stride  the stride between data points.
571    * @param nextFFT the stride between FFTs.
572    */
573   private void pack(double[] data, int offset, int stride, int nextFFT) {
574     int i = 0;
575     for (int f = 0; f < nFFTs; f++) {
576       int inputOffset = offset + f * nextFFT;
577       for (int index = inputOffset, k = 0; k < n; k++, i += ii, index += stride) {
578         packedData[i] = data[index];
579         packedData[i + im] = data[index + externalIm];
580       }
581     }
582   }
583 
584   /**
585    * Pack the data into a contiguous array.
586    *
587    * @param data    the location to unpack the data to.
588    * @param offset  the offset to the first point data.
589    * @param stride  the stride between data points.
590    * @param nextFFT the stride between FFTs.
591    */
592   private void unpack(double[] source, double[] data, int offset, int stride, int nextFFT) {
593     int i = 0;
594     for (int f = 0; f < nFFTs; f++) {
595       int outputOffset = offset + f * nextFFT;
596       for (int index = outputOffset, k = 0; k < n; k++, i += ii, index += stride) {
597         data[index] = source[i];
598         data[index + externalIm] = source[i + im];
599       }
600     }
601   }
602 
603   /**
604    * Return the normalization factor. Multiply the elements of the back-transformed data to get the
605    * normalized inverse.
606    *
607    * @return a double.
608    */
609   private double normalization() {
610     return 1.0 / n;
611   }
612 
613   /**
614    * Factor the data length into preferred factors (those with special methods), falling back to odd
615    * primes that the general routine must handle.
616    *
617    * @param n the length of the data.
618    * @return integer factors
619    */
620   private static int[] factor(int n) {
621     if (n < 2) {
622       return null;
623     }
624     Vector<Integer> v = new Vector<>();
625     int nTest = n;
626 
627     // Use the preferred factors first
628     for (int factor : availableFactors) {
629       while ((nTest % factor) == 0) {
630         nTest /= factor;
631         v.add(factor);
632       }
633     }
634 
635     // Unavailable odd prime factors.
636     int factor = firstUnavailablePrime;
637     while (nTest > 1) {
638       while ((nTest % factor) != 0) {
639         factor += 2;
640       }
641       nTest /= factor;
642       v.add(factor);
643     }
644     int product = 1;
645     int nf = v.size();
646     int[] ret = new int[nf];
647     for (int i = 0; i < nf; i++) {
648       ret[i] = v.get(i);
649       product *= ret[i];
650     }
651 
652     // Report a failed factorization.
653     if (product != n) {
654       StringBuilder sb = new StringBuilder(" FFT factorization failed for " + n + "\n");
655       for (int i = 0; i < nf; i++) {
656         sb.append(" ");
657         sb.append(ret[i]);
658       }
659       sb.append("\n");
660       sb.append(" Factor product = ");
661       sb.append(product);
662       sb.append("\n");
663       logger.severe(sb.toString());
664       System.exit(-1);
665     } else {
666       if (logger.isLoggable(Level.FINEST)) {
667         StringBuilder sb = new StringBuilder(" FFT factorization for " + n + " = ");
668         for (int i = 0; i < nf - 1; i++) {
669           sb.append(ret[i]);
670           sb.append(" * ");
671         }
672         sb.append(ret[nf - 1]);
673         logger.finest(sb.toString());
674       }
675     }
676     return ret;
677   }
678 
679   /**
680    * Compute twiddle factors. These are trigonometric constants that depend on the factoring of n.
681    *
682    * @param n       the length of the data.
683    * @param factors the factors of n.
684    * @return twiddle factors.
685    */
686   private static double[][][] wavetable(int n, int[] factors) {
687     if (n < 2) {
688       return null;
689     }
690 
691     // System.out.println(" Computing twiddle factors for length :" + n);
692     // System.out.println(" Number of factors: " + factors.length);
693 
694     final double TwoPI_N = -2.0 * PI / n;
695     final double[][][] ret = new double[factors.length][][];
696     int product = 1;
697     for (int i = 0; i < factors.length; i++) {
698       int factor = factors[i];
699       int product_1 = product;
700       product *= factor;
701       // The number of twiddle factors for the current factor.
702       int outLoopLimit = n / product;
703       // For the general odd pass, we need to add 1.
704       if (factor >= firstUnavailablePrime) {
705         outLoopLimit += 1;
706       }
707       final int nTwiddle = factor - 1;
708       // System.out.println("\n  Factor: " + factor);
709       // System.out.println("   Product: " + product);
710       ret[i] = new double[outLoopLimit][2 * nTwiddle];
711       // System.out.printf("   Size: T(%d,%d)\n", outLoopLimit, nTwiddle);
712       final double[][] twid = ret[i];
713       for (int j = 0; j < factor - 1; j++) {
714         twid[0][2 * j] = 1.0;
715         twid[0][2 * j + 1] = 0.0;
716         // System.out.printf("    T(%d,%d) = %10.6f %10.6f\n", 0, j, twid[0][2 * j], twid[0][2 * j + 1]);
717       }
718       for (int k = 1; k < outLoopLimit; k++) {
719         int m = 0;
720         for (int j = 0; j < nTwiddle; j++) {
721           m += k * product_1;
722           m %= n;
723           final double theta = TwoPI_N * m;
724           twid[k][2 * j] = cos(theta);
725           twid[k][2 * j + 1] = sin(theta);
726           // System.out.printf("    T(%d,%d) = %10.6f %10.6f\n", k, j, twid[k][2 * j], twid[k][2 * j + 1]);
727         }
728       }
729     }
730     return ret;
731   }
732 
733   /**
734    * Static DFT method used to test the FFT.
735    *
736    * @param in  input array.
737    * @param out output array.
738    */
739   public static void dft(double[] in, double[] out) {
740     int n = in.length / 2;
741     for (int k = 0; k < n; k++) { // For each output element
742       double sumReal = 0;
743       double simImag = 0;
744       for (int t = 0; t < n; t++) { // For each input element
745         double angle = (2 * PI * t * k) / n;
746         int re = 2 * t;
747         int im = 2 * t + 1;
748         sumReal = fma(in[re], cos(angle), sumReal);
749         sumReal = fma(in[im], sin(angle), sumReal);
750         simImag = fma(-in[re], sin(angle), simImag);
751         simImag = fma(in[im], cos(angle), simImag);
752       }
753       int re = 2 * k;
754       int im = 2 * k + 1;
755       out[re] = sumReal;
756       out[im] = simImag;
757     }
758   }
759 
760   /**
761    * Static DFT method used to test the FFT.
762    *
763    * @param in  input array.
764    * @param out output array.
765    */
766   public static void dftBlocked(double[] in, double[] out) {
767     int n = in.length / 2;
768     for (int k = 0; k < n; k++) { // For each output element
769       double sumReal = 0;
770       double simImag = 0;
771       for (int t = 0; t < n; t++) { // For each input element
772         double angle = (2 * PI * t * k) / n;
773         int re = t;
774         int im = t + n;
775         sumReal = fma(in[re], cos(angle), sumReal);
776         sumReal = fma(in[im], sin(angle), sumReal);
777         simImag = fma(-in[re], sin(angle), simImag);
778         simImag = fma(in[im], cos(angle), simImag);
779       }
780       int re = k;
781       int im = k + n;
782       out[re] = sumReal;
783       out[im] = simImag;
784     }
785   }
786 
787   /**
788    * Test the Complex FFT.
789    *
790    * @param args an array of {@link java.lang.String} objects.
791    * @throws java.lang.Exception if any.
792    * @since 1.0
793    */
794   public static void main(String[] args) throws Exception {
795     int dimNotFinal = 128;
796     int reps = 5;
797     try {
798       dimNotFinal = Integer.parseInt(args[0]);
799       if (dimNotFinal < 1) {
800         dimNotFinal = 128;
801       }
802       reps = Integer.parseInt(args[1]);
803       if (reps < 1) {
804         reps = 5;
805       }
806     } catch (Exception e) {
807       //
808     }
809     final int dim = dimNotFinal;
810     System.out.printf("Initializing a 1D array of length %d.\n"
811         + "The best timing out of %d repetitions will be used.%n", dim, reps);
812     Complex complex = new Complex(dim);
813     final double[] data = new double[dim * 2];
814     Random random = new Random(1);
815     for (int i = 0; i < dim; i++) {
816       data[2 * i] = random.nextDouble();
817     }
818     double toSeconds = 0.000000001;
819     long seqTime = Long.MAX_VALUE;
820     for (int i = 0; i < reps; i++) {
821       System.out.printf("Iteration %d%n", i + 1);
822       long time = System.nanoTime();
823       complex.fft(data, 0, 2);
824       complex.ifft(data, 0, 2);
825       time = (System.nanoTime() - time);
826       System.out.printf("Sequential: %12.9f%n", toSeconds * time);
827       if (time < seqTime) {
828         seqTime = time;
829       }
830     }
831     System.out.printf("Best Sequential Time:  %12.9f%n", toSeconds * seqTime);
832   }
833 
834 }