View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2026.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.numerics.fft;
39  
40  import java.util.Arrays;
41  import java.util.Random;
42  import java.util.Vector;
43  import java.util.logging.Level;
44  import java.util.logging.Logger;
45  
46  import static java.lang.Integer.max;
47  import static java.lang.Math.fma;
48  import static java.lang.System.arraycopy;
49  import static org.apache.commons.math3.util.FastMath.PI;
50  import static org.apache.commons.math3.util.FastMath.cos;
51  import static org.apache.commons.math3.util.FastMath.sin;
52  
53  /**
54   * Compute the FFT of complex, double precision data of arbitrary length n. This class uses a mixed
55   * radix method and has special methods to handle factors [2, 3, 4, 5, 6, 7] and a general method for
56   * larger prime factors.
57   *
58   * @author Michal J. Schnieders<br>
59   * Derived from:
60   * <br>
61   * Bruce R. Miller (bruce.miller@nist.gov)
62   * <br>
63   * Contribution of the National Institute of Standards and Technology, not subject to copyright.
64   * <br>
65   * Derived from:
66   * <br>
67   * GSL (Gnu Scientific Library) FFT Code by Brian Gough (bjg@network-theory.co.uk)
68   * @see <ul>
69   * <li><a href="http://dx.doi.org/10.1016/0021-9991(83)90013-X" target="_blank"> Clive
70   * Temperton. Self-sorting mixed-radix fast fourier transforms. Journal of Computational
71   * Physics, 52(1):1-23, 1983. </a>
72   * <li><a href="http://www.jstor.org/stable/2003354" target="_blank"> J. W. Cooley and J. W.
73   * Tukey, Mathematics of Computation 19 (90), 297 (1965) </a>
74   * <li><a href="http://en.wikipedia.org/wiki/Fast_Fourier_transform" target="_blank">FFT at Wikipedia </a>
75   * </ul>
76   * @since 1.0
77   */
78  public class Complex {
79  
80    private static final Logger logger = Logger.getLogger(Complex.class.getName());
81    private static final int[] availableFactors = {7, 6, 5, 4, 3, 2};
82    // private static final int[] availableFactors = {4, 3, 2};
83    private static final int firstUnavailablePrime = 11;
84    // private static final int firstUnavailablePrime = 5;
85    /**
86     * Number of complex numbers in the transform.
87     */
88    private final int n;
89    /**
90     * The number of FFTs to process (default = 1).
91     */
92    private final int nFFTs;
93    /**
94     * Offset to the imaginary part of the input
95     * This is 1 for interleaved data.
96     * For blocked data, a typical offset is n in 1-dimension (or nX*nY + n in 2D).
97     */
98    private final int externalIm;
99    /**
100    * Offset to the imaginary part of the input.
101    * Internally this is 1 (for interleaved data) or n (for blocked data).
102    * <p>
103    * The internal imaginary offset cannot be larger than n due to use of a single scratch array of length 2n.
104    */
105   private final int im;
106   /**
107    * Internal increment for packing data (1 for blocked and 2 for interleaved data).
108    */
109   private final int ii;
110   /**
111    * Factorization of n.
112    */
113   private final int[] factors;
114   /**
115    * Packing of non-contiguous data.
116    */
117   private final double[] packedData;
118   /**
119    * Scratch space for the transform.
120    */
121   private final double[] scratch;
122   /**
123    * Constants for each pass.
124    */
125   private final MixedRadixFactor[] mixedRadixFactors;
126   /**
127    * Pass specific data for the transform with
128    * references to the input and output data arrays.
129    */
130   private final PassData[] passData;
131   /**
132    * Use SIMD operators.
133    */
134   private boolean useSIMD;
135   /**
136    * The SIMD width to prefer. The default is the preferred width for the platform, but
137    * can be set to less than this by the flag fft.width.
138    */
139   private int simdWidth;
140   /**
141    * Minimum SIMD loop length set to the preferred SIMD vector length.
142    */
143   private int minSIMDLoopLength;
144 
145   /**
146    * Cache the last input dimension.
147    */
148   private static int lastN = -1;
149   /**
150    * Cache the last imaginary offset.
151    */
152   private static int lastIm = -1;
153   /**
154    * Cache the last nFFT size.
155    */
156   private static int lastNFFTs = -1;
157   /**
158    * Cache the last set of radix factors.
159    */
160   private static int[] factorsCache = null;
161   /**
162    * Cache the last set of radix factors. These classes are static and thread-safe.
163    */
164   private static MixedRadixFactor[] mixedRadixFactorsCache = null;
165 
166   /**
167    * Construct a Complex instance for interleaved data of length n. Factorization of n is designed to use special
168    * methods for small factors, and a general routine for large odd prime factors. Scratch memory is
169    * created of length 2*n, which is reused each time a transform is computed.
170    *
171    * @param n Number of complex numbers (n .GT. 1).
172    */
173   public Complex(int n) {
174     this(n, DataLayout1D.INTERLEAVED, 1);
175   }
176 
177   /**
178    * Construct a Complex instance for data of length n.
179    * The offset to each imaginary part relative to the real part is given by im.
180    * Factorization of n is designed to use special methods for small factors.
181    * Scratch memory is created of length 2*n, which is reused each time a transform is computed.
182    *
183    * @param n          Number of complex numbers (n .GT. 1).
184    * @param dataLayout Data layout (interleaved or blocked).
185    * @param imOffset   Offset to the imaginary part of each complex number relative to its real part.
186    */
187   public Complex(int n, DataLayout1D dataLayout, int imOffset) {
188     this(n, dataLayout, imOffset, 1);
189   }
190 
191   /**
192    * Construct a Complex instance for data of length n.
193    * The offset to each imaginary part relative to the real part is given by im.
194    * Factorization of n is designed to use special methods for small factors.
195    * Scratch memory is created of length 2*n, which is reused each time a transform is computed.
196    *
197    * @param n          Number of complex numbers (n .GT. 1).
198    * @param dataLayout Data layout (interleaved or blocked).
199    * @param imOffset   Offset to the imaginary part of each complex number relative to its real part.
200    * @param nFFTs      Number of FFTs to process (default = 1).
201    */
202   public Complex(int n, DataLayout1D dataLayout, int imOffset, int nFFTs) {
203     assert (n > 1);
204     this.n = n;
205     this.nFFTs = nFFTs;
206     this.externalIm = imOffset;
207 
208     /*
209      * The internal imaginary offset is always 1 or n.
210      *
211      * If the external imaginary offset is greater than n,
212      * the data will be packed into a contiguous array.
213      */
214     if (dataLayout == DataLayout1D.INTERLEAVED) {
215       im = 1;
216       ii = 2;
217     } else {
218       im = n * nFFTs;
219       ii = 1;
220     }
221     packedData = new double[2 * n * nFFTs];
222     scratch = new double[2 * n * nFFTs];
223     passData = new PassData[2];
224     passData[0] = new PassData(1, packedData, 0, scratch, 0);
225     passData[1] = new PassData(1, packedData, 0, scratch, 0);
226 
227     // For a 3D transform with 64 threads, there will be 3 * 64 = 192 transforms created.
228     // To conserve memory, the most recent set of twiddles and mixed radix factors are cached.
229     // Successive transforms with the same dimension and imaginary offset will reuse the cached values.
230     // Synchronize the creation of the twiddle factors.
231     synchronized (Complex.class) {
232       // The last set of factors, twiddles and mixed radix factors will be reused.
233       if (this.n == lastN && this.im == lastIm && this.nFFTs == lastNFFTs) {
234         factors = factorsCache;
235         mixedRadixFactors = mixedRadixFactorsCache;
236       } else {
237         // The cache cannot be reused and will be updated.
238         factors = factor(n);
239         double[][][] twiddle = wavetable(n, factors);
240         mixedRadixFactors = new MixedRadixFactor[factors.length];
241         lastN = this.n;
242         lastIm = this.im;
243         lastNFFTs = this.nFFTs;
244         factorsCache = factors;
245         mixedRadixFactorsCache = mixedRadixFactors;
246         // Allocate space for each pass and radix instances for each factor.
247         int product = 1;
248         for (int i = 0; i < factors.length; i++) {
249           final int factor = factors[i];
250           product *= factor;
251           PassConstants passConstants = new PassConstants(n, im, nFFTs, factor, product, twiddle[i]);
252           switch (factor) {
253             case 2 -> mixedRadixFactors[i] = new MixedRadixFactor2(passConstants);
254             case 3 -> mixedRadixFactors[i] = new MixedRadixFactor3(passConstants);
255             case 4 -> mixedRadixFactors[i] = new MixedRadixFactor4(passConstants);
256             case 5 -> mixedRadixFactors[i] = new MixedRadixFactor5(passConstants);
257             case 6 -> mixedRadixFactors[i] = new MixedRadixFactor6(passConstants);
258             case 7 -> mixedRadixFactors[i] = new MixedRadixFactor7(passConstants);
259             default -> {
260               if (dataLayout == DataLayout1D.BLOCKED) {
261                 throw new IllegalArgumentException(
262                     " Prime factors greater than 7 are only supported for interleaved data: " + factor);
263               }
264               mixedRadixFactors[i] = new MixedRadixFactorPrime(passConstants);
265             }
266           }
267           // logger.info(" Mixed Radix Factor: " + mixedRadixFactors[i]);
268         }
269       }
270 
271       // Use SIMD by default.
272       useSIMD = true;
273       String simd = System.getProperty("fft.simd", Boolean.toString(useSIMD));
274       try {
275         useSIMD = Boolean.parseBoolean(simd);
276       } catch (Exception e) {
277         logger.info(" Invalid value for fft.simd: " + simd);
278         useSIMD = false;
279       }
280 
281       simdWidth = MixedRadixFactor.LENGTH;
282       String width = System.getProperty("fft.simd.width", Integer.toString(simdWidth));
283       try {
284         simdWidth = Integer.parseInt(width);
285         if (simdWidth < 2 || simdWidth > MixedRadixFactor.LENGTH || simdWidth % 2 != 0) {
286           logger.info(" Invalid value for fft.simd.width: " + width);
287           simdWidth = MixedRadixFactor.LENGTH;
288         }
289       } catch (Exception e) {
290         logger.info(" Invalid value for fft.simd.width: " + width);
291         simdWidth = MixedRadixFactor.LENGTH;
292       }
293 
294       for (MixedRadixFactor mixedRadixFactor : mixedRadixFactors) {
295         mixedRadixFactor.setSIMDWidth(simdWidth);
296         // logger.info(" " + mixedRadixFactor);
297       }
298 
299       // Minimum SIMD inner loop length.
300       // For interleaved data, the default minimum SIMD loop length is 1 using AVX-128 (1 Real and 1 Imaginary per load).
301       // For blocked data, the default minimum SIMD loop length is 2 using AVX-128 (2 Real or 2 Imaginary per load).
302       // Setting this value higher than one reverts to scalar operations for short inner loop lengths.
303       if (im == 1) {
304         // Interleaved data.
305         minSIMDLoopLength = MixedRadixFactor.LENGTH / 2;
306       } else {
307         // Blocked data.
308         minSIMDLoopLength = MixedRadixFactor.LENGTH;
309       }
310       String loop = System.getProperty("fft.minLoop", Integer.toString(minSIMDLoopLength));
311       try {
312         minSIMDLoopLength = max(minSIMDLoopLength, Integer.parseInt(loop));
313       } catch (Exception e) {
314         logger.info(" Invalid value for fft.minLoop: " + loop);
315         if (im == 1) {
316           // Interleaved data.
317           minSIMDLoopLength = MixedRadixFactor.LENGTH / 2;
318         } else {
319           // Blocked data.
320           minSIMDLoopLength = MixedRadixFactor.LENGTH;
321         }
322       }
323     }
324   }
325 
326   /**
327    * String representation of the Complex FFT.
328    *
329    * @return a String.
330    */
331   @Override
332   public String toString() {
333     StringBuilder sb = new StringBuilder(" Complex FFT: n = " + n + ", nFFTs = " + nFFTs + ", im = " + externalIm);
334     sb.append("\n  Factors: ").append(Arrays.toString(factors));
335     return sb.toString();
336   }
337 
338   /**
339    * Configure use of SIMD operators.
340    *
341    * @param useSIMD True to use SIMD operators.
342    */
343   public void setUseSIMD(boolean useSIMD) {
344     this.useSIMD = useSIMD;
345   }
346 
347   /**
348    * Configure the minimum SIMD inner loop length.
349    * For interleaved data, the default minimum SIMD loop length is 1 using AVX-128 (1 Real and 1 Imaginary per load).
350    * For blocked data, the default minimum SIMD loop length is 1 using AVX-128 (2 Real or 2 Imaginary per load).
351    * Setting this value higher than one reverts to scalar operations for short inner loop lengths.
352    *
353    * @param minSIMDLoopLength Minimum SIMD inner loop length.
354    */
355   public void setMinSIMDLoopLength(int minSIMDLoopLength) {
356     if (im == 1 && minSIMDLoopLength < 1) {
357       throw new IllegalArgumentException(" Minimum SIMD loop length for interleaved data is 1 or greater.");
358     }
359     if (im > 2 && minSIMDLoopLength < 2) {
360       throw new IllegalArgumentException(" Minimum SIMD loop length for blocked data is 2 or greater.");
361     }
362     this.minSIMDLoopLength = minSIMDLoopLength;
363   }
364 
365   /**
366    * Check if a dimension is a preferred dimension.
367    *
368    * @param dim the dimension to check.
369    * @return true if the dimension is a preferred dimension.
370    */
371   public static boolean preferredDimension(int dim) {
372     if (dim < 2) {
373       return false;
374     }
375 
376     // Apply preferred factors.
377     for (int factor : availableFactors) {
378       while ((dim % factor) == 0) {
379         dim /= factor;
380       }
381     }
382     return dim <= 1;
383   }
384 
385   /**
386    * Getter for the field <code>factors</code>.
387    *
388    * @return an array of int.
389    */
390   public int[] getFactors() {
391     return factors;
392   }
393 
394   /**
395    * Compute the Fast Fourier Transform of data leaving the result in data. The array data must
396    * contain the data points in the following locations:
397    *
398    * <PRE>
399    * Re(d[i]) = data[offset + stride*i]
400    * Im(d[i]) = data[offset + stride*i + im]
401    * </PRE>
402    * <p>
403    * where im is 1 for interleaved data or a constant set when the class was constructed.
404    *
405    * @param data   an array of double.
406    * @param offset the offset to the beginning of the data.
407    * @param stride the stride between data points.
408    */
409   public void fft(double[] data, int offset, int stride) {
410     transformInternal(data, offset, stride, -1, 2 * n);
411   }
412 
413   /**
414    * Compute the Fast Fourier Transform of data leaving the result in data.
415    * The array data must contain the data points in the following locations:
416    *
417    * <PRE>
418    * Re(d[i]) = data[offset + stride*i] + k * nextFFT
419    * Im(d[i]) = data[offset + stride*i + im] + k * nextFFT
420    * </PRE>
421    * <p>
422    * where im is 1 for interleaved data or a constant set when the class was constructed.
423    * The value of k is the FFT number (0 to nFFTs-1).
424    * The value of nextFFT is the stride between FFT data sets. The nextFFT value is ignored if the number of FFTs is 1.
425    *
426    * @param data    an array of double.
427    * @param offset  the offset to the beginning of the data.
428    * @param stride  the stride between data points.
429    * @param nextFFT the offset to the beginning of the next FFT when nFFTs > 1.
430    */
431   public void fft(double[] data, int offset, int stride, int nextFFT) {
432     transformInternal(data, offset, stride, -1, nextFFT);
433   }
434 
435   /**
436    * Compute the (un-normalized) inverse FFT of data, leaving it in place. The frequency domain data
437    * must be in wrap-around order, and be stored in the following locations:
438    *
439    * <PRE>
440    * Re(D[i]) = data[offset + stride*i]
441    * Im(D[i]) = data[offset + stride*i + im]
442    * </PRE>
443    *
444    * @param data   an array of double.
445    * @param offset the offset to the beginning of the data.
446    * @param stride the stride between data points.
447    */
448   public void ifft(double[] data, int offset, int stride) {
449     transformInternal(data, offset, stride, +1, 2 * n);
450   }
451 
452   /**
453    * Compute the (un-normalized) inverse FFT of data, leaving it in place. The frequency domain data
454    * must be in wrap-around order, and be stored in the following locations:
455    *
456    * <PRE>
457    * Re(d[i]) = data[offset + stride*i] + k * nextFFT
458    * Im(d[i]) = data[offset + stride*i + im] + k * nextFFT
459    * </PRE>
460    * <p>
461    * where im is 1 for interleaved data or a constant set when the class was constructed.
462    * The value of k is the FFT number (0 to nFFTs-1).
463    * The value of nextFFT is the stride between FFT data sets. The nextFFT value is ignored if the number of FFTs is 1.
464    *
465    * @param data    an array of double.
466    * @param offset  the offset to the beginning of the data.
467    * @param stride  the stride between data points.
468    * @param nextFFT the offset to the beginning of the next FFT when nFFTs > 1.
469    */
470   public void ifft(double[] data, int offset, int stride, int nextFFT) {
471     transformInternal(data, offset, stride, +1, nextFFT);
472   }
473 
474   /**
475    * Compute the normalized inverse FFT of data, leaving it in place. The frequency domain data must
476    * be stored in the following locations:
477    *
478    * <PRE>
479    * Re(d[i]) = data[offset + stride*i]
480    * Im(d[i]) = data[offset + stride*i + im]
481    * </PRE>
482    * <p>
483    * where im is 1 for interleaved data or a constant set when the class was constructed.
484    *
485    * @param data   an array of double.
486    * @param offset the offset to the beginning of the data.
487    * @param stride the stride between data points.
488    */
489   public void inverse(double[] data, int offset, int stride) {
490     inverse(data, offset, stride, 2 * n);
491   }
492 
493   /**
494    * Compute the normalized inverse FFT of data, leaving it in place. The frequency domain data must
495    * be stored in the following locations:
496    *
497    * <PRE>
498    * Re(d[i]) = data[offset + stride*i] + k * nextFFT
499    * Im(d[i]) = data[offset + stride*i + im] + k * nextFFT
500    * </PRE>
501    * <p>
502    * where im is 1 for interleaved data or a constant set when the class was constructed.
503    * The value of k is the FFT number (0 to nFFTs-1).
504    * The value of nextFFT is the stride between FFT data sets. The nextFFT value is ignored if the number of FFTs is 1.
505    *
506    * @param data    an array of double.
507    * @param offset  the offset to the beginning of the data.
508    * @param stride  the stride between data points.
509    * @param nextFFT the offset to the beginning of the next FFT when nFFTs > 1.
510    */
511   public void inverse(double[] data, int offset, int stride, int nextFFT) {
512     ifft(data, offset, stride, nextFFT);
513 
514     // Normalize inverse FFT with 1/n.
515     double norm = normalization();
516     int index = 0;
517     for (int f = 0; f < nFFTs; f++) {
518       for (int i = 0; i < 2 * n; i++) {
519         data[index++] *= norm;
520       }
521     }
522   }
523 
524   /**
525    * Compute the Fast Fourier Transform of data leaving the result in data.
526    *
527    * @param data    data an array of double.
528    * @param offset  the offset to the beginning of the data.
529    * @param stride  the stride between data points.
530    * @param sign    the sign to apply (forward -1 and inverse 1).
531    * @param nextFFT the offset to the beginning of the next FFT when nFFTs > 1.
532    */
533   private void transformInternal(
534       final double[] data, final int offset, final int stride, final int sign, final int nextFFT) {
535 
536     // Even pass data.
537     passData[0].sign = sign;
538     passData[0].in = data;
539     passData[0].inOffset = offset;
540     passData[0].out = scratch;
541     passData[0].outOffset = 0;
542     // Odd pass data.
543     passData[1].sign = sign;
544     passData[1].in = scratch;
545     passData[1].inOffset = 0;
546     passData[1].out = data;
547     passData[1].outOffset = offset;
548 
549     // Configure the pass data for the transform.
550     boolean packed = false;
551     if (stride > 2 || externalIm > n * nFFTs) {
552       // System.out.println(" Packing data: " + n + " offset " + offset + " stride " + stride + " nextFFT " + nextFFT + " n * nFFTs " + (n * nFFTs) + " externalIm " + externalIm);
553       // Pack non-contiguous (stride > 2) data into a contiguous array.
554       packed = true;
555       pack(data, offset, stride, nextFFT);
556       // The even pass input data is in the packedData array with no offset.
557       passData[0].in = packedData;
558       passData[0].inOffset = 0;
559       // The odd pass input data is in the scratch array with no offset.
560       passData[1].out = packedData;
561       passData[1].outOffset = 0;
562     }
563 
564     // Perform the FFT by looping over the factors.
565     final int nfactors = factors.length;
566     for (int i = 0; i < nfactors; i++) {
567       final int pass = i % 2;
568       MixedRadixFactor mixedRadixFactor = mixedRadixFactors[i];
569       boolean applySIMD = false;
570       // If the useSIMD flag is true, evaluate this pass for optimal SIMD width.
571       if (useSIMD) {
572         // Check the requested SIMD species.
573 
574       }
575 
576       if (useSIMD && mixedRadixFactor.innerLoopLimit >= minSIMDLoopLength) {
577         mixedRadixFactor.passSIMD(passData[pass]);
578       } else {
579         mixedRadixFactor.passScalar(passData[pass]);
580       }
581     }
582 
583     // If the number of factors is odd, the final result is in the scratch array.
584     if (nfactors % 2 == 1) {
585       // Copy the scratch array to the data array.
586       if (stride <= 2 && (im == externalIm) && nextFFT == 2 * n) {
587         arraycopy(scratch, 0, data, offset, 2 * n * nFFTs);
588       } else {
589         unpack(scratch, data, offset, stride, nextFFT);
590       }
591       // If the number of factors is even, the data may need to be unpacked.
592     } else if (packed) {
593       unpack(packedData, data, offset, stride, nextFFT);
594     }
595   }
596 
597   /**
598    * Pack the data into a contiguous array.
599    *
600    * @param data    the data to pack.
601    * @param offset  the offset to the first point data.
602    * @param stride  the stride between data points.
603    * @param nextFFT the stride between FFTs.
604    */
605   private void pack(double[] data, int offset, int stride, int nextFFT) {
606     int i = 0;
607     for (int f = 0; f < nFFTs; f++) {
608       int inputOffset = offset + f * nextFFT;
609       for (int index = inputOffset, k = 0; k < n; k++, i += ii, index += stride) {
610         packedData[i] = data[index];
611         packedData[i + im] = data[index + externalIm];
612       }
613     }
614   }
615 
616   /**
617    * Pack the data into a contiguous array.
618    *
619    * @param data    the location to unpack the data to.
620    * @param offset  the offset to the first point data.
621    * @param stride  the stride between data points.
622    * @param nextFFT the stride between FFTs.
623    */
624   private void unpack(double[] source, double[] data, int offset, int stride, int nextFFT) {
625     int i = 0;
626     for (int f = 0; f < nFFTs; f++) {
627       int outputOffset = offset + f * nextFFT;
628       for (int index = outputOffset, k = 0; k < n; k++, i += ii, index += stride) {
629         data[index] = source[i];
630         data[index + externalIm] = source[i + im];
631       }
632     }
633   }
634 
635   /**
636    * Return the normalization factor. Multiply the elements of the back-transformed data to get the
637    * normalized inverse.
638    *
639    * @return a double.
640    */
641   private double normalization() {
642     return 1.0 / n;
643   }
644 
645   /**
646    * Factor the data length into preferred factors (those with special methods), falling back to odd
647    * primes that the general routine must handle.
648    *
649    * @param n the length of the data.
650    * @return integer factors
651    */
652   protected static int[] factor(int n) {
653     if (n < 2) {
654       return null;
655     }
656     Vector<Integer> v = new Vector<>();
657     int nTest = n;
658 
659     // Use the preferred factors first
660     for (int factor : availableFactors) {
661       while ((nTest % factor) == 0) {
662         nTest /= factor;
663         v.add(factor);
664       }
665     }
666 
667     // Unavailable odd prime factors.
668     int factor = firstUnavailablePrime;
669     while (nTest > 1) {
670       while ((nTest % factor) != 0) {
671         factor += 2;
672       }
673       nTest /= factor;
674       v.add(factor);
675     }
676     int product = 1;
677     int nf = v.size();
678     int[] ret = new int[nf];
679     for (int i = 0; i < nf; i++) {
680       ret[i] = v.get(i);
681       product *= ret[i];
682     }
683 
684     // Report a failed factorization.
685     if (product != n) {
686       StringBuilder sb = new StringBuilder(" FFT factorization failed for " + n + "\n");
687       for (int i = 0; i < nf; i++) {
688         sb.append(" ");
689         sb.append(ret[i]);
690       }
691       sb.append("\n");
692       sb.append(" Factor product = ");
693       sb.append(product);
694       sb.append("\n");
695       logger.severe(sb.toString());
696       System.exit(-1);
697     } else {
698       if (logger.isLoggable(Level.FINEST)) {
699         StringBuilder sb = new StringBuilder(" FFT factorization for " + n + " = ");
700         for (int i = 0; i < nf - 1; i++) {
701           sb.append(ret[i]);
702           sb.append(" * ");
703         }
704         sb.append(ret[nf - 1]);
705         logger.finest(sb.toString());
706       }
707     }
708     return ret;
709   }
710 
711   /**
712    * Compute twiddle factors. These are trigonometric constants that depend on the factoring of n.
713    *
714    * @param n       the length of the data.
715    * @param factors the factors of n.
716    * @return twiddle factors.
717    */
718   private static double[][][] wavetable(int n, int[] factors) {
719     if (n < 2) {
720       return null;
721     }
722 
723     // System.out.println(" Computing twiddle factors for length :" + n);
724     // System.out.println(" Number of factors: " + factors.length);
725 
726     final double TwoPI_N = -2.0 * PI / n;
727     final double[][][] ret = new double[factors.length][][];
728     int product = 1;
729     for (int i = 0; i < factors.length; i++) {
730       int factor = factors[i];
731       int product_1 = product;
732       product *= factor;
733       // The number of twiddle factors for the current factor.
734       int outLoopLimit = n / product;
735       // For the general odd pass, we need to add 1.
736       if (factor >= firstUnavailablePrime) {
737         outLoopLimit += 1;
738       }
739       final int nTwiddle = factor - 1;
740       // System.out.println("\n  Factor: " + factor);
741       // System.out.println("   Product: " + product);
742       ret[i] = new double[outLoopLimit][2 * nTwiddle];
743 
744       // System.out.printf("   Size: T(%d,%d)\n", outLoopLimit, nTwiddle);
745       final double[][] twid = ret[i];
746       for (int j = 0; j < factor - 1; j++) {
747         twid[0][2 * j] = 1.0;
748         twid[0][2 * j + 1] = 0.0;
749         // System.out.printf("  T(%d,%d) = %10.6f %10.6f\n", 0, j, twid[0][2 * j], twid[0][2 * j + 1]);
750       }
751       for (int k = 1; k < outLoopLimit; k++) {
752         int m = 0;
753         for (int j = 0; j < nTwiddle; j++) {
754           m += k * product_1;
755           m %= n;
756           final double theta = TwoPI_N * m;
757           twid[k][2 * j] = cos(theta);
758           twid[k][2 * j + 1] = sin(theta);
759           // System.out.printf("  T(%d,%d) = %10.6f %10.6f\n", k, j, twid[k][2 * j], twid[k][2 * j + 1]);
760         }
761       }
762     }
763     return ret;
764   }
765 
766   /**
767    * Static DFT method used to test the FFT.
768    *
769    * @param in  input array.
770    * @param out output array.
771    */
772   public static void dft(double[] in, double[] out) {
773     int n = in.length / 2;
774     for (int k = 0; k < n; k++) { // For each output element
775       double sumReal = 0;
776       double simImag = 0;
777       for (int t = 0; t < n; t++) { // For each input element
778         double angle = (2 * PI * t * k) / n;
779         int re = 2 * t;
780         int im = 2 * t + 1;
781         sumReal = fma(in[re], cos(angle), sumReal);
782         sumReal = fma(in[im], sin(angle), sumReal);
783         simImag = fma(-in[re], sin(angle), simImag);
784         simImag = fma(in[im], cos(angle), simImag);
785       }
786       int re = 2 * k;
787       int im = 2 * k + 1;
788       out[re] = sumReal;
789       out[im] = simImag;
790     }
791   }
792 
793   /**
794    * Static DFT method used to test the FFT.
795    *
796    * @param in  input array.
797    * @param out output array.
798    */
799   public static void dftBlocked(double[] in, double[] out) {
800     int n = in.length / 2;
801     for (int k = 0; k < n; k++) { // For each output element
802       double sumReal = 0;
803       double simImag = 0;
804       for (int t = 0; t < n; t++) { // For each input element
805         double angle = (2 * PI * t * k) / n;
806         int re = t;
807         int im = t + n;
808         sumReal = fma(in[re], cos(angle), sumReal);
809         sumReal = fma(in[im], sin(angle), sumReal);
810         simImag = fma(-in[re], sin(angle), simImag);
811         simImag = fma(in[im], cos(angle), simImag);
812       }
813       int re = k;
814       int im = k + n;
815       out[re] = sumReal;
816       out[im] = simImag;
817     }
818   }
819 
820   /**
821    * Test the Complex FFT.
822    *
823    * @param args an array of {@link java.lang.String} objects.
824    * @throws java.lang.Exception if any.
825    * @since 1.0
826    */
827   public static void main(String[] args) throws Exception {
828     int dimNotFinal = 128;
829     int reps = 5;
830     boolean blocked = false;
831     try {
832       dimNotFinal = Integer.parseInt(args[0]);
833       if (dimNotFinal < 1) {
834         dimNotFinal = 128;
835       }
836       reps = Integer.parseInt(args[1]);
837       if (reps < 1) {
838         reps = 5;
839       }
840       blocked = Boolean.parseBoolean(args[2]);
841     } catch (Exception e) {
842       //
843     }
844     final int dim = dimNotFinal;
845     System.out.printf("Initializing a 1D array of length %d.\n"
846         + "The best timing out of %d repetitions will be used.%n", dim, reps);
847     DataLayout1D dataLayout1D = DataLayout1D.INTERLEAVED;
848     int im = 1;
849     if (blocked) {
850       dataLayout1D = DataLayout1D.BLOCKED;
851       im = dim;
852     }
853     Complex complex = new Complex(dim, dataLayout1D, im);
854     final double[] data = new double[dim * 2];
855     Random random = new Random(1);
856     for (int i = 0; i < dim; i++) {
857       data[2 * i] = random.nextDouble();
858     }
859     double toSeconds = 0.000000001;
860     long seqTime = Long.MAX_VALUE;
861     for (int i = 0; i < reps; i++) {
862       System.out.printf("Iteration %d%n", i + 1);
863       long time = System.nanoTime();
864       complex.fft(data, 0, 2);
865       complex.ifft(data, 0, 2);
866       time = (System.nanoTime() - time);
867       System.out.printf("Sequential: %12.9f%n", toSeconds * time);
868       if (time < seqTime) {
869         seqTime = time;
870       }
871     }
872     System.out.printf("Best Sequential Time:  %12.9f%n", toSeconds * seqTime);
873   }
874 
875 }