View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2026.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.numerics.fft;
39  
40  import edu.rit.pj.IntegerForLoop;
41  import edu.rit.pj.ParallelRegion;
42  import edu.rit.pj.ParallelTeam;
43  
44  import java.util.Random;
45  
46  import static java.lang.Math.max;
47  import static java.lang.Math.min;
48  
49  /**
50   * Compute the 2D FFT of complex, double precision input of arbitrary dimensions
51   * via 1D Mixed Radix FFTs.
52   * <p>
53   * For interleaved data, the location of the input point [x, y] within the input array must be: <br>
54   * double real = input[x*nextX + y*nextY]<br>
55   * double imag = input[x*nextX + y*nextY + im]<br>
56   * where <br>
57   * nextX = 2
58   * nextY = 2*nX
59   * im = 1
60   * <p>
61   * For blocked data along x, the location of the input point [x, y] within the input array must be: <br>
62   * double real = input[x*nextX + y*nextY]<br>
63   * double imag = input[x*nextX + y*nextY + im]<br>
64   * where for BLOCKED_X <br>
65   * nextX = 1<br>
66   * nextY = 2*nX<br>
67   * im = nX<br>
68   * and for BLOCKED_XY <br>
69   * nextX = 1<br>
70   * nextY = nX<br>
71   * im = nX*nY<br>
72   *
73   * @author Michal J. Schnieders
74   * @see Complex
75   * @since 1.0
76   */
77  public class Complex2D {
78  
79    /**
80     * The 2D data layout.
81     */
82    private final DataLayout2D layout;
83    /**
84     * The external imaginary offset.
85     */
86    private final int externalIm;
87    /**
88     * The X-dimension.
89     */
90    private final int nX;
91    /**
92     * The Y-dimension.
93     */
94    private final int nY;
95    /**
96     * The next real value along the X dimension.
97     */
98    private final int nextX;
99    /**
100    * The next real value along the Y dimension.
101    */
102   private final int nextY;
103   /**
104    * Compute FFTs along X one at a time.
105    */
106   private final Complex fftX;
107   /**
108    * Compute FFTs along Y one at a time.
109    */
110   private final Complex fftY;
111   /**
112    * If true, pack FFTs along X (or Y) into a contiguous array to compute all FFTs along X (or Y) at once.
113    */
114   private boolean packFFTs;
115   /**
116    * If true, use SIMD instructions.
117    */
118   private boolean useSIMD;
119   /**
120    * Number of FFTs to compute along X in one batch. The optimal size depends on cache layout
121    * and SIMD register width. The default is to compute 8 at a time (tileSizeX = 8).
122    */
123   private int nFFTX;
124   /**
125    * Number of FFTs to compute along Y in one batch. The optimal size depends on cache layout
126    * and SIMD register width. The default is to compute 8 at a time (tileSizeY = 8).
127    */
128   private int nFFTY;
129   /**
130    * Compute tileSizeX FFTs along the X dimension all at once.
131    */
132   private final Complex fftXTile;
133   /**
134    * Compute tileSizeY FFTs along the Y dimension all at once.
135    */
136   private final Complex fftYTile;
137   /**
138    * Working array for packed FFTs.
139    */
140   private final double[] tile;
141   /**
142    * The offset between real values in the packed data.
143    */
144   private final int ii;
145   /**
146    * The offset between any real value and its corresponding imaginary value for the packed data.
147    */
148   private final int im;
149   /**
150    * For FFTs along X, the offset between any real value and its corresponding
151    * imaginary value for tiled data.
152    * This will be 1 for interleaved or nX for blocked.
153    */
154   private final int imX;
155   /**
156    * For FFTs along Y, the offset between any real value and its corresponding
157    * imaginary value for tiled data.
158    * This will be 1 for interleaved or nY for blocked.
159    */
160   private final int imY;
161   /**
162    * The offset between real values along the X-dimension in the transposed packed data.
163    */
164   private final int trNextX;
165   /**
166    * The offset between real values along the Y-dimension in the transposed packed data.
167    */
168   private final int trNextY;
169 
170   /**
171    * Create a new 2D Complex FFT for interleaved data.
172    *
173    * @param nX The number of points in the X dimension.
174    * @param nY The number of points in the Y dimension.
175    */
176   public Complex2D(int nX, int nY) {
177     this(nX, nY, DataLayout2D.INTERLEAVED, 1);
178   }
179 
180   /**
181    * Create a new 2D Complex FFT.
182    *
183    * @param nX       The number of points in the X dimension.
184    * @param nY       The number of points in the Y dimension.
185    * @param layout   The data layout.
186    * @param imOffset The offset between real and imaginary values.
187    */
188   public Complex2D(int nX, int nY, DataLayout2D layout, int imOffset) {
189     this.nX = nX;
190     this.nY = nY;
191     this.externalIm = imOffset;
192     this.layout = layout;
193 
194     int dX = nY;
195     String pX = System.getProperty("fft.tileX", Integer.toString(dX));
196     try {
197       nFFTX = Integer.parseInt(pX);
198       if (nFFTX < 1 || nFFTX > nY) {
199         nFFTX = dX;
200       }
201     } catch (Exception e) {
202       nFFTX = dX;
203     }
204 
205     int dY = nX;
206     String pY = System.getProperty("fft.tileY", Integer.toString(dY));
207     try {
208       nFFTY = Integer.parseInt(pY);
209       if (nFFTY < 1 || nFFTY > nX) {
210         nFFTY = dY;
211       }
212     } catch (Exception e) {
213       nFFTY = dY;
214     }
215 
216     if (layout == DataLayout2D.INTERLEAVED) {
217       im = 1;
218       imX = 1;
219       imY = 1;
220       ii = 2;
221       nextX = 2;
222       nextY = 2 * nX;
223       trNextY = 2;
224       trNextX = 2 * nY;
225     } else if (layout == DataLayout2D.BLOCKED_X) {
226       im = nX;
227       if (nFFTX == nX) {
228         // All at once.
229         imX = nX;
230       } else {
231         // Internal tiles using BLOCKED_XY
232         imX = nX * nFFTX;
233       }
234       if (nFFTY == nY) {
235         // All at once.
236         imY = nX;
237       } else {
238         // Internal Tiles using BLOCKED_XY
239         imY = nY * nFFTY;
240       }
241       ii = 1;
242       nextX = 1;
243       nextY = 2 * nX;
244       trNextY = 1;
245       trNextX = 2 * nY;
246     } else if (layout == DataLayout2D.BLOCKED_XY) {
247       im = nX * nY;
248       imX = nX * nFFTX;   // Packed, FFT along X
249       imY = nY * nFFTY;   // Packed, FFT along Y
250       ii = 1;
251       nextX = 1;
252       nextY = nX;
253       trNextY = 1;
254       trNextX = nY;
255     } else {
256       throw new IllegalArgumentException(" Unsupported data layout: " + layout);
257     }
258 
259     if (this.externalIm != im) {
260       throw new IllegalArgumentException(" Unsupported im offset: " + imOffset);
261     }
262 
263     // Use SIMD by default.
264     useSIMD = true;
265     String simd = System.getProperty("fft.simd", Boolean.toString(useSIMD));
266     try {
267       useSIMD = Boolean.parseBoolean(simd);
268     } catch (Exception e) {
269       useSIMD = false;
270     }
271 
272     packFFTs = true;
273     String pack = System.getProperty("fft.pack", Boolean.toString(packFFTs));
274     try {
275       packFFTs = Boolean.parseBoolean(pack);
276     } catch (Exception e) {
277       packFFTs = false;
278     }
279 
280     DataLayout1D layout1D;
281     if (layout == DataLayout2D.INTERLEAVED) {
282       layout1D = DataLayout1D.INTERLEAVED;
283     } else {
284       layout1D = DataLayout1D.BLOCKED;
285     }
286 
287     // Create 1D FFTs that will be used for computing 1 FFT at a time.
288     fftX = new Complex(nX, layout1D, im);
289     fftX.setUseSIMD(useSIMD);
290     fftY = new Complex(nY, layout1D, im);
291     fftY.setUseSIMD(useSIMD);
292 
293     // Create 1D FFTs that will be used for computing more than 1 FFT at a time.
294     fftXTile = new Complex(nX, layout1D, imX, nFFTX);
295     fftXTile.setUseSIMD(useSIMD);
296     fftYTile = new Complex(nY, layout1D, imY, nFFTY);
297     fftYTile.setUseSIMD(useSIMD);
298 
299     int arraySize = max(nX * nFFTX, nFFTY * nY);
300     tile = new double[2 * arraySize];
301   }
302 
303   /**
304    * Get the Data Layout.
305    *
306    * @return The layout.
307    */
308   public DataLayout2D getLayout() {
309     return layout;
310   }
311 
312   /**
313    * Set the 2D transform to use SIMD instructions.
314    *
315    * @param useSIMD True to use SIMD instructions.
316    */
317   public void setUseSIMD(boolean useSIMD) {
318     this.useSIMD = useSIMD;
319     fftX.setUseSIMD(useSIMD);
320     fftY.setUseSIMD(useSIMD);
321     fftXTile.setUseSIMD(useSIMD);
322     fftYTile.setUseSIMD(useSIMD);
323   }
324 
325   /**
326    * Set the 2D transform to pack FFTs into a contiguous array to compute all FFTs at once.
327    *
328    * @param packFFTs True to pack FFTs.
329    */
330   public void setPackFFTs(boolean packFFTs) {
331     this.packFFTs = packFFTs;
332   }
333 
334   /**
335    * Compute the 2D FFT.
336    *
337    * @param input The input array must be of size 2 * nX * nY.
338    * @param index The offset into the input array of the first element.
339    */
340   public void fft(final double[] input, int index) {
341     if (!packFFTs) {
342       for (int offset = index, y = 0; y < nY; y++, offset += nextY) {
343         fftX.fft(input, offset, nextX);
344       }
345       for (int offset = index, x = 0; x < nX; x++, offset += nextX) {
346         fftY.fft(input, offset, nextY);
347       }
348     } else {
349       if (nFFTY == nX) {
350         fftYTile.fft(input, index, ii);
351       } else {
352         for (int x = 0; x < nX; x += nFFTY) {
353           getFFTYTile(input, index, tile, x, nFFTY);
354           fftYTile.fft(tile, 0, ii);
355           setFFTYTile(input, index, tile, x, nFFTY);
356         }
357       }
358       if (nFFTX == nY) {
359         transpose(input, index);
360         fftXTile.fft(tile, 0, ii);
361         unTranspose(input, index);
362       } else {
363         for (int y = 0; y < nY; y += nFFTX) {
364           getFFTXTile(input, index, tile, y, nFFTX);
365           fftXTile.fft(tile, 0, ii);
366           setFFTXTile(input, index, tile, y, nFFTX);
367         }
368       }
369     }
370   }
371 
372   /**
373    * Compute the 2D IFFT.
374    *
375    * @param input The input array must be of size 2 * nX * nY.
376    * @param index The offset into the input array of the first element.
377    */
378   public void ifft(final double[] input, int index) {
379     if (!packFFTs) {
380       for (int offset = index, y = 0; y < nY; y++, offset += nextY) {
381         fftX.ifft(input, offset, nextX);
382       }
383       for (int offset = index, x = 0; x < nX; x++, offset += nextX) {
384         fftY.ifft(input, offset, nextY);
385       }
386     } else {
387       if (nFFTY == nX) {
388         fftYTile.ifft(input, index, ii);
389       } else {
390         for (int x = 0; x < nX; x += nFFTY) {
391           getFFTYTile(input, index, tile, x, nFFTY);
392           fftYTile.ifft(tile, 0, ii);
393           setFFTYTile(input, index, tile, x, nFFTY);
394         }
395       }
396       if (nFFTX == nY) {
397         transpose(input, index);
398         fftXTile.ifft(tile, 0, ii);
399         unTranspose(input, index);
400       } else {
401         for (int y = 0; y < nY; y += nFFTX) {
402           getFFTXTile(input, index, tile, y, nFFTX);
403           fftXTile.ifft(tile, 0, ii);
404           setFFTXTile(input, index, tile, y, nFFTX);
405         }
406       }
407     }
408   }
409 
410   /**
411    * The input is of size nX x nY.
412    * Extract a tile of size tileSize x nY.
413    * <p>
414    * Input order:
415    * real(x,y) = input[offset + x*nextX + y*nextY]
416    * imag(x,y) = input[offset + x*nextX + y*nextY + externalIm]
417    * Output::
418    * For each Y, all X-values for the tile are contiguous in memory.
419    *
420    * @param input    The input data.
421    * @param offset   The offset into the input data.
422    * @param tile     The output tile.
423    * @param firstX   The first x-index of the tile.
424    * @param tileSize The size of the tile along X.
425    */
426   private void getFFTYTile(final double[] input, int offset, final double[] tile, int firstX, int tileSize) {
427     int index = 0;
428     // Handle padding
429     int padX = firstX + tileSize;
430     int lastX = min(nX, padX);
431     // Outer loop over the Y dimension.
432     for (int y = 0; y < nY; y++) {
433       int dy = offset + y * nextY;
434       // Inner loop over the X dimension.
435       for (int x = firstX; x < lastX; x++) {
436         int dx = x * nextX;
437         double real = input[dx + dy];
438         double imag = input[dx + dy + externalIm];
439         // Contiguous storage into a packed array.
440         tile[index] = real;
441         tile[index + imY] = imag;
442         index += ii;
443       }
444       for (int x = lastX; x < padX; x++) {
445         index += ii;
446       }
447     }
448   }
449 
450   /**
451    * Set a tile of size tileSize x nY.
452    * Put it into the input array of size nX x nY.
453    * <p>
454    * Input array order:
455    * real(x,y) = input[offset + x*nextX + y*nextY]
456    * imag(x,y) = input[offset + x*nextX + y*nextY + externalIm]
457    *
458    * @param input    The input data.
459    * @param offset   The offset into the input data.
460    * @param tile     The output tile.
461    * @param firstX   The first x-index of the tile.
462    * @param tileSize The size of the tile along X.
463    */
464   private void setFFTYTile(final double[] input, int offset, final double[] tile, int firstX, int tileSize) {
465     int index = 0;
466     // Handle padding
467     int padX = firstX + tileSize;
468     int lastX = min(nX, padX);
469     // Outer loop over the Y dimension.
470     for (int y = 0; y < nY; y++) {
471       int dy = offset + y * nextY;
472       // Inner loop over the X dimension.
473       for (int x = firstX; x < lastX; x++) {
474         double real = tile[index];
475         double imag = tile[index + imY];
476         index += ii;
477         int dx = x * nextX;
478         input[dx + dy] = real;
479         input[dx + dy + externalIm] = imag;
480       }
481       for (int x = lastX; x < padX; x++) {
482         index += ii;
483       }
484     }
485   }
486 
487   /**
488    * The input is of size nX x nY.
489    * Extract a tile of size nX * tileSize.
490    * <p>
491    * Input order:
492    * real(x,y) = input[offset + x*nextX + y*nextY]
493    * imag(x,y) = input[offset + x*nextX + y*nextY + externalIm]
494    * Tile order:
495    * For each X, all Y-values for the tile a contiguous in memory.
496    *
497    * @param input    The input data.
498    * @param offset   The offset into the input data.
499    * @param tile     The output tile.
500    * @param firstY   The first y-index of the tile.
501    * @param tileSize The size of the tile along Y.
502    */
503   private void getFFTXTile(final double[] input, int offset, final double[] tile, int firstY, int tileSize) {
504     int index = 0;
505     // Handle padding
506     int padY = firstY + tileSize;
507     int lastY = min(nY, padY);
508     // Outer loop over the X dimension.
509     for (int x = 0; x < nX; x++) {
510       int dx = offset + x * nextX;
511       // Inner loop over the Y dimension.
512       for (int y = firstY; y < lastY; y++) {
513         int dy = y * nextY;
514         double real = input[dx + dy];
515         double imag = input[dx + dy + externalIm];
516         // Contiguous storage into a packed array.
517         tile[index] = real;
518         tile[index + imX] = imag;
519         index += ii;
520       }
521       for (int y = lastY; y < padY; y++) {
522         index += ii;
523       }
524     }
525   }
526 
527   /**
528    * Set a tile of size nX * tileSize.
529    * Results are written back into the "input" array of size nX x nY.
530    * <p>
531    * Tile order:
532    * For each X, all Y-values for the tile a contiguous in memory.
533    * <p>
534    * Input order:
535    * real(x,y) = input[offset + x*nextX + y*nextY]
536    * imag(x,y) = input[offset + x*nextX + y*nextY + externalIm]
537    *
538    * @param input    The input data.
539    * @param offset   The offset into the input data.
540    * @param tile     The output tile.
541    * @param firstY   The first y-index of the tile.
542    * @param tileSize The size of the tile along Y.
543    */
544   private void setFFTXTile(final double[] input, int offset, final double[] tile, int firstY, int tileSize) {
545     int index = 0;
546     // Handle padding
547     int padY = firstY + tileSize;
548     int lastY = min(nY, padY);
549     // Outer loop over the X dimension.
550     for (int x = 0; x < nX; x++) {
551       int dx = offset + x * nextX;
552       // Inner loop over the Y dimension.
553       for (int y = firstY; y < lastY; y++) {
554         double real = tile[index];
555         double imag = tile[index + imX];
556         index += ii;
557         int dy = y * nextY;
558         input[dx + dy] = real;
559         input[dx + dy + externalIm] = imag;
560       }
561       for (int y = lastY; y < padY; y++) {
562         index += ii;
563       }
564     }
565   }
566 
567   /**
568    * Pack the input array for Fourier transforms along the X dimension.
569    * <p>
570    * Input order:
571    * real(x,y) = input[offset + x*nextX + y*nextY]
572    * imag(x,y) = input[offset + x*nextX + y*nextY + im]
573    * Output order:
574    * real(x,y) = packedData[y*trNextY + x*trNextX]
575    * imag(x,y) = packedData[y*trNextY + x*trXextX + im]
576    *
577    * @param input  The input data.
578    * @param offset The offset into the input data.
579    */
580   private void transpose(final double[] input, int offset) {
581     int index = 0;
582     // Outer loop over the X dimension.
583     for (int x = 0; x < nX; x++) {
584       int dx = offset + x * nextX;
585       // Inner loop over the Y dimension (the number of FFTs).
586       int y = 0;
587 //      for (; y < nY - 1; y += 2, index += 2 * ii) {
588 //        int i1 = dx + y * nextY;
589 //        int i2 = i1 + nextY;
590 //        // Non-contiguous reads.
591 //        double real1 = input[i1];
592 //        double imag1 = input[i1 + externalIm];
593 //        double real2 = input[i2];
594 //        double imag2 = input[i2 + externalIm];
595 //        // Contiguous storage into the packed array.
596 //        tile[index] = real1;
597 //        tile[index + im] = imag1;
598 //        tile[index + ii] = real2;
599 //        tile[index + ii + im] = imag2;
600 //      }
601       for (; y < nY - 3; y += 4, index += 4*ii) {
602         int i1 = dx + y * nextY;
603         int i2 = i1 + nextY;
604         int i3 = i2 + nextY;
605         int i4 = i3 + nextY;
606         // Non-contiguous reads.
607         double real1 = input[i1];
608         double imag1 = input[i1 + externalIm];
609         double real2 = input[i2];
610         double imag2 = input[i2 + externalIm];
611         double real3 = input[i3];
612         double imag3 = input[i3 + externalIm];
613         double real4 = input[i4];
614         double imag4 = input[i4 + externalIm];
615         // Contiguous storage into the packed array.
616         int ii2 = ii + ii;
617         int ii3 = ii2 + ii;
618         tile[index] = real1;
619         tile[index + im] = imag1;
620         tile[index + ii] = real2;
621         tile[index + ii + im] = imag2;
622         tile[index + ii2] = real3;
623         tile[index + ii2 + im] = imag3;
624         tile[index + ii3] = real4;
625         tile[index + ii3 + im] = imag4;
626       }
627       for (; y < nY; y++, index += ii) {
628         double real = input[dx + y * nextY];
629         double imag = input[dx + y * nextY + externalIm];
630         // Contiguous storage into the packed array.
631         tile[index] = real;
632         tile[index + im] = imag;
633       }
634     }
635   }
636 
637   /**
638    * Unpack the output array after Fourier transforms.
639    * <p>
640    * Input order:
641    * real_xy = packedData[y*trNextY + x*trNextX]
642    * imag_xy = packedData[y*trNextY + x*trXextX + im]
643    * Output order:
644    * real_xy = output[offset + x*nextX + y*nextY]
645    * imag_xy = output[offset + x*nextX + y*nextY + im]
646    *
647    * @param output The output data.
648    * @param offset The offset into the output data.
649    */
650   private void unTranspose(final double[] output, int offset) {
651     int index = offset;
652     // Outer loop over the Y dimension.
653     for (int y = 0; y < nY; y++) {
654       int dy = y * trNextY;
655       // Inner loop over the X dimension.
656       int x = 0;
657 //      for (; x < nX - 1; x += 2, index += 2 * ii) {
658 //        int i1 = dy + x * trNextX;
659 //        int i2 = i1 + trNextX;
660 //        double real1 = tile[i1];
661 //        double imag1 = tile[i1 + im];
662 //        double real2 = tile[i2];
663 //        double imag2 = tile[i2 + im];
664 //        // Contiguous storage into the output array.
665 //        output[index] = real1;
666 //        output[index + externalIm] = imag1;
667 //        output[index + ii] = real2;
668 //        output[index + ii + externalIm] = imag2;
669 //      }
670       for (; x < nX - 3; x+=4, index += 4*ii) {
671         int i1 = dy + x * trNextX;
672         int i2 = i1 + trNextX;
673         int i3 = i2 + trNextX;
674         int i4 = i3 + trNextX;
675         double real1 = tile[i1];
676         double imag1 = tile[i1 + im];
677         double real2 = tile[i2];
678         double imag2 = tile[i2 + im];
679         double real3 = tile[i3];
680         double imag3 = tile[i3 + im];
681         double real4 = tile[i4];
682         double imag4 = tile[i4 + im];
683         // Contiguous storage into the output array.
684         int ii2 = ii + ii;
685         int ii3 = ii2 + ii;
686         output[index] = real1;
687         output[index + externalIm] = imag1;
688         output[index + ii] = real2;
689         output[index + ii + externalIm] = imag2;
690         output[index + ii2] = real3;
691         output[index + ii2 + externalIm] = imag3;
692         output[index + ii3] = real4;
693         output[index + ii3 + externalIm] = imag4;
694       }
695       for (; x < nX; x++, index += ii) {
696         double real = tile[dy + x * trNextX];
697         double imag = tile[dy + x * trNextX + im];
698         // Contiguous storage into the output array.
699         output[index] = real;
700         output[index + externalIm] = imag;
701       }
702     }
703   }
704 
705   /**
706    * Test the Complex2D FFT.
707    *
708    * @param args an array of {@link java.lang.String} objects.
709    * @throws java.lang.Exception if any.
710    * @since 1.0
711    */
712   public static void main(String[] args) throws Exception {
713     int dimNotFinal = 128;
714     int reps = 5;
715     boolean blocked = false;
716     try {
717       dimNotFinal = Integer.parseInt(args[0]);
718       if (dimNotFinal < 1) {
719         dimNotFinal = 128;
720       }
721       reps = Integer.parseInt(args[1]);
722       if (reps < 1) {
723         reps = 5;
724       }
725       blocked = Boolean.parseBoolean(args[2]);
726     } catch (Exception e) {
727       //
728     }
729     final int dim = dimNotFinal;
730     System.out.printf("Initializing a %d cubed grid.\n"
731             + "The best timing out of %d repetitions will be used.%n",
732         dim, reps);
733     // One dimension of the serial array divided by the number of threads.
734     Complex2D complex2D;
735     if (blocked) {
736       complex2D = new Complex2D(dim, dim, DataLayout2D.BLOCKED_X, dim);
737     } else {
738       complex2D = new Complex2D(dim, dim);
739     }
740     ParallelTeam parallelTeam = new ParallelTeam();
741     final double[] data = initRandomData(dim, parallelTeam);
742 
743     double toSeconds = 0.000000001;
744     long forwardTime = Long.MAX_VALUE;
745     long inverseTime = Long.MAX_VALUE;
746 
747     // Warm-up
748     System.out.println(" Warm Up FFT");
749     complex2D.fft(data, 0);
750     System.out.println(" Warm Up IFFT");
751     complex2D.ifft(data, 0);
752 
753     for (int i = 0; i < reps; i++) {
754       System.out.printf(" Iteration %d%n", i + 1);
755       long time = System.nanoTime();
756       complex2D.fft(data, 0);
757       time = (System.nanoTime() - time);
758       System.out.printf("  FFT:   %9.6f (sec)%n", toSeconds * time);
759       if (time < forwardTime) {
760         forwardTime = time;
761       }
762       time = System.nanoTime();
763       complex2D.ifft(data, 0);
764       time = (System.nanoTime() - time);
765       System.out.printf("  IFFT:  %9.6f (sec)%n", toSeconds * time);
766       if (time < inverseTime) {
767         inverseTime = time;
768       }
769     }
770 
771     System.out.printf(" Best FFT Time:    %9.6f (sec)%n", toSeconds * forwardTime);
772     System.out.printf(" Best IFFT Time:   %9.6f (sec)%n", toSeconds * inverseTime);
773     parallelTeam.shutdown();
774   }
775 
776   /**
777    * Initialize a 2D data for testing purposes.
778    *
779    * @param dim The dimension of the 2D grid.
780    * @return The 3D data.
781    * @since 1.0
782    */
783   public static double[] initRandomData(int dim, ParallelTeam parallelTeam) {
784     int n = dim * dim;
785     double[] data = new double[2 * n];
786     try {
787       parallelTeam.execute(
788           new ParallelRegion() {
789             @Override
790             public void run() {
791               try {
792                 execute(
793                     0,
794                     dim - 1,
795                     new IntegerForLoop() {
796                       @Override
797                       public void run(final int lb, final int ub) {
798                         Random randomNumberGenerator = new Random(1);
799                         int index = dim * lb * 2;
800                         for (int i = lb; i <= ub; i++) {
801                           for (int j = 0; j < dim; j++) {
802                             double randomNumber = randomNumberGenerator.nextDouble();
803                             data[index] = randomNumber;
804                             index += 2;
805                           }
806                         }
807                       }
808                     });
809               } catch (Exception e) {
810                 System.out.println(e.getMessage());
811                 System.exit(-1);
812               }
813             }
814           });
815     } catch (Exception e) {
816       System.out.println(e.getMessage());
817       System.exit(-1);
818     }
819     return data;
820   }
821 }