1 // ******************************************************************************
2 //
3 // Title: Force Field X.
4 // Description: Force Field X - Software for Molecular Biophysics.
5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2026.
6 //
7 // This file is part of Force Field X.
8 //
9 // Force Field X is free software; you can redistribute it and/or modify it
10 // under the terms of the GNU General Public License version 3 as published by
11 // the Free Software Foundation.
12 //
13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16 // details.
17 //
18 // You should have received a copy of the GNU General Public License along with
19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20 // Place, Suite 330, Boston, MA 02111-1307 USA
21 //
22 // Linking this library statically or dynamically with other modules is making a
23 // combined work based on this library. Thus, the terms and conditions of the
24 // GNU General Public License cover the whole combination.
25 //
26 // As a special exception, the copyright holders of this library give you
27 // permission to link this library with independent modules to produce an
28 // executable, regardless of the license terms of these independent modules, and
29 // to copy and distribute the resulting executable under terms of your choice,
30 // provided that you also meet, for each linked independent module, the terms
31 // and conditions of the license of that module. An independent module is a
32 // module which is not derived from or based on this library. If you modify this
33 // library, you may extend this exception to your version of the library, but
34 // you are not obligated to do so. If you do not wish to do so, delete this
35 // exception statement from your version.
36 //
37 // ******************************************************************************
38 package ffx.numerics.fft;
39
40 import edu.rit.pj.IntegerForLoop;
41 import edu.rit.pj.ParallelRegion;
42 import edu.rit.pj.ParallelTeam;
43
44 import java.util.Random;
45
46 import static java.lang.Math.max;
47 import static java.lang.Math.min;
48
49 /**
50 * Compute the 2D FFT of complex, double precision input of arbitrary dimensions
51 * via 1D Mixed Radix FFTs.
52 * <p>
53 * For interleaved data, the location of the input point [x, y] within the input array must be: <br>
54 * double real = input[x*nextX + y*nextY]<br>
55 * double imag = input[x*nextX + y*nextY + im]<br>
56 * where <br>
57 * nextX = 2
58 * nextY = 2*nX
59 * im = 1
60 * <p>
61 * For blocked data along x, the location of the input point [x, y] within the input array must be: <br>
62 * double real = input[x*nextX + y*nextY]<br>
63 * double imag = input[x*nextX + y*nextY + im]<br>
64 * where for BLOCKED_X <br>
65 * nextX = 1<br>
66 * nextY = 2*nX<br>
67 * im = nX<br>
68 * and for BLOCKED_XY <br>
69 * nextX = 1<br>
70 * nextY = nX<br>
71 * im = nX*nY<br>
72 *
73 * @author Michal J. Schnieders
74 * @see Complex
75 * @since 1.0
76 */
77 public class Complex2D {
78
79 /**
80 * The 2D data layout.
81 */
82 private final DataLayout2D layout;
83 /**
84 * The external imaginary offset.
85 */
86 private final int externalIm;
87 /**
88 * The X-dimension.
89 */
90 private final int nX;
91 /**
92 * The Y-dimension.
93 */
94 private final int nY;
95 /**
96 * The next real value along the X dimension.
97 */
98 private final int nextX;
99 /**
100 * The next real value along the Y dimension.
101 */
102 private final int nextY;
103 /**
104 * Compute FFTs along X one at a time.
105 */
106 private final Complex fftX;
107 /**
108 * Compute FFTs along Y one at a time.
109 */
110 private final Complex fftY;
111 /**
112 * If true, pack FFTs along X (or Y) into a contiguous array to compute all FFTs along X (or Y) at once.
113 */
114 private boolean packFFTs;
115 /**
116 * If true, use SIMD instructions.
117 */
118 private boolean useSIMD;
119 /**
120 * Number of FFTs to compute along X in one batch. The optimal size depends on cache layout
121 * and SIMD register width. The default is to compute 8 at a time (tileSizeX = 8).
122 */
123 private int nFFTX;
124 /**
125 * Number of FFTs to compute along Y in one batch. The optimal size depends on cache layout
126 * and SIMD register width. The default is to compute 8 at a time (tileSizeY = 8).
127 */
128 private int nFFTY;
129 /**
130 * Compute tileSizeX FFTs along the X dimension all at once.
131 */
132 private final Complex fftXTile;
133 /**
134 * Compute tileSizeY FFTs along the Y dimension all at once.
135 */
136 private final Complex fftYTile;
137 /**
138 * Working array for packed FFTs.
139 */
140 private final double[] tile;
141 /**
142 * The offset between real values in the packed data.
143 */
144 private final int ii;
145 /**
146 * The offset between any real value and its corresponding imaginary value for the packed data.
147 */
148 private final int im;
149 /**
150 * For FFTs along X, the offset between any real value and its corresponding
151 * imaginary value for tiled data.
152 * This will be 1 for interleaved or nX for blocked.
153 */
154 private final int imX;
155 /**
156 * For FFTs along Y, the offset between any real value and its corresponding
157 * imaginary value for tiled data.
158 * This will be 1 for interleaved or nY for blocked.
159 */
160 private final int imY;
161 /**
162 * The offset between real values along the X-dimension in the transposed packed data.
163 */
164 private final int trNextX;
165 /**
166 * The offset between real values along the Y-dimension in the transposed packed data.
167 */
168 private final int trNextY;
169
170 /**
171 * Create a new 2D Complex FFT for interleaved data.
172 *
173 * @param nX The number of points in the X dimension.
174 * @param nY The number of points in the Y dimension.
175 */
176 public Complex2D(int nX, int nY) {
177 this(nX, nY, DataLayout2D.INTERLEAVED, 1);
178 }
179
180 /**
181 * Create a new 2D Complex FFT.
182 *
183 * @param nX The number of points in the X dimension.
184 * @param nY The number of points in the Y dimension.
185 * @param layout The data layout.
186 * @param imOffset The offset between real and imaginary values.
187 */
188 public Complex2D(int nX, int nY, DataLayout2D layout, int imOffset) {
189 this.nX = nX;
190 this.nY = nY;
191 this.externalIm = imOffset;
192 this.layout = layout;
193
194 int dX = nY;
195 String pX = System.getProperty("fft.tileX", Integer.toString(dX));
196 try {
197 nFFTX = Integer.parseInt(pX);
198 if (nFFTX < 1 || nFFTX > nY) {
199 nFFTX = dX;
200 }
201 } catch (Exception e) {
202 nFFTX = dX;
203 }
204
205 int dY = nX;
206 String pY = System.getProperty("fft.tileY", Integer.toString(dY));
207 try {
208 nFFTY = Integer.parseInt(pY);
209 if (nFFTY < 1 || nFFTY > nX) {
210 nFFTY = dY;
211 }
212 } catch (Exception e) {
213 nFFTY = dY;
214 }
215
216 if (layout == DataLayout2D.INTERLEAVED) {
217 im = 1;
218 imX = 1;
219 imY = 1;
220 ii = 2;
221 nextX = 2;
222 nextY = 2 * nX;
223 trNextY = 2;
224 trNextX = 2 * nY;
225 } else if (layout == DataLayout2D.BLOCKED_X) {
226 im = nX;
227 if (nFFTX == nX) {
228 // All at once.
229 imX = nX;
230 } else {
231 // Internal tiles using BLOCKED_XY
232 imX = nX * nFFTX;
233 }
234 if (nFFTY == nY) {
235 // All at once.
236 imY = nX;
237 } else {
238 // Internal Tiles using BLOCKED_XY
239 imY = nY * nFFTY;
240 }
241 ii = 1;
242 nextX = 1;
243 nextY = 2 * nX;
244 trNextY = 1;
245 trNextX = 2 * nY;
246 } else if (layout == DataLayout2D.BLOCKED_XY) {
247 im = nX * nY;
248 imX = nX * nFFTX; // Packed, FFT along X
249 imY = nY * nFFTY; // Packed, FFT along Y
250 ii = 1;
251 nextX = 1;
252 nextY = nX;
253 trNextY = 1;
254 trNextX = nY;
255 } else {
256 throw new IllegalArgumentException(" Unsupported data layout: " + layout);
257 }
258
259 if (this.externalIm != im) {
260 throw new IllegalArgumentException(" Unsupported im offset: " + imOffset);
261 }
262
263 // Use SIMD by default.
264 useSIMD = true;
265 String simd = System.getProperty("fft.simd", Boolean.toString(useSIMD));
266 try {
267 useSIMD = Boolean.parseBoolean(simd);
268 } catch (Exception e) {
269 useSIMD = false;
270 }
271
272 packFFTs = true;
273 String pack = System.getProperty("fft.pack", Boolean.toString(packFFTs));
274 try {
275 packFFTs = Boolean.parseBoolean(pack);
276 } catch (Exception e) {
277 packFFTs = false;
278 }
279
280 DataLayout1D layout1D;
281 if (layout == DataLayout2D.INTERLEAVED) {
282 layout1D = DataLayout1D.INTERLEAVED;
283 } else {
284 layout1D = DataLayout1D.BLOCKED;
285 }
286
287 // Create 1D FFTs that will be used for computing 1 FFT at a time.
288 fftX = new Complex(nX, layout1D, im);
289 fftX.setUseSIMD(useSIMD);
290 fftY = new Complex(nY, layout1D, im);
291 fftY.setUseSIMD(useSIMD);
292
293 // Create 1D FFTs that will be used for computing more than 1 FFT at a time.
294 fftXTile = new Complex(nX, layout1D, imX, nFFTX);
295 fftXTile.setUseSIMD(useSIMD);
296 fftYTile = new Complex(nY, layout1D, imY, nFFTY);
297 fftYTile.setUseSIMD(useSIMD);
298
299 int arraySize = max(nX * nFFTX, nFFTY * nY);
300 tile = new double[2 * arraySize];
301 }
302
303 /**
304 * Get the Data Layout.
305 *
306 * @return The layout.
307 */
308 public DataLayout2D getLayout() {
309 return layout;
310 }
311
312 /**
313 * Set the 2D transform to use SIMD instructions.
314 *
315 * @param useSIMD True to use SIMD instructions.
316 */
317 public void setUseSIMD(boolean useSIMD) {
318 this.useSIMD = useSIMD;
319 fftX.setUseSIMD(useSIMD);
320 fftY.setUseSIMD(useSIMD);
321 fftXTile.setUseSIMD(useSIMD);
322 fftYTile.setUseSIMD(useSIMD);
323 }
324
325 /**
326 * Set the 2D transform to pack FFTs into a contiguous array to compute all FFTs at once.
327 *
328 * @param packFFTs True to pack FFTs.
329 */
330 public void setPackFFTs(boolean packFFTs) {
331 this.packFFTs = packFFTs;
332 }
333
334 /**
335 * Compute the 2D FFT.
336 *
337 * @param input The input array must be of size 2 * nX * nY.
338 * @param index The offset into the input array of the first element.
339 */
340 public void fft(final double[] input, int index) {
341 if (!packFFTs) {
342 for (int offset = index, y = 0; y < nY; y++, offset += nextY) {
343 fftX.fft(input, offset, nextX);
344 }
345 for (int offset = index, x = 0; x < nX; x++, offset += nextX) {
346 fftY.fft(input, offset, nextY);
347 }
348 } else {
349 if (nFFTY == nX) {
350 fftYTile.fft(input, index, ii);
351 } else {
352 for (int x = 0; x < nX; x += nFFTY) {
353 getFFTYTile(input, index, tile, x, nFFTY);
354 fftYTile.fft(tile, 0, ii);
355 setFFTYTile(input, index, tile, x, nFFTY);
356 }
357 }
358 if (nFFTX == nY) {
359 transpose(input, index);
360 fftXTile.fft(tile, 0, ii);
361 unTranspose(input, index);
362 } else {
363 for (int y = 0; y < nY; y += nFFTX) {
364 getFFTXTile(input, index, tile, y, nFFTX);
365 fftXTile.fft(tile, 0, ii);
366 setFFTXTile(input, index, tile, y, nFFTX);
367 }
368 }
369 }
370 }
371
372 /**
373 * Compute the 2D IFFT.
374 *
375 * @param input The input array must be of size 2 * nX * nY.
376 * @param index The offset into the input array of the first element.
377 */
378 public void ifft(final double[] input, int index) {
379 if (!packFFTs) {
380 for (int offset = index, y = 0; y < nY; y++, offset += nextY) {
381 fftX.ifft(input, offset, nextX);
382 }
383 for (int offset = index, x = 0; x < nX; x++, offset += nextX) {
384 fftY.ifft(input, offset, nextY);
385 }
386 } else {
387 if (nFFTY == nX) {
388 fftYTile.ifft(input, index, ii);
389 } else {
390 for (int x = 0; x < nX; x += nFFTY) {
391 getFFTYTile(input, index, tile, x, nFFTY);
392 fftYTile.ifft(tile, 0, ii);
393 setFFTYTile(input, index, tile, x, nFFTY);
394 }
395 }
396 if (nFFTX == nY) {
397 transpose(input, index);
398 fftXTile.ifft(tile, 0, ii);
399 unTranspose(input, index);
400 } else {
401 for (int y = 0; y < nY; y += nFFTX) {
402 getFFTXTile(input, index, tile, y, nFFTX);
403 fftXTile.ifft(tile, 0, ii);
404 setFFTXTile(input, index, tile, y, nFFTX);
405 }
406 }
407 }
408 }
409
410 /**
411 * The input is of size nX x nY.
412 * Extract a tile of size tileSize x nY.
413 * <p>
414 * Input order:
415 * real(x,y) = input[offset + x*nextX + y*nextY]
416 * imag(x,y) = input[offset + x*nextX + y*nextY + externalIm]
417 * Output::
418 * For each Y, all X-values for the tile are contiguous in memory.
419 *
420 * @param input The input data.
421 * @param offset The offset into the input data.
422 * @param tile The output tile.
423 * @param firstX The first x-index of the tile.
424 * @param tileSize The size of the tile along X.
425 */
426 private void getFFTYTile(final double[] input, int offset, final double[] tile, int firstX, int tileSize) {
427 int index = 0;
428 // Handle padding
429 int padX = firstX + tileSize;
430 int lastX = min(nX, padX);
431 // Outer loop over the Y dimension.
432 for (int y = 0; y < nY; y++) {
433 int dy = offset + y * nextY;
434 // Inner loop over the X dimension.
435 for (int x = firstX; x < lastX; x++) {
436 int dx = x * nextX;
437 double real = input[dx + dy];
438 double imag = input[dx + dy + externalIm];
439 // Contiguous storage into a packed array.
440 tile[index] = real;
441 tile[index + imY] = imag;
442 index += ii;
443 }
444 for (int x = lastX; x < padX; x++) {
445 index += ii;
446 }
447 }
448 }
449
450 /**
451 * Set a tile of size tileSize x nY.
452 * Put it into the input array of size nX x nY.
453 * <p>
454 * Input array order:
455 * real(x,y) = input[offset + x*nextX + y*nextY]
456 * imag(x,y) = input[offset + x*nextX + y*nextY + externalIm]
457 *
458 * @param input The input data.
459 * @param offset The offset into the input data.
460 * @param tile The output tile.
461 * @param firstX The first x-index of the tile.
462 * @param tileSize The size of the tile along X.
463 */
464 private void setFFTYTile(final double[] input, int offset, final double[] tile, int firstX, int tileSize) {
465 int index = 0;
466 // Handle padding
467 int padX = firstX + tileSize;
468 int lastX = min(nX, padX);
469 // Outer loop over the Y dimension.
470 for (int y = 0; y < nY; y++) {
471 int dy = offset + y * nextY;
472 // Inner loop over the X dimension.
473 for (int x = firstX; x < lastX; x++) {
474 double real = tile[index];
475 double imag = tile[index + imY];
476 index += ii;
477 int dx = x * nextX;
478 input[dx + dy] = real;
479 input[dx + dy + externalIm] = imag;
480 }
481 for (int x = lastX; x < padX; x++) {
482 index += ii;
483 }
484 }
485 }
486
487 /**
488 * The input is of size nX x nY.
489 * Extract a tile of size nX * tileSize.
490 * <p>
491 * Input order:
492 * real(x,y) = input[offset + x*nextX + y*nextY]
493 * imag(x,y) = input[offset + x*nextX + y*nextY + externalIm]
494 * Tile order:
495 * For each X, all Y-values for the tile a contiguous in memory.
496 *
497 * @param input The input data.
498 * @param offset The offset into the input data.
499 * @param tile The output tile.
500 * @param firstY The first y-index of the tile.
501 * @param tileSize The size of the tile along Y.
502 */
503 private void getFFTXTile(final double[] input, int offset, final double[] tile, int firstY, int tileSize) {
504 int index = 0;
505 // Handle padding
506 int padY = firstY + tileSize;
507 int lastY = min(nY, padY);
508 // Outer loop over the X dimension.
509 for (int x = 0; x < nX; x++) {
510 int dx = offset + x * nextX;
511 // Inner loop over the Y dimension.
512 for (int y = firstY; y < lastY; y++) {
513 int dy = y * nextY;
514 double real = input[dx + dy];
515 double imag = input[dx + dy + externalIm];
516 // Contiguous storage into a packed array.
517 tile[index] = real;
518 tile[index + imX] = imag;
519 index += ii;
520 }
521 for (int y = lastY; y < padY; y++) {
522 index += ii;
523 }
524 }
525 }
526
527 /**
528 * Set a tile of size nX * tileSize.
529 * Results are written back into the "input" array of size nX x nY.
530 * <p>
531 * Tile order:
532 * For each X, all Y-values for the tile a contiguous in memory.
533 * <p>
534 * Input order:
535 * real(x,y) = input[offset + x*nextX + y*nextY]
536 * imag(x,y) = input[offset + x*nextX + y*nextY + externalIm]
537 *
538 * @param input The input data.
539 * @param offset The offset into the input data.
540 * @param tile The output tile.
541 * @param firstY The first y-index of the tile.
542 * @param tileSize The size of the tile along Y.
543 */
544 private void setFFTXTile(final double[] input, int offset, final double[] tile, int firstY, int tileSize) {
545 int index = 0;
546 // Handle padding
547 int padY = firstY + tileSize;
548 int lastY = min(nY, padY);
549 // Outer loop over the X dimension.
550 for (int x = 0; x < nX; x++) {
551 int dx = offset + x * nextX;
552 // Inner loop over the Y dimension.
553 for (int y = firstY; y < lastY; y++) {
554 double real = tile[index];
555 double imag = tile[index + imX];
556 index += ii;
557 int dy = y * nextY;
558 input[dx + dy] = real;
559 input[dx + dy + externalIm] = imag;
560 }
561 for (int y = lastY; y < padY; y++) {
562 index += ii;
563 }
564 }
565 }
566
567 /**
568 * Pack the input array for Fourier transforms along the X dimension.
569 * <p>
570 * Input order:
571 * real(x,y) = input[offset + x*nextX + y*nextY]
572 * imag(x,y) = input[offset + x*nextX + y*nextY + im]
573 * Output order:
574 * real(x,y) = packedData[y*trNextY + x*trNextX]
575 * imag(x,y) = packedData[y*trNextY + x*trXextX + im]
576 *
577 * @param input The input data.
578 * @param offset The offset into the input data.
579 */
580 private void transpose(final double[] input, int offset) {
581 int index = 0;
582 // Outer loop over the X dimension.
583 for (int x = 0; x < nX; x++) {
584 int dx = offset + x * nextX;
585 // Inner loop over the Y dimension (the number of FFTs).
586 int y = 0;
587 // for (; y < nY - 1; y += 2, index += 2 * ii) {
588 // int i1 = dx + y * nextY;
589 // int i2 = i1 + nextY;
590 // // Non-contiguous reads.
591 // double real1 = input[i1];
592 // double imag1 = input[i1 + externalIm];
593 // double real2 = input[i2];
594 // double imag2 = input[i2 + externalIm];
595 // // Contiguous storage into the packed array.
596 // tile[index] = real1;
597 // tile[index + im] = imag1;
598 // tile[index + ii] = real2;
599 // tile[index + ii + im] = imag2;
600 // }
601 for (; y < nY - 3; y += 4, index += 4*ii) {
602 int i1 = dx + y * nextY;
603 int i2 = i1 + nextY;
604 int i3 = i2 + nextY;
605 int i4 = i3 + nextY;
606 // Non-contiguous reads.
607 double real1 = input[i1];
608 double imag1 = input[i1 + externalIm];
609 double real2 = input[i2];
610 double imag2 = input[i2 + externalIm];
611 double real3 = input[i3];
612 double imag3 = input[i3 + externalIm];
613 double real4 = input[i4];
614 double imag4 = input[i4 + externalIm];
615 // Contiguous storage into the packed array.
616 int ii2 = ii + ii;
617 int ii3 = ii2 + ii;
618 tile[index] = real1;
619 tile[index + im] = imag1;
620 tile[index + ii] = real2;
621 tile[index + ii + im] = imag2;
622 tile[index + ii2] = real3;
623 tile[index + ii2 + im] = imag3;
624 tile[index + ii3] = real4;
625 tile[index + ii3 + im] = imag4;
626 }
627 for (; y < nY; y++, index += ii) {
628 double real = input[dx + y * nextY];
629 double imag = input[dx + y * nextY + externalIm];
630 // Contiguous storage into the packed array.
631 tile[index] = real;
632 tile[index + im] = imag;
633 }
634 }
635 }
636
637 /**
638 * Unpack the output array after Fourier transforms.
639 * <p>
640 * Input order:
641 * real_xy = packedData[y*trNextY + x*trNextX]
642 * imag_xy = packedData[y*trNextY + x*trXextX + im]
643 * Output order:
644 * real_xy = output[offset + x*nextX + y*nextY]
645 * imag_xy = output[offset + x*nextX + y*nextY + im]
646 *
647 * @param output The output data.
648 * @param offset The offset into the output data.
649 */
650 private void unTranspose(final double[] output, int offset) {
651 int index = offset;
652 // Outer loop over the Y dimension.
653 for (int y = 0; y < nY; y++) {
654 int dy = y * trNextY;
655 // Inner loop over the X dimension.
656 int x = 0;
657 // for (; x < nX - 1; x += 2, index += 2 * ii) {
658 // int i1 = dy + x * trNextX;
659 // int i2 = i1 + trNextX;
660 // double real1 = tile[i1];
661 // double imag1 = tile[i1 + im];
662 // double real2 = tile[i2];
663 // double imag2 = tile[i2 + im];
664 // // Contiguous storage into the output array.
665 // output[index] = real1;
666 // output[index + externalIm] = imag1;
667 // output[index + ii] = real2;
668 // output[index + ii + externalIm] = imag2;
669 // }
670 for (; x < nX - 3; x+=4, index += 4*ii) {
671 int i1 = dy + x * trNextX;
672 int i2 = i1 + trNextX;
673 int i3 = i2 + trNextX;
674 int i4 = i3 + trNextX;
675 double real1 = tile[i1];
676 double imag1 = tile[i1 + im];
677 double real2 = tile[i2];
678 double imag2 = tile[i2 + im];
679 double real3 = tile[i3];
680 double imag3 = tile[i3 + im];
681 double real4 = tile[i4];
682 double imag4 = tile[i4 + im];
683 // Contiguous storage into the output array.
684 int ii2 = ii + ii;
685 int ii3 = ii2 + ii;
686 output[index] = real1;
687 output[index + externalIm] = imag1;
688 output[index + ii] = real2;
689 output[index + ii + externalIm] = imag2;
690 output[index + ii2] = real3;
691 output[index + ii2 + externalIm] = imag3;
692 output[index + ii3] = real4;
693 output[index + ii3 + externalIm] = imag4;
694 }
695 for (; x < nX; x++, index += ii) {
696 double real = tile[dy + x * trNextX];
697 double imag = tile[dy + x * trNextX + im];
698 // Contiguous storage into the output array.
699 output[index] = real;
700 output[index + externalIm] = imag;
701 }
702 }
703 }
704
705 /**
706 * Test the Complex2D FFT.
707 *
708 * @param args an array of {@link java.lang.String} objects.
709 * @throws java.lang.Exception if any.
710 * @since 1.0
711 */
712 public static void main(String[] args) throws Exception {
713 int dimNotFinal = 128;
714 int reps = 5;
715 boolean blocked = false;
716 try {
717 dimNotFinal = Integer.parseInt(args[0]);
718 if (dimNotFinal < 1) {
719 dimNotFinal = 128;
720 }
721 reps = Integer.parseInt(args[1]);
722 if (reps < 1) {
723 reps = 5;
724 }
725 blocked = Boolean.parseBoolean(args[2]);
726 } catch (Exception e) {
727 //
728 }
729 final int dim = dimNotFinal;
730 System.out.printf("Initializing a %d cubed grid.\n"
731 + "The best timing out of %d repetitions will be used.%n",
732 dim, reps);
733 // One dimension of the serial array divided by the number of threads.
734 Complex2D complex2D;
735 if (blocked) {
736 complex2D = new Complex2D(dim, dim, DataLayout2D.BLOCKED_X, dim);
737 } else {
738 complex2D = new Complex2D(dim, dim);
739 }
740 ParallelTeam parallelTeam = new ParallelTeam();
741 final double[] data = initRandomData(dim, parallelTeam);
742
743 double toSeconds = 0.000000001;
744 long forwardTime = Long.MAX_VALUE;
745 long inverseTime = Long.MAX_VALUE;
746
747 // Warm-up
748 System.out.println(" Warm Up FFT");
749 complex2D.fft(data, 0);
750 System.out.println(" Warm Up IFFT");
751 complex2D.ifft(data, 0);
752
753 for (int i = 0; i < reps; i++) {
754 System.out.printf(" Iteration %d%n", i + 1);
755 long time = System.nanoTime();
756 complex2D.fft(data, 0);
757 time = (System.nanoTime() - time);
758 System.out.printf(" FFT: %9.6f (sec)%n", toSeconds * time);
759 if (time < forwardTime) {
760 forwardTime = time;
761 }
762 time = System.nanoTime();
763 complex2D.ifft(data, 0);
764 time = (System.nanoTime() - time);
765 System.out.printf(" IFFT: %9.6f (sec)%n", toSeconds * time);
766 if (time < inverseTime) {
767 inverseTime = time;
768 }
769 }
770
771 System.out.printf(" Best FFT Time: %9.6f (sec)%n", toSeconds * forwardTime);
772 System.out.printf(" Best IFFT Time: %9.6f (sec)%n", toSeconds * inverseTime);
773 parallelTeam.shutdown();
774 }
775
776 /**
777 * Initialize a 2D data for testing purposes.
778 *
779 * @param dim The dimension of the 2D grid.
780 * @return The 3D data.
781 * @since 1.0
782 */
783 public static double[] initRandomData(int dim, ParallelTeam parallelTeam) {
784 int n = dim * dim;
785 double[] data = new double[2 * n];
786 try {
787 parallelTeam.execute(
788 new ParallelRegion() {
789 @Override
790 public void run() {
791 try {
792 execute(
793 0,
794 dim - 1,
795 new IntegerForLoop() {
796 @Override
797 public void run(final int lb, final int ub) {
798 Random randomNumberGenerator = new Random(1);
799 int index = dim * lb * 2;
800 for (int i = lb; i <= ub; i++) {
801 for (int j = 0; j < dim; j++) {
802 double randomNumber = randomNumberGenerator.nextDouble();
803 data[index] = randomNumber;
804 index += 2;
805 }
806 }
807 }
808 });
809 } catch (Exception e) {
810 System.out.println(e.getMessage());
811 System.exit(-1);
812 }
813 }
814 });
815 } catch (Exception e) {
816 System.out.println(e.getMessage());
817 System.exit(-1);
818 }
819 return data;
820 }
821 }