1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 package ffx.numerics.fft;
39
40 import static java.lang.Math.fma;
41
42
43
44
45 public class MixedRadixFactorPrime extends MixedRadixFactor {
46
47
48
49
50
51
52 public MixedRadixFactorPrime(PassConstants passConstants) {
53 super(passConstants);
54 }
55
56
57
58
59 protected void passScalar(PassData passData) {
60 final double[] data = passData.in;
61 final double[] ret = passData.out;
62 int sign = passData.sign;
63 if (im != 1) {
64 throw new IllegalArgumentException(" Support for large prime factors requires interleaved data.");
65 }
66 final int dataOffset = passData.inOffset;
67 final int retOffset = passData.outOffset;
68 final int jump = (factor - 1) * innerLoopLimit;
69 for (int i = 0; i < nextInput; i++) {
70 ret[retOffset + 2 * i] = data[dataOffset + 2 * i];
71 ret[retOffset + 2 * i + 1] = data[dataOffset + 2 * i + 1];
72 }
73 for (int e = 1; e < (factor - 1) / 2 + 1; e++) {
74 for (int i = 0; i < nextInput; i++) {
75 int idx = i + e * nextInput;
76 int idxc = i + (factor - e) * nextInput;
77 ret[retOffset + 2 * idx] = data[dataOffset + 2 * idx] + data[dataOffset + 2 * idxc];
78 ret[retOffset + 2 * idx + 1] = data[dataOffset + 2 * idx + 1] + data[dataOffset + 2 * idxc + 1];
79 ret[retOffset + 2 * idxc] = data[dataOffset + 2 * idx] - data[dataOffset + 2 * idxc];
80 ret[retOffset + 2 * idxc + 1] = data[dataOffset + 2 * idx + 1] - data[dataOffset + 2 * idxc + 1];
81 }
82 }
83 for (int i = 0; i < nextInput; i++) {
84 data[dataOffset + 2 * i] = ret[retOffset + 2 * i];
85 data[dataOffset + 2 * i + 1] = ret[retOffset + 2 * i + 1];
86 }
87 for (int e1 = 1; e1 < (factor - 1) / 2 + 1; e1++) {
88 for (int i = 0; i < nextInput; i++) {
89 int i1 = retOffset + 2 * (i + e1 * nextInput);
90 data[dataOffset + 2 * i] += ret[i1];
91 data[dataOffset + 2 * i + 1] += ret[i1 + 1];
92 }
93 }
94 double[] twiddl = twiddles[outerLoopLimit];
95 for (int e = 1; e < (factor - 1) / 2 + 1; e++) {
96 int idx = e;
97 double wr, wi;
98 int em = e * nextInput;
99 int ecm = (factor - e) * nextInput;
100 for (int i = 0; i < nextInput; i++) {
101 data[dataOffset + 2 * (i + em)] = ret[retOffset + 2 * i];
102 data[dataOffset + 2 * (i + em) + 1] = ret[retOffset + 2 * i + 1];
103 data[dataOffset + 2 * (i + ecm)] = ret[retOffset + 2 * i];
104 data[dataOffset + 2 * (i + ecm) + 1] = ret[retOffset + 2 * i + 1];
105 }
106 for (int e1 = 1; e1 < (factor - 1) / 2 + 1; e1++) {
107 if (idx == 0) {
108 wr = 1;
109 wi = 0;
110 } else {
111 wr = twiddl[2 * (idx - 1)];
112 wi = -sign * twiddl[2 * (idx - 1) + 1];
113 }
114 for (int i = 0; i < nextInput; i++) {
115 int i1 = retOffset + 2 * (i + e1 * nextInput);
116 int i2 = retOffset + 2 * (i + (factor - e1) * nextInput);
117 double ap = wr * ret[i1];
118 double am = wi * ret[i2 + 1];
119 double bp = wr * ret[i1 + 1];
120 double bm = wi * ret[i2];
121 data[dataOffset + 2 * (i + em)] += (ap - am);
122 data[dataOffset + 2 * (i + em) + 1] += (bp + bm);
123 data[dataOffset + 2 * (i + ecm)] += (ap + am);
124 data[dataOffset + 2 * (i + ecm) + 1] += (bp - bm);
125 }
126 idx += e;
127 idx %= factor;
128 }
129 }
130
131 for (int k1 = 0; k1 < innerLoopLimit; k1++) {
132 ret[retOffset + 2 * k1] = data[dataOffset + 2 * k1];
133 ret[retOffset + 2 * k1 + 1] = data[dataOffset + 2 * k1 + 1];
134 }
135 for (int e1 = 1; e1 < factor; e1++) {
136 for (int k1 = 0; k1 < innerLoopLimit; k1++) {
137 int i = retOffset + 2 * (k1 + e1 * innerLoopLimit);
138 int i1 = dataOffset + 2 * (k1 + e1 * nextInput);
139 ret[i] = data[i1];
140 ret[i + 1] = data[i1 + 1];
141 }
142 }
143 int i = innerLoopLimit;
144 int j = product;
145 for (int k = 1; k < outerLoopLimit; k++) {
146 for (int k1 = 0; k1 < innerLoopLimit; k1++) {
147 ret[retOffset + 2 * j] = data[dataOffset + 2 * i];
148 ret[retOffset + 2 * j + 1] = data[dataOffset + 2 * i + 1];
149 i++;
150 j++;
151 }
152 j += jump;
153 }
154 i = innerLoopLimit;
155 j = product;
156 for (int k = 1; k < outerLoopLimit; k++) {
157 twiddl = twiddles[k];
158 for (int k1 = 0; k1 < innerLoopLimit; k1++) {
159 for (int e1 = 1; e1 < factor; e1++) {
160 int i1 = dataOffset + 2 * (i + e1 * nextInput);
161 double xr = data[i1];
162 double xi = data[i1 + 1];
163 double wr = twiddl[2 * (e1 - 1)];
164 double wi = -sign * twiddl[2 * (e1 - 1) + 1];
165 int i2 = retOffset + 2 * (j + e1 * innerLoopLimit);
166 ret[i2] = fma(wr, xr, -wi * xi);
167 ret[i2 + 1] = fma(wr, xi, wi * xr);
168 }
169 i++;
170 j++;
171 }
172 j += jump;
173 }
174 }
175
176
177
178
179 protected void passSIMD(PassData passData) {
180 passScalar(passData);
181 }
182 }