/*
 * Decompiled with CFR 0.152.
 */
package ffx.numerics.fft;

import ffx.numerics.fft.MixedRadixFactor;
import ffx.numerics.fft.PassConstants;
import ffx.numerics.fft.PassData;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorSpecies;
import org.apache.commons.math3.util.FastMath;

public class MixedRadixFactor7
extends MixedRadixFactor {
    private static final double oneThird = 0.3333333333333333;
    private static final double sin2PI_7 = FastMath.sin((double)0.8975979010256552);
    private static final double sin4PI_7 = FastMath.sin((double)1.7951958020513104);
    private static final double sin6PI_7 = FastMath.sin((double)2.6927937030769655);
    private static final double cos2PI_7 = FastMath.cos((double)0.8975979010256552);
    private static final double cos4PI_7 = FastMath.cos((double)1.7951958020513104);
    private static final double cos6PI_7 = FastMath.cos((double)2.6927937030769655);
    private static final double c1 = cos2PI_7;
    private static final double c2 = cos4PI_7;
    private static final double c3 = cos6PI_7;
    private static final double v1 = (c1 + c2 + c3) * 0.3333333333333333 - 1.0;
    private static final double v2 = (2.0 * c1 - c2 - c3) * 0.3333333333333333;
    private static final double v3 = (c1 - 2.0 * c2 + c3) * 0.3333333333333333;
    private static final double v4 = (c1 + c2 - 2.0 * c3) * 0.3333333333333333;
    private final int di2;
    private final int di3;
    private final int di4;
    private final int di5;
    private final int di6;
    private final int dj2;
    private final int dj3;
    private final int dj4;
    private final int dj5;
    private final int dj6;

    public MixedRadixFactor7(PassConstants passConstants) {
        super(passConstants);
        this.di2 = 2 * this.di;
        this.di3 = 3 * this.di;
        this.di4 = 4 * this.di;
        this.di5 = 5 * this.di;
        this.di6 = 6 * this.di;
        this.dj2 = 2 * this.dj;
        this.dj3 = 3 * this.dj;
        this.dj4 = 4 * this.dj;
        this.dj5 = 5 * this.dj;
        this.dj6 = 6 * this.dj;
    }

    @Override
    protected void passScalar(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double s1 = (double)(-sign) * sin2PI_7;
        double s2 = (double)(-sign) * sin4PI_7;
        double s3 = (double)(-sign) * sin6PI_7;
        double v5 = (s1 + s2 - s3) * 0.3333333333333333;
        double v6 = (2.0 * s1 - s2 + s3) * 0.3333333333333333;
        double v7 = (s1 - 2.0 * s2 - s3) * 0.3333333333333333;
        double v8 = (s1 + s2 + 2.0 * s3) * 0.3333333333333333;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            double z0r = data[i];
            double z1r = data[i + this.di];
            double z2r = data[i + this.di2];
            double z3r = data[i + this.di3];
            double z4r = data[i + this.di4];
            double z5r = data[i + this.di5];
            double z6r = data[i + this.di6];
            double z0i = data[i + this.im];
            double z1i = data[i + this.di + this.im];
            double z2i = data[i + this.di2 + this.im];
            double z3i = data[i + this.di3 + this.im];
            double z4i = data[i + this.di4 + this.im];
            double z5i = data[i + this.di5 + this.im];
            double z6i = data[i + this.di6 + this.im];
            double t0r = z1r + z6r;
            double t0i = z1i + z6i;
            double t1r = z1r - z6r;
            double t1i = z1i - z6i;
            double t2r = z2r + z5r;
            double t2i = z2i + z5i;
            double t3r = z2r - z5r;
            double t3i = z2i - z5i;
            double t4r = z4r + z3r;
            double t4i = z4i + z3i;
            double t5r = z4r - z3r;
            double t5i = z4i - z3i;
            double t6r = t2r + t0r;
            double t6i = t2i + t0i;
            double t7r = t5r + t3r;
            double t7i = t5i + t3i;
            double b0r = z0r + t6r + t4r;
            double b0i = z0i + t6i + t4i;
            double b1r = v1 * (t6r + t4r);
            double b1i = v1 * (t6i + t4i);
            double b2r = v2 * (t0r - t4r);
            double b2i = v2 * (t0i - t4i);
            double b3r = v3 * (t4r - t2r);
            double b3i = v3 * (t4i - t2i);
            double b4r = v4 * (t2r - t0r);
            double b4i = v4 * (t2i - t0i);
            double b5r = v5 * (t7r + t1r);
            double b5i = v5 * (t7i + t1i);
            double b6r = v6 * (t1r - t5r);
            double b6i = v6 * (t1i - t5i);
            double b7r = v7 * (t5r - t3r);
            double b7i = v7 * (t5i - t3i);
            double b8r = v8 * (t3r - t1r);
            double b8i = v8 * (t3i - t1i);
            double u0r = b0r + b1r;
            double u0i = b0i + b1i;
            double u1r = b2r + b3r;
            double u1i = b2i + b3i;
            double u2r = b4r - b3r;
            double u2i = b4i - b3i;
            double u3r = -b2r - b4r;
            double u3i = -b2i - b4i;
            double u4r = b6r + b7r;
            double u4i = b6i + b7i;
            double u5r = b8r - b7r;
            double u5i = b8i - b7i;
            double u6r = -b8r - b6r;
            double u6i = -b8i - b6i;
            double u7r = u0r + u1r;
            double u7i = u0i + u1i;
            double u8r = u0r + u2r;
            double u8i = u0i + u2i;
            double u9r = u0r + u3r;
            double u9i = u0i + u3i;
            double u10r = u4r + b5r;
            double u10i = u4i + b5i;
            double u11r = u5r + b5r;
            double u11i = u5i + b5i;
            double u12r = u6r + b5r;
            double u12i = u6i + b5i;
            ret[j] = b0r;
            ret[j + this.im] = b0i;
            ret[j + this.dj] = u7r + u10i;
            ret[j + this.dj + this.im] = u7i - u10r;
            ret[j + this.dj2] = u9r + u12i;
            ret[j + this.dj2 + this.im] = u9i - u12r;
            ret[j + this.dj3] = u8r - u11i;
            ret[j + this.dj3 + this.im] = u8i + u11r;
            ret[j + this.dj4] = u8r + u11i;
            ret[j + this.dj4 + this.im] = u8i - u11r;
            ret[j + this.dj5] = u9r - u12i;
            ret[j + this.dj5 + this.im] = u9i + u12r;
            ret[j + this.dj6] = u7r - u10i;
            ret[j + this.dj6 + this.im] = u7i + u10r;
            ++k1;
            i += this.ii;
            j += this.ii;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 6;
            double w1r = this.wr[index];
            double w2r = this.wr[index + 1];
            double w3r = this.wr[index + 2];
            double w4r = this.wr[index + 3];
            double w5r = this.wr[index + 4];
            double w6r = this.wr[index + 5];
            double w1i = (double)(-sign) * this.wi[index];
            double w2i = (double)(-sign) * this.wi[index + 1];
            double w3i = (double)(-sign) * this.wi[index + 2];
            double w4i = (double)(-sign) * this.wi[index + 3];
            double w5i = (double)(-sign) * this.wi[index + 4];
            double w6i = (double)(-sign) * this.wi[index + 5];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                double z0r = data[i];
                double z1r = data[i + this.di];
                double z2r = data[i + this.di2];
                double z3r = data[i + this.di3];
                double z4r = data[i + this.di4];
                double z5r = data[i + this.di5];
                double z6r = data[i + this.di6];
                double z0i = data[i + this.im];
                double z1i = data[i + this.di + this.im];
                double z2i = data[i + this.di2 + this.im];
                double z3i = data[i + this.di3 + this.im];
                double z4i = data[i + this.di4 + this.im];
                double z5i = data[i + this.di5 + this.im];
                double z6i = data[i + this.di6 + this.im];
                double t0r = z1r + z6r;
                double t0i = z1i + z6i;
                double t1r = z1r - z6r;
                double t1i = z1i - z6i;
                double t2r = z2r + z5r;
                double t2i = z2i + z5i;
                double t3r = z2r - z5r;
                double t3i = z2i - z5i;
                double t4r = z4r + z3r;
                double t4i = z4i + z3i;
                double t5r = z4r - z3r;
                double t5i = z4i - z3i;
                double t6r = t2r + t0r;
                double t6i = t2i + t0i;
                double t7r = t5r + t3r;
                double t7i = t5i + t3i;
                double b0r = z0r + t6r + t4r;
                double b0i = z0i + t6i + t4i;
                double b1r = v1 * (t6r + t4r);
                double b1i = v1 * (t6i + t4i);
                double b2r = v2 * (t0r - t4r);
                double b2i = v2 * (t0i - t4i);
                double b3r = v3 * (t4r - t2r);
                double b3i = v3 * (t4i - t2i);
                double b4r = v4 * (t2r - t0r);
                double b4i = v4 * (t2i - t0i);
                double b5r = v5 * (t7r + t1r);
                double b5i = v5 * (t7i + t1i);
                double b6r = v6 * (t1r - t5r);
                double b6i = v6 * (t1i - t5i);
                double b7r = v7 * (t5r - t3r);
                double b7i = v7 * (t5i - t3i);
                double b8r = v8 * (t3r - t1r);
                double b8i = v8 * (t3i - t1i);
                double u0r = b0r + b1r;
                double u0i = b0i + b1i;
                double u1r = b2r + b3r;
                double u1i = b2i + b3i;
                double u2r = b4r - b3r;
                double u2i = b4i - b3i;
                double u3r = -b2r - b4r;
                double u3i = -b2i - b4i;
                double u4r = b6r + b7r;
                double u4i = b6i + b7i;
                double u5r = b8r - b7r;
                double u5i = b8i - b7i;
                double u6r = -b8r - b6r;
                double u6i = -b8i - b6i;
                double u7r = u0r + u1r;
                double u7i = u0i + u1i;
                double u8r = u0r + u2r;
                double u8i = u0i + u2i;
                double u9r = u0r + u3r;
                double u9i = u0i + u3i;
                double u10r = u4r + b5r;
                double u10i = u4i + b5i;
                double u11r = u5r + b5r;
                double u11i = u5i + b5i;
                double u12r = u6r + b5r;
                double u12i = u6i + b5i;
                ret[j] = b0r;
                ret[j + this.im] = b0i;
                MixedRadixFactor7.multiplyAndStore(u7r + u10i, u7i - u10r, w1r, w1i, ret, j + this.dj, j + this.dj + this.im);
                MixedRadixFactor7.multiplyAndStore(u9r + u12i, u9i - u12r, w2r, w2i, ret, j + this.dj2, j + this.dj2 + this.im);
                MixedRadixFactor7.multiplyAndStore(u8r - u11i, u8i + u11r, w3r, w3i, ret, j + this.dj3, j + this.dj3 + this.im);
                MixedRadixFactor7.multiplyAndStore(u8r + u11i, u8i - u11r, w4r, w4i, ret, j + this.dj4, j + this.dj4 + this.im);
                MixedRadixFactor7.multiplyAndStore(u9r - u12i, u9i + u12r, w5r, w5i, ret, j + this.dj5, j + this.dj5 + this.im);
                MixedRadixFactor7.multiplyAndStore(u7r - u10i, u7i + u10r, w6r, w6i, ret, j + this.dj6, j + this.dj6 + this.im);
                ++k12;
                i += this.ii;
                j += this.ii;
            }
            ++k;
            j += this.jstep;
        }
    }

    @Override
    protected void passSIMD(PassData passData) {
        if (!this.isValidSIMDWidth(this.simdWidth)) {
            this.passScalar(passData);
        } else if (this.im == 1) {
            this.interleaved(passData);
        } else {
            this.blocked(passData);
        }
    }

    private void interleaved(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double s1 = (double)(-sign) * sin2PI_7;
        double s2 = (double)(-sign) * sin4PI_7;
        double s3 = (double)(-sign) * sin6PI_7;
        double v5 = (s1 + s2 - s3) * 0.3333333333333333;
        double v6 = (2.0 * s1 - s2 + s3) * 0.3333333333333333;
        double v7 = (s1 - 2.0 * s2 - s3) * 0.3333333333333333;
        double v8 = (s1 + s2 + 2.0 * s3) * 0.3333333333333333;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di));
            DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2));
            DoubleVector z3 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3));
            DoubleVector z4 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4));
            DoubleVector z5 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di5));
            DoubleVector z6 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di6));
            DoubleVector t0 = z1.add((Vector)z6);
            DoubleVector t1 = z1.sub((Vector)z6);
            DoubleVector t2 = z2.add((Vector)z5);
            DoubleVector t3 = z2.sub((Vector)z5);
            DoubleVector t4 = z4.add((Vector)z3);
            DoubleVector t5 = z4.sub((Vector)z3);
            DoubleVector t6 = t2.add((Vector)t0);
            DoubleVector t7 = t5.add((Vector)t3);
            DoubleVector b0 = z0.add((Vector)t6).add((Vector)t4);
            DoubleVector b1 = t6.add((Vector)t4).mul(v1);
            DoubleVector b2 = t0.sub((Vector)t4).mul(v2);
            DoubleVector b3 = t4.sub((Vector)t2).mul(v3);
            DoubleVector b4 = t2.sub((Vector)t0).mul(v4);
            DoubleVector b5 = t7.add((Vector)t1).mul(v5);
            DoubleVector b6 = t1.sub((Vector)t5).mul(v6);
            DoubleVector b7 = t5.sub((Vector)t3).mul(v7);
            DoubleVector b8 = t3.sub((Vector)t1).mul(v8);
            DoubleVector u0 = b0.add((Vector)b1);
            DoubleVector u1 = b2.add((Vector)b3);
            DoubleVector u2 = b4.sub((Vector)b3);
            DoubleVector u3 = b2.add((Vector)b4).neg();
            DoubleVector u4 = b6.add((Vector)b7);
            DoubleVector u5 = b8.sub((Vector)b7);
            DoubleVector u6 = b8.add((Vector)b6).neg();
            DoubleVector u7 = u0.add((Vector)u1);
            DoubleVector u8 = u0.add((Vector)u2);
            DoubleVector u9 = u0.add((Vector)u3);
            DoubleVector u10 = u4.add((Vector)b5).rearrange(SHUFFLE_RE_IM);
            DoubleVector u11 = u5.add((Vector)b5).rearrange(SHUFFLE_RE_IM);
            DoubleVector u12 = u6.add((Vector)b5).rearrange(SHUFFLE_RE_IM);
            b0.intoArray(ret, j);
            u10.fma((Vector)NEGATE_IM, (Vector)u7).intoArray(ret, j + this.dj);
            u12.fma((Vector)NEGATE_IM, (Vector)u9).intoArray(ret, j + this.dj2);
            u11.fma((Vector)NEGATE_RE, (Vector)u8).intoArray(ret, j + this.dj3);
            u11.fma((Vector)NEGATE_IM, (Vector)u8).intoArray(ret, j + this.dj4);
            u12.fma((Vector)NEGATE_RE, (Vector)u9).intoArray(ret, j + this.dj5);
            u10.fma((Vector)NEGATE_RE, (Vector)u7).intoArray(ret, j + this.dj6);
            k1 += INTERLEAVED_LOOP;
            i += LENGTH;
            j += LENGTH;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 6;
            DoubleVector w1r = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)this.wr[index]);
            DoubleVector w2r = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)this.wr[index + 1]);
            DoubleVector w3r = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)this.wr[index + 2]);
            DoubleVector w4r = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)this.wr[index + 3]);
            DoubleVector w5r = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)this.wr[index + 4]);
            DoubleVector w6r = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)this.wr[index + 5]);
            DoubleVector w1i = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)((double)(-sign) * this.wi[index])).mul((Vector)NEGATE_IM);
            DoubleVector w2i = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)((double)(-sign) * this.wi[index + 1])).mul((Vector)NEGATE_IM);
            DoubleVector w3i = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)((double)(-sign) * this.wi[index + 2])).mul((Vector)NEGATE_IM);
            DoubleVector w4i = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)((double)(-sign) * this.wi[index + 3])).mul((Vector)NEGATE_IM);
            DoubleVector w5i = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)((double)(-sign) * this.wi[index + 4])).mul((Vector)NEGATE_IM);
            DoubleVector w6i = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)((double)(-sign) * this.wi[index + 5])).mul((Vector)NEGATE_IM);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di));
                DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2));
                DoubleVector z3 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3));
                DoubleVector z4 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4));
                DoubleVector z5 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di5));
                DoubleVector z6 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di6));
                DoubleVector t0 = z1.add((Vector)z6);
                DoubleVector t1 = z1.sub((Vector)z6);
                DoubleVector t2 = z2.add((Vector)z5);
                DoubleVector t3 = z2.sub((Vector)z5);
                DoubleVector t4 = z4.add((Vector)z3);
                DoubleVector t5 = z4.sub((Vector)z3);
                DoubleVector t6 = t2.add((Vector)t0);
                DoubleVector t7 = t5.add((Vector)t3);
                DoubleVector b0 = z0.add((Vector)t6).add((Vector)t4);
                DoubleVector b1 = t6.add((Vector)t4).mul(v1);
                DoubleVector b2 = t0.sub((Vector)t4).mul(v2);
                DoubleVector b3 = t4.sub((Vector)t2).mul(v3);
                DoubleVector b4 = t2.sub((Vector)t0).mul(v4);
                DoubleVector b5 = t7.add((Vector)t1).mul(v5);
                DoubleVector b6 = t1.sub((Vector)t5).mul(v6);
                DoubleVector b7 = t5.sub((Vector)t3).mul(v7);
                DoubleVector b8 = t3.sub((Vector)t1).mul(v8);
                DoubleVector u0 = b0.add((Vector)b1);
                DoubleVector u1 = b2.add((Vector)b3);
                DoubleVector u2 = b4.sub((Vector)b3);
                DoubleVector u3 = b2.add((Vector)b4).neg();
                DoubleVector u4 = b6.add((Vector)b7);
                DoubleVector u5 = b8.sub((Vector)b7);
                DoubleVector u6 = b8.add((Vector)b6).neg();
                DoubleVector u7 = u0.add((Vector)u1);
                DoubleVector u8 = u0.add((Vector)u2);
                DoubleVector u9 = u0.add((Vector)u3);
                DoubleVector u10 = u4.add((Vector)b5).rearrange(SHUFFLE_RE_IM);
                DoubleVector u11 = u5.add((Vector)b5).rearrange(SHUFFLE_RE_IM);
                DoubleVector u12 = u6.add((Vector)b5).rearrange(SHUFFLE_RE_IM);
                b0.intoArray(ret, j);
                DoubleVector x1 = u10.fma((Vector)NEGATE_IM, (Vector)u7);
                DoubleVector x2 = u12.fma((Vector)NEGATE_IM, (Vector)u9);
                DoubleVector x3 = u11.fma((Vector)NEGATE_RE, (Vector)u8);
                DoubleVector x4 = u11.fma((Vector)NEGATE_IM, (Vector)u8);
                DoubleVector x5 = u12.fma((Vector)NEGATE_RE, (Vector)u9);
                DoubleVector x6 = u10.fma((Vector)NEGATE_RE, (Vector)u7);
                w1r.fma((Vector)x1, (Vector)w1i.mul((Vector)x1).rearrange(SHUFFLE_RE_IM)).intoArray(ret, j + this.dj);
                w2r.fma((Vector)x2, (Vector)w2i.mul((Vector)x2).rearrange(SHUFFLE_RE_IM)).intoArray(ret, j + this.dj2);
                w3r.fma((Vector)x3, (Vector)w3i.mul((Vector)x3).rearrange(SHUFFLE_RE_IM)).intoArray(ret, j + this.dj3);
                w4r.fma((Vector)x4, (Vector)w4i.mul((Vector)x4).rearrange(SHUFFLE_RE_IM)).intoArray(ret, j + this.dj4);
                w5r.fma((Vector)x5, (Vector)w5i.mul((Vector)x5).rearrange(SHUFFLE_RE_IM)).intoArray(ret, j + this.dj5);
                w6r.fma((Vector)x6, (Vector)w6i.mul((Vector)x6).rearrange(SHUFFLE_RE_IM)).intoArray(ret, j + this.dj6);
                k12 += INTERLEAVED_LOOP;
                i += LENGTH;
                j += LENGTH;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void blocked(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double s1 = (double)(-sign) * sin2PI_7;
        double s2 = (double)(-sign) * sin4PI_7;
        double s3 = (double)(-sign) * sin6PI_7;
        double v5 = (s1 + s2 - s3) * 0.3333333333333333;
        double v6 = (2.0 * s1 - s2 + s3) * 0.3333333333333333;
        double v7 = (s1 - 2.0 * s2 - s3) * 0.3333333333333333;
        double v8 = (s1 + s2 + 2.0 * s3) * 0.3333333333333333;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)i);
            DoubleVector z1r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di));
            DoubleVector z2r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2));
            DoubleVector z3r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3));
            DoubleVector z4r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4));
            DoubleVector z5r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di5));
            DoubleVector z6r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di6));
            DoubleVector z0i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.im));
            DoubleVector z1i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di + this.im));
            DoubleVector z2i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2 + this.im));
            DoubleVector z3i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3 + this.im));
            DoubleVector z4i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4 + this.im));
            DoubleVector z5i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di5 + this.im));
            DoubleVector z6i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di6 + this.im));
            DoubleVector t0r = z1r.add((Vector)z6r);
            DoubleVector t0i = z1i.add((Vector)z6i);
            DoubleVector t1r = z1r.sub((Vector)z6r);
            DoubleVector t1i = z1i.sub((Vector)z6i);
            DoubleVector t2r = z2r.add((Vector)z5r);
            DoubleVector t2i = z2i.add((Vector)z5i);
            DoubleVector t3r = z2r.sub((Vector)z5r);
            DoubleVector t3i = z2i.sub((Vector)z5i);
            DoubleVector t4r = z4r.add((Vector)z3r);
            DoubleVector t4i = z4i.add((Vector)z3i);
            DoubleVector t5r = z4r.sub((Vector)z3r);
            DoubleVector t5i = z4i.sub((Vector)z3i);
            DoubleVector t6r = t2r.add((Vector)t0r);
            DoubleVector t6i = t2i.add((Vector)t0i);
            DoubleVector t7r = t5r.add((Vector)t3r);
            DoubleVector t7i = t5i.add((Vector)t3i);
            DoubleVector b0r = z0r.add((Vector)t6r).add((Vector)t4r);
            DoubleVector b0i = z0i.add((Vector)t6i).add((Vector)t4i);
            DoubleVector b1r = t6r.add((Vector)t4r).mul(v1);
            DoubleVector b1i = t6i.add((Vector)t4i).mul(v1);
            DoubleVector b2r = t0r.sub((Vector)t4r).mul(v2);
            DoubleVector b2i = t0i.sub((Vector)t4i).mul(v2);
            DoubleVector b3r = t4r.sub((Vector)t2r).mul(v3);
            DoubleVector b3i = t4i.sub((Vector)t2i).mul(v3);
            DoubleVector b4r = t2r.sub((Vector)t0r).mul(v4);
            DoubleVector b4i = t2i.sub((Vector)t0i).mul(v4);
            DoubleVector b5r = t7r.add((Vector)t1r).mul(v5);
            DoubleVector b5i = t7i.add((Vector)t1i).mul(v5);
            DoubleVector b6r = t1r.sub((Vector)t5r).mul(v6);
            DoubleVector b6i = t1i.sub((Vector)t5i).mul(v6);
            DoubleVector b7r = t5r.sub((Vector)t3r).mul(v7);
            DoubleVector b7i = t5i.sub((Vector)t3i).mul(v7);
            DoubleVector b8r = t3r.sub((Vector)t1r).mul(v8);
            DoubleVector b8i = t3i.sub((Vector)t1i).mul(v8);
            DoubleVector u0r = b0r.add((Vector)b1r);
            DoubleVector u0i = b0i.add((Vector)b1i);
            DoubleVector u1r = b2r.add((Vector)b3r);
            DoubleVector u1i = b2i.add((Vector)b3i);
            DoubleVector u2r = b4r.sub((Vector)b3r);
            DoubleVector u2i = b4i.sub((Vector)b3i);
            DoubleVector u3r = b2r.add((Vector)b4r).neg();
            DoubleVector u3i = b2i.add((Vector)b4i).neg();
            DoubleVector u4r = b6r.add((Vector)b7r);
            DoubleVector u4i = b6i.add((Vector)b7i);
            DoubleVector u5r = b8r.sub((Vector)b7r);
            DoubleVector u5i = b8i.sub((Vector)b7i);
            DoubleVector u6r = b8r.add((Vector)b6r).neg();
            DoubleVector u6i = b8i.add((Vector)b6i).neg();
            DoubleVector u7r = u0r.add((Vector)u1r);
            DoubleVector u7i = u0i.add((Vector)u1i);
            DoubleVector u8r = u0r.add((Vector)u2r);
            DoubleVector u8i = u0i.add((Vector)u2i);
            DoubleVector u9r = u0r.add((Vector)u3r);
            DoubleVector u9i = u0i.add((Vector)u3i);
            DoubleVector u10r = u4r.add((Vector)b5r);
            DoubleVector u10i = u4i.add((Vector)b5i);
            DoubleVector u11r = u5r.add((Vector)b5r);
            DoubleVector u11i = u5i.add((Vector)b5i);
            DoubleVector u12r = u6r.add((Vector)b5r);
            DoubleVector u12i = u6i.add((Vector)b5i);
            b0r.intoArray(ret, j);
            b0i.intoArray(ret, j + this.im);
            u7r.add((Vector)u10i).intoArray(ret, j + this.dj);
            u7i.sub((Vector)u10r).intoArray(ret, j + this.dj + this.im);
            u9r.add((Vector)u12i).intoArray(ret, j + this.dj2);
            u9i.sub((Vector)u12r).intoArray(ret, j + this.dj2 + this.im);
            u8r.sub((Vector)u11i).intoArray(ret, j + this.dj3);
            u8i.add((Vector)u11r).intoArray(ret, j + this.dj3 + this.im);
            u8r.add((Vector)u11i).intoArray(ret, j + this.dj4);
            u8i.sub((Vector)u11r).intoArray(ret, j + this.dj4 + this.im);
            u9r.sub((Vector)u12i).intoArray(ret, j + this.dj5);
            u9i.add((Vector)u12r).intoArray(ret, j + this.dj5 + this.im);
            u7r.sub((Vector)u10i).intoArray(ret, j + this.dj6);
            u7i.add((Vector)u10r).intoArray(ret, j + this.dj6 + this.im);
            k1 += BLOCK_LOOP;
            i += LENGTH;
            j += LENGTH;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 6;
            double w1r = this.wr[index];
            double w2r = this.wr[index + 1];
            double w3r = this.wr[index + 2];
            double w4r = this.wr[index + 3];
            double w5r = this.wr[index + 4];
            double w6r = this.wr[index + 5];
            double w1i = (double)(-sign) * this.wi[index];
            double w2i = (double)(-sign) * this.wi[index + 1];
            double w3i = (double)(-sign) * this.wi[index + 2];
            double w4i = (double)(-sign) * this.wi[index + 3];
            double w5i = (double)(-sign) * this.wi[index + 4];
            double w6i = (double)(-sign) * this.wi[index + 5];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)i);
                DoubleVector z1r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di));
                DoubleVector z2r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2));
                DoubleVector z3r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3));
                DoubleVector z4r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4));
                DoubleVector z5r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di5));
                DoubleVector z6r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di6));
                DoubleVector z0i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.im));
                DoubleVector z1i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di + this.im));
                DoubleVector z2i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2 + this.im));
                DoubleVector z3i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3 + this.im));
                DoubleVector z4i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4 + this.im));
                DoubleVector z5i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di5 + this.im));
                DoubleVector z6i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di6 + this.im));
                DoubleVector t0r = z1r.add((Vector)z6r);
                DoubleVector t0i = z1i.add((Vector)z6i);
                DoubleVector t1r = z1r.sub((Vector)z6r);
                DoubleVector t1i = z1i.sub((Vector)z6i);
                DoubleVector t2r = z2r.add((Vector)z5r);
                DoubleVector t2i = z2i.add((Vector)z5i);
                DoubleVector t3r = z2r.sub((Vector)z5r);
                DoubleVector t3i = z2i.sub((Vector)z5i);
                DoubleVector t4r = z4r.add((Vector)z3r);
                DoubleVector t4i = z4i.add((Vector)z3i);
                DoubleVector t5r = z4r.sub((Vector)z3r);
                DoubleVector t5i = z4i.sub((Vector)z3i);
                DoubleVector t6r = t2r.add((Vector)t0r);
                DoubleVector t6i = t2i.add((Vector)t0i);
                DoubleVector t7r = t5r.add((Vector)t3r);
                DoubleVector t7i = t5i.add((Vector)t3i);
                DoubleVector b0r = z0r.add((Vector)t6r).add((Vector)t4r);
                DoubleVector b0i = z0i.add((Vector)t6i).add((Vector)t4i);
                DoubleVector b1r = t6r.add((Vector)t4r).mul(v1);
                DoubleVector b1i = t6i.add((Vector)t4i).mul(v1);
                DoubleVector b2r = t0r.sub((Vector)t4r).mul(v2);
                DoubleVector b2i = t0i.sub((Vector)t4i).mul(v2);
                DoubleVector b3r = t4r.sub((Vector)t2r).mul(v3);
                DoubleVector b3i = t4i.sub((Vector)t2i).mul(v3);
                DoubleVector b4r = t2r.sub((Vector)t0r).mul(v4);
                DoubleVector b4i = t2i.sub((Vector)t0i).mul(v4);
                DoubleVector b5r = t7r.add((Vector)t1r).mul(v5);
                DoubleVector b5i = t7i.add((Vector)t1i).mul(v5);
                DoubleVector b6r = t1r.sub((Vector)t5r).mul(v6);
                DoubleVector b6i = t1i.sub((Vector)t5i).mul(v6);
                DoubleVector b7r = t5r.sub((Vector)t3r).mul(v7);
                DoubleVector b7i = t5i.sub((Vector)t3i).mul(v7);
                DoubleVector b8r = t3r.sub((Vector)t1r).mul(v8);
                DoubleVector b8i = t3i.sub((Vector)t1i).mul(v8);
                DoubleVector u0r = b0r.add((Vector)b1r);
                DoubleVector u0i = b0i.add((Vector)b1i);
                DoubleVector u1r = b2r.add((Vector)b3r);
                DoubleVector u1i = b2i.add((Vector)b3i);
                DoubleVector u2r = b4r.sub((Vector)b3r);
                DoubleVector u2i = b4i.sub((Vector)b3i);
                DoubleVector u3r = b2r.add((Vector)b4r).neg();
                DoubleVector u3i = b2i.add((Vector)b4i).neg();
                DoubleVector u4r = b6r.add((Vector)b7r);
                DoubleVector u4i = b6i.add((Vector)b7i);
                DoubleVector u5r = b8r.sub((Vector)b7r);
                DoubleVector u5i = b8i.sub((Vector)b7i);
                DoubleVector u6r = b8r.add((Vector)b6r).neg();
                DoubleVector u6i = b8i.add((Vector)b6i).neg();
                DoubleVector u7r = u0r.add((Vector)u1r);
                DoubleVector u7i = u0i.add((Vector)u1i);
                DoubleVector u8r = u0r.add((Vector)u2r);
                DoubleVector u8i = u0i.add((Vector)u2i);
                DoubleVector u9r = u0r.add((Vector)u3r);
                DoubleVector u9i = u0i.add((Vector)u3i);
                DoubleVector u10r = u4r.add((Vector)b5r);
                DoubleVector u10i = u4i.add((Vector)b5i);
                DoubleVector u11r = u5r.add((Vector)b5r);
                DoubleVector u11i = u5i.add((Vector)b5i);
                DoubleVector u12r = u6r.add((Vector)b5r);
                DoubleVector u12i = u6i.add((Vector)b5i);
                b0r.intoArray(ret, j);
                b0i.intoArray(ret, j + this.im);
                DoubleVector x1r = u7r.add((Vector)u10i);
                DoubleVector x1i = u7i.sub((Vector)u10r);
                DoubleVector x2r = u9r.add((Vector)u12i);
                DoubleVector x2i = u9i.sub((Vector)u12r);
                DoubleVector x3r = u8r.sub((Vector)u11i);
                DoubleVector x3i = u8i.add((Vector)u11r);
                DoubleVector x4r = u8r.add((Vector)u11i);
                DoubleVector x4i = u8i.sub((Vector)u11r);
                DoubleVector x5r = u9r.sub((Vector)u12i);
                DoubleVector x5i = u9i.add((Vector)u12r);
                DoubleVector x6r = u7r.sub((Vector)u10i);
                DoubleVector x6i = u7i.add((Vector)u10r);
                x1r.mul(w1r).sub((Vector)x1i.mul(w1i)).intoArray(ret, j + this.dj);
                x2r.mul(w2r).sub((Vector)x2i.mul(w2i)).intoArray(ret, j + this.dj2);
                x3r.mul(w3r).sub((Vector)x3i.mul(w3i)).intoArray(ret, j + this.dj3);
                x4r.mul(w4r).sub((Vector)x4i.mul(w4i)).intoArray(ret, j + this.dj4);
                x5r.mul(w5r).sub((Vector)x5i.mul(w5i)).intoArray(ret, j + this.dj5);
                x6r.mul(w6r).sub((Vector)x6i.mul(w6i)).intoArray(ret, j + this.dj6);
                x1i.mul(w1r).add((Vector)x1r.mul(w1i)).intoArray(ret, j + this.dj + this.im);
                x2i.mul(w2r).add((Vector)x2r.mul(w2i)).intoArray(ret, j + this.dj2 + this.im);
                x3i.mul(w3r).add((Vector)x3r.mul(w3i)).intoArray(ret, j + this.dj3 + this.im);
                x4i.mul(w4r).add((Vector)x4r.mul(w4i)).intoArray(ret, j + this.dj4 + this.im);
                x5i.mul(w5r).add((Vector)x5r.mul(w5i)).intoArray(ret, j + this.dj5 + this.im);
                x6i.mul(w6r).add((Vector)x6r.mul(w6i)).intoArray(ret, j + this.dj6 + this.im);
                k12 += BLOCK_LOOP;
                i += LENGTH;
                j += LENGTH;
            }
            ++k;
            j += this.jstep;
        }
    }
}

