/*
 * Decompiled with CFR 0.152.
 */
package ffx.numerics.fft;

import ffx.numerics.fft.MixedRadixFactor;
import ffx.numerics.fft.PassConstants;
import ffx.numerics.fft.PassData;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorSpecies;
import org.apache.commons.math3.util.FastMath;

public class MixedRadixFactor5
extends MixedRadixFactor {
    private static final double sqrt5_4 = FastMath.sqrt((double)5.0) / 4.0;
    private static final double sinPI_5 = FastMath.sin((double)0.6283185307179586);
    private static final double sin2PI_5 = FastMath.sin((double)1.2566370614359172);
    private final int di2;
    private final int di3;
    private final int di4;
    private final int dj2;
    private final int dj3;
    private final int dj4;
    private final double tau = sqrt5_4;

    public MixedRadixFactor5(PassConstants passConstants) {
        super(passConstants);
        this.di2 = 2 * this.di;
        this.di3 = 3 * this.di;
        this.di4 = 4 * this.di;
        this.dj2 = 2 * this.dj;
        this.dj3 = 3 * this.dj;
        this.dj4 = 4 * this.dj;
    }

    @Override
    protected void passScalar(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double sin2PI_5s = (double)sign * sin2PI_5;
        double sinPI_5s = (double)sign * sinPI_5;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            double z0r = data[i];
            double z1r = data[i + this.di];
            double z2r = data[i + this.di2];
            double z3r = data[i + this.di3];
            double z4r = data[i + this.di4];
            double z0i = data[i + this.im];
            double z1i = data[i + this.di + this.im];
            double z2i = data[i + this.di2 + this.im];
            double z3i = data[i + this.di3 + this.im];
            double z4i = data[i + this.di4 + this.im];
            double t1r = z1r + z4r;
            double t1i = z1i + z4i;
            double t2r = z2r + z3r;
            double t2i = z2i + z3i;
            double t3r = z1r - z4r;
            double t3i = z1i - z4i;
            double t4r = z2r - z3r;
            double t4i = z2i - z3i;
            double t5r = t1r + t2r;
            double t5i = t1i + t2i;
            double t6r = this.tau * (t1r - t2r);
            double t6i = this.tau * (t1i - t2i);
            double t7r = Math.fma(-0.25, t5r, z0r);
            double t7i = Math.fma(-0.25, t5i, z0i);
            double t8r = t7r + t6r;
            double t8i = t7i + t6i;
            double t9r = t7r - t6r;
            double t9i = t7i - t6i;
            double t10r = Math.fma(sin2PI_5s, t3r, sinPI_5s * t4r);
            double t10i = Math.fma(sin2PI_5s, t3i, sinPI_5s * t4i);
            double t11r = Math.fma(-sin2PI_5s, t4r, sinPI_5s * t3r);
            double t11i = Math.fma(-sin2PI_5s, t4i, sinPI_5s * t3i);
            ret[j] = z0r + t5r;
            ret[j + this.im] = z0i + t5i;
            ret[j + this.dj] = t8r - t10i;
            ret[j + this.dj + this.im] = t8i + t10r;
            ret[j + this.dj2] = t9r - t11i;
            ret[j + this.dj2 + this.im] = t9i + t11r;
            ret[j + this.dj3] = t9r + t11i;
            ret[j + this.dj3 + this.im] = t9i - t11r;
            ret[j + this.dj4] = t8r + t10i;
            ret[j + this.dj4 + this.im] = t8i - t10r;
            ++k1;
            i += this.ii;
            j += this.ii;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 4;
            double w1r = this.wr[index];
            double w2r = this.wr[index + 1];
            double w3r = this.wr[index + 2];
            double w4r = this.wr[index + 3];
            double w1i = (double)(-sign) * this.wi[index];
            double w2i = (double)(-sign) * this.wi[index + 1];
            double w3i = (double)(-sign) * this.wi[index + 2];
            double w4i = (double)(-sign) * this.wi[index + 3];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                double z0r = data[i];
                double z1r = data[i + this.di];
                double z2r = data[i + this.di2];
                double z3r = data[i + this.di3];
                double z4r = data[i + this.di4];
                double z0i = data[i + this.im];
                double z1i = data[i + this.di + this.im];
                double z2i = data[i + this.di2 + this.im];
                double z3i = data[i + this.di3 + this.im];
                double z4i = data[i + this.di4 + this.im];
                double t1r = z1r + z4r;
                double t1i = z1i + z4i;
                double t2r = z2r + z3r;
                double t2i = z2i + z3i;
                double t3r = z1r - z4r;
                double t3i = z1i - z4i;
                double t4r = z2r - z3r;
                double t4i = z2i - z3i;
                double t5r = t1r + t2r;
                double t5i = t1i + t2i;
                double t6r = this.tau * (t1r - t2r);
                double t6i = this.tau * (t1i - t2i);
                double t7r = Math.fma(-0.25, t5r, z0r);
                double t7i = Math.fma(-0.25, t5i, z0i);
                double t8r = t7r + t6r;
                double t8i = t7i + t6i;
                double t9r = t7r - t6r;
                double t9i = t7i - t6i;
                double t10r = Math.fma(sin2PI_5s, t3r, sinPI_5s * t4r);
                double t10i = Math.fma(sin2PI_5s, t3i, sinPI_5s * t4i);
                double t11r = Math.fma(-sin2PI_5s, t4r, sinPI_5s * t3r);
                double t11i = Math.fma(-sin2PI_5s, t4i, sinPI_5s * t3i);
                ret[j] = z0r + t5r;
                ret[j + this.im] = z0i + t5i;
                MixedRadixFactor5.multiplyAndStore(t8r - t10i, t8i + t10r, w1r, w1i, ret, j + this.dj, j + this.dj + this.im);
                MixedRadixFactor5.multiplyAndStore(t9r - t11i, t9i + t11r, w2r, w2i, ret, j + this.dj2, j + this.dj2 + this.im);
                MixedRadixFactor5.multiplyAndStore(t9r + t11i, t9i - t11r, w3r, w3i, ret, j + this.dj3, j + this.dj3 + this.im);
                MixedRadixFactor5.multiplyAndStore(t8r + t10i, t8i - t10r, w4r, w4i, ret, j + this.dj4, j + this.dj4 + this.im);
                ++k12;
                i += this.ii;
                j += this.ii;
            }
            ++k;
            j += this.jstep;
        }
    }

    @Override
    protected void passSIMD(PassData passData) {
        if (!this.isValidSIMDWidth(this.simdWidth)) {
            this.passScalar(passData);
        } else if (this.im == 1) {
            this.interleaved(passData);
        } else {
            this.blocked(passData);
        }
    }

    protected void interleaved(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double sin2PI_5s = (double)sign * sin2PI_5;
        double sinPI_5s = (double)sign * sinPI_5;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di));
            DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2));
            DoubleVector z3 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3));
            DoubleVector z4 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4));
            DoubleVector t1 = z1.add((Vector)z4);
            DoubleVector t2 = z2.add((Vector)z3);
            DoubleVector t3 = z1.sub((Vector)z4);
            DoubleVector t4 = z2.sub((Vector)z3);
            DoubleVector t5 = t1.add((Vector)t2);
            DoubleVector t6 = t1.sub((Vector)t2).mul(this.tau);
            DoubleVector t7 = t5.mul(-0.25).add((Vector)z0);
            DoubleVector t8 = t7.add((Vector)t6);
            DoubleVector t9 = t7.sub((Vector)t6);
            DoubleVector t10 = t3.mul(sin2PI_5s).add((Vector)t4.mul(sinPI_5s)).rearrange(SHUFFLE_RE_IM);
            DoubleVector t11 = t4.mul(-sin2PI_5s).add((Vector)t3.mul(sinPI_5s)).rearrange(SHUFFLE_RE_IM);
            z0.add((Vector)t5).intoArray(ret, j);
            t8.add((Vector)t10.mul((Vector)NEGATE_RE)).intoArray(ret, j + this.dj);
            t9.add((Vector)t11.mul((Vector)NEGATE_RE)).intoArray(ret, j + this.dj2);
            t9.add((Vector)t11.mul((Vector)NEGATE_IM)).intoArray(ret, j + this.dj3);
            t8.add((Vector)t10.mul((Vector)NEGATE_IM)).intoArray(ret, j + this.dj4);
            k1 += INTERLEAVED_LOOP;
            i += LENGTH;
            j += LENGTH;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 4;
            DoubleVector w1r = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)this.wr[index]);
            DoubleVector w2r = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)this.wr[index + 1]);
            DoubleVector w3r = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)this.wr[index + 2]);
            DoubleVector w4r = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)this.wr[index + 3]);
            DoubleVector w1i = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)((double)(-sign) * this.wi[index])).mul((Vector)NEGATE_IM);
            DoubleVector w2i = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)((double)(-sign) * this.wi[index + 1])).mul((Vector)NEGATE_IM);
            DoubleVector w3i = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)((double)(-sign) * this.wi[index + 2])).mul((Vector)NEGATE_IM);
            DoubleVector w4i = DoubleVector.broadcast((VectorSpecies)DOUBLE_SPECIES, (double)((double)(-sign) * this.wi[index + 3])).mul((Vector)NEGATE_IM);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di));
                DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2));
                DoubleVector z3 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3));
                DoubleVector z4 = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4));
                DoubleVector t1 = z1.add((Vector)z4);
                DoubleVector t2 = z2.add((Vector)z3);
                DoubleVector t3 = z1.sub((Vector)z4);
                DoubleVector t4 = z2.sub((Vector)z3);
                DoubleVector t5 = t1.add((Vector)t2);
                DoubleVector t6 = t1.sub((Vector)t2).mul(this.tau);
                DoubleVector t7 = t5.mul(-0.25).add((Vector)z0);
                DoubleVector t8 = t7.add((Vector)t6);
                DoubleVector t9 = t7.sub((Vector)t6);
                DoubleVector t10 = t3.mul(sin2PI_5s).add((Vector)t4.mul(sinPI_5s)).rearrange(SHUFFLE_RE_IM);
                DoubleVector t11 = t4.mul(-sin2PI_5s).add((Vector)t3.mul(sinPI_5s)).rearrange(SHUFFLE_RE_IM);
                z0.add((Vector)t5).intoArray(ret, j);
                DoubleVector x1 = t10.fma((Vector)NEGATE_RE, (Vector)t8);
                DoubleVector x2 = t11.fma((Vector)NEGATE_RE, (Vector)t9);
                DoubleVector x3 = t11.fma((Vector)NEGATE_IM, (Vector)t9);
                DoubleVector x4 = t10.fma((Vector)NEGATE_IM, (Vector)t8);
                w1r.fma((Vector)x1, (Vector)w1i.mul((Vector)x1).rearrange(SHUFFLE_RE_IM)).intoArray(ret, j + this.dj);
                w2r.fma((Vector)x2, (Vector)w2i.mul((Vector)x2).rearrange(SHUFFLE_RE_IM)).intoArray(ret, j + this.dj2);
                w3r.fma((Vector)x3, (Vector)w3i.mul((Vector)x3).rearrange(SHUFFLE_RE_IM)).intoArray(ret, j + this.dj3);
                w4r.fma((Vector)x4, (Vector)w4i.mul((Vector)x4).rearrange(SHUFFLE_RE_IM)).intoArray(ret, j + this.dj4);
                k12 += INTERLEAVED_LOOP;
                i += LENGTH;
                j += LENGTH;
            }
            ++k;
            j += this.jstep;
        }
    }

    protected void blocked(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double sin2PI_5s = (double)sign * sin2PI_5;
        double sinPI_5s = (double)sign * sinPI_5;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)i);
            DoubleVector z1r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di));
            DoubleVector z2r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2));
            DoubleVector z3r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3));
            DoubleVector z4r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4));
            DoubleVector z0i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.im));
            DoubleVector z1i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di + this.im));
            DoubleVector z2i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2 + this.im));
            DoubleVector z3i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3 + this.im));
            DoubleVector z4i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4 + this.im));
            DoubleVector t1r = z1r.add((Vector)z4r);
            DoubleVector t1i = z1i.add((Vector)z4i);
            DoubleVector t2r = z2r.add((Vector)z3r);
            DoubleVector t2i = z2i.add((Vector)z3i);
            DoubleVector t3r = z1r.sub((Vector)z4r);
            DoubleVector t3i = z1i.sub((Vector)z4i);
            DoubleVector t4r = z2r.sub((Vector)z3r);
            DoubleVector t4i = z2i.sub((Vector)z3i);
            DoubleVector t5r = t1r.add((Vector)t2r);
            DoubleVector t5i = t1i.add((Vector)t2i);
            DoubleVector t6r = t1r.sub((Vector)t2r).mul(this.tau);
            DoubleVector t6i = t1i.sub((Vector)t2i).mul(this.tau);
            DoubleVector t7r = t5r.mul(-0.25).add((Vector)z0r);
            DoubleVector t7i = t5i.mul(-0.25).add((Vector)z0i);
            DoubleVector t8r = t7r.add((Vector)t6r);
            DoubleVector t8i = t7i.add((Vector)t6i);
            DoubleVector t9r = t7r.sub((Vector)t6r);
            DoubleVector t9i = t7i.sub((Vector)t6i);
            DoubleVector t10r = t3r.mul(sin2PI_5s).add((Vector)t4r.mul(sinPI_5s));
            DoubleVector t10i = t3i.mul(sin2PI_5s).add((Vector)t4i.mul(sinPI_5s));
            DoubleVector t11r = t4r.mul(-sin2PI_5s).add((Vector)t3r.mul(sinPI_5s));
            DoubleVector t11i = t4i.mul(-sin2PI_5s).add((Vector)t3i.mul(sinPI_5s));
            z0r.add((Vector)t5r).intoArray(ret, j);
            z0i.add((Vector)t5i).intoArray(ret, j + this.im);
            t8r.sub((Vector)t10i).intoArray(ret, j + this.dj);
            t8i.add((Vector)t10r).intoArray(ret, j + this.dj + this.im);
            t9r.sub((Vector)t11i).intoArray(ret, j + this.dj2);
            t9i.add((Vector)t11r).intoArray(ret, j + this.dj2 + this.im);
            t9r.add((Vector)t11i).intoArray(ret, j + this.dj3);
            t9i.sub((Vector)t11r).intoArray(ret, j + this.dj3 + this.im);
            t8r.add((Vector)t10i).intoArray(ret, j + this.dj4);
            t8i.sub((Vector)t10r).intoArray(ret, j + this.dj4 + this.im);
            k1 += BLOCK_LOOP;
            i += LENGTH;
            j += LENGTH;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 4;
            double w1r = this.wr[index];
            double w2r = this.wr[index + 1];
            double w3r = this.wr[index + 2];
            double w4r = this.wr[index + 3];
            double w1i = (double)(-sign) * this.wi[index];
            double w2i = (double)(-sign) * this.wi[index + 1];
            double w3i = (double)(-sign) * this.wi[index + 2];
            double w4i = (double)(-sign) * this.wi[index + 3];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)i);
                DoubleVector z1r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di));
                DoubleVector z2r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2));
                DoubleVector z3r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3));
                DoubleVector z4r = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4));
                DoubleVector z0i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.im));
                DoubleVector z1i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di + this.im));
                DoubleVector z2i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di2 + this.im));
                DoubleVector z3i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di3 + this.im));
                DoubleVector z4i = DoubleVector.fromArray((VectorSpecies)DOUBLE_SPECIES, (double[])data, (int)(i + this.di4 + this.im));
                DoubleVector t1r = z1r.add((Vector)z4r);
                DoubleVector t1i = z1i.add((Vector)z4i);
                DoubleVector t2r = z2r.add((Vector)z3r);
                DoubleVector t2i = z2i.add((Vector)z3i);
                DoubleVector t3r = z1r.sub((Vector)z4r);
                DoubleVector t3i = z1i.sub((Vector)z4i);
                DoubleVector t4r = z2r.sub((Vector)z3r);
                DoubleVector t4i = z2i.sub((Vector)z3i);
                DoubleVector t5r = t1r.add((Vector)t2r);
                DoubleVector t5i = t1i.add((Vector)t2i);
                DoubleVector t6r = t1r.sub((Vector)t2r).mul(this.tau);
                DoubleVector t6i = t1i.sub((Vector)t2i).mul(this.tau);
                DoubleVector t7r = t5r.mul(-0.25).add((Vector)z0r);
                DoubleVector t7i = t5i.mul(-0.25).add((Vector)z0i);
                DoubleVector t8r = t7r.add((Vector)t6r);
                DoubleVector t8i = t7i.add((Vector)t6i);
                DoubleVector t9r = t7r.sub((Vector)t6r);
                DoubleVector t9i = t7i.sub((Vector)t6i);
                DoubleVector t10r = t3r.mul(sin2PI_5s).add((Vector)t4r.mul(sinPI_5s));
                DoubleVector t10i = t3i.mul(sin2PI_5s).add((Vector)t4i.mul(sinPI_5s));
                DoubleVector t11r = t4r.mul(-sin2PI_5s).add((Vector)t3r.mul(sinPI_5s));
                DoubleVector t11i = t4i.mul(-sin2PI_5s).add((Vector)t3i.mul(sinPI_5s));
                z0r.add((Vector)t5r).intoArray(ret, j);
                z0i.add((Vector)t5i).intoArray(ret, j + this.im);
                DoubleVector x1r = t8r.sub((Vector)t10i);
                DoubleVector x1i = t8i.add((Vector)t10r);
                DoubleVector x2r = t9r.sub((Vector)t11i);
                DoubleVector x2i = t9i.add((Vector)t11r);
                DoubleVector x3r = t9r.add((Vector)t11i);
                DoubleVector x3i = t9i.sub((Vector)t11r);
                DoubleVector x4r = t8r.add((Vector)t10i);
                DoubleVector x4i = t8i.sub((Vector)t10r);
                x1r.mul(w1r).sub((Vector)x1i.mul(w1i)).intoArray(ret, j + this.dj);
                x2r.mul(w2r).sub((Vector)x2i.mul(w2i)).intoArray(ret, j + this.dj2);
                x3r.mul(w3r).sub((Vector)x3i.mul(w3i)).intoArray(ret, j + this.dj3);
                x4r.mul(w4r).sub((Vector)x4i.mul(w4i)).intoArray(ret, j + this.dj4);
                x1i.mul(w1r).add((Vector)x1r.mul(w1i)).intoArray(ret, j + this.dj + this.im);
                x2i.mul(w2r).add((Vector)x2r.mul(w2i)).intoArray(ret, j + this.dj2 + this.im);
                x3i.mul(w3r).add((Vector)x3r.mul(w3i)).intoArray(ret, j + this.dj3 + this.im);
                x4i.mul(w4r).add((Vector)x4r.mul(w4i)).intoArray(ret, j + this.dj4 + this.im);
                k12 += BLOCK_LOOP;
                i += LENGTH;
                j += LENGTH;
            }
            ++k;
            j += this.jstep;
        }
    }
}

