/*
 * Decompiled with CFR 0.152.
 */
package ffx.numerics.fft;

import ffx.numerics.fft.MixedRadixFactor;
import ffx.numerics.fft.PassConstants;
import ffx.numerics.fft.PassData;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorSpecies;
import org.apache.commons.math3.util.FastMath;

public class MixedRadixFactor3
extends MixedRadixFactor {
    private static final double sqrt3_2 = FastMath.sqrt((double)3.0) / 2.0;
    private final int di2;
    private final int dj2;
    private static int[] simdSizes = new int[]{8, 4, 2};

    public MixedRadixFactor3(PassConstants passConstants) {
        super(passConstants);
        this.di2 = 2 * this.di;
        this.dj2 = 2 * this.dj;
    }

    @Override
    public boolean isValidSIMDWidth(int width) {
        if (width != 2 && width != 4 && width != 8) {
            return false;
        }
        if (this.im == 1) {
            return this.innerLoopLimit % (width / 2) == 0;
        }
        return this.innerLoopLimit % width == 0;
    }

    @Override
    public int getOptimalSIMDWidth() {
        if (this.isValidSIMDWidth(LENGTH)) {
            return LENGTH;
        }
        for (int size : simdSizes) {
            if (size >= LENGTH || !this.isValidSIMDWidth(size)) continue;
            return size;
        }
        return 0;
    }

    @Override
    protected void passScalar(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double tau = (double)sign * sqrt3_2;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            double z0_r = data[i];
            double z1_r = data[i + this.di];
            double z2_r = data[i + this.di2];
            double z0_i = data[i + this.im];
            double z1_i = data[i + this.di + this.im];
            double z2_i = data[i + this.di2 + this.im];
            double t1_r = z1_r + z2_r;
            double t1_i = z1_i + z2_i;
            double t2_r = Math.fma(-0.5, t1_r, z0_r);
            double t2_i = Math.fma(-0.5, t1_i, z0_i);
            double t3_r = tau * (z1_r - z2_r);
            double t3_i = tau * (z1_i - z2_i);
            ret[j] = z0_r + t1_r;
            ret[j + this.im] = z0_i + t1_i;
            ret[j + this.dj] = t2_r - t3_i;
            ret[j + this.dj + this.im] = t2_i + t3_r;
            ret[j + this.dj2] = t2_r + t3_i;
            ret[j + this.dj2 + this.im] = t2_i - t3_r;
            ++k1;
            i += this.ii;
            j += this.ii;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 2;
            double w1_r = this.wr[index];
            double w2_r = this.wr[index + 1];
            double w1_i = (double)(-sign) * this.wi[index];
            double w2_i = (double)(-sign) * this.wi[index + 1];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                double z0_r = data[i];
                double z1_r = data[i + this.di];
                double z2_r = data[i + this.di2];
                double z0_i = data[i + this.im];
                double z1_i = data[i + this.di + this.im];
                double z2_i = data[i + this.di2 + this.im];
                double t1_r = z1_r + z2_r;
                double t1_i = z1_i + z2_i;
                double t2_r = Math.fma(-0.5, t1_r, z0_r);
                double t2_i = Math.fma(-0.5, t1_i, z0_i);
                double t3_r = tau * (z1_r - z2_r);
                double t3_i = tau * (z1_i - z2_i);
                ret[j] = z0_r + t1_r;
                ret[j + this.im] = z0_i + t1_i;
                MixedRadixFactor3.multiplyAndStore(t2_r - t3_i, t2_i + t3_r, w1_r, w1_i, ret, j + this.dj, j + this.dj + this.im);
                MixedRadixFactor3.multiplyAndStore(t2_r + t3_i, t2_i - t3_r, w2_r, w2_i, ret, j + this.dj2, j + this.dj2 + this.im);
                ++k12;
                i += this.ii;
                j += this.ii;
            }
            ++k;
            j += this.jstep;
        }
    }

    @Override
    protected void passSIMD(PassData passData) {
        if (!this.isValidSIMDWidth(this.simdWidth)) {
            this.passScalar(passData);
        } else if (this.im == 1) {
            this.interleaved(passData, this.simdWidth);
        } else {
            this.blocked(passData, this.simdWidth);
        }
    }

    private void interleaved(PassData passData, int simdLength) {
        switch (simdLength) {
            case 2: {
                this.interleaved128(passData);
                break;
            }
            case 4: {
                this.interleaved256(passData);
                break;
            }
            case 8: {
                this.interleaved512(passData);
                break;
            }
            default: {
                this.passScalar(passData);
            }
        }
    }

    private void blocked(PassData passData, int simdLength) {
        switch (simdLength) {
            case 2: {
                this.blocked128(passData);
                break;
            }
            case 4: {
                this.blocked256(passData);
                break;
            }
            case 8: {
                this.blocked512(passData);
                break;
            }
            default: {
                this.passScalar(passData);
            }
        }
    }

    private void blocked128(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double tau = (double)sign * sqrt3_2;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
            DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
            DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2));
            DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.im));
            DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di + this.im));
            DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2 + this.im));
            DoubleVector t1_r = z1_r.add((Vector)z2_r);
            DoubleVector t1_i = z1_i.add((Vector)z2_i);
            DoubleVector t2_r = t1_r.mul(-0.5).add((Vector)z0_r);
            DoubleVector t2_i = t1_i.mul(-0.5).add((Vector)z0_i);
            DoubleVector t3_r = z1_r.sub((Vector)z2_r).mul(tau);
            DoubleVector t3_i = z1_i.sub((Vector)z2_i).mul(tau);
            z0_r.add((Vector)t1_r).intoArray(ret, j);
            z0_i.add((Vector)t1_i).intoArray(ret, j + this.im);
            t2_r.sub((Vector)t3_i).intoArray(ret, j + this.dj);
            t2_i.add((Vector)t3_r).intoArray(ret, j + this.dj + this.im);
            t2_r.add((Vector)t3_i).intoArray(ret, j + this.dj2);
            t2_i.sub((Vector)t3_r).intoArray(ret, j + this.dj2 + this.im);
            k1 += BLOCK_LOOP_128;
            i += LENGTH_128;
            j += LENGTH_128;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 2;
            double w1_r = this.wr[index];
            double w2_r = this.wr[index + 1];
            double w1_i = (double)(-sign) * this.wi[index];
            double w2_i = (double)(-sign) * this.wi[index + 1];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
                DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
                DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2));
                DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.im));
                DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di + this.im));
                DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2 + this.im));
                DoubleVector t1_r = z1_r.add((Vector)z2_r);
                DoubleVector t1_i = z1_i.add((Vector)z2_i);
                DoubleVector t2_r = t1_r.mul(-0.5).add((Vector)z0_r);
                DoubleVector t2_i = t1_i.mul(-0.5).add((Vector)z0_i);
                DoubleVector t3_r = z1_r.sub((Vector)z2_r).mul(tau);
                DoubleVector t3_i = z1_i.sub((Vector)z2_i).mul(tau);
                z0_r.add((Vector)t1_r).intoArray(ret, j);
                z0_i.add((Vector)t1_i).intoArray(ret, j + this.im);
                DoubleVector x1_r = t2_r.sub((Vector)t3_i);
                DoubleVector x1_i = t2_i.add((Vector)t3_r);
                DoubleVector x2_r = t2_r.add((Vector)t3_i);
                DoubleVector x2_i = t2_i.sub((Vector)t3_r);
                x1_r.mul(w1_r).sub((Vector)x1_i.mul(w1_i)).intoArray(ret, j + this.dj);
                x2_r.mul(w2_r).sub((Vector)x2_i.mul(w2_i)).intoArray(ret, j + this.dj2);
                x1_i.mul(w1_r).add((Vector)x1_r.mul(w1_i)).intoArray(ret, j + this.dj + this.im);
                x2_i.mul(w2_r).add((Vector)x2_r.mul(w2_i)).intoArray(ret, j + this.dj2 + this.im);
                k12 += BLOCK_LOOP_128;
                i += LENGTH_128;
                j += LENGTH_128;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void blocked256(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double tau = (double)sign * sqrt3_2;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
            DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
            DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2));
            DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.im));
            DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di + this.im));
            DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2 + this.im));
            DoubleVector t1_r = z1_r.add((Vector)z2_r);
            DoubleVector t1_i = z1_i.add((Vector)z2_i);
            DoubleVector t2_r = t1_r.mul(-0.5).add((Vector)z0_r);
            DoubleVector t2_i = t1_i.mul(-0.5).add((Vector)z0_i);
            DoubleVector t3_r = z1_r.sub((Vector)z2_r).mul(tau);
            DoubleVector t3_i = z1_i.sub((Vector)z2_i).mul(tau);
            z0_r.add((Vector)t1_r).intoArray(ret, j);
            z0_i.add((Vector)t1_i).intoArray(ret, j + this.im);
            t2_r.sub((Vector)t3_i).intoArray(ret, j + this.dj);
            t2_i.add((Vector)t3_r).intoArray(ret, j + this.dj + this.im);
            t2_r.add((Vector)t3_i).intoArray(ret, j + this.dj2);
            t2_i.sub((Vector)t3_r).intoArray(ret, j + this.dj2 + this.im);
            k1 += BLOCK_LOOP_256;
            i += LENGTH_256;
            j += LENGTH_256;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 2;
            double w1_r = this.wr[index];
            double w2_r = this.wr[index + 1];
            double w1_i = (double)(-sign) * this.wi[index];
            double w2_i = (double)(-sign) * this.wi[index + 1];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
                DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
                DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2));
                DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.im));
                DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di + this.im));
                DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2 + this.im));
                DoubleVector t1_r = z1_r.add((Vector)z2_r);
                DoubleVector t1_i = z1_i.add((Vector)z2_i);
                DoubleVector t2_r = t1_r.mul(-0.5).add((Vector)z0_r);
                DoubleVector t2_i = t1_i.mul(-0.5).add((Vector)z0_i);
                DoubleVector t3_r = z1_r.sub((Vector)z2_r).mul(tau);
                DoubleVector t3_i = z1_i.sub((Vector)z2_i).mul(tau);
                z0_r.add((Vector)t1_r).intoArray(ret, j);
                z0_i.add((Vector)t1_i).intoArray(ret, j + this.im);
                DoubleVector x1_r = t2_r.sub((Vector)t3_i);
                DoubleVector x1_i = t2_i.add((Vector)t3_r);
                DoubleVector x2_r = t2_r.add((Vector)t3_i);
                DoubleVector x2_i = t2_i.sub((Vector)t3_r);
                x1_r.mul(w1_r).sub((Vector)x1_i.mul(w1_i)).intoArray(ret, j + this.dj);
                x2_r.mul(w2_r).sub((Vector)x2_i.mul(w2_i)).intoArray(ret, j + this.dj2);
                x1_i.mul(w1_r).add((Vector)x1_r.mul(w1_i)).intoArray(ret, j + this.dj + this.im);
                x2_i.mul(w2_r).add((Vector)x2_r.mul(w2_i)).intoArray(ret, j + this.dj2 + this.im);
                k12 += BLOCK_LOOP_256;
                i += LENGTH_256;
                j += LENGTH_256;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void blocked512(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double tau = (double)sign * sqrt3_2;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
            DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
            DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2));
            DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.im));
            DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di + this.im));
            DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2 + this.im));
            DoubleVector t1_r = z1_r.add((Vector)z2_r);
            DoubleVector t1_i = z1_i.add((Vector)z2_i);
            DoubleVector t2_r = t1_r.mul(-0.5).add((Vector)z0_r);
            DoubleVector t2_i = t1_i.mul(-0.5).add((Vector)z0_i);
            DoubleVector t3_r = z1_r.sub((Vector)z2_r).mul(tau);
            DoubleVector t3_i = z1_i.sub((Vector)z2_i).mul(tau);
            z0_r.add((Vector)t1_r).intoArray(ret, j);
            z0_i.add((Vector)t1_i).intoArray(ret, j + this.im);
            t2_r.sub((Vector)t3_i).intoArray(ret, j + this.dj);
            t2_i.add((Vector)t3_r).intoArray(ret, j + this.dj + this.im);
            t2_r.add((Vector)t3_i).intoArray(ret, j + this.dj2);
            t2_i.sub((Vector)t3_r).intoArray(ret, j + this.dj2 + this.im);
            k1 += BLOCK_LOOP_512;
            i += LENGTH_512;
            j += LENGTH_512;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 2;
            double w1_r = this.wr[index];
            double w2_r = this.wr[index + 1];
            double w1_i = (double)(-sign) * this.wi[index];
            double w2_i = (double)(-sign) * this.wi[index + 1];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
                DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
                DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2));
                DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.im));
                DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di + this.im));
                DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2 + this.im));
                DoubleVector t1_r = z1_r.add((Vector)z2_r);
                DoubleVector t1_i = z1_i.add((Vector)z2_i);
                DoubleVector t2_r = t1_r.mul(-0.5).add((Vector)z0_r);
                DoubleVector t2_i = t1_i.mul(-0.5).add((Vector)z0_i);
                DoubleVector t3_r = z1_r.sub((Vector)z2_r).mul(tau);
                DoubleVector t3_i = z1_i.sub((Vector)z2_i).mul(tau);
                z0_r.add((Vector)t1_r).intoArray(ret, j);
                z0_i.add((Vector)t1_i).intoArray(ret, j + this.im);
                DoubleVector x1_r = t2_r.sub((Vector)t3_i);
                DoubleVector x1_i = t2_i.add((Vector)t3_r);
                DoubleVector x2_r = t2_r.add((Vector)t3_i);
                DoubleVector x2_i = t2_i.sub((Vector)t3_r);
                x1_r.mul(w1_r).sub((Vector)x1_i.mul(w1_i)).intoArray(ret, j + this.dj);
                x2_r.mul(w2_r).sub((Vector)x2_i.mul(w2_i)).intoArray(ret, j + this.dj2);
                x1_i.mul(w1_r).add((Vector)x1_r.mul(w1_i)).intoArray(ret, j + this.dj + this.im);
                x2_i.mul(w2_r).add((Vector)x2_r.mul(w2_i)).intoArray(ret, j + this.dj2 + this.im);
                k12 += BLOCK_LOOP_512;
                i += LENGTH_512;
                j += LENGTH_512;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void interleaved128(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double tau = (double)sign * sqrt3_2;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
            DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2));
            DoubleVector t1 = z1.add((Vector)z2);
            DoubleVector t2 = t1.mul(-0.5).add((Vector)z0);
            DoubleVector t3 = z1.sub((Vector)z2).mul(tau).rearrange(SHUFFLE_RE_IM_128);
            z0.add((Vector)t1).intoArray(ret, j);
            t2.add((Vector)t3.mul((Vector)NEGATE_RE_128)).intoArray(ret, j + this.dj);
            t2.add((Vector)t3.mul((Vector)NEGATE_IM_128)).intoArray(ret, j + this.dj2);
            k1 += INTERLEAVED_LOOP_128;
            i += LENGTH_128;
            j += LENGTH_128;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 2;
            DoubleVector w1r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)this.wr[index]);
            DoubleVector w2r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)this.wr[index + 1]);
            DoubleVector w1i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)((double)(-sign) * this.wi[index])).mul((Vector)NEGATE_IM_128);
            DoubleVector w2i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)((double)(-sign) * this.wi[index + 1])).mul((Vector)NEGATE_IM_128);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
                DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2));
                DoubleVector t1 = z1.add((Vector)z2);
                DoubleVector t2 = t1.mul(-0.5).add((Vector)z0);
                DoubleVector t3 = z1.sub((Vector)z2).mul(tau).rearrange(SHUFFLE_RE_IM_128);
                z0.add((Vector)t1).intoArray(ret, j);
                z0.add((Vector)t1).intoArray(ret, j);
                DoubleVector x1 = t3.fma((Vector)NEGATE_RE_128, (Vector)t2);
                DoubleVector x2 = t3.fma((Vector)NEGATE_IM_128, (Vector)t2);
                w1r.fma((Vector)x1, (Vector)w1i.mul((Vector)x1).rearrange(SHUFFLE_RE_IM_128)).intoArray(ret, j + this.dj);
                w2r.fma((Vector)x2, (Vector)w2i.mul((Vector)x2).rearrange(SHUFFLE_RE_IM_128)).intoArray(ret, j + this.dj2);
                k12 += INTERLEAVED_LOOP_128;
                i += LENGTH_128;
                j += LENGTH_128;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void interleaved256(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double tau = (double)sign * sqrt3_2;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
            DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2));
            DoubleVector t1 = z1.add((Vector)z2);
            DoubleVector t2 = t1.mul(-0.5).add((Vector)z0);
            DoubleVector t3 = z1.sub((Vector)z2).mul(tau).rearrange(SHUFFLE_RE_IM_256);
            z0.add((Vector)t1).intoArray(ret, j);
            t2.add((Vector)t3.mul((Vector)NEGATE_RE_256)).intoArray(ret, j + this.dj);
            t2.add((Vector)t3.mul((Vector)NEGATE_IM_256)).intoArray(ret, j + this.dj2);
            k1 += INTERLEAVED_LOOP_256;
            i += LENGTH_256;
            j += LENGTH_256;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 2;
            DoubleVector w1r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)this.wr[index]);
            DoubleVector w2r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)this.wr[index + 1]);
            DoubleVector w1i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)((double)(-sign) * this.wi[index])).mul((Vector)NEGATE_IM_256);
            DoubleVector w2i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)((double)(-sign) * this.wi[index + 1])).mul((Vector)NEGATE_IM_256);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
                DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2));
                DoubleVector t1 = z1.add((Vector)z2);
                DoubleVector t2 = t1.mul(-0.5).add((Vector)z0);
                DoubleVector t3 = z1.sub((Vector)z2).mul(tau).rearrange(SHUFFLE_RE_IM_256);
                z0.add((Vector)t1).intoArray(ret, j);
                DoubleVector x1 = t3.fma((Vector)NEGATE_RE_256, (Vector)t2);
                DoubleVector x2 = t3.fma((Vector)NEGATE_IM_256, (Vector)t2);
                w1r.fma((Vector)x1, (Vector)w1i.mul((Vector)x1).rearrange(SHUFFLE_RE_IM_256)).intoArray(ret, j + this.dj);
                w2r.fma((Vector)x2, (Vector)w2i.mul((Vector)x2).rearrange(SHUFFLE_RE_IM_256)).intoArray(ret, j + this.dj2);
                k12 += INTERLEAVED_LOOP_256;
                i += LENGTH_256;
                j += LENGTH_256;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void interleaved512(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        double tau = (double)sign * sqrt3_2;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
            DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2));
            DoubleVector t1 = z1.add((Vector)z2);
            DoubleVector t2 = t1.mul(-0.5).add((Vector)z0);
            DoubleVector t3 = z1.sub((Vector)z2).mul(tau).rearrange(SHUFFLE_RE_IM_512);
            z0.add((Vector)t1).intoArray(ret, j);
            t2.add((Vector)t3.mul((Vector)NEGATE_RE_512)).intoArray(ret, j + this.dj);
            t2.add((Vector)t3.mul((Vector)NEGATE_IM_512)).intoArray(ret, j + this.dj2);
            k1 += INTERLEAVED_LOOP_512;
            i += LENGTH_512;
            j += LENGTH_512;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 2;
            DoubleVector w1r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)this.wr[index]);
            DoubleVector w2r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)this.wr[index + 1]);
            DoubleVector w1i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)((double)(-sign) * this.wi[index])).mul((Vector)NEGATE_IM_512);
            DoubleVector w2i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)((double)(-sign) * this.wi[index + 1])).mul((Vector)NEGATE_IM_512);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
                DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2));
                DoubleVector t1 = z1.add((Vector)z2);
                DoubleVector t2 = t1.mul(-0.5).add((Vector)z0);
                DoubleVector t3 = z1.sub((Vector)z2).mul(tau).rearrange(SHUFFLE_RE_IM_512);
                z0.add((Vector)t1).intoArray(ret, j);
                DoubleVector x1 = t3.fma((Vector)NEGATE_RE_512, (Vector)t2);
                DoubleVector x2 = t3.fma((Vector)NEGATE_IM_512, (Vector)t2);
                w1r.fma((Vector)x1, (Vector)w1i.mul((Vector)x1).rearrange(SHUFFLE_RE_IM_512)).intoArray(ret, j + this.dj);
                w2r.fma((Vector)x2, (Vector)w2i.mul((Vector)x2).rearrange(SHUFFLE_RE_IM_512)).intoArray(ret, j + this.dj2);
                k12 += INTERLEAVED_LOOP_512;
                i += LENGTH_512;
                j += LENGTH_512;
            }
            ++k;
            j += this.jstep;
        }
    }
}

