/*
 * Decompiled with CFR 0.152.
 */
package ffx.numerics.fft;

import ffx.numerics.fft.MixedRadixFactor;
import ffx.numerics.fft.PassConstants;
import ffx.numerics.fft.PassData;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorSpecies;

public class MixedRadixFactor4
extends MixedRadixFactor {
    private static int[] simdSizes = new int[]{8, 4, 2};
    private final int di2;
    private final int di3;
    private final int dj2;
    private final int dj3;

    public MixedRadixFactor4(PassConstants passConstants) {
        super(passConstants);
        this.di2 = 2 * this.di;
        this.di3 = 3 * this.di;
        this.dj2 = 2 * this.dj;
        this.dj3 = 3 * this.dj;
    }

    @Override
    public boolean isValidSIMDWidth(int width) {
        if (width != 2 && width != 4 && width != 8) {
            return false;
        }
        if (this.im == 1) {
            return this.innerLoopLimit % (width / 2) == 0;
        }
        return this.innerLoopLimit % width == 0;
    }

    @Override
    public int getOptimalSIMDWidth() {
        if (this.isValidSIMDWidth(LENGTH)) {
            return LENGTH;
        }
        for (int size : simdSizes) {
            if (size >= LENGTH || !this.isValidSIMDWidth(size)) continue;
            return size;
        }
        return 0;
    }

    @Override
    protected void passScalar(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            double z0_r = data[i];
            double z1_r = data[i + this.di];
            double z2_r = data[i + this.di2];
            double z3_r = data[i + this.di3];
            double z0_i = data[i + this.im];
            double z1_i = data[i + this.di + this.im];
            double z2_i = data[i + this.di2 + this.im];
            double z3_i = data[i + this.di3 + this.im];
            double t1_r = z0_r + z2_r;
            double t1_i = z0_i + z2_i;
            double t2_r = z1_r + z3_r;
            double t2_i = z1_i + z3_i;
            double t3_r = z0_r - z2_r;
            double t3_i = z0_i - z2_i;
            double t4_r = (double)sign * (z1_r - z3_r);
            double t4_i = (double)sign * (z1_i - z3_i);
            ret[j] = t1_r + t2_r;
            ret[j + this.im] = t1_i + t2_i;
            ret[j + this.dj] = t3_r - t4_i;
            ret[j + this.dj + this.im] = t3_i + t4_r;
            ret[j + this.dj2] = t1_r - t2_r;
            ret[j + this.dj2 + this.im] = t1_i - t2_i;
            ret[j + this.dj3] = t3_r + t4_i;
            ret[j + this.dj3 + this.im] = t3_i - t4_r;
            ++k1;
            i += this.ii;
            j += this.ii;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = k * 3;
            double w1_r = this.wr[index];
            double w2_r = this.wr[index + 1];
            double w3_r = this.wr[index + 2];
            double w1_i = (double)(-sign) * this.wi[index];
            double w2_i = (double)(-sign) * this.wi[index + 1];
            double w3_i = (double)(-sign) * this.wi[index + 2];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                double z0_r = data[i];
                double z1_r = data[i + this.di];
                double z2_r = data[i + this.di2];
                double z3_r = data[i + this.di3];
                double z0_i = data[i + this.im];
                double z1_i = data[i + this.di + this.im];
                double z2_i = data[i + this.di2 + this.im];
                double z3_i = data[i + this.di3 + this.im];
                double t1_r = z0_r + z2_r;
                double t1_i = z0_i + z2_i;
                double t2_r = z1_r + z3_r;
                double t2_i = z1_i + z3_i;
                double t3_r = z0_r - z2_r;
                double t3_i = z0_i - z2_i;
                double t4_r = (double)sign * (z1_r - z3_r);
                double t4_i = (double)sign * (z1_i - z3_i);
                ret[j] = t1_r + t2_r;
                ret[j + this.im] = t1_i + t2_i;
                MixedRadixFactor4.multiplyAndStore(t3_r - t4_i, t3_i + t4_r, w1_r, w1_i, ret, j + this.dj, j + this.dj + this.im);
                MixedRadixFactor4.multiplyAndStore(t1_r - t2_r, t1_i - t2_i, w2_r, w2_i, ret, j + this.dj2, j + this.dj2 + this.im);
                MixedRadixFactor4.multiplyAndStore(t3_r + t4_i, t3_i - t4_r, w3_r, w3_i, ret, j + this.dj3, j + this.dj3 + this.im);
                ++k12;
                i += this.ii;
                j += this.ii;
            }
            ++k;
            j += this.jstep;
        }
    }

    @Override
    protected void passSIMD(PassData passData) {
        if (!this.isValidSIMDWidth(this.simdWidth)) {
            this.passScalar(passData);
        } else if (this.im == 1) {
            this.interleaved(passData, this.simdWidth);
        } else {
            this.blocked(passData, this.simdWidth);
        }
    }

    private void interleaved(PassData passData, int simdLength) {
        switch (simdLength) {
            case 2: {
                this.interleaved128(passData);
                break;
            }
            case 4: {
                this.interleaved256(passData);
                break;
            }
            case 8: {
                this.interleaved512(passData);
                break;
            }
            default: {
                this.passScalar(passData);
            }
        }
    }

    private void blocked(PassData passData, int simdLength) {
        switch (simdLength) {
            case 2: {
                this.blocked128(passData);
                break;
            }
            case 4: {
                this.blocked256(passData);
                break;
            }
            case 8: {
                this.blocked512(passData);
                break;
            }
            default: {
                this.passScalar(passData);
            }
        }
    }

    private void butterFly4Blocked(VectorSpecies<Double> species, double[] data, int i, double sign, double w1r, double w1i, double w2r, double w2i, double w3r, double w3i, double[] ret, int j) {
        DoubleVector z0_r = DoubleVector.fromArray(species, (double[])data, (int)i);
        DoubleVector z0_i = DoubleVector.fromArray(species, (double[])data, (int)(i + this.im));
        DoubleVector z1_r = DoubleVector.fromArray(species, (double[])data, (int)(i + this.di));
        DoubleVector z1_i = DoubleVector.fromArray(species, (double[])data, (int)(i + this.di + this.im));
        DoubleVector z2_r = DoubleVector.fromArray(species, (double[])data, (int)(i + this.di2));
        DoubleVector z2_i = DoubleVector.fromArray(species, (double[])data, (int)(i + this.di2 + this.im));
        DoubleVector z3_r = DoubleVector.fromArray(species, (double[])data, (int)(i + this.di3));
        DoubleVector z3_i = DoubleVector.fromArray(species, (double[])data, (int)(i + this.di3 + this.im));
        DoubleVector t1_r = z0_r.add((Vector)z2_r);
        DoubleVector t1_i = z0_i.add((Vector)z2_i);
        DoubleVector t2_r = z1_r.add((Vector)z3_r);
        DoubleVector t2_i = z1_i.add((Vector)z3_i);
        DoubleVector t3_r = z0_r.sub((Vector)z2_r);
        DoubleVector t3_i = z0_i.sub((Vector)z2_i);
        DoubleVector t4_r = z1_r.sub((Vector)z3_r).mul(sign);
        DoubleVector t4_i = z1_i.sub((Vector)z3_i).mul(sign);
        t1_r.add((Vector)t2_r).intoArray(ret, j);
        t1_i.add((Vector)t2_i).intoArray(ret, j + this.im);
        DoubleVector x1_r = t3_r.sub((Vector)t4_i);
        DoubleVector x1_i = t3_i.add((Vector)t4_r);
        DoubleVector x2_r = t1_r.sub((Vector)t2_r);
        DoubleVector x2_i = t1_i.sub((Vector)t2_i);
        DoubleVector x3_r = t3_r.add((Vector)t4_i);
        DoubleVector x3_i = t3_i.sub((Vector)t4_r);
        x1_r.mul(w1r).sub((Vector)x1_i.mul(w1i)).intoArray(ret, j + this.dj);
        x2_r.mul(w2r).sub((Vector)x2_i.mul(w2i)).intoArray(ret, j + this.dj2);
        x3_r.mul(w3r).sub((Vector)x3_i.mul(w3i)).intoArray(ret, j + this.dj3);
        x1_i.mul(w1r).add((Vector)x1_r.mul(w1i)).intoArray(ret, j + this.dj + this.im);
        x2_i.mul(w2r).add((Vector)x2_r.mul(w2i)).intoArray(ret, j + this.dj2 + this.im);
        x3_i.mul(w3r).add((Vector)x3_r.mul(w3i)).intoArray(ret, j + this.dj3 + this.im);
    }

    private void blocked128(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
            DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.im));
            DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
            DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di + this.im));
            DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2));
            DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2 + this.im));
            DoubleVector z3_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di3));
            DoubleVector z3_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di3 + this.im));
            DoubleVector t1_r = z0_r.add((Vector)z2_r);
            DoubleVector t1_i = z0_i.add((Vector)z2_i);
            DoubleVector t2_r = z1_r.add((Vector)z3_r);
            DoubleVector t2_i = z1_i.add((Vector)z3_i);
            t1_r.add((Vector)t2_r).intoArray(ret, j);
            t1_i.add((Vector)t2_i).intoArray(ret, j + this.im);
            DoubleVector t3_r = z0_r.sub((Vector)z2_r);
            DoubleVector t3_i = z0_i.sub((Vector)z2_i);
            DoubleVector t4_r = z1_r.sub((Vector)z3_r).mul((double)sign);
            DoubleVector t4_i = z1_i.sub((Vector)z3_i).mul((double)sign);
            t3_r.sub((Vector)t4_i).intoArray(ret, j + this.dj);
            t1_r.sub((Vector)t2_r).intoArray(ret, j + this.dj2);
            t3_r.add((Vector)t4_i).intoArray(ret, j + this.dj3);
            t3_i.add((Vector)t4_r).intoArray(ret, j + this.dj + this.im);
            t1_i.sub((Vector)t2_i).intoArray(ret, j + this.dj2 + this.im);
            t3_i.sub((Vector)t4_r).intoArray(ret, j + this.dj3 + this.im);
            k1 += BLOCK_LOOP_128;
            i += LENGTH_128;
            j += LENGTH_128;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = 3 * k;
            double w1r = this.wr[index];
            double w2r = this.wr[index + 1];
            double w3r = this.wr[index + 2];
            double w1i = (double)(-sign) * this.wi[index];
            double w2i = (double)(-sign) * this.wi[index + 1];
            double w3i = (double)(-sign) * this.wi[index + 2];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
                DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
                DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2));
                DoubleVector z3_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di3));
                DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.im));
                DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di + this.im));
                DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2 + this.im));
                DoubleVector z3_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di3 + this.im));
                DoubleVector t1_r = z0_r.add((Vector)z2_r);
                DoubleVector t1_i = z0_i.add((Vector)z2_i);
                DoubleVector t2_r = z1_r.add((Vector)z3_r);
                DoubleVector t2_i = z1_i.add((Vector)z3_i);
                DoubleVector t3_r = z0_r.sub((Vector)z2_r);
                DoubleVector t3_i = z0_i.sub((Vector)z2_i);
                DoubleVector t4_r = z1_r.sub((Vector)z3_r).mul((double)sign);
                DoubleVector t4_i = z1_i.sub((Vector)z3_i).mul((double)sign);
                t1_r.add((Vector)t2_r).intoArray(ret, j);
                t1_i.add((Vector)t2_i).intoArray(ret, j + this.im);
                DoubleVector x1_r = t3_r.sub((Vector)t4_i);
                DoubleVector x1_i = t3_i.add((Vector)t4_r);
                DoubleVector x2_r = t1_r.sub((Vector)t2_r);
                DoubleVector x2_i = t1_i.sub((Vector)t2_i);
                DoubleVector x3_r = t3_r.add((Vector)t4_i);
                DoubleVector x3_i = t3_i.sub((Vector)t4_r);
                x1_r.mul(w1r).sub((Vector)x1_i.mul(w1i)).intoArray(ret, j + this.dj);
                x2_r.mul(w2r).sub((Vector)x2_i.mul(w2i)).intoArray(ret, j + this.dj2);
                x3_r.mul(w3r).sub((Vector)x3_i.mul(w3i)).intoArray(ret, j + this.dj3);
                x1_i.mul(w1r).add((Vector)x1_r.mul(w1i)).intoArray(ret, j + this.dj + this.im);
                x2_i.mul(w2r).add((Vector)x2_r.mul(w2i)).intoArray(ret, j + this.dj2 + this.im);
                x3_i.mul(w3r).add((Vector)x3_r.mul(w3i)).intoArray(ret, j + this.dj3 + this.im);
                k12 += BLOCK_LOOP_128;
                i += LENGTH_128;
                j += LENGTH_128;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void blocked256(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
            DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
            DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2));
            DoubleVector z3_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di3));
            DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.im));
            DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di + this.im));
            DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2 + this.im));
            DoubleVector z3_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di3 + this.im));
            DoubleVector t1_r = z0_r.add((Vector)z2_r);
            DoubleVector t1_i = z0_i.add((Vector)z2_i);
            DoubleVector t2_r = z1_r.add((Vector)z3_r);
            DoubleVector t2_i = z1_i.add((Vector)z3_i);
            DoubleVector t3_r = z0_r.sub((Vector)z2_r);
            DoubleVector t3_i = z0_i.sub((Vector)z2_i);
            DoubleVector t4_r = z1_r.sub((Vector)z3_r).mul((double)sign);
            DoubleVector t4_i = z1_i.sub((Vector)z3_i).mul((double)sign);
            t1_r.add((Vector)t2_r).intoArray(ret, j);
            t3_r.sub((Vector)t4_i).intoArray(ret, j + this.dj);
            t1_r.sub((Vector)t2_r).intoArray(ret, j + this.dj2);
            t3_r.add((Vector)t4_i).intoArray(ret, j + this.dj3);
            t1_i.add((Vector)t2_i).intoArray(ret, j + this.im);
            t3_i.add((Vector)t4_r).intoArray(ret, j + this.dj + this.im);
            t1_i.sub((Vector)t2_i).intoArray(ret, j + this.dj2 + this.im);
            t3_i.sub((Vector)t4_r).intoArray(ret, j + this.dj3 + this.im);
            k1 += BLOCK_LOOP_256;
            i += LENGTH_256;
            j += LENGTH_256;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = 3 * k;
            double w1r = this.wr[index];
            double w2r = this.wr[index + 1];
            double w3r = this.wr[index + 2];
            double w1i = (double)(-sign) * this.wi[index];
            double w2i = (double)(-sign) * this.wi[index + 1];
            double w3i = (double)(-sign) * this.wi[index + 2];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
                DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
                DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2));
                DoubleVector z3_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di3));
                DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.im));
                DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di + this.im));
                DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2 + this.im));
                DoubleVector z3_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di3 + this.im));
                DoubleVector t1_r = z0_r.add((Vector)z2_r);
                DoubleVector t1_i = z0_i.add((Vector)z2_i);
                DoubleVector t2_r = z1_r.add((Vector)z3_r);
                DoubleVector t2_i = z1_i.add((Vector)z3_i);
                DoubleVector t3_r = z0_r.sub((Vector)z2_r);
                DoubleVector t3_i = z0_i.sub((Vector)z2_i);
                DoubleVector t4_r = z1_r.sub((Vector)z3_r).mul((double)sign);
                DoubleVector t4_i = z1_i.sub((Vector)z3_i).mul((double)sign);
                t1_r.add((Vector)t2_r).intoArray(ret, j);
                t1_i.add((Vector)t2_i).intoArray(ret, j + this.im);
                DoubleVector x1_r = t3_r.sub((Vector)t4_i);
                DoubleVector x1_i = t3_i.add((Vector)t4_r);
                DoubleVector x2_r = t1_r.sub((Vector)t2_r);
                DoubleVector x2_i = t1_i.sub((Vector)t2_i);
                DoubleVector x3_r = t3_r.add((Vector)t4_i);
                DoubleVector x3_i = t3_i.sub((Vector)t4_r);
                x1_r.mul(w1r).sub((Vector)x1_i.mul(w1i)).intoArray(ret, j + this.dj);
                x2_r.mul(w2r).sub((Vector)x2_i.mul(w2i)).intoArray(ret, j + this.dj2);
                x3_r.mul(w3r).sub((Vector)x3_i.mul(w3i)).intoArray(ret, j + this.dj3);
                x1_i.mul(w1r).add((Vector)x1_r.mul(w1i)).intoArray(ret, j + this.dj + this.im);
                x2_i.mul(w2r).add((Vector)x2_r.mul(w2i)).intoArray(ret, j + this.dj2 + this.im);
                x3_i.mul(w3r).add((Vector)x3_r.mul(w3i)).intoArray(ret, j + this.dj3 + this.im);
                k12 += BLOCK_LOOP_256;
                i += LENGTH_256;
                j += LENGTH_256;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void blocked512(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
            DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
            DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2));
            DoubleVector z3_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di3));
            DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.im));
            DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di + this.im));
            DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2 + this.im));
            DoubleVector z3_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di3 + this.im));
            DoubleVector t1_r = z0_r.add((Vector)z2_r);
            DoubleVector t1_i = z0_i.add((Vector)z2_i);
            DoubleVector t2_r = z1_r.add((Vector)z3_r);
            DoubleVector t2_i = z1_i.add((Vector)z3_i);
            DoubleVector t3_r = z0_r.sub((Vector)z2_r);
            DoubleVector t3_i = z0_i.sub((Vector)z2_i);
            DoubleVector t4_r = z1_r.sub((Vector)z3_r).mul((double)sign);
            DoubleVector t4_i = z1_i.sub((Vector)z3_i).mul((double)sign);
            t1_r.add((Vector)t2_r).intoArray(ret, j);
            t3_r.sub((Vector)t4_i).intoArray(ret, j + this.dj);
            t1_r.sub((Vector)t2_r).intoArray(ret, j + this.dj2);
            t3_r.add((Vector)t4_i).intoArray(ret, j + this.dj3);
            t1_i.add((Vector)t2_i).intoArray(ret, j + this.im);
            t3_i.add((Vector)t4_r).intoArray(ret, j + this.dj + this.im);
            t1_i.sub((Vector)t2_i).intoArray(ret, j + this.dj2 + this.im);
            t3_i.sub((Vector)t4_r).intoArray(ret, j + this.dj3 + this.im);
            k1 += BLOCK_LOOP_512;
            i += LENGTH_512;
            j += LENGTH_512;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = 3 * k;
            double w1r = this.wr[index];
            double w2r = this.wr[index + 1];
            double w3r = this.wr[index + 2];
            double w1i = (double)(-sign) * this.wi[index];
            double w2i = (double)(-sign) * this.wi[index + 1];
            double w3i = (double)(-sign) * this.wi[index + 2];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
                DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
                DoubleVector z2_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2));
                DoubleVector z3_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di3));
                DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.im));
                DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di + this.im));
                DoubleVector z2_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2 + this.im));
                DoubleVector z3_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di3 + this.im));
                DoubleVector t1_r = z0_r.add((Vector)z2_r);
                DoubleVector t1_i = z0_i.add((Vector)z2_i);
                DoubleVector t2_r = z1_r.add((Vector)z3_r);
                DoubleVector t2_i = z1_i.add((Vector)z3_i);
                DoubleVector t3_r = z0_r.sub((Vector)z2_r);
                DoubleVector t3_i = z0_i.sub((Vector)z2_i);
                DoubleVector t4_r = z1_r.sub((Vector)z3_r).mul((double)sign);
                DoubleVector t4_i = z1_i.sub((Vector)z3_i).mul((double)sign);
                t1_r.add((Vector)t2_r).intoArray(ret, j);
                t1_i.add((Vector)t2_i).intoArray(ret, j + this.im);
                DoubleVector x1_r = t3_r.sub((Vector)t4_i);
                DoubleVector x1_i = t3_i.add((Vector)t4_r);
                DoubleVector x2_r = t1_r.sub((Vector)t2_r);
                DoubleVector x2_i = t1_i.sub((Vector)t2_i);
                DoubleVector x3_r = t3_r.add((Vector)t4_i);
                DoubleVector x3_i = t3_i.sub((Vector)t4_r);
                x1_r.mul(w1r).sub((Vector)x1_i.mul(w1i)).intoArray(ret, j + this.dj);
                x2_r.mul(w2r).sub((Vector)x2_i.mul(w2i)).intoArray(ret, j + this.dj2);
                x3_r.mul(w3r).sub((Vector)x3_i.mul(w3i)).intoArray(ret, j + this.dj3);
                x1_i.mul(w1r).add((Vector)x1_r.mul(w1i)).intoArray(ret, j + this.dj + this.im);
                x2_i.mul(w2r).add((Vector)x2_r.mul(w2i)).intoArray(ret, j + this.dj2 + this.im);
                x3_i.mul(w3r).add((Vector)x3_r.mul(w3i)).intoArray(ret, j + this.dj3 + this.im);
                k12 += BLOCK_LOOP_512;
                i += LENGTH_512;
                j += LENGTH_512;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void interleaved128(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
            DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2));
            DoubleVector z3 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di3));
            DoubleVector t1 = z0.add((Vector)z2);
            DoubleVector t2 = z1.add((Vector)z3);
            DoubleVector t3 = z0.sub((Vector)z2);
            DoubleVector t4 = z1.sub((Vector)z3).mul((double)sign).rearrange(SHUFFLE_RE_IM_128);
            t1.add((Vector)t2).intoArray(ret, j);
            t3.add((Vector)t4.mul((Vector)NEGATE_RE_128)).intoArray(ret, j + this.dj);
            t1.sub((Vector)t2).intoArray(ret, j + this.dj2);
            t3.add((Vector)t4.mul((Vector)NEGATE_IM_128)).intoArray(ret, j + this.dj3);
            k1 += INTERLEAVED_LOOP_128;
            i += LENGTH_128;
            j += LENGTH_128;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = 3 * k;
            DoubleVector w1r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)this.wr[index]);
            DoubleVector w2r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)this.wr[index + 1]);
            DoubleVector w3r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)this.wr[index + 2]);
            DoubleVector w1i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)((double)(-sign) * this.wi[index])).mul((Vector)NEGATE_IM_128);
            DoubleVector w2i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)((double)(-sign) * this.wi[index + 1])).mul((Vector)NEGATE_IM_128);
            DoubleVector w3i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)((double)(-sign) * this.wi[index + 2])).mul((Vector)NEGATE_IM_128);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
                DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di2));
                DoubleVector z3 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di3));
                DoubleVector t1 = z0.add((Vector)z2);
                DoubleVector t2 = z1.add((Vector)z3);
                DoubleVector t3 = z0.sub((Vector)z2);
                DoubleVector t4 = z1.sub((Vector)z3).mul((double)sign).rearrange(SHUFFLE_RE_IM_128);
                t1.add((Vector)t2).intoArray(ret, j);
                DoubleVector x1 = t3.add((Vector)t4.mul((Vector)NEGATE_RE_128));
                DoubleVector x2 = t1.sub((Vector)t2);
                DoubleVector x3 = t3.add((Vector)t4.mul((Vector)NEGATE_IM_128));
                w1r.fma((Vector)x1, (Vector)w1i.mul((Vector)x1).rearrange(SHUFFLE_RE_IM_128)).intoArray(ret, j + this.dj);
                w2r.fma((Vector)x2, (Vector)w2i.mul((Vector)x2).rearrange(SHUFFLE_RE_IM_128)).intoArray(ret, j + this.dj2);
                w3r.fma((Vector)x3, (Vector)w3i.mul((Vector)x3).rearrange(SHUFFLE_RE_IM_128)).intoArray(ret, j + this.dj3);
                k12 += INTERLEAVED_LOOP_128;
                i += LENGTH_128;
                j += LENGTH_128;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void interleaved256(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
            DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2));
            DoubleVector z3 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di3));
            DoubleVector t1 = z0.add((Vector)z2);
            DoubleVector t2 = z1.add((Vector)z3);
            DoubleVector t3 = z0.sub((Vector)z2);
            DoubleVector t4 = z1.sub((Vector)z3).mul((double)sign).rearrange(SHUFFLE_RE_IM_256);
            t1.add((Vector)t2).intoArray(ret, j);
            t3.add((Vector)t4.mul((Vector)NEGATE_RE_256)).intoArray(ret, j + this.dj);
            t1.sub((Vector)t2).intoArray(ret, j + this.dj2);
            t3.add((Vector)t4.mul((Vector)NEGATE_IM_256)).intoArray(ret, j + this.dj3);
            k1 += INTERLEAVED_LOOP_256;
            i += LENGTH_256;
            j += LENGTH_256;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = 3 * k;
            DoubleVector w1r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)this.wr[index]);
            DoubleVector w2r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)this.wr[index + 1]);
            DoubleVector w3r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)this.wr[index + 2]);
            DoubleVector w1i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)((double)(-sign) * this.wi[index])).mul((Vector)NEGATE_IM_256);
            DoubleVector w2i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)((double)(-sign) * this.wi[index + 1])).mul((Vector)NEGATE_IM_256);
            DoubleVector w3i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)((double)(-sign) * this.wi[index + 2])).mul((Vector)NEGATE_IM_256);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
                DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di2));
                DoubleVector z3 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di3));
                DoubleVector t1 = z0.add((Vector)z2);
                DoubleVector t2 = z1.add((Vector)z3);
                DoubleVector t3 = z0.sub((Vector)z2);
                DoubleVector t4 = z1.sub((Vector)z3).mul((double)sign).rearrange(SHUFFLE_RE_IM_256);
                t1.add((Vector)t2).intoArray(ret, j);
                DoubleVector x1 = t3.add((Vector)t4.mul((Vector)NEGATE_RE_256));
                DoubleVector x2 = t1.sub((Vector)t2);
                DoubleVector x3 = t3.add((Vector)t4.mul((Vector)NEGATE_IM_256));
                w1r.fma((Vector)x1, (Vector)w1i.mul((Vector)x1).rearrange(SHUFFLE_RE_IM_256)).intoArray(ret, j + this.dj);
                w2r.fma((Vector)x2, (Vector)w2i.mul((Vector)x2).rearrange(SHUFFLE_RE_IM_256)).intoArray(ret, j + this.dj2);
                w3r.fma((Vector)x3, (Vector)w3i.mul((Vector)x3).rearrange(SHUFFLE_RE_IM_256)).intoArray(ret, j + this.dj3);
                k12 += INTERLEAVED_LOOP_256;
                i += LENGTH_256;
                j += LENGTH_256;
            }
            ++k;
            j += this.jstep;
        }
    }

    private void interleaved512(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
            DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2));
            DoubleVector z3 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di3));
            DoubleVector t1 = z0.add((Vector)z2);
            DoubleVector t2 = z1.add((Vector)z3);
            DoubleVector t3 = z0.sub((Vector)z2);
            DoubleVector t4 = z1.sub((Vector)z3).mul((double)sign).rearrange(SHUFFLE_RE_IM_512);
            t1.add((Vector)t2).intoArray(ret, j);
            t3.add((Vector)t4.mul((Vector)NEGATE_RE_512)).intoArray(ret, j + this.dj);
            t1.sub((Vector)t2).intoArray(ret, j + this.dj2);
            t3.add((Vector)t4.mul((Vector)NEGATE_IM_512)).intoArray(ret, j + this.dj3);
            k1 += INTERLEAVED_LOOP_512;
            i += LENGTH_512;
            j += LENGTH_512;
        }
        j += this.jstep;
        int k = 1;
        while (k < this.outerLoopLimit) {
            int index = 3 * k;
            DoubleVector w1r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)this.wr[index]);
            DoubleVector w2r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)this.wr[index + 1]);
            DoubleVector w3r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)this.wr[index + 2]);
            DoubleVector w1i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)((double)(-sign) * this.wi[index])).mul((Vector)NEGATE_IM_512);
            DoubleVector w2i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)((double)(-sign) * this.wi[index + 1])).mul((Vector)NEGATE_IM_512);
            DoubleVector w3i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)((double)(-sign) * this.wi[index + 2])).mul((Vector)NEGATE_IM_512);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
                DoubleVector z2 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di2));
                DoubleVector z3 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di3));
                DoubleVector t1 = z0.add((Vector)z2);
                DoubleVector t2 = z1.add((Vector)z3);
                DoubleVector t3 = z0.sub((Vector)z2);
                DoubleVector t4 = z1.sub((Vector)z3).mul((double)sign).rearrange(SHUFFLE_RE_IM_512);
                t1.add((Vector)t2).intoArray(ret, j);
                DoubleVector x1 = t3.add((Vector)t4.mul((Vector)NEGATE_RE_512));
                DoubleVector x2 = t1.sub((Vector)t2);
                DoubleVector x3 = t3.add((Vector)t4.mul((Vector)NEGATE_IM_512));
                w1r.fma((Vector)x1, (Vector)w1i.mul((Vector)x1).rearrange(SHUFFLE_RE_IM_512)).intoArray(ret, j + this.dj);
                w2r.fma((Vector)x2, (Vector)w2i.mul((Vector)x2).rearrange(SHUFFLE_RE_IM_512)).intoArray(ret, j + this.dj2);
                w3r.fma((Vector)x3, (Vector)w3i.mul((Vector)x3).rearrange(SHUFFLE_RE_IM_512)).intoArray(ret, j + this.dj3);
                k12 += INTERLEAVED_LOOP_512;
                i += LENGTH_512;
                j += LENGTH_512;
            }
            ++k;
            j += this.jstep;
        }
    }
}

