/*
 * Decompiled with CFR 0.152.
 */
package ffx.numerics.fft;

import ffx.numerics.fft.MixedRadixFactor;
import ffx.numerics.fft.PassConstants;
import ffx.numerics.fft.PassData;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorShuffle;
import jdk.incubator.vector.VectorSpecies;

public class MixedRadixFactor2
extends MixedRadixFactor {
    private static int[] simdSizes = new int[]{8, 4, 2};

    public MixedRadixFactor2(PassConstants passConstants) {
        super(passConstants);
    }

    @Override
    public boolean isValidSIMDWidth(int width) {
        if (width != 2 && width != 4 && width != 8) {
            return false;
        }
        if (this.im == 1) {
            return this.innerLoopLimit % (width / 2) == 0;
        }
        return this.innerLoopLimit % width == 0;
    }

    @Override
    public int getOptimalSIMDWidth() {
        if (this.isValidSIMDWidth(LENGTH)) {
            return LENGTH;
        }
        for (int size : simdSizes) {
            if (size >= LENGTH || !this.isValidSIMDWidth(size)) continue;
            return size;
        }
        return 0;
    }

    @Override
    protected void passScalar(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            double z0_r = data[i];
            double z0_i = data[i + this.im];
            int idi = i + this.di;
            double z1_r = data[idi];
            double z1_i = data[idi + this.im];
            ret[j] = z0_r + z1_r;
            ret[j + this.im] = z0_i + z1_i;
            double x_r = z0_r - z1_r;
            double x_i = z0_i - z1_i;
            int jdj = j + this.dj;
            ret[jdj] = x_r;
            ret[jdj + this.im] = x_i;
            ++k1;
            i += this.ii;
            j += this.ii;
        }
        j += this.dj;
        int k = 1;
        while (k < this.outerLoopLimit) {
            double w_r = this.wr[k];
            double w_i = (double)(-sign) * this.wi[k];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                double z0_r = data[i];
                double z0_i = data[i + this.im];
                int idi = i + this.di;
                double z1_r = data[idi];
                double z1_i = data[idi + this.im];
                ret[j] = z0_r + z1_r;
                ret[j + this.im] = z0_i + z1_i;
                int jdj = j + this.dj;
                MixedRadixFactor2.multiplyAndStore(z0_r - z1_r, z0_i - z1_i, w_r, w_i, ret, jdj, jdj + this.im);
                ++k12;
                i += this.ii;
                j += this.ii;
            }
            ++k;
            j += this.dj;
        }
    }

    @Override
    protected void passSIMD(PassData passData) {
        if (!this.isValidSIMDWidth(this.simdWidth)) {
            this.passScalar(passData);
        } else if (this.im == 1) {
            this.interleaved(passData, this.simdWidth);
        } else {
            this.blocked(passData, this.simdWidth);
        }
    }

    private void interleaved(PassData passData, int simdLength) {
        switch (simdLength) {
            case 2: {
                this.interleaved128(passData);
                break;
            }
            case 4: {
                this.interleaved256(passData);
                break;
            }
            case 8: {
                this.interleaved512(passData);
                break;
            }
            default: {
                this.passScalar(passData);
            }
        }
    }

    private void blocked(PassData passData, int simdLength) {
        switch (simdLength) {
            case 2: {
                this.blocked128(passData);
                break;
            }
            case 4: {
                this.blocked256(passData);
                break;
            }
            case 8: {
                this.blocked512(passData);
                break;
            }
            default: {
                this.passScalar(passData);
            }
        }
    }

    private void butterFly2Blocked(VectorSpecies<Double> species, double[] data, int i, double w_r, double w_i, double[] ret, int j) {
        DoubleVector z0_r = DoubleVector.fromArray(species, (double[])data, (int)i);
        DoubleVector z0_i = DoubleVector.fromArray(species, (double[])data, (int)(i + this.im));
        DoubleVector z1_r = DoubleVector.fromArray(species, (double[])data, (int)(i + this.di));
        DoubleVector z1_i = DoubleVector.fromArray(species, (double[])data, (int)(i + this.di + this.im));
        z0_r.add((Vector)z1_r).intoArray(ret, j);
        z0_i.add((Vector)z1_i).intoArray(ret, j + this.im);
        DoubleVector x_r = z0_r.sub((Vector)z1_r);
        DoubleVector x_i = z0_i.sub((Vector)z1_i);
        x_r.mul(w_r).sub((Vector)x_i.mul(w_i)).intoArray(ret, j + this.dj);
        x_i.mul(w_r).add((Vector)x_r.mul(w_i)).intoArray(ret, j + this.dj + this.im);
    }

    private void butterFly2Interleaved(DoubleVector z0, DoubleVector z1, DoubleVector w_r, DoubleVector w_i, VectorShuffle<Double> shuffle_re_im, int j, double[] ret) {
        z0.add((Vector)z1).intoArray(ret, j);
        DoubleVector x = z0.sub((Vector)z1);
        x.mul((Vector)w_r).add((Vector)x.mul((Vector)w_i).rearrange(shuffle_re_im)).intoArray(ret, j + this.dj);
    }

    private void blocked128(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
            DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.im));
            DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
            DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di + this.im));
            z0_r.add((Vector)z1_r).intoArray(ret, j);
            z0_i.add((Vector)z1_i).intoArray(ret, j + this.im);
            z0_r.sub((Vector)z1_r).intoArray(ret, j + this.dj);
            z0_i.sub((Vector)z1_i).intoArray(ret, j + this.dj + this.im);
            k1 += BLOCK_LOOP_128;
            i += LENGTH_128;
            j += LENGTH_128;
        }
        j += this.dj;
        int k = 1;
        while (k < this.outerLoopLimit) {
            double w_r = this.wr[k];
            double w_i = (double)(-sign) * this.wi[k];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
                DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.im));
                DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
                DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di + this.im));
                z0_r.add((Vector)z1_r).intoArray(ret, j);
                z0_i.add((Vector)z1_i).intoArray(ret, j + this.im);
                DoubleVector x_r = z0_r.sub((Vector)z1_r);
                DoubleVector x_i = z0_i.sub((Vector)z1_i);
                x_r.mul(w_r).sub((Vector)x_i.mul(w_i)).intoArray(ret, j + this.dj);
                x_i.mul(w_r).add((Vector)x_r.mul(w_i)).intoArray(ret, j + this.dj + this.im);
                k12 += BLOCK_LOOP_128;
                i += LENGTH_128;
                j += LENGTH_128;
            }
            ++k;
            j += this.dj;
        }
    }

    private void blocked256(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
            DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.im));
            DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
            DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di + this.im));
            z0_r.add((Vector)z1_r).intoArray(ret, j);
            z0_i.add((Vector)z1_i).intoArray(ret, j + this.im);
            z0_r.sub((Vector)z1_r).intoArray(ret, j + this.dj);
            z0_i.sub((Vector)z1_i).intoArray(ret, j + this.dj + this.im);
            k1 += BLOCK_LOOP_256;
            i += LENGTH_256;
            j += LENGTH_256;
        }
        j += this.dj;
        int k = 1;
        while (k < this.outerLoopLimit) {
            double w_r = this.wr[k];
            double w_i = (double)(-sign) * this.wi[k];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
                DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.im));
                DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
                DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di + this.im));
                z0_r.add((Vector)z1_r).intoArray(ret, j);
                z0_i.add((Vector)z1_i).intoArray(ret, j + this.im);
                DoubleVector x_r = z0_r.sub((Vector)z1_r);
                DoubleVector x_i = z0_i.sub((Vector)z1_i);
                x_r.mul(w_r).sub((Vector)x_i.mul(w_i)).intoArray(ret, j + this.dj);
                x_i.mul(w_r).add((Vector)x_r.mul(w_i)).intoArray(ret, j + this.dj + this.im);
                k12 += BLOCK_LOOP_256;
                i += LENGTH_256;
                j += LENGTH_256;
            }
            ++k;
            j += this.dj;
        }
    }

    private void blocked512(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
            DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.im));
            DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
            DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di + this.im));
            z0_r.add((Vector)z1_r).intoArray(ret, j);
            z0_i.add((Vector)z1_i).intoArray(ret, j + this.im);
            z0_r.sub((Vector)z1_r).intoArray(ret, j + this.dj);
            z0_i.sub((Vector)z1_i).intoArray(ret, j + this.dj + this.im);
            k1 += BLOCK_LOOP_512;
            i += LENGTH_512;
            j += LENGTH_512;
        }
        j += this.dj;
        int k = 1;
        while (k < this.outerLoopLimit) {
            double w_r = this.wr[k];
            double w_i = (double)(-sign) * this.wi[k];
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
                DoubleVector z1_r = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
                DoubleVector z0_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.im));
                DoubleVector z1_i = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di + this.im));
                z0_r.add((Vector)z1_r).intoArray(ret, j);
                z0_i.add((Vector)z1_i).intoArray(ret, j + this.im);
                DoubleVector x_r = z0_r.sub((Vector)z1_r);
                DoubleVector x_i = z0_i.sub((Vector)z1_i);
                x_r.mul(w_r).sub((Vector)x_i.mul(w_i)).intoArray(ret, j + this.dj);
                x_i.mul(w_r).add((Vector)x_r.mul(w_i)).intoArray(ret, j + this.dj + this.im);
                k12 += BLOCK_LOOP_512;
                i += LENGTH_512;
                j += LENGTH_512;
            }
            ++k;
            j += this.dj;
        }
    }

    private void interleaved128(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
            z0.add((Vector)z1).intoArray(ret, j);
            z0.sub((Vector)z1).intoArray(ret, j + this.dj);
            k1 += INTERLEAVED_LOOP_128;
            i += LENGTH_128;
            j += LENGTH_128;
        }
        j += this.dj;
        int k = 1;
        while (k < this.outerLoopLimit) {
            DoubleVector w_r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)this.wr[k]);
            DoubleVector w_i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_128, (double)((double)(-sign) * this.wi[k])).mul((Vector)NEGATE_IM_128);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_128, (double[])data, (int)(i + this.di));
                z0.add((Vector)z1).intoArray(ret, j);
                DoubleVector x = z0.sub((Vector)z1);
                x.fma((Vector)w_r, (Vector)x.mul((Vector)w_i).rearrange(SHUFFLE_RE_IM_128)).intoArray(ret, j + this.dj);
                k12 += INTERLEAVED_LOOP_128;
                i += LENGTH_128;
                j += LENGTH_128;
            }
            ++k;
            j += this.dj;
        }
    }

    private void interleaved256(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
            z0.add((Vector)z1).intoArray(ret, j);
            z0.sub((Vector)z1).intoArray(ret, j + this.dj);
            k1 += INTERLEAVED_LOOP_256;
            i += LENGTH_256;
            j += LENGTH_256;
        }
        j += this.dj;
        int k = 1;
        while (k < this.outerLoopLimit) {
            DoubleVector w_r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)this.wr[k]);
            DoubleVector w_i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_256, (double)((double)(-sign) * this.wi[k])).mul((Vector)NEGATE_IM_256);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_256, (double[])data, (int)(i + this.di));
                z0.add((Vector)z1).intoArray(ret, j);
                DoubleVector x = z0.sub((Vector)z1);
                x.fma((Vector)w_r, (Vector)x.mul((Vector)w_i).rearrange(SHUFFLE_RE_IM_256)).intoArray(ret, j + this.dj);
                k12 += INTERLEAVED_LOOP_256;
                i += LENGTH_256;
                j += LENGTH_256;
            }
            ++k;
            j += this.dj;
        }
    }

    private void interleaved512(PassData passData) {
        double[] data = passData.in;
        double[] ret = passData.out;
        int sign = passData.sign;
        int i = passData.inOffset;
        int j = passData.outOffset;
        int k1 = 0;
        while (k1 < this.innerLoopLimit) {
            DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
            DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
            z0.add((Vector)z1).intoArray(ret, j);
            z0.sub((Vector)z1).intoArray(ret, j + this.dj);
            k1 += INTERLEAVED_LOOP_512;
            i += LENGTH_512;
            j += LENGTH_512;
        }
        j += this.dj;
        int k = 1;
        while (k < this.outerLoopLimit) {
            DoubleVector w_r = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)this.wr[k]);
            DoubleVector w_i = DoubleVector.broadcast((VectorSpecies)DoubleVector.SPECIES_512, (double)((double)(-sign) * this.wi[k])).mul((Vector)NEGATE_IM_512);
            int k12 = 0;
            while (k12 < this.innerLoopLimit) {
                DoubleVector z0 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)i);
                DoubleVector z1 = DoubleVector.fromArray((VectorSpecies)DoubleVector.SPECIES_512, (double[])data, (int)(i + this.di));
                z0.add((Vector)z1).intoArray(ret, j);
                DoubleVector x = z0.sub((Vector)z1);
                x.fma((Vector)w_r, (Vector)x.mul((Vector)w_i).rearrange(SHUFFLE_RE_IM_512)).intoArray(ret, j + this.dj);
                k12 += INTERLEAVED_LOOP_512;
                i += LENGTH_512;
                j += LENGTH_512;
            }
            ++k;
            j += this.dj;
        }
    }
}

