View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.numerics.math;
39  
40  import static org.apache.commons.math3.util.FastMath.floor;
41  
42  import java.util.Random;
43  
44  /**
45   * java -cp target/numerics-1.0.0-beta.jar -XX:+UnlockDiagnosticVMOptions -XX:+PrintAssembly
46   * -Djava.library.path=hsdis-amd64.dylib ffx.numerics.math.SSETest
47   *
48   * @author M. J. Schnieders
49   */
50  public class SSETest {
51  
52    public final double[][] A;
53    public final double[] x;
54    private final double[] flatA;
55  
56    /**
57     * Constructor for SSETest.
58     *
59     * @param m the number of rows.
60     * @param n the number of columns.
61     */
62    private SSETest(int m, int n) {
63      A = new double[m][n];
64      flatA = new double[m * n];
65      x = new double[n];
66    }
67  
68    /**
69     * main.
70     *
71     * @param args an array of {@link java.lang.String} objects.
72     */
73    public static void main(String[] args) {
74  
75      int m = 500;
76      int n = 500;
77      if (args != null && args.length > 0) {
78        m = Integer.parseInt(args[0]);
79        n = Integer.parseInt(args[1]);
80      }
81  
82      SSETest test = new SSETest(m, n);
83  
84      int nLoops = 100000;
85  
86      test.init(m, n);
87      Random r = new Random(0);
88      double temp = 0;
89      long time = 0;
90      for (int i = 1; i <= nLoops; i++) {
91        time -= System.nanoTime();
92        double[] y = test.matVec(test.A, test.x, m, n);
93        time += System.nanoTime();
94        int j = (int) floor(m * r.nextDouble());
95        temp += y[j];
96        if (i % 10000 == 0) {
97          System.out.println(" Nested: " + temp + " " + time * 1.0e-9);
98          time = 0;
99        }
100     }
101 
102     test.init(m, n);
103     r = new Random(0);
104     temp = 0;
105     time = 0;
106     for (int i = 1; i <= nLoops; i++) {
107       time -= System.nanoTime();
108       double[] y = test.matVec(test.flatA, test.x, m, n);
109       time += System.nanoTime();
110       int j = (int) floor(m * r.nextDouble());
111       temp += y[j];
112       if (i % 10000 == 0) {
113         System.out.println(" Flat: " + temp + " " + time * 1.0e-9);
114         time = 0;
115       }
116     }
117   }
118 
119   private void init(int n, int m) {
120     Random r = new Random(0);
121 
122     // Initialize A and flatA.
123     for (int i = 0; i < m; i++) {
124       int idx = i * n;
125       for (int j = 0; j < n; j++) {
126         A[i][j] = r.nextDouble();
127         flatA[idx + j] = A[i][j];
128       }
129     }
130     // Initialize X.
131     for (int j = 0; j < n; j++) {
132       x[j] = r.nextDouble();
133     }
134   }
135 
136   /**
137    * matVec.
138    *
139    * @param A an array of {@link double} objects.
140    * @param x an array of {@link double} objects.
141    * @param m the number of rows.
142    * @param n the number of columns.
143    * @return an array of {@link double} objects.
144    */
145   private double[] matVec(final double[][] A, final double[] x, final int m, final int n) {
146     double[] y = new double[m];
147     for (int i = 0; i < m; i++) {
148       for (int j = 0; j < n; j++) {
149         // y[i] = fma(A[i][j], x[j], y[i]);
150         y[i] += A[i][j] * x[j];
151       }
152     }
153     return y;
154   }
155 
156   /**
157    * matVec.
158    *
159    * @param A an array of {@link double} objects.
160    * @param x an array of {@link double} objects.
161    * @param m the number of rows.
162    * @param n the number of columns.
163    * @return an array of {@link double} objects.
164    */
165   private double[] matVec(final double[] A, final double[] x, final int m, final int n) {
166     double[] y = new double[m];
167     final int extra = n - n % 8;
168     final int ub = ((n / 8) * 8) - 1;
169     int idx = 0;
170     for (int i = 0; i < m; i++) {
171       double acc = 0;
172       for (int j = 0; j < ub; j += 8) {
173         int ptr = idx + j;
174         y[i] +=
175             A[ptr] * x[j]
176                 + A[ptr + 1] * x[j + 1]
177                 + A[ptr + 2] * x[j + 2]
178                 + A[ptr + 3] * x[j + 3];
179         acc += A[ptr + 4] * x[j + 4]
180             + A[ptr + 5] * x[j + 5]
181             + A[ptr + 6] * x[j + 6]
182             + A[ptr + 7] * x[j + 7];
183       }
184       y[i] += acc;
185       for (int j = extra; j < n; j++) {
186         y[i] += A[idx + j] * x[j];
187       }
188       idx += n;
189     }
190     return y;
191   }
192 }