View Javadoc
1   // ******************************************************************************
2   //
3   // Title:       Force Field X.
4   // Description: Force Field X - Software for Molecular Biophysics.
5   // Copyright:   Copyright (c) Michael J. Schnieders 2001-2024.
6   //
7   // This file is part of Force Field X.
8   //
9   // Force Field X is free software; you can redistribute it and/or modify it
10  // under the terms of the GNU General Public License version 3 as published by
11  // the Free Software Foundation.
12  //
13  // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16  // details.
17  //
18  // You should have received a copy of the GNU General Public License along with
19  // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20  // Place, Suite 330, Boston, MA 02111-1307 USA
21  //
22  // Linking this library statically or dynamically with other modules is making a
23  // combined work based on this library. Thus, the terms and conditions of the
24  // GNU General Public License cover the whole combination.
25  //
26  // As a special exception, the copyright holders of this library give you
27  // permission to link this library with independent modules to produce an
28  // executable, regardless of the license terms of these independent modules, and
29  // to copy and distribute the resulting executable under terms of your choice,
30  // provided that you also meet, for each linked independent module, the terms
31  // and conditions of the license of that module. An independent module is a
32  // module which is not derived from or based on this library. If you modify this
33  // library, you may extend this exception to your version of the library, but
34  // you are not obligated to do so. If you do not wish to do so, delete this
35  // exception statement from your version.
36  //
37  // ******************************************************************************
38  package ffx.numerics.optimization;
39  
40  import static java.lang.Double.isInfinite;
41  import static java.lang.Double.isNaN;
42  import static java.lang.Math.fma;
43  import static java.lang.String.format;
44  import static java.lang.System.arraycopy;
45  import static java.util.Arrays.fill;
46  import static org.apache.commons.math3.util.FastMath.abs;
47  import static org.apache.commons.math3.util.FastMath.min;
48  import static org.apache.commons.math3.util.FastMath.sqrt;
49  
50  import ffx.numerics.OptimizationInterface;
51  import ffx.numerics.optimization.LineSearch.LineSearchResult;
52  
53  import javax.annotation.Nullable;
54  import java.util.logging.Logger;
55  
56  /**
57   * This class implements the limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) algorithm for
58   * large-scale multidimensional unconstrained optimization problems.<br>
59   *
60   * @author Michael J. Schnieders<br> Derived from: <br> Robert Dodier's Java translation of original
61   * FORTRAN code by Jorge Nocedal. <br> J. Nocedal, "Updating Quasi-Newton Matrices with Limited
62   * Storage", Mathematics of Computation, 35, 773-782 (1980) <br> D. C. Lui and J. Nocedal, "On
63   * the Limited Memory BFGS Method for Large Scale Optimization", Mathematical Programming, 45,
64   * 503-528 (1989) <br> J. Nocedal and S. J. Wright, "Numerical Optimization", Springer-Verlag,
65   * New York, 1999, Section 9.1
66   * @since 1.0
67   */
68  public class LBFGS {
69  
70    /**
71     * Controls the accuracy of the line search.
72     * <p>
73     * If the function and gradient evaluations are inexpensive with respect to the cost of the
74     * iteration (which is sometimes the case when solving very large problems) it may be advantageous
75     * to set <code>CAPPA</code> to a small value.
76     * <p>
77     * A typical small value is 0.1.
78     * <p>
79     * Restriction: <code>CAPPA</code> should be greater than 1e-4.
80     */
81    public static final double DEFAULT_CAPPA = 0.9;
82  
83    /**
84     * This specifies the default lower bound for the step in the line search.
85     * <p>
86     * The default value of 1.0e-16 does not need to be modified unless the problem is extremely badly
87     * scaled (in which case the exponent should be increased).
88     */
89    public static final double DEFAULT_STEPMIN = 1.0e-16;
90  
91    /**
92     * This specifies the default upper bound for the step in the line search.
93     * <p>
94     * The default value of 5.0e0 does not need to be modified unless the problem is extremely badly
95     * scaled (in which case the exponent should be increased).
96     */
97    public static final double DEFAULT_STEPMAX = 5.0e0;
98  
99    /**
100    * The default projected gradient above which step size is reduced.
101    */
102   public static final double DEFAULT_SLOPEMAX = 1.0e4;
103 
104   /**
105    * The default maximum angle between search direction and gradient.
106    */
107   public static final double DEFAULT_ANGLEMAX = 180.0;
108 
109   /**
110    * The default maximum number of interpolations during line search.
111    */
112   public static final int DEFAULT_INTMAX = 5;
113 
114   private static final Logger logger = Logger.getLogger(LBFGS.class.getName());
115 
116   /**
117    * Make the constructor private so that the L-BFGS cannot be instantiated.
118    */
119   private LBFGS() {
120   }
121 
122   /**
123    * This method solves the unconstrained minimization problem
124    *
125    * <pre>
126    *     min f(x),    x = (x1,x2,...,x_n),
127    * </pre>
128    * <p>
129    * using the limited-memory BFGS method. The routine is especially effective on problems involving
130    * a large number of variables. In a typical iteration of this method an approximation <code>Hk
131    * </code> to the inverse of the Hessian is obtained by applying <code>m</code> BFGS updates to a
132    * diagonal matrix <code>Hk0</code>, using information from the previous <code>m</code> steps.
133    *
134    * <p>The user specifies the number <code>m</code>, which determines the amount of storage
135    * required by the routine.
136    *
137    * <p>The user is required to calculate the function value <code>f</code> and its gradient <code>g
138    * </code>.
139    *
140    * <p>The step length is determined at each iteration by means of the line search routine <code>
141    * lineSearch</code>, which is a slight modification of the routine <code>CSRCH</code> written by
142    * More and Thuente.
143    *
144    * @param n             The number of variables in the minimization problem. Restriction: <code>n &gt; 0
145    *                      </code>.
146    * @param mSave         The number of corrections used in the BFGS update. Values of <code>mSave</code>
147    *                      less than 3 are not recommended; large values of <code>mSave</code> will result in excessive
148    *                      computing time. <code>3 &lt;= mSave &lt;= 7</code> is recommended.
149    *                      <p>
150    *                      Restriction: <code>mSave &gt; 0</code>.
151    * @param x             On initial entry this must be set by the user to the values of the initial estimate
152    *                      of the solution vector. On exit, it contains the values of the variables at the best point
153    *                      found (usually a solution).
154    * @param f             The value of the function <code>f</code> at the point <code>x</code>.
155    * @param g             The components of the gradient <code>g</code> at the point <code>x</code>.
156    * @param eps           Determines the accuracy with which the solution is to be found. The subroutine
157    *                      terminates when <code>G RMS &lt; EPS</code>
158    * @param maxIterations Maximum number of optimization steps.
159    * @param potential     Implements the {@link ffx.numerics.Potential} interface to supply function
160    *                      values and gradients.
161    * @param listener      Implements the {@link OptimizationListener} interface and will be notified
162    *                      after each successful step.
163    * @return status code (0 = success, 1 = max iterations reached, -1 = failed)
164    * @since 1.0
165    */
166   public static int minimize(final int n, int mSave, final double[] x, double f, double[] g,
167                              final double eps, final int maxIterations, OptimizationInterface potential,
168                              @Nullable OptimizationListener listener) {
169 
170     assert (n > 0);
171     assert (mSave > -1);
172     assert (maxIterations > 0);
173     assert (x != null && x.length >= n);
174     assert (g != null && g.length >= n);
175 
176     if (mSave > n) {
177       logger.fine(format(" Resetting the number of saved L-BFGS vectors to %d.", n));
178       mSave = n;
179     }
180 
181     int iterations = 0;
182     int evaluations = 1;
183     int nErrors = 0;
184     int maxErrors = 2;
185 
186     double rms = sqrt(n);
187     double[] scaling = potential.getScaling();
188     if (scaling == null) {
189       scaling = new double[n];
190       fill(scaling, 1.0);
191     }
192 
193     // Initial search direction is the steepest decent direction.
194     double[][] s = new double[mSave][n];
195     double[][] y = new double[mSave][n];
196     if (mSave > 0) {
197       for (int i = 0; i < n; i++) {
198         s[0][i] = -g[i];
199       }
200     }
201 
202     double grms = 0.0;
203     double gnorm = 0.0;
204     for (int i = 0; i < n; i++) {
205       double gi = g[i];
206       if (isNaN(gi) || isInfinite(gi)) {
207         logger.warning(format("The gradient of variable %d is %8.3f.", i, gi));
208         return 1;
209       }
210       double gis = gi * scaling[i];
211       gnorm += gi * gi;
212       grms += gis * gis;
213     }
214     gnorm = sqrt(gnorm);
215     grms = sqrt(grms) / rms;
216 
217     // Notify the listeners of initial conditions.
218     if (listener != null) {
219       if (!listener.optimizationUpdate(iterations, mSave, evaluations, grms, 0.0, f, 0.0, 0.0, null)) {
220         // Terminate the optimization.
221         return 1;
222       }
223     } else {
224       log(iterations, evaluations, grms, 0.0, f, 0.0, 0.0, null);
225     }
226 
227     // The convergence criteria may already be satisfied.
228     if (grms <= eps) {
229       return 0;
230     }
231 
232     final double[] prevX = new double[n];
233     final double[] prevG = new double[n];
234     final double[] r = new double[n];
235     final double[] p = new double[n];
236     final double[] h0 = new double[n];
237     final double[] q = new double[n];
238     final double[] alpha = new double[mSave];
239     final double[] rho = new double[mSave];
240     double gamma = 1.0;
241 
242     // Line search parameters.
243     final LineSearch lineSearch = new LineSearch(n);
244     final LineSearchResult[] info = {LineSearchResult.Success};
245     final int[] nFunctionEvals = {0};
246     final double[] angle = {0.0};
247     double df = 0.5 * DEFAULT_STEPMAX * gnorm;
248     int m = -1;
249 
250     while (true) {
251       iterations++;
252       if (iterations > maxIterations) {
253         logger.info(format(" Maximum number of iterations reached: %d.", maxIterations));
254         return 1;
255       }
256 
257       if (mSave > 0) {
258         int muse = min(iterations - 1, mSave);
259         m++;
260         if (m > mSave - 1) {
261           m = 0;
262         }
263 
264         // Estimate the Hessian Diagonal.
265         fill(h0, gamma);
266         arraycopy(g, 0, q, 0, n);
267         int k = m;
268         for (int j = 0; j < muse; j++) {
269           k--;
270           if (k < 0) {
271             k = mSave - 1;
272           }
273           alpha[k] = v1DotV2(n, s[k], 0, 1, q, 0, 1);
274           alpha[k] *= rho[k];
275           aV1PlusV2(n, -alpha[k], y[k], 0, 1, q, 0, 1);
276         }
277         for (int i = 0; i < n; i++) {
278           r[i] = h0[i] * q[i];
279         }
280         for (int j = 0; j < muse; j++) {
281           double beta = v1DotV2(n, r, 0, 1, y[k], 0, 1);
282           beta *= rho[k];
283           aV1PlusV2(n, alpha[k] - beta, s[k], 0, 1, r, 0, 1);
284           k++;
285           if (k > mSave - 1) {
286             k = 0;
287           }
288         }
289 
290         // Set the search direction.
291         for (int i = 0; i < n; i++) {
292           p[i] = -r[i];
293         }
294       } else {
295         // Steepest-decent
296         for (int i = 0; i < n; i++) {
297           p[i] = -g[i];
298         }
299       }
300 
301       arraycopy(x, 0, prevX, 0, n);
302       arraycopy(g, 0, prevG, 0, n);
303 
304       // Perform the line search along the new conjugate direction.
305       nFunctionEvals[0] = 0;
306       double prevF = f;
307       f = lineSearch.search(n, x, f, g, p, angle, df, info, nFunctionEvals, potential);
308       evaluations += nFunctionEvals[0];
309 
310       // Update variables based on the results of this iteration.
311       if (mSave > 0) {
312         for (int i = 0; i < n; i++) {
313           s[m][i] = x[i] - prevX[i];
314           y[m][i] = g[i] - prevG[i];
315         }
316         double ys = v1DotV2(n, y[m], 0, 1, s[m], 0, 1);
317         double yy = v1DotV2(n, y[m], 0, 1, y[m], 0, 1);
318         gamma = abs(ys / yy);
319         rho[m] = 1.0 / ys;
320       }
321 
322       // Get the sizes of the moves made during this iteration.
323       df = prevF - f;
324       double xrms = 0.0;
325       grms = 0.0;
326       for (int i = 0; i < n; i++) {
327         double dx = (x[i] - prevX[i]) / scaling[i];
328         xrms += dx * dx;
329         double gx = g[i] * scaling[i];
330         grms += gx * gx;
331       }
332       xrms = sqrt(xrms) / rms;
333       grms = sqrt(grms) / rms;
334 
335       boolean done = false;
336       if (info[0] == LineSearchResult.BadIntpln || info[0] == LineSearchResult.IntplnErr) {
337         nErrors++;
338         if (nErrors >= maxErrors) {
339           done = true;
340         }
341       } else {
342         nErrors = 0;
343       }
344 
345       if (listener != null) {
346         if (!listener.optimizationUpdate(iterations, mSave, evaluations, grms, xrms, f, df, angle[0],
347             info[0])) {
348           // Terminate the optimization.
349           return 1;
350         }
351       } else {
352         log(iterations, evaluations, grms, xrms, f, df, angle[0], info[0]);
353       }
354 
355       /*
356        Terminate the optimization if the line search failed or upon
357        satisfying the convergence criteria.
358       */
359       if (done) {
360         return -1;
361       } else if (grms <= eps) {
362         return 0;
363       }
364     }
365   }
366 
367   /**
368    * This method solves the unconstrained minimization problem
369    *
370    * <pre>
371    *     min f(x),    x = (x1,x2,...,x_n),
372    * </pre>
373    * <p>
374    * using the limited-memory BFGS method. The routine is especially effective on problems involving
375    * a large number of variables. In a typical iteration of this method an approximation <code>Hk
376    * </code> to the inverse of the Hessian is obtained by applying <code>m</code> BFGS updates to a
377    * diagonal matrix <code>Hk0</code>, using information from the previous <code>m</code> steps.
378    *
379    * <p>The user specifies the number <code>m</code>, which determines the amount of storage
380    * required by the routine.
381    *
382    * <p>The user is required to calculate the function value <code>f</code> and its gradient <code>g
383    * </code>.
384    *
385    * <p>The step length is determined at each iteration by means of the line search routine <code>
386    * lineSearch</code>, which is a slight modification of the routine <code>CSRCH</code> written by
387    * More and Thuente.
388    *
389    * @param n         The number of variables in the minimization problem. Restriction: <code>n &gt; 0
390    *                  </code>.
391    * @param mSave     The number of corrections used in the BFGS update. Values of <code>mSave</code>
392    *                  less than 3 are not recommended; large values of <code>mSave</code> will result in excessive
393    *                  computing time. <code>3 &lt;= mSave &lt;= 7</code> is recommended. * Restriction:
394    *                  <code>mSave &gt; 0</code>.
395    * @param x         On initial entry this must be set by the user to the values of the initial estimate
396    *                  of the solution vector. On exit, it contains the values of the variables at the best point
397    *                  found (usually a solution).
398    * @param f         The value of the function <code>f</code> at the point <code>x</code>.
399    * @param g         The components of the gradient <code>g</code> at the point <code>x</code>.
400    * @param eps       Determines the accuracy with which the solution is to be found. The subroutine
401    *                  terminates when <code>G RMS &lt; EPS</code>
402    * @param potential Implements the {@link ffx.numerics.Potential} interface to supply function
403    *                  values and gradients.
404    * @param listener  Implements the {@link OptimizationListener} interface and will be notified
405    *                  after each successful step.
406    * @return status code (0 = success, -1 = failed)
407    * @since 1.0
408    */
409   public static int minimize(int n, int mSave, double[] x, double f, double[] g, double eps,
410                              OptimizationInterface potential, OptimizationListener listener) {
411     return minimize(n, mSave, x, f, g, eps, Integer.MAX_VALUE - 1, potential, listener);
412   }
413 
414   /**
415    * Compute the sum of a vector times a scalar plus another vector.
416    *
417    * @param n       The number of points.
418    * @param a       The scalar.
419    * @param v1      The X array.
420    * @param v1Start The first point in the X array.
421    * @param v1Step  The X array increment.
422    * @param v2      The Y array.
423    * @param v2Start The first point in the Y array.
424    * @param v2Step  The Y array increment.
425    * @since 1.0
426    */
427   static void aV1PlusV2(final int n, final double a, final double[] v1, final int v1Start,
428                         final int v1Step, final double[] v2, final int v2Start, final int v2Step) {
429     /*
430      Require the number of entries (n) to be greater than zero. If the
431      scalar (a) is zero, then the v2 array is unchanged.
432     */
433     if (n <= 0 || a == 0) {
434       return;
435     }
436 
437     int stop = v1Start + v1Step * n;
438     for (int i = v1Start, j = v2Start; i != stop; i += v1Step, j += v2Step) {
439       v2[j] = fma(a, v1[i], v2[j]);
440     }
441   }
442 
443   /**
444    * Print status messages for <code>LBFGS</code> if there is no listener.
445    *
446    * @param iter  Number of iterations so far.
447    * @param nfun  Number of function evaluations so far.
448    * @param grms  Gradient RMS at current solution.
449    * @param xrms  Coordinate change RMS at current solution.
450    * @param f     Function value at current solution.
451    * @param df    Change in the function value compared to the previous solution.
452    * @param angle Current angle between gradient and search direction.
453    * @since 1.0
454    */
455   private static void log(int iter, int nfun, double grms, double xrms, double f, double df,
456                           double angle, LineSearchResult info) {
457     if (iter == 0) {
458       logger.info("\n Limited Memory BFGS Quasi-Newton Optimization: \n");
459       logger.info(
460           " QN Iter    F Value      G RMS     F Move    X Move    Angle  FG Call  Comment\n");
461     }
462     if (info == null) {
463       logger.info(
464           format("%6d%13.4f%11.4f%11.4f%10.4f%9.2f%7d", iter, f, grms, df, xrms, angle, nfun));
465     } else {
466       logger.info(
467           format("%6d%13.4f%11.4f%11.4f%10.4f%9.2f%7d   %8s", iter, f, grms, df, xrms, angle, nfun,
468               info));
469     }
470   }
471 
472   /**
473    * Compute the dot product of two vectors.
474    *
475    * @param n       Number of entries to include.
476    * @param v1      The X array.
477    * @param v1Start The first point in the X array.
478    * @param v1Step  The X array increment.
479    * @param v2      The Y array.
480    * @param v2Start The first point in the Y array.
481    * @param v2Step  The Y increment.
482    * @return dot product
483    * @since 1.0
484    */
485   static double v1DotV2(final int n, final double[] v1, final int v1Start, final int v1Step,
486                         final double[] v2, final int v2Start, final int v2Step) {
487 
488     // Require the number of entries to be greater than zero.
489     if (n <= 0) {
490       return 0;
491     }
492 
493     double sum = 0.0;
494     int stop = v1Start + v1Step * n;
495     for (int i = v1Start, j = v2Start; i != stop; i += v1Step, j += v2Step) {
496       sum = fma(v1[i], v2[j], sum);
497     }
498     return sum;
499   }
500 }