1 // ******************************************************************************
2 //
3 // Title: Force Field X.
4 // Description: Force Field X - Software for Molecular Biophysics.
5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2025.
6 //
7 // This file is part of Force Field X.
8 //
9 // Force Field X is free software; you can redistribute it and/or modify it
10 // under the terms of the GNU General Public License version 3 as published by
11 // the Free Software Foundation.
12 //
13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT
14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16 // details.
17 //
18 // You should have received a copy of the GNU General Public License along with
19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple
20 // Place, Suite 330, Boston, MA 02111-1307 USA
21 //
22 // Linking this library statically or dynamically with other modules is making a
23 // combined work based on this library. Thus, the terms and conditions of the
24 // GNU General Public License cover the whole combination.
25 //
26 // As a special exception, the copyright holders of this library give you
27 // permission to link this library with independent modules to produce an
28 // executable, regardless of the license terms of these independent modules, and
29 // to copy and distribute the resulting executable under terms of your choice,
30 // provided that you also meet, for each linked independent module, the terms
31 // and conditions of the license of that module. An independent module is a
32 // module which is not derived from or based on this library. If you modify this
33 // library, you may extend this exception to your version of the library, but
34 // you are not obligated to do so. If you do not wish to do so, delete this
35 // exception statement from your version.
36 //
37 // ******************************************************************************
38 package ffx.numerics.optimization;
39
40 import ffx.numerics.OptimizationInterface;
41 import ffx.numerics.optimization.LineSearch.LineSearchResult;
42
43 import javax.annotation.Nullable;
44 import java.util.logging.Logger;
45
46 import static java.lang.Double.isInfinite;
47 import static java.lang.Double.isNaN;
48 import static java.lang.Math.fma;
49 import static java.lang.String.format;
50 import static java.lang.System.arraycopy;
51 import static java.util.Arrays.fill;
52 import static org.apache.commons.math3.util.FastMath.abs;
53 import static org.apache.commons.math3.util.FastMath.min;
54 import static org.apache.commons.math3.util.FastMath.sqrt;
55
56 /**
57 * This class implements the limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) algorithm for
58 * large-scale multidimensional unconstrained optimization problems.
59 *
60 * <p>The L-BFGS algorithm is a quasi-Newton method that approximates the Broyden–Fletcher–Goldfarb–Shanno
61 * (BFGS) algorithm using a limited amount of computer memory. It is particularly well suited for
62 * optimization problems with a large number of variables.
63 *
64 * <p>The algorithm works by storing a sparse representation of the approximate inverse Hessian matrix,
65 * using only a few vectors that represent the approximation implicitly. Unlike the original BFGS method,
66 * L-BFGS stores only a few vectors that represent the approximation implicitly, which makes it particularly
67 * well suited for problems with many variables.
68 *
69 * <p>This implementation uses a line search procedure to ensure sufficient decrease in the objective
70 * function and to maintain positive definiteness of the Hessian approximation.
71 *
72 * @author Michael J. Schnieders
73 * <br> Derived from:
74 * <br> Robert Dodier's Java translation of original FORTRAN code by Jorge Nocedal.
75 * <br> J. Nocedal, "Updating Quasi-Newton Matrices with Limited Storage", Mathematics of Computation, 35, 773-782 (1980)
76 * <br> D. C. Lui and J. Nocedal, "On the Limited Memory BFGS Method for Large Scale Optimization", Mathematical Programming, 45, 503-528 (1989)
77 * <br> J. Nocedal and S. J. Wright, "Numerical Optimization", Springer-Verlag, New York, 1999, Section 9.1
78 * @since 1.0
79 */
80 public class LBFGS {
81
82 /**
83 * Controls the accuracy of the line search.
84 * <p>
85 * If the function and gradient evaluations are inexpensive with respect to the cost of the
86 * iteration (which is sometimes the case when solving very large problems) it may be advantageous
87 * to set <code>CAPPA</code> to a small value.
88 * <p>
89 * A typical small value is 0.1.
90 * <p>
91 * Restriction: <code>CAPPA</code> should be greater than 1e-4.
92 */
93 public static final double DEFAULT_CAPPA = 0.9;
94
95 /**
96 * This specifies the default lower bound for the step in the line search.
97 * <p>
98 * The default value of 1.0e-16 does not need to be modified unless the problem is extremely badly
99 * scaled (in which case the exponent should be increased).
100 */
101 public static final double DEFAULT_STEPMIN = 1.0e-16;
102
103 /**
104 * This specifies the default upper bound for the step in the line search.
105 * <p>
106 * The default value of 5.0e0 does not need to be modified unless the problem is extremely badly
107 * scaled (in which case the exponent should be increased).
108 */
109 public static final double DEFAULT_STEPMAX = 5.0e0;
110
111 /**
112 * The default projected gradient above which step size is reduced.
113 */
114 public static final double DEFAULT_SLOPEMAX = 1.0e4;
115
116 /**
117 * The default maximum angle between search direction and gradient.
118 */
119 public static final double DEFAULT_ANGLEMAX = 180.0;
120
121 /**
122 * The default maximum number of interpolations during line search.
123 */
124 public static final int DEFAULT_INTMAX = 5;
125
126 private static final Logger logger = Logger.getLogger(LBFGS.class.getName());
127
128 /**
129 * Make the constructor private so that the L-BFGS cannot be instantiated.
130 */
131 private LBFGS() {
132 // Private constructor to prevent instantiation
133 }
134
135 /**
136 * This method solves the unconstrained minimization problem
137 *
138 * <pre>
139 * min f(x), x = (x1,x2,...,x_n),
140 * </pre>
141 * <p>
142 * using the limited-memory BFGS method. The routine is especially effective on problems involving
143 * a large number of variables. In a typical iteration of this method an approximation <code>Hk
144 * </code> to the inverse of the Hessian is obtained by applying <code>m</code> BFGS updates to a
145 * diagonal matrix <code>Hk0</code>, using information from the previous <code>m</code> steps.
146 *
147 * <p>The user specifies the number <code>mSave</code>, which determines the amount of storage
148 * required by the routine. This is the number of previous steps that will be used to approximate
149 * the Hessian. Larger values of <code>mSave</code> can lead to better convergence but require more
150 * memory and computation per iteration.
151 *
152 * <p>The algorithm works as follows:
153 * <ol>
154 * <li>Initialize with the current point, function value, and gradient</li>
155 * <li>Compute a search direction using the L-BFGS approximation of the inverse Hessian</li>
156 * <li>Perform a line search along this direction to find a new point with sufficient decrease in the function value</li>
157 * <li>Update the L-BFGS approximation using the new point and gradient</li>
158 * <li>Repeat until convergence or maximum iterations reached</li>
159 * </ol>
160 *
161 * <p>The step length is determined at each iteration by means of the line search routine <code>
162 * lineSearch</code>, which is a slight modification of the routine <code>CSRCH</code> written by
163 * More and Thuente. This ensures that the function value decreases sufficiently and that the
164 * Hessian approximation remains positive definite.
165 *
166 * @param n The number of variables in the minimization problem. Must be positive.
167 * @param mSave The number of corrections used in the BFGS update. Values of <code>mSave</code>
168 * less than 3 are not recommended; large values of <code>mSave</code> will result in excessive
169 * computing time. <code>3 <= mSave <= 7</code> is recommended.
170 * Must be non-negative. If <code>mSave</code> is 0, the method will use steepest descent.
171 * @param x On initial entry this must be set by the user to the values of the initial estimate
172 * of the solution vector. On exit, it contains the values of the variables at the best point
173 * found (usually a solution). The array must have length at least <code>n</code>.
174 * @param f The value of the function <code>f</code> at the point <code>x</code>.
175 * @param g The components of the gradient <code>g</code> at the point <code>x</code>.
176 * On exit, it contains the gradient at the final point. The array must have length at least <code>n</code>.
177 * @param eps Determines the accuracy with which the solution is to be found. The subroutine
178 * terminates when <code>G RMS < EPS</code>. Should be positive.
179 * @param maxIterations Maximum number of optimization steps. Must be positive.
180 * @param potential Implements the {@link ffx.numerics.Potential} interface to supply function
181 * values and gradients. Cannot be null.
182 * @param listener Implements the {@link OptimizationListener} interface and will be notified
183 * after each successful step. Can be null, in which case progress will be logged
184 * but no callbacks will be made.
185 * @return status code:
186 * <ul>
187 * <li>0 = success (convergence achieved)</li>
188 * <li>1 = maximum iterations reached without convergence</li>
189 * <li>-1 = optimization failed (e.g., line search failure, invalid inputs)</li>
190 * </ul>
191 * @since 1.0
192 */
193 public static int minimize(final int n, int mSave, final double[] x, double f, double[] g,
194 final double eps, final int maxIterations, OptimizationInterface potential,
195 @Nullable OptimizationListener listener) {
196
197 // Validate input parameters with explicit checks instead of assertions
198 if (n <= 0) {
199 logger.severe("Number of variables must be positive.");
200 return -1;
201 }
202 if (mSave < 0) {
203 logger.severe("Number of correction vectors must be non-negative.");
204 return -1;
205 }
206 if (maxIterations <= 0) {
207 logger.severe("Maximum number of iterations must be positive.");
208 return -1;
209 }
210 if (x == null || x.length < n) {
211 logger.severe("Coordinate array is null or too small.");
212 return -1;
213 }
214 if (g == null || g.length < n) {
215 logger.severe("Gradient array is null or too small.");
216 return -1;
217 }
218 if (potential == null) {
219 logger.severe("Potential interface cannot be null.");
220 return -1;
221 }
222 if (eps <= 0.0) {
223 logger.warning("Convergence criterion (eps) should be positive.");
224 }
225
226 if (mSave > n) {
227 logger.fine(format(" Resetting the number of saved L-BFGS vectors to %d.", n));
228 mSave = n;
229 }
230
231 int iterations = 0;
232 int evaluations = 1;
233 int nErrors = 0;
234 int maxErrors = 2;
235
236 double rms = sqrt(n);
237 double[] scaling = potential.getScaling();
238 if (scaling == null) {
239 scaling = new double[n];
240 fill(scaling, 1.0);
241 }
242
243 // Initial search direction is the steepest decent direction.
244 double[][] s = new double[mSave][n];
245 double[][] y = new double[mSave][n];
246 if (mSave > 0) {
247 for (int i = 0; i < n; i++) {
248 s[0][i] = -g[i];
249 }
250 }
251
252 double grms = 0.0;
253 double gnorm = 0.0;
254 for (int i = 0; i < n; i++) {
255 double gi = g[i];
256 if (isNaN(gi) || isInfinite(gi)) {
257 logger.warning(format("The gradient of variable %d is %8.3f.", i, gi));
258 return 1;
259 }
260 double gis = gi * scaling[i];
261 gnorm += gi * gi;
262 grms += gis * gis;
263 }
264 gnorm = sqrt(gnorm);
265 grms = sqrt(grms) / rms;
266
267 // Notify the listeners of initial conditions.
268 if (listener != null) {
269 if (!listener.optimizationUpdate(iterations, mSave, evaluations, grms, 0.0, f, 0.0, 0.0, null)) {
270 // Terminate the optimization.
271 return 1;
272 }
273 } else {
274 log(iterations, evaluations, grms, 0.0, f, 0.0, 0.0, null);
275 }
276
277 // The convergence criteria may already be satisfied.
278 if (grms <= eps) {
279 return 0;
280 }
281
282 final double[] prevX = new double[n];
283 final double[] prevG = new double[n];
284 final double[] r = new double[n];
285 final double[] p = new double[n];
286 final double[] h0 = new double[n];
287 final double[] q = new double[n];
288 final double[] alpha = new double[mSave];
289 final double[] rho = new double[mSave];
290 double gamma = 1.0;
291
292 // Line search parameters.
293 final LineSearch lineSearch = new LineSearch(n);
294 final LineSearchResult[] info = {LineSearchResult.Success};
295 final int[] nFunctionEvals = {0};
296 final double[] angle = {0.0};
297 double df = 0.5 * DEFAULT_STEPMAX * gnorm;
298 int m = -1;
299
300 while (true) {
301 iterations++;
302 if (iterations > maxIterations) {
303 logger.info(format(" Maximum number of iterations reached: %d.", maxIterations));
304 return 1;
305 }
306
307 if (mSave > 0) {
308 int muse = min(iterations - 1, mSave);
309 m++;
310 if (m > mSave - 1) {
311 m = 0;
312 }
313
314 // Estimate the Hessian Diagonal.
315 fill(h0, gamma);
316 arraycopy(g, 0, q, 0, n);
317 int k = m;
318 for (int j = 0; j < muse; j++) {
319 k--;
320 if (k < 0) {
321 k = mSave - 1;
322 }
323 alpha[k] = v1DotV2(n, s[k], 0, 1, q, 0, 1);
324 alpha[k] *= rho[k];
325 aV1PlusV2(n, -alpha[k], y[k], 0, 1, q, 0, 1);
326 }
327 for (int i = 0; i < n; i++) {
328 r[i] = h0[i] * q[i];
329 }
330 for (int j = 0; j < muse; j++) {
331 double beta = v1DotV2(n, r, 0, 1, y[k], 0, 1);
332 beta *= rho[k];
333 aV1PlusV2(n, alpha[k] - beta, s[k], 0, 1, r, 0, 1);
334 k++;
335 if (k > mSave - 1) {
336 k = 0;
337 }
338 }
339
340 // Set the search direction.
341 for (int i = 0; i < n; i++) {
342 p[i] = -r[i];
343 }
344 } else {
345 // Steepest-decent
346 for (int i = 0; i < n; i++) {
347 p[i] = -g[i];
348 }
349 }
350
351 arraycopy(x, 0, prevX, 0, n);
352 arraycopy(g, 0, prevG, 0, n);
353
354 // Perform the line search along the new conjugate direction.
355 nFunctionEvals[0] = 0;
356 double prevF = f;
357 f = lineSearch.search(n, x, f, g, p, angle, df, info, nFunctionEvals, potential);
358 evaluations += nFunctionEvals[0];
359
360 // Check for NaN or infinite gradient values after line search
361 for (int i = 0; i < n; i++) {
362 if (isNaN(g[i]) || isInfinite(g[i])) {
363 logger.warning(format("The gradient of variable %d is %8.3f after line search. Terminating optimization.", i, g[i]));
364 return -1;
365 }
366 }
367
368 // Update variables based on the results of this iteration.
369 if (mSave > 0) {
370 for (int i = 0; i < n; i++) {
371 s[m][i] = x[i] - prevX[i];
372 y[m][i] = g[i] - prevG[i];
373 }
374 double ys = v1DotV2(n, y[m], 0, 1, s[m], 0, 1);
375 double yy = v1DotV2(n, y[m], 0, 1, y[m], 0, 1);
376 gamma = abs(ys / yy);
377 rho[m] = 1.0 / ys;
378 }
379
380 // Get the sizes of the moves made during this iteration.
381 df = prevF - f;
382 double xrms = 0.0;
383 grms = 0.0;
384 for (int i = 0; i < n; i++) {
385 double dx = (x[i] - prevX[i]) / scaling[i];
386 xrms += dx * dx;
387 double gx = g[i] * scaling[i];
388 grms += gx * gx;
389 }
390 xrms = sqrt(xrms) / rms;
391 grms = sqrt(grms) / rms;
392
393 boolean done = false;
394 if (info[0] == LineSearchResult.BadIntpln || info[0] == LineSearchResult.IntplnErr) {
395 nErrors++;
396 if (nErrors >= maxErrors) {
397 done = true;
398 }
399 } else {
400 nErrors = 0;
401 }
402
403 if (listener != null) {
404 if (!listener.optimizationUpdate(iterations, mSave, evaluations, grms, xrms, f, df, angle[0],
405 info[0])) {
406 // Terminate the optimization.
407 return 1;
408 }
409 } else {
410 log(iterations, evaluations, grms, xrms, f, df, angle[0], info[0]);
411 }
412
413 /*
414 Terminate the optimization if the line search failed or upon
415 satisfying the convergence criteria.
416 */
417 if (done) {
418 return -1;
419 } else if (grms <= eps) {
420 return 0;
421 }
422 }
423 }
424
425 /**
426 * This method solves the unconstrained minimization problem
427 *
428 * <pre>
429 * min f(x), x = (x1,x2,...,x_n),
430 * </pre>
431 * <p>
432 * using the limited-memory BFGS method. The routine is especially effective on problems involving
433 * a large number of variables. In a typical iteration of this method an approximation <code>Hk
434 * </code> to the inverse of the Hessian is obtained by applying <code>m</code> BFGS updates to a
435 * diagonal matrix <code>Hk0</code>, using information from the previous <code>m</code> steps.
436 *
437 * <p>The user specifies the number <code>m</code>, which determines the amount of storage
438 * required by the routine.
439 *
440 * <p>The user is required to calculate the function value <code>f</code> and its gradient <code>g
441 * </code>.
442 *
443 * <p>The step length is determined at each iteration by means of the line search routine <code>
444 * lineSearch</code>, which is a slight modification of the routine <code>CSRCH</code> written by
445 * More and Thuente.
446 *
447 * @param n The number of variables in the minimization problem. Restriction: <code>n > 0
448 * </code>.
449 * @param mSave The number of corrections used in the BFGS update. Values of <code>mSave</code>
450 * less than 3 are not recommended; large values of <code>mSave</code> will result in excessive
451 * computing time. <code>3 <= mSave <= 7</code> is recommended. * Restriction:
452 * <code>mSave > 0</code>.
453 * @param x On initial entry this must be set by the user to the values of the initial estimate
454 * of the solution vector. On exit, it contains the values of the variables at the best point
455 * found (usually a solution).
456 * @param f The value of the function <code>f</code> at the point <code>x</code>.
457 * @param g The components of the gradient <code>g</code> at the point <code>x</code>.
458 * @param eps Determines the accuracy with which the solution is to be found. The subroutine
459 * terminates when <code>G RMS < EPS</code>
460 * @param potential Implements the {@link ffx.numerics.Potential} interface to supply function
461 * values and gradients.
462 * @param listener Implements the {@link OptimizationListener} interface and will be notified
463 * after each successful step.
464 * @return status code (0 = success, -1 = failed)
465 * @since 1.0
466 */
467 public static int minimize(int n, int mSave, double[] x, double f, double[] g, double eps,
468 OptimizationInterface potential, OptimizationListener listener) {
469 return minimize(n, mSave, x, f, g, eps, Integer.MAX_VALUE - 1, potential, listener);
470 }
471
472 /**
473 * Compute the sum of a vector times a scalar plus another vector.
474 * <p>
475 * This operation computes: v2 = a * v1 + v2
476 * <p>
477 * The method is optimized for the common case where both step sizes are 1 and
478 * both start indices are 0, which is the most frequent usage pattern in the LBFGS algorithm.
479 *
480 * @param n The number of points.
481 * @param a The scalar.
482 * @param v1 The X array.
483 * @param v1Start The first point in the X array.
484 * @param v1Step The X array increment.
485 * @param v2 The Y array.
486 * @param v2Start The first point in the Y array.
487 * @param v2Step The Y array increment.
488 * @since 1.0
489 */
490 static void aV1PlusV2(final int n, final double a, final double[] v1, final int v1Start,
491 final int v1Step, final double[] v2, final int v2Start, final int v2Step) {
492 /*
493 Require the number of entries (n) to be greater than zero. If the
494 scalar (a) is zero, then the v2 array is unchanged.
495 */
496 if (n <= 0 || a == 0) {
497 return;
498 }
499
500 // Optimize for the common case where both step sizes are 1 and both start indices are 0
501 if (v1Step == 1 && v2Step == 1 && v1Start == 0 && v2Start == 0) {
502 for (int i = 0; i < n; i++) {
503 v2[i] = fma(a, v1[i], v2[i]);
504 }
505 return;
506 }
507
508 // General case with arbitrary step sizes and start indices
509 int stop = v1Start + v1Step * n;
510 for (int i = v1Start, j = v2Start; i != stop; i += v1Step, j += v2Step) {
511 v2[j] = fma(a, v1[i], v2[j]);
512 }
513 }
514
515 /**
516 * Print status messages for <code>LBFGS</code> if there is no listener.
517 *
518 * @param iter Number of iterations so far.
519 * @param nfun Number of function evaluations so far.
520 * @param grms Gradient RMS at current solution.
521 * @param xrms Coordinate change RMS at current solution.
522 * @param f Function value at current solution.
523 * @param df Change in the function value compared to the previous solution.
524 * @param angle Current angle between gradient and search direction.
525 * @since 1.0
526 */
527 private static void log(int iter, int nfun, double grms, double xrms, double f, double df,
528 double angle, LineSearchResult info) {
529 if (iter == 0) {
530 logger.info("\n Limited Memory BFGS Quasi-Newton Optimization: \n");
531 logger.info(
532 " QN Iter F Value G RMS F Move X Move Angle FG Call Comment\n");
533 }
534 if (info == null) {
535 logger.info(
536 format("%6d%13.4f%11.4f%11.4f%10.4f%9.2f%7d", iter, f, grms, df, xrms, angle, nfun));
537 } else {
538 logger.info(
539 format("%6d%13.4f%11.4f%11.4f%10.4f%9.2f%7d %8s", iter, f, grms, df, xrms, angle, nfun,
540 info));
541 }
542 }
543
544 /**
545 * Compute the dot product of two vectors.
546 * <p>
547 * This operation computes: sum(v1[i] * v2[i]) for i = 0 to n-1
548 * <p>
549 * The method is optimized for the common case where both step sizes are 1 and
550 * both start indices are 0, which is the most frequent usage pattern in the LBFGS algorithm.
551 *
552 * @param n Number of entries to include.
553 * @param v1 The X array.
554 * @param v1Start The first point in the X array.
555 * @param v1Step The X array increment.
556 * @param v2 The Y array.
557 * @param v2Start The first point in the Y array.
558 * @param v2Step The Y increment.
559 * @return dot product
560 * @since 1.0
561 */
562 static double v1DotV2(final int n, final double[] v1, final int v1Start, final int v1Step,
563 final double[] v2, final int v2Start, final int v2Step) {
564
565 // Require the number of entries to be greater than zero.
566 if (n <= 0) {
567 return 0;
568 }
569
570 // Optimize for the common case where both step sizes are 1 and both start indices are 0
571 if (v1Step == 1 && v2Step == 1 && v1Start == 0 && v2Start == 0) {
572 double sum = 0.0;
573 for (int i = 0; i < n; i++) {
574 sum = fma(v1[i], v2[i], sum);
575 }
576 return sum;
577 }
578
579 // General case with arbitrary step sizes and start indices
580 double sum = 0.0;
581 int stop = v1Start + v1Step * n;
582 for (int i = v1Start, j = v2Start; i != stop; i += v1Step, j += v2Step) {
583 sum = fma(v1[i], v2[j], sum);
584 }
585 return sum;
586 }
587 }