1 // ****************************************************************************** 2 // 3 // Title: Force Field X. 4 // Description: Force Field X - Software for Molecular Biophysics. 5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2025. 6 // 7 // This file is part of Force Field X. 8 // 9 // Force Field X is free software; you can redistribute it and/or modify it 10 // under the terms of the GNU General Public License version 3 as published by 11 // the Free Software Foundation. 12 // 13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT 14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 // details. 17 // 18 // You should have received a copy of the GNU General Public License along with 19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple 20 // Place, Suite 330, Boston, MA 02111-1307 USA 21 // 22 // Linking this library statically or dynamically with other modules is making a 23 // combined work based on this library. Thus, the terms and conditions of the 24 // GNU General Public License cover the whole combination. 25 // 26 // As a special exception, the copyright holders of this library give you 27 // permission to link this library with independent modules to produce an 28 // executable, regardless of the license terms of these independent modules, and 29 // to copy and distribute the resulting executable under terms of your choice, 30 // provided that you also meet, for each linked independent module, the terms 31 // and conditions of the license of that module. An independent module is a 32 // module which is not derived from or based on this library. If you modify this 33 // library, you may extend this exception to your version of the library, but 34 // you are not obligated to do so. If you do not wish to do so, delete this 35 // exception statement from your version. 36 // 37 // ****************************************************************************** 38 package ffx.numerics.optimization; 39 40 import ffx.numerics.OptimizationInterface; 41 import ffx.numerics.optimization.LineSearch.LineSearchResult; 42 43 import javax.annotation.Nullable; 44 import java.util.logging.Logger; 45 46 import static java.lang.Double.isInfinite; 47 import static java.lang.Double.isNaN; 48 import static java.lang.Math.fma; 49 import static java.lang.String.format; 50 import static java.lang.System.arraycopy; 51 import static java.util.Arrays.fill; 52 import static org.apache.commons.math3.util.FastMath.abs; 53 import static org.apache.commons.math3.util.FastMath.min; 54 import static org.apache.commons.math3.util.FastMath.sqrt; 55 56 /** 57 * This class implements the limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) algorithm for 58 * large-scale multidimensional unconstrained optimization problems. 59 * 60 * <p>The L-BFGS algorithm is a quasi-Newton method that approximates the Broyden–Fletcher–Goldfarb–Shanno 61 * (BFGS) algorithm using a limited amount of computer memory. It is particularly well suited for 62 * optimization problems with a large number of variables. 63 * 64 * <p>The algorithm works by storing a sparse representation of the approximate inverse Hessian matrix, 65 * using only a few vectors that represent the approximation implicitly. Unlike the original BFGS method, 66 * L-BFGS stores only a few vectors that represent the approximation implicitly, which makes it particularly 67 * well suited for problems with many variables. 68 * 69 * <p>This implementation uses a line search procedure to ensure sufficient decrease in the objective 70 * function and to maintain positive definiteness of the Hessian approximation. 71 * 72 * @author Michael J. Schnieders 73 * <br> Derived from: 74 * <br> Robert Dodier's Java translation of original FORTRAN code by Jorge Nocedal. 75 * <br> J. Nocedal, "Updating Quasi-Newton Matrices with Limited Storage", Mathematics of Computation, 35, 773-782 (1980) 76 * <br> D. C. Lui and J. Nocedal, "On the Limited Memory BFGS Method for Large Scale Optimization", Mathematical Programming, 45, 503-528 (1989) 77 * <br> J. Nocedal and S. J. Wright, "Numerical Optimization", Springer-Verlag, New York, 1999, Section 9.1 78 * @since 1.0 79 */ 80 public class LBFGS { 81 82 /** 83 * Controls the accuracy of the line search. 84 * <p> 85 * If the function and gradient evaluations are inexpensive with respect to the cost of the 86 * iteration (which is sometimes the case when solving very large problems) it may be advantageous 87 * to set <code>CAPPA</code> to a small value. 88 * <p> 89 * A typical small value is 0.1. 90 * <p> 91 * Restriction: <code>CAPPA</code> should be greater than 1e-4. 92 */ 93 public static final double DEFAULT_CAPPA = 0.9; 94 95 /** 96 * This specifies the default lower bound for the step in the line search. 97 * <p> 98 * The default value of 1.0e-16 does not need to be modified unless the problem is extremely badly 99 * scaled (in which case the exponent should be increased). 100 */ 101 public static final double DEFAULT_STEPMIN = 1.0e-16; 102 103 /** 104 * This specifies the default upper bound for the step in the line search. 105 * <p> 106 * The default value of 5.0e0 does not need to be modified unless the problem is extremely badly 107 * scaled (in which case the exponent should be increased). 108 */ 109 public static final double DEFAULT_STEPMAX = 5.0e0; 110 111 /** 112 * The default projected gradient above which step size is reduced. 113 */ 114 public static final double DEFAULT_SLOPEMAX = 1.0e4; 115 116 /** 117 * The default maximum angle between search direction and gradient. 118 */ 119 public static final double DEFAULT_ANGLEMAX = 180.0; 120 121 /** 122 * The default maximum number of interpolations during line search. 123 */ 124 public static final int DEFAULT_INTMAX = 5; 125 126 private static final Logger logger = Logger.getLogger(LBFGS.class.getName()); 127 128 /** 129 * Make the constructor private so that the L-BFGS cannot be instantiated. 130 */ 131 private LBFGS() { 132 // Private constructor to prevent instantiation 133 } 134 135 /** 136 * This method solves the unconstrained minimization problem 137 * 138 * <pre> 139 * min f(x), x = (x1,x2,...,x_n), 140 * </pre> 141 * <p> 142 * using the limited-memory BFGS method. The routine is especially effective on problems involving 143 * a large number of variables. In a typical iteration of this method an approximation <code>Hk 144 * </code> to the inverse of the Hessian is obtained by applying <code>m</code> BFGS updates to a 145 * diagonal matrix <code>Hk0</code>, using information from the previous <code>m</code> steps. 146 * 147 * <p>The user specifies the number <code>mSave</code>, which determines the amount of storage 148 * required by the routine. This is the number of previous steps that will be used to approximate 149 * the Hessian. Larger values of <code>mSave</code> can lead to better convergence but require more 150 * memory and computation per iteration. 151 * 152 * <p>The algorithm works as follows: 153 * <ol> 154 * <li>Initialize with the current point, function value, and gradient</li> 155 * <li>Compute a search direction using the L-BFGS approximation of the inverse Hessian</li> 156 * <li>Perform a line search along this direction to find a new point with sufficient decrease in the function value</li> 157 * <li>Update the L-BFGS approximation using the new point and gradient</li> 158 * <li>Repeat until convergence or maximum iterations reached</li> 159 * </ol> 160 * 161 * <p>The step length is determined at each iteration by means of the line search routine <code> 162 * lineSearch</code>, which is a slight modification of the routine <code>CSRCH</code> written by 163 * More and Thuente. This ensures that the function value decreases sufficiently and that the 164 * Hessian approximation remains positive definite. 165 * 166 * @param n The number of variables in the minimization problem. Must be positive. 167 * @param mSave The number of corrections used in the BFGS update. Values of <code>mSave</code> 168 * less than 3 are not recommended; large values of <code>mSave</code> will result in excessive 169 * computing time. <code>3 <= mSave <= 7</code> is recommended. 170 * Must be non-negative. If <code>mSave</code> is 0, the method will use steepest descent. 171 * @param x On initial entry this must be set by the user to the values of the initial estimate 172 * of the solution vector. On exit, it contains the values of the variables at the best point 173 * found (usually a solution). The array must have length at least <code>n</code>. 174 * @param f The value of the function <code>f</code> at the point <code>x</code>. 175 * @param g The components of the gradient <code>g</code> at the point <code>x</code>. 176 * On exit, it contains the gradient at the final point. The array must have length at least <code>n</code>. 177 * @param eps Determines the accuracy with which the solution is to be found. The subroutine 178 * terminates when <code>G RMS < EPS</code>. Should be positive. 179 * @param maxIterations Maximum number of optimization steps. Must be positive. 180 * @param potential Implements the {@link ffx.numerics.Potential} interface to supply function 181 * values and gradients. Cannot be null. 182 * @param listener Implements the {@link OptimizationListener} interface and will be notified 183 * after each successful step. Can be null, in which case progress will be logged 184 * but no callbacks will be made. 185 * @return status code: 186 * <ul> 187 * <li>0 = success (convergence achieved)</li> 188 * <li>1 = maximum iterations reached without convergence</li> 189 * <li>-1 = optimization failed (e.g., line search failure, invalid inputs)</li> 190 * </ul> 191 * @since 1.0 192 */ 193 public static int minimize(final int n, int mSave, final double[] x, double f, double[] g, 194 final double eps, final int maxIterations, OptimizationInterface potential, 195 @Nullable OptimizationListener listener) { 196 197 // Validate input parameters with explicit checks instead of assertions 198 if (n <= 0) { 199 logger.severe("Number of variables must be positive."); 200 return -1; 201 } 202 if (mSave < 0) { 203 logger.severe("Number of correction vectors must be non-negative."); 204 return -1; 205 } 206 if (maxIterations <= 0) { 207 logger.severe("Maximum number of iterations must be positive."); 208 return -1; 209 } 210 if (x == null || x.length < n) { 211 logger.severe("Coordinate array is null or too small."); 212 return -1; 213 } 214 if (g == null || g.length < n) { 215 logger.severe("Gradient array is null or too small."); 216 return -1; 217 } 218 if (potential == null) { 219 logger.severe("Potential interface cannot be null."); 220 return -1; 221 } 222 if (eps <= 0.0) { 223 logger.warning("Convergence criterion (eps) should be positive."); 224 } 225 226 if (mSave > n) { 227 logger.fine(format(" Resetting the number of saved L-BFGS vectors to %d.", n)); 228 mSave = n; 229 } 230 231 int iterations = 0; 232 int evaluations = 1; 233 int nErrors = 0; 234 int maxErrors = 2; 235 236 double rms = sqrt(n); 237 double[] scaling = potential.getScaling(); 238 if (scaling == null) { 239 scaling = new double[n]; 240 fill(scaling, 1.0); 241 } 242 243 // Initial search direction is the steepest decent direction. 244 double[][] s = new double[mSave][n]; 245 double[][] y = new double[mSave][n]; 246 if (mSave > 0) { 247 for (int i = 0; i < n; i++) { 248 s[0][i] = -g[i]; 249 } 250 } 251 252 double grms = 0.0; 253 double gnorm = 0.0; 254 for (int i = 0; i < n; i++) { 255 double gi = g[i]; 256 if (isNaN(gi) || isInfinite(gi)) { 257 logger.warning(format("The gradient of variable %d is %8.3f.", i, gi)); 258 return 1; 259 } 260 double gis = gi * scaling[i]; 261 gnorm += gi * gi; 262 grms += gis * gis; 263 } 264 gnorm = sqrt(gnorm); 265 grms = sqrt(grms) / rms; 266 267 // Notify the listeners of initial conditions. 268 if (listener != null) { 269 if (!listener.optimizationUpdate(iterations, mSave, evaluations, grms, 0.0, f, 0.0, 0.0, null)) { 270 // Terminate the optimization. 271 return 1; 272 } 273 } else { 274 log(iterations, evaluations, grms, 0.0, f, 0.0, 0.0, null); 275 } 276 277 // The convergence criteria may already be satisfied. 278 if (grms <= eps) { 279 return 0; 280 } 281 282 final double[] prevX = new double[n]; 283 final double[] prevG = new double[n]; 284 final double[] r = new double[n]; 285 final double[] p = new double[n]; 286 final double[] h0 = new double[n]; 287 final double[] q = new double[n]; 288 final double[] alpha = new double[mSave]; 289 final double[] rho = new double[mSave]; 290 double gamma = 1.0; 291 292 // Line search parameters. 293 final LineSearch lineSearch = new LineSearch(n); 294 final LineSearchResult[] info = {LineSearchResult.Success}; 295 final int[] nFunctionEvals = {0}; 296 final double[] angle = {0.0}; 297 double df = 0.5 * DEFAULT_STEPMAX * gnorm; 298 int m = -1; 299 300 while (true) { 301 iterations++; 302 if (iterations > maxIterations) { 303 logger.info(format(" Maximum number of iterations reached: %d.", maxIterations)); 304 return 1; 305 } 306 307 if (mSave > 0) { 308 int muse = min(iterations - 1, mSave); 309 m++; 310 if (m > mSave - 1) { 311 m = 0; 312 } 313 314 // Estimate the Hessian Diagonal. 315 fill(h0, gamma); 316 arraycopy(g, 0, q, 0, n); 317 int k = m; 318 for (int j = 0; j < muse; j++) { 319 k--; 320 if (k < 0) { 321 k = mSave - 1; 322 } 323 alpha[k] = v1DotV2(n, s[k], 0, 1, q, 0, 1); 324 alpha[k] *= rho[k]; 325 aV1PlusV2(n, -alpha[k], y[k], 0, 1, q, 0, 1); 326 } 327 for (int i = 0; i < n; i++) { 328 r[i] = h0[i] * q[i]; 329 } 330 for (int j = 0; j < muse; j++) { 331 double beta = v1DotV2(n, r, 0, 1, y[k], 0, 1); 332 beta *= rho[k]; 333 aV1PlusV2(n, alpha[k] - beta, s[k], 0, 1, r, 0, 1); 334 k++; 335 if (k > mSave - 1) { 336 k = 0; 337 } 338 } 339 340 // Set the search direction. 341 for (int i = 0; i < n; i++) { 342 p[i] = -r[i]; 343 } 344 } else { 345 // Steepest-decent 346 for (int i = 0; i < n; i++) { 347 p[i] = -g[i]; 348 } 349 } 350 351 arraycopy(x, 0, prevX, 0, n); 352 arraycopy(g, 0, prevG, 0, n); 353 354 // Perform the line search along the new conjugate direction. 355 nFunctionEvals[0] = 0; 356 double prevF = f; 357 f = lineSearch.search(n, x, f, g, p, angle, df, info, nFunctionEvals, potential); 358 evaluations += nFunctionEvals[0]; 359 360 // Check for NaN or infinite gradient values after line search 361 for (int i = 0; i < n; i++) { 362 if (isNaN(g[i]) || isInfinite(g[i])) { 363 logger.warning(format("The gradient of variable %d is %8.3f after line search. Terminating optimization.", i, g[i])); 364 return -1; 365 } 366 } 367 368 // Update variables based on the results of this iteration. 369 if (mSave > 0) { 370 for (int i = 0; i < n; i++) { 371 s[m][i] = x[i] - prevX[i]; 372 y[m][i] = g[i] - prevG[i]; 373 } 374 double ys = v1DotV2(n, y[m], 0, 1, s[m], 0, 1); 375 double yy = v1DotV2(n, y[m], 0, 1, y[m], 0, 1); 376 gamma = abs(ys / yy); 377 rho[m] = 1.0 / ys; 378 } 379 380 // Get the sizes of the moves made during this iteration. 381 df = prevF - f; 382 double xrms = 0.0; 383 grms = 0.0; 384 for (int i = 0; i < n; i++) { 385 double dx = (x[i] - prevX[i]) / scaling[i]; 386 xrms += dx * dx; 387 double gx = g[i] * scaling[i]; 388 grms += gx * gx; 389 } 390 xrms = sqrt(xrms) / rms; 391 grms = sqrt(grms) / rms; 392 393 boolean done = false; 394 if (info[0] == LineSearchResult.BadIntpln || info[0] == LineSearchResult.IntplnErr) { 395 nErrors++; 396 if (nErrors >= maxErrors) { 397 done = true; 398 } 399 } else { 400 nErrors = 0; 401 } 402 403 if (listener != null) { 404 if (!listener.optimizationUpdate(iterations, mSave, evaluations, grms, xrms, f, df, angle[0], 405 info[0])) { 406 // Terminate the optimization. 407 return 1; 408 } 409 } else { 410 log(iterations, evaluations, grms, xrms, f, df, angle[0], info[0]); 411 } 412 413 /* 414 Terminate the optimization if the line search failed or upon 415 satisfying the convergence criteria. 416 */ 417 if (done) { 418 return -1; 419 } else if (grms <= eps) { 420 return 0; 421 } 422 } 423 } 424 425 /** 426 * This method solves the unconstrained minimization problem 427 * 428 * <pre> 429 * min f(x), x = (x1,x2,...,x_n), 430 * </pre> 431 * <p> 432 * using the limited-memory BFGS method. The routine is especially effective on problems involving 433 * a large number of variables. In a typical iteration of this method an approximation <code>Hk 434 * </code> to the inverse of the Hessian is obtained by applying <code>m</code> BFGS updates to a 435 * diagonal matrix <code>Hk0</code>, using information from the previous <code>m</code> steps. 436 * 437 * <p>The user specifies the number <code>m</code>, which determines the amount of storage 438 * required by the routine. 439 * 440 * <p>The user is required to calculate the function value <code>f</code> and its gradient <code>g 441 * </code>. 442 * 443 * <p>The step length is determined at each iteration by means of the line search routine <code> 444 * lineSearch</code>, which is a slight modification of the routine <code>CSRCH</code> written by 445 * More and Thuente. 446 * 447 * @param n The number of variables in the minimization problem. Restriction: <code>n > 0 448 * </code>. 449 * @param mSave The number of corrections used in the BFGS update. Values of <code>mSave</code> 450 * less than 3 are not recommended; large values of <code>mSave</code> will result in excessive 451 * computing time. <code>3 <= mSave <= 7</code> is recommended. * Restriction: 452 * <code>mSave > 0</code>. 453 * @param x On initial entry this must be set by the user to the values of the initial estimate 454 * of the solution vector. On exit, it contains the values of the variables at the best point 455 * found (usually a solution). 456 * @param f The value of the function <code>f</code> at the point <code>x</code>. 457 * @param g The components of the gradient <code>g</code> at the point <code>x</code>. 458 * @param eps Determines the accuracy with which the solution is to be found. The subroutine 459 * terminates when <code>G RMS < EPS</code> 460 * @param potential Implements the {@link ffx.numerics.Potential} interface to supply function 461 * values and gradients. 462 * @param listener Implements the {@link OptimizationListener} interface and will be notified 463 * after each successful step. 464 * @return status code (0 = success, -1 = failed) 465 * @since 1.0 466 */ 467 public static int minimize(int n, int mSave, double[] x, double f, double[] g, double eps, 468 OptimizationInterface potential, OptimizationListener listener) { 469 return minimize(n, mSave, x, f, g, eps, Integer.MAX_VALUE - 1, potential, listener); 470 } 471 472 /** 473 * Compute the sum of a vector times a scalar plus another vector. 474 * <p> 475 * This operation computes: v2 = a * v1 + v2 476 * <p> 477 * The method is optimized for the common case where both step sizes are 1 and 478 * both start indices are 0, which is the most frequent usage pattern in the LBFGS algorithm. 479 * 480 * @param n The number of points. 481 * @param a The scalar. 482 * @param v1 The X array. 483 * @param v1Start The first point in the X array. 484 * @param v1Step The X array increment. 485 * @param v2 The Y array. 486 * @param v2Start The first point in the Y array. 487 * @param v2Step The Y array increment. 488 * @since 1.0 489 */ 490 static void aV1PlusV2(final int n, final double a, final double[] v1, final int v1Start, 491 final int v1Step, final double[] v2, final int v2Start, final int v2Step) { 492 /* 493 Require the number of entries (n) to be greater than zero. If the 494 scalar (a) is zero, then the v2 array is unchanged. 495 */ 496 if (n <= 0 || a == 0) { 497 return; 498 } 499 500 // Optimize for the common case where both step sizes are 1 and both start indices are 0 501 if (v1Step == 1 && v2Step == 1 && v1Start == 0 && v2Start == 0) { 502 for (int i = 0; i < n; i++) { 503 v2[i] = fma(a, v1[i], v2[i]); 504 } 505 return; 506 } 507 508 // General case with arbitrary step sizes and start indices 509 int stop = v1Start + v1Step * n; 510 for (int i = v1Start, j = v2Start; i != stop; i += v1Step, j += v2Step) { 511 v2[j] = fma(a, v1[i], v2[j]); 512 } 513 } 514 515 /** 516 * Print status messages for <code>LBFGS</code> if there is no listener. 517 * 518 * @param iter Number of iterations so far. 519 * @param nfun Number of function evaluations so far. 520 * @param grms Gradient RMS at current solution. 521 * @param xrms Coordinate change RMS at current solution. 522 * @param f Function value at current solution. 523 * @param df Change in the function value compared to the previous solution. 524 * @param angle Current angle between gradient and search direction. 525 * @since 1.0 526 */ 527 private static void log(int iter, int nfun, double grms, double xrms, double f, double df, 528 double angle, LineSearchResult info) { 529 if (iter == 0) { 530 logger.info("\n Limited Memory BFGS Quasi-Newton Optimization: \n"); 531 logger.info( 532 " QN Iter F Value G RMS F Move X Move Angle FG Call Comment\n"); 533 } 534 if (info == null) { 535 logger.info( 536 format("%6d%13.4f%11.4f%11.4f%10.4f%9.2f%7d", iter, f, grms, df, xrms, angle, nfun)); 537 } else { 538 logger.info( 539 format("%6d%13.4f%11.4f%11.4f%10.4f%9.2f%7d %8s", iter, f, grms, df, xrms, angle, nfun, 540 info)); 541 } 542 } 543 544 /** 545 * Compute the dot product of two vectors. 546 * <p> 547 * This operation computes: sum(v1[i] * v2[i]) for i = 0 to n-1 548 * <p> 549 * The method is optimized for the common case where both step sizes are 1 and 550 * both start indices are 0, which is the most frequent usage pattern in the LBFGS algorithm. 551 * 552 * @param n Number of entries to include. 553 * @param v1 The X array. 554 * @param v1Start The first point in the X array. 555 * @param v1Step The X array increment. 556 * @param v2 The Y array. 557 * @param v2Start The first point in the Y array. 558 * @param v2Step The Y increment. 559 * @return dot product 560 * @since 1.0 561 */ 562 static double v1DotV2(final int n, final double[] v1, final int v1Start, final int v1Step, 563 final double[] v2, final int v2Start, final int v2Step) { 564 565 // Require the number of entries to be greater than zero. 566 if (n <= 0) { 567 return 0; 568 } 569 570 // Optimize for the common case where both step sizes are 1 and both start indices are 0 571 if (v1Step == 1 && v2Step == 1 && v1Start == 0 && v2Start == 0) { 572 double sum = 0.0; 573 for (int i = 0; i < n; i++) { 574 sum = fma(v1[i], v2[i], sum); 575 } 576 return sum; 577 } 578 579 // General case with arbitrary step sizes and start indices 580 double sum = 0.0; 581 int stop = v1Start + v1Step * n; 582 for (int i = v1Start, j = v2Start; i != stop; i += v1Step, j += v2Step) { 583 sum = fma(v1[i], v2[j], sum); 584 } 585 return sum; 586 } 587 }