1 // ****************************************************************************** 2 // 3 // Title: Force Field X. 4 // Description: Force Field X - Software for Molecular Biophysics. 5 // Copyright: Copyright (c) Michael J. Schnieders 2001-2024. 6 // 7 // This file is part of Force Field X. 8 // 9 // Force Field X is free software; you can redistribute it and/or modify it 10 // under the terms of the GNU General Public License version 3 as published by 11 // the Free Software Foundation. 12 // 13 // Force Field X is distributed in the hope that it will be useful, but WITHOUT 14 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 // details. 17 // 18 // You should have received a copy of the GNU General Public License along with 19 // Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple 20 // Place, Suite 330, Boston, MA 02111-1307 USA 21 // 22 // Linking this library statically or dynamically with other modules is making a 23 // combined work based on this library. Thus, the terms and conditions of the 24 // GNU General Public License cover the whole combination. 25 // 26 // As a special exception, the copyright holders of this library give you 27 // permission to link this library with independent modules to produce an 28 // executable, regardless of the license terms of these independent modules, and 29 // to copy and distribute the resulting executable under terms of your choice, 30 // provided that you also meet, for each linked independent module, the terms 31 // and conditions of the license of that module. An independent module is a 32 // module which is not derived from or based on this library. If you modify this 33 // library, you may extend this exception to your version of the library, but 34 // you are not obligated to do so. If you do not wish to do so, delete this 35 // exception statement from your version. 36 // 37 // ****************************************************************************** 38 package ffx.numerics.optimization; 39 40 import static java.lang.Double.isInfinite; 41 import static java.lang.Double.isNaN; 42 import static java.lang.Math.fma; 43 import static java.lang.String.format; 44 import static java.lang.System.arraycopy; 45 import static java.util.Arrays.fill; 46 import static org.apache.commons.math3.util.FastMath.abs; 47 import static org.apache.commons.math3.util.FastMath.min; 48 import static org.apache.commons.math3.util.FastMath.sqrt; 49 50 import ffx.numerics.OptimizationInterface; 51 import ffx.numerics.optimization.LineSearch.LineSearchResult; 52 53 import javax.annotation.Nullable; 54 import java.util.logging.Logger; 55 56 /** 57 * This class implements the limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) algorithm for 58 * large-scale multidimensional unconstrained optimization problems.<br> 59 * 60 * @author Michael J. Schnieders<br> Derived from: <br> Robert Dodier's Java translation of original 61 * FORTRAN code by Jorge Nocedal. <br> J. Nocedal, "Updating Quasi-Newton Matrices with Limited 62 * Storage", Mathematics of Computation, 35, 773-782 (1980) <br> D. C. Lui and J. Nocedal, "On 63 * the Limited Memory BFGS Method for Large Scale Optimization", Mathematical Programming, 45, 64 * 503-528 (1989) <br> J. Nocedal and S. J. Wright, "Numerical Optimization", Springer-Verlag, 65 * New York, 1999, Section 9.1 66 * @since 1.0 67 */ 68 public class LBFGS { 69 70 /** 71 * Controls the accuracy of the line search. 72 * <p> 73 * If the function and gradient evaluations are inexpensive with respect to the cost of the 74 * iteration (which is sometimes the case when solving very large problems) it may be advantageous 75 * to set <code>CAPPA</code> to a small value. 76 * <p> 77 * A typical small value is 0.1. 78 * <p> 79 * Restriction: <code>CAPPA</code> should be greater than 1e-4. 80 */ 81 public static final double DEFAULT_CAPPA = 0.9; 82 83 /** 84 * This specifies the default lower bound for the step in the line search. 85 * <p> 86 * The default value of 1.0e-16 does not need to be modified unless the problem is extremely badly 87 * scaled (in which case the exponent should be increased). 88 */ 89 public static final double DEFAULT_STEPMIN = 1.0e-16; 90 91 /** 92 * This specifies the default upper bound for the step in the line search. 93 * <p> 94 * The default value of 5.0e0 does not need to be modified unless the problem is extremely badly 95 * scaled (in which case the exponent should be increased). 96 */ 97 public static final double DEFAULT_STEPMAX = 5.0e0; 98 99 /** 100 * The default projected gradient above which step size is reduced. 101 */ 102 public static final double DEFAULT_SLOPEMAX = 1.0e4; 103 104 /** 105 * The default maximum angle between search direction and gradient. 106 */ 107 public static final double DEFAULT_ANGLEMAX = 180.0; 108 109 /** 110 * The default maximum number of interpolations during line search. 111 */ 112 public static final int DEFAULT_INTMAX = 5; 113 114 private static final Logger logger = Logger.getLogger(LBFGS.class.getName()); 115 116 /** 117 * Make the constructor private so that the L-BFGS cannot be instantiated. 118 */ 119 private LBFGS() { 120 } 121 122 /** 123 * This method solves the unconstrained minimization problem 124 * 125 * <pre> 126 * min f(x), x = (x1,x2,...,x_n), 127 * </pre> 128 * <p> 129 * using the limited-memory BFGS method. The routine is especially effective on problems involving 130 * a large number of variables. In a typical iteration of this method an approximation <code>Hk 131 * </code> to the inverse of the Hessian is obtained by applying <code>m</code> BFGS updates to a 132 * diagonal matrix <code>Hk0</code>, using information from the previous <code>m</code> steps. 133 * 134 * <p>The user specifies the number <code>m</code>, which determines the amount of storage 135 * required by the routine. 136 * 137 * <p>The user is required to calculate the function value <code>f</code> and its gradient <code>g 138 * </code>. 139 * 140 * <p>The step length is determined at each iteration by means of the line search routine <code> 141 * lineSearch</code>, which is a slight modification of the routine <code>CSRCH</code> written by 142 * More and Thuente. 143 * 144 * @param n The number of variables in the minimization problem. Restriction: <code>n > 0 145 * </code>. 146 * @param mSave The number of corrections used in the BFGS update. Values of <code>mSave</code> 147 * less than 3 are not recommended; large values of <code>mSave</code> will result in excessive 148 * computing time. <code>3 <= mSave <= 7</code> is recommended. 149 * <p> 150 * Restriction: <code>mSave > 0</code>. 151 * @param x On initial entry this must be set by the user to the values of the initial estimate 152 * of the solution vector. On exit, it contains the values of the variables at the best point 153 * found (usually a solution). 154 * @param f The value of the function <code>f</code> at the point <code>x</code>. 155 * @param g The components of the gradient <code>g</code> at the point <code>x</code>. 156 * @param eps Determines the accuracy with which the solution is to be found. The subroutine 157 * terminates when <code>G RMS < EPS</code> 158 * @param maxIterations Maximum number of optimization steps. 159 * @param potential Implements the {@link ffx.numerics.Potential} interface to supply function 160 * values and gradients. 161 * @param listener Implements the {@link OptimizationListener} interface and will be notified 162 * after each successful step. 163 * @return status code (0 = success, 1 = max iterations reached, -1 = failed) 164 * @since 1.0 165 */ 166 public static int minimize(final int n, int mSave, final double[] x, double f, double[] g, 167 final double eps, final int maxIterations, OptimizationInterface potential, 168 @Nullable OptimizationListener listener) { 169 170 assert (n > 0); 171 assert (mSave > -1); 172 assert (maxIterations > 0); 173 assert (x != null && x.length >= n); 174 assert (g != null && g.length >= n); 175 176 if (mSave > n) { 177 logger.fine(format(" Resetting the number of saved L-BFGS vectors to %d.", n)); 178 mSave = n; 179 } 180 181 int iterations = 0; 182 int evaluations = 1; 183 int nErrors = 0; 184 int maxErrors = 2; 185 186 double rms = sqrt(n); 187 double[] scaling = potential.getScaling(); 188 if (scaling == null) { 189 scaling = new double[n]; 190 fill(scaling, 1.0); 191 } 192 193 // Initial search direction is the steepest decent direction. 194 double[][] s = new double[mSave][n]; 195 double[][] y = new double[mSave][n]; 196 if (mSave > 0) { 197 for (int i = 0; i < n; i++) { 198 s[0][i] = -g[i]; 199 } 200 } 201 202 double grms = 0.0; 203 double gnorm = 0.0; 204 for (int i = 0; i < n; i++) { 205 double gi = g[i]; 206 if (isNaN(gi) || isInfinite(gi)) { 207 logger.warning(format("The gradient of variable %d is %8.3f.", i, gi)); 208 return 1; 209 } 210 double gis = gi * scaling[i]; 211 gnorm += gi * gi; 212 grms += gis * gis; 213 } 214 gnorm = sqrt(gnorm); 215 grms = sqrt(grms) / rms; 216 217 // Notify the listeners of initial conditions. 218 if (listener != null) { 219 if (!listener.optimizationUpdate(iterations, mSave, evaluations, grms, 0.0, f, 0.0, 0.0, null)) { 220 // Terminate the optimization. 221 return 1; 222 } 223 } else { 224 log(iterations, evaluations, grms, 0.0, f, 0.0, 0.0, null); 225 } 226 227 // The convergence criteria may already be satisfied. 228 if (grms <= eps) { 229 return 0; 230 } 231 232 final double[] prevX = new double[n]; 233 final double[] prevG = new double[n]; 234 final double[] r = new double[n]; 235 final double[] p = new double[n]; 236 final double[] h0 = new double[n]; 237 final double[] q = new double[n]; 238 final double[] alpha = new double[mSave]; 239 final double[] rho = new double[mSave]; 240 double gamma = 1.0; 241 242 // Line search parameters. 243 final LineSearch lineSearch = new LineSearch(n); 244 final LineSearchResult[] info = {LineSearchResult.Success}; 245 final int[] nFunctionEvals = {0}; 246 final double[] angle = {0.0}; 247 double df = 0.5 * DEFAULT_STEPMAX * gnorm; 248 int m = -1; 249 250 while (true) { 251 iterations++; 252 if (iterations > maxIterations) { 253 logger.info(format(" Maximum number of iterations reached: %d.", maxIterations)); 254 return 1; 255 } 256 257 if (mSave > 0) { 258 int muse = min(iterations - 1, mSave); 259 m++; 260 if (m > mSave - 1) { 261 m = 0; 262 } 263 264 // Estimate the Hessian Diagonal. 265 fill(h0, gamma); 266 arraycopy(g, 0, q, 0, n); 267 int k = m; 268 for (int j = 0; j < muse; j++) { 269 k--; 270 if (k < 0) { 271 k = mSave - 1; 272 } 273 alpha[k] = v1DotV2(n, s[k], 0, 1, q, 0, 1); 274 alpha[k] *= rho[k]; 275 aV1PlusV2(n, -alpha[k], y[k], 0, 1, q, 0, 1); 276 } 277 for (int i = 0; i < n; i++) { 278 r[i] = h0[i] * q[i]; 279 } 280 for (int j = 0; j < muse; j++) { 281 double beta = v1DotV2(n, r, 0, 1, y[k], 0, 1); 282 beta *= rho[k]; 283 aV1PlusV2(n, alpha[k] - beta, s[k], 0, 1, r, 0, 1); 284 k++; 285 if (k > mSave - 1) { 286 k = 0; 287 } 288 } 289 290 // Set the search direction. 291 for (int i = 0; i < n; i++) { 292 p[i] = -r[i]; 293 } 294 } else { 295 // Steepest-decent 296 for (int i = 0; i < n; i++) { 297 p[i] = -g[i]; 298 } 299 } 300 301 arraycopy(x, 0, prevX, 0, n); 302 arraycopy(g, 0, prevG, 0, n); 303 304 // Perform the line search along the new conjugate direction. 305 nFunctionEvals[0] = 0; 306 double prevF = f; 307 f = lineSearch.search(n, x, f, g, p, angle, df, info, nFunctionEvals, potential); 308 evaluations += nFunctionEvals[0]; 309 310 // Update variables based on the results of this iteration. 311 if (mSave > 0) { 312 for (int i = 0; i < n; i++) { 313 s[m][i] = x[i] - prevX[i]; 314 y[m][i] = g[i] - prevG[i]; 315 } 316 double ys = v1DotV2(n, y[m], 0, 1, s[m], 0, 1); 317 double yy = v1DotV2(n, y[m], 0, 1, y[m], 0, 1); 318 gamma = abs(ys / yy); 319 rho[m] = 1.0 / ys; 320 } 321 322 // Get the sizes of the moves made during this iteration. 323 df = prevF - f; 324 double xrms = 0.0; 325 grms = 0.0; 326 for (int i = 0; i < n; i++) { 327 double dx = (x[i] - prevX[i]) / scaling[i]; 328 xrms += dx * dx; 329 double gx = g[i] * scaling[i]; 330 grms += gx * gx; 331 } 332 xrms = sqrt(xrms) / rms; 333 grms = sqrt(grms) / rms; 334 335 boolean done = false; 336 if (info[0] == LineSearchResult.BadIntpln || info[0] == LineSearchResult.IntplnErr) { 337 nErrors++; 338 if (nErrors >= maxErrors) { 339 done = true; 340 } 341 } else { 342 nErrors = 0; 343 } 344 345 if (listener != null) { 346 if (!listener.optimizationUpdate(iterations, mSave, evaluations, grms, xrms, f, df, angle[0], 347 info[0])) { 348 // Terminate the optimization. 349 return 1; 350 } 351 } else { 352 log(iterations, evaluations, grms, xrms, f, df, angle[0], info[0]); 353 } 354 355 /* 356 Terminate the optimization if the line search failed or upon 357 satisfying the convergence criteria. 358 */ 359 if (done) { 360 return -1; 361 } else if (grms <= eps) { 362 return 0; 363 } 364 } 365 } 366 367 /** 368 * This method solves the unconstrained minimization problem 369 * 370 * <pre> 371 * min f(x), x = (x1,x2,...,x_n), 372 * </pre> 373 * <p> 374 * using the limited-memory BFGS method. The routine is especially effective on problems involving 375 * a large number of variables. In a typical iteration of this method an approximation <code>Hk 376 * </code> to the inverse of the Hessian is obtained by applying <code>m</code> BFGS updates to a 377 * diagonal matrix <code>Hk0</code>, using information from the previous <code>m</code> steps. 378 * 379 * <p>The user specifies the number <code>m</code>, which determines the amount of storage 380 * required by the routine. 381 * 382 * <p>The user is required to calculate the function value <code>f</code> and its gradient <code>g 383 * </code>. 384 * 385 * <p>The step length is determined at each iteration by means of the line search routine <code> 386 * lineSearch</code>, which is a slight modification of the routine <code>CSRCH</code> written by 387 * More and Thuente. 388 * 389 * @param n The number of variables in the minimization problem. Restriction: <code>n > 0 390 * </code>. 391 * @param mSave The number of corrections used in the BFGS update. Values of <code>mSave</code> 392 * less than 3 are not recommended; large values of <code>mSave</code> will result in excessive 393 * computing time. <code>3 <= mSave <= 7</code> is recommended. * Restriction: 394 * <code>mSave > 0</code>. 395 * @param x On initial entry this must be set by the user to the values of the initial estimate 396 * of the solution vector. On exit, it contains the values of the variables at the best point 397 * found (usually a solution). 398 * @param f The value of the function <code>f</code> at the point <code>x</code>. 399 * @param g The components of the gradient <code>g</code> at the point <code>x</code>. 400 * @param eps Determines the accuracy with which the solution is to be found. The subroutine 401 * terminates when <code>G RMS < EPS</code> 402 * @param potential Implements the {@link ffx.numerics.Potential} interface to supply function 403 * values and gradients. 404 * @param listener Implements the {@link OptimizationListener} interface and will be notified 405 * after each successful step. 406 * @return status code (0 = success, -1 = failed) 407 * @since 1.0 408 */ 409 public static int minimize(int n, int mSave, double[] x, double f, double[] g, double eps, 410 OptimizationInterface potential, OptimizationListener listener) { 411 return minimize(n, mSave, x, f, g, eps, Integer.MAX_VALUE - 1, potential, listener); 412 } 413 414 /** 415 * Compute the sum of a vector times a scalar plus another vector. 416 * 417 * @param n The number of points. 418 * @param a The scalar. 419 * @param v1 The X array. 420 * @param v1Start The first point in the X array. 421 * @param v1Step The X array increment. 422 * @param v2 The Y array. 423 * @param v2Start The first point in the Y array. 424 * @param v2Step The Y array increment. 425 * @since 1.0 426 */ 427 static void aV1PlusV2(final int n, final double a, final double[] v1, final int v1Start, 428 final int v1Step, final double[] v2, final int v2Start, final int v2Step) { 429 /* 430 Require the number of entries (n) to be greater than zero. If the 431 scalar (a) is zero, then the v2 array is unchanged. 432 */ 433 if (n <= 0 || a == 0) { 434 return; 435 } 436 437 int stop = v1Start + v1Step * n; 438 for (int i = v1Start, j = v2Start; i != stop; i += v1Step, j += v2Step) { 439 v2[j] = fma(a, v1[i], v2[j]); 440 } 441 } 442 443 /** 444 * Print status messages for <code>LBFGS</code> if there is no listener. 445 * 446 * @param iter Number of iterations so far. 447 * @param nfun Number of function evaluations so far. 448 * @param grms Gradient RMS at current solution. 449 * @param xrms Coordinate change RMS at current solution. 450 * @param f Function value at current solution. 451 * @param df Change in the function value compared to the previous solution. 452 * @param angle Current angle between gradient and search direction. 453 * @since 1.0 454 */ 455 private static void log(int iter, int nfun, double grms, double xrms, double f, double df, 456 double angle, LineSearchResult info) { 457 if (iter == 0) { 458 logger.info("\n Limited Memory BFGS Quasi-Newton Optimization: \n"); 459 logger.info( 460 " QN Iter F Value G RMS F Move X Move Angle FG Call Comment\n"); 461 } 462 if (info == null) { 463 logger.info( 464 format("%6d%13.4f%11.4f%11.4f%10.4f%9.2f%7d", iter, f, grms, df, xrms, angle, nfun)); 465 } else { 466 logger.info( 467 format("%6d%13.4f%11.4f%11.4f%10.4f%9.2f%7d %8s", iter, f, grms, df, xrms, angle, nfun, 468 info)); 469 } 470 } 471 472 /** 473 * Compute the dot product of two vectors. 474 * 475 * @param n Number of entries to include. 476 * @param v1 The X array. 477 * @param v1Start The first point in the X array. 478 * @param v1Step The X array increment. 479 * @param v2 The Y array. 480 * @param v2Start The first point in the Y array. 481 * @param v2Step The Y increment. 482 * @return dot product 483 * @since 1.0 484 */ 485 static double v1DotV2(final int n, final double[] v1, final int v1Start, final int v1Step, 486 final double[] v2, final int v2Start, final int v2Step) { 487 488 // Require the number of entries to be greater than zero. 489 if (n <= 0) { 490 return 0; 491 } 492 493 double sum = 0.0; 494 int stop = v1Start + v1Step * n; 495 for (int i = v1Start, j = v2Start; i != stop; i += v1Step, j += v2Step) { 496 sum = fma(v1[i], v2[j], sum); 497 } 498 return sum; 499 } 500 }