www.gusucode.com > SAE RBM 程序MATLAB源码代码实现的一个关于sae的例子 > minFunc/minFunc.m
function [x,f,exitflag,output] = minFunc(funObj,x0,options,varargin) % minFunc(funObj,x0,options,varargin) % % Unconstrained optimizer using a line search strategy % % Uses an interface very similar to fminunc % (it doesn't support all of the optimization toolbox options, % but supports many other options). % % It computes descent directions using one of ('Method'): % - 'sd': Steepest Descent % (no previous information used, not recommended) % - 'csd': Cyclic Steepest Descent % (uses previous step length for a fixed length cycle) % - 'bb': Barzilai and Borwein Gradient % (uses only previous step) % - 'cg': Non-Linear Conjugate Gradient % (uses only previous step and a vector beta) % - 'scg': Scaled Non-Linear Conjugate Gradient % (uses previous step and a vector beta, % and Hessian-vector products to initialize line search) % - 'pcg': Preconditionined Non-Linear Conjugate Gradient % (uses only previous step and a vector beta, preconditioned version) % - 'lbfgs': Quasi-Newton with Limited-Memory BFGS Updating % (default: uses a predetermined nunber of previous steps to form a % low-rank Hessian approximation) % - 'newton0': Hessian-Free Newton % (numerically computes Hessian-Vector products) % - 'pnewton0': Preconditioned Hessian-Free Newton % (numerically computes Hessian-Vector products, preconditioned % version) % - 'qnewton': Quasi-Newton Hessian approximation % (uses dense Hessian approximation) % - 'mnewton': Newton's method with Hessian calculation after every % user-specified number of iterations % (needs user-supplied Hessian matrix) % - 'newton': Newton's method with Hessian calculation every iteration % (needs user-supplied Hessian matrix) % - 'tensor': Tensor % (needs user-supplied Hessian matrix and Tensor of 3rd partial derivatives) % % Several line search strategies are available for finding a step length satisfying % the termination criteria ('LS'): % - 0: Backtrack w/ Step Size Halving % - 1: Backtrack w/ Quadratic/Cubic Interpolation from new function values % - 2: Backtrack w/ Cubic Interpolation from new function + gradient % values (default for 'bb' and 'sd') % - 3: Bracketing w/ Step Size Doubling and Bisection % - 4: Bracketing w/ Cubic Interpolation/Extrapolation with function + % gradient values (default for all except 'bb' and 'sd') % - 5: Bracketing w/ Mixed Quadratic/Cubic Interpolation/Extrapolation % - 6: Use Matlab Optimization Toolbox's line search % (requires Matlab's linesearch.m to be added to the path) % % Above, the first three find a point satisfying the Armijo conditions, % while the last four search for find a point satisfying the Wolfe % conditions. If the objective function overflows, it is recommended % to use one of the first 3. % The first three can be used to perform a non-monotone % linesearch by changing the option 'Fref'. % % Several strategies for choosing the initial step size are avaiable ('LS_init'): % - 0: Always try an initial step length of 1 (default for all except 'cg' and 'sd') % (t = 1) % - 1: Use a step similar to the previous step (default for 'cg' and 'sd') % (t = t_old*min(2,g'd/g_old'd_old)) % - 2: Quadratic Initialization using previous function value and new % function value/gradient (use this if steps tend to be very long) % (t = min(1,2*(f-f_old)/g)) % - 3: The minimum between 1 and twice the previous step length % (t = min(1,2*t) % - 4: The scaled conjugate gradient step length (may accelerate % conjugate gradient methods, but requires a Hessian-vector product) % (t = g'd/d'Hd) % % Inputs: % funObj is a function handle % x0 is a starting vector; % options is a struct containing parameters % (defaults are used for non-existent or blank fields) % all other arguments are passed to funObj % % Outputs: % x is the minimum value found % f is the function value at the minimum found % exitflag returns an exit condition % output returns a structure with other information % % Supported Input Options % Display - Level of display [ off | final | (iter) | full | excessive ] % MaxFunEvals - Maximum number of function evaluations allowed (1000) % MaxIter - Maximum number of iterations allowed (500) % TolFun - Termination tolerance on the first-order optimality (1e-5) % TolX - Termination tolerance on progress in terms of function/parameter changes (1e-9) % Method - [ sd | csd | bb | cg | scg | pcg | {lbfgs} | newton0 | pnewton0 | % qnewton | mnewton | newton | tensor ] % c1 - Sufficient Decrease for Armijo condition (1e-4) % c2 - Curvature Decrease for Wolfe conditions (.2 for cg methods, .9 otherwise) % LS_init - Line Search Initialization -see above (2 for cg/sd, 4 for scg, 0 otherwise) % LS - Line Search type -see above (2 for bb, 4 otherwise) % Fref - Setting this to a positive integer greater than 1 % will use non-monotone Armijo objective in the line search. % (20 for bb, 10 for csd, 1 for all others) % numDiff - compute derivative numerically % (default: 0) (this option has a different effect for 'newton', see below) % useComplex - if 1, use complex differentials when computing numerical derivatives % to get very accurate values (default: 0) % DerivativeCheck - if 'on', computes derivatives numerically at initial % point and compares to user-supplied derivative (default: 'off') % outputFcn - function to run after each iteration (default: []). It % should have the following interface: % outputFcn(x,infoStruct,state,varargin{:}) % useMex - where applicable, use mex files to speed things up (default: 1) % % Method-specific input options: % newton: % HessianModify - type of Hessian modification for direct solvers to % use if the Hessian is not positive definite (default: 0) % 0: Minimum Euclidean norm s.t. eigenvalues sufficiently large % (requires eigenvalues on iterations where matrix is not pd) % 1: Start with (1/2)*||A||_F and increment until Cholesky succeeds % (an approximation to method 0, does not require eigenvalues) % 2: Modified LDL factorization % (only 1 generalized Cholesky factorization done and no eigenvalues required) % 3: Modified Spectral Decomposition % (requires eigenvalues) % 4: Modified Symmetric Indefinite Factorization % 5: Uses the eigenvector of the smallest eigenvalue as negative % curvature direction % cgSolve - use conjugate gradient instead of direct solver (default: 0) % 0: Direct Solver % 1: Conjugate Gradient % 2: Conjugate Gradient with Diagonal Preconditioner % 3: Conjugate Gradient with LBFGS Preconditioner % x: Conjugate Graident with Symmetric Successive Over Relaxation % Preconditioner with parameter x % (where x is a real number in the range [0,2]) % x: Conjugate Gradient with Incomplete Cholesky Preconditioner % with drop tolerance -x % (where x is a real negative number) % numDiff - compute Hessian numerically % (default: 0, done with complex differentials if useComplex = 1) % LS_saveHessiancomp - when on, only computes the Hessian at the % first and last iteration of the line search (default: 1) % mnewton: % HessianIter - number of iterations to use same Hessian (default: 5) % qnewton: % initialHessType - scale initial Hessian approximation (default: 1) % qnUpdate - type of quasi-Newton update (default: 3): % 0: BFGS % 1: SR1 (when it is positive-definite, otherwise BFGS) % 2: Hoshino % 3: Self-Scaling BFGS % 4: Oren's Self-Scaling Variable Metric method % 5: McCormick-Huang asymmetric update % Damped - use damped BFGS update (default: 1) % newton0/pnewton0: % HvFunc - user-supplied function that returns Hessian-vector products % (by default, these are computed numerically using autoHv) % HvFunc should have the following interface: HvFunc(v,x,varargin{:}) % useComplex - use a complex perturbation to get high accuracy % Hessian-vector products (default: 0) % (the increased accuracy can make the method much more efficient, % but gradient code must properly support complex inputs) % useNegCurv - a negative curvature direction is used as the descent % direction if one is encountered during the cg iterations % (default: 1) % precFunc (for pnewton0 only) - user-supplied preconditioner % (by default, an L-BFGS preconditioner is used) % precFunc should have the following interfact: % precFunc(v,x,varargin{:}) % lbfgs: % Corr - number of corrections to store in memory (default: 100) % (higher numbers converge faster but use more memory) % Damped - use damped update (default: 0) % pcg: % cgUpdate - type of update (default: 2) % cg/scg/pcg: % cgUpdate - type of update (default for cg/scg: 2, default for pcg: 1) % 0: Fletcher Reeves % 1: Polak-Ribiere % 2: Hestenes-Stiefel (not supported for pcg) % 3: Gilbert-Nocedal % HvFunc (for scg only)- user-supplied function that returns Hessian-vector % products % (by default, these are computed numerically using autoHv) % HvFunc should have the following interface: % HvFunc(v,x,varargin{:}) % precFunc (for pcg only) - user-supplied preconditioner % (by default, an L-BFGS preconditioner is used) % precFunc should have the following interfact: % precFunc(v,x,varargin{:}) % bb: % bbType - type of bb step (default: 1) % 0: min_alpha ||delta_x - alpha delta_g||_2 % 1: min_alpha ||alpha delta_x - delta_g||_2 % 2: Conic BB % 3: Gradient method with retards % csd: % cycle - length of cycle (default: 3) % % Supported Output Options % iterations - number of iterations taken % funcCount - number of function evaluations % algorithm - algorithm used % firstorderopt - first-order optimality % message - exit message % trace.funccount - function evaluations after each iteration % trace.fval - function value after each iteration % % Author: Mark Schmidt (2006) % Web: http://www.cs.ubc.ca/~schmidtm % % Sources (in order of how much the source material contributes): % J. Nocedal and S.J. Wright. 1999. "Numerical Optimization". Springer Verlag. % R. Fletcher. 1987. "Practical Methods of Optimization". Wiley. % J. Demmel. 1997. "Applied Linear Algebra. SIAM. % R. Barret, M. Berry, T. Chan, J. Demmel, J. Dongarra, V. Eijkhout, R. % Pozo, C. Romine, and H. Van der Vost. 1994. "Templates for the Solution of % Linear Systems: Building Blocks for Iterative Methods". SIAM. % J. More and D. Thuente. "Line search algorithms with guaranteed % sufficient decrease". ACM Trans. Math. Softw. vol 20, 286-307, 1994. % M. Raydan. "The Barzilai and Borwein gradient method for the large % scale unconstrained minimization problem". SIAM J. Optim., 7, 26-33, % (1997). % "Mathematical Optimization". The Computational Science Education % Project. 1995. % C. Kelley. 1999. "Iterative Methods for Optimization". Frontiers in % Applied Mathematics. SIAM. if nargin < 3 options = []; end % Get Parameters [verbose,verboseI,debug,doPlot,maxFunEvals,maxIter,tolFun,tolX,method,... corrections,c1,c2,LS_init,LS,cgSolve,qnUpdate,cgUpdate,initialHessType,... HessianModify,Fref,useComplex,numDiff,LS_saveHessianComp,... DerivativeCheck,Damped,HvFunc,bbType,cycle,... HessianIter,outputFcn,useMex,useNegCurv,precFunc] = ... minFunc_processInputOptions(options); if isfield(options, 'logfile') logfile = options.logfile; else logfile = []; end % Constants SD = 0; CSD = 1; BB = 2; CG = 3; PCG = 4; LBFGS = 5; QNEWTON = 6; NEWTON0 = 7; NEWTON = 8; TENSOR = 9; % Initialize p = length(x0); d = zeros(p,1); x = x0; t = 1; % If necessary, form numerical differentiation functions funEvalMultiplier = 1; if numDiff && method ~= TENSOR varargin(3:end+2) = varargin(1:end); varargin{1} = useComplex; varargin{2} = funObj; if method ~= NEWTON if debug if useComplex fprintf('Using complex differentials for gradient computation\n'); else fprintf('Using finite differences for gradient computation\n'); end end funObj = @autoGrad; else if debug if useComplex fprintf('Using complex differentials for gradient computation\n'); else fprintf('Using finite differences for gradient computation\n'); end end funObj = @autoHess; end if method == NEWTON0 && useComplex == 1 if debug fprintf('Turning off the use of complex differentials\n'); end useComplex = 0; end if useComplex funEvalMultiplier = p; else funEvalMultiplier = p+1; end end % Evaluate Initial Point if method < NEWTON [f,g] = feval(funObj, x, varargin{:}); else [f,g,H] = feval(funObj, x, varargin{:}); computeHessian = 1; end funEvals = 1; if strcmp(DerivativeCheck,'on') if numDiff fprintf('Can not do derivative checking when numDiff is 1\n'); end % Check provided gradient/hessian function using numerical derivatives fprintf('Checking Gradient:\n'); [f2,g2] = autoGrad(x,useComplex,funObj,varargin{:}); fprintf('Max difference between user and numerical gradient: %f\n',max(abs(g-g2))); if max(abs(g-g2)) > 1e-4 fprintf('User NumDif:\n'); [g g2] diff = abs(g-g2) pause; end if method >= NEWTON fprintf('Check Hessian:\n'); [f2,g2,H2] = autoHess(x,useComplex,funObj,varargin{:}); fprintf('Max difference between user and numerical hessian: %f\n',max(abs(H(:)-H2(:)))); if max(abs(H(:)-H2(:))) > 1e-4 H H2 diff = abs(H-H2) pause; end end end % Output Log if verboseI fprintf('%10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond'); end if logfile fid = fopen(logfile, 'a'); if (fid > 0) fprintf(fid, '-- %10s %10s %15s %15s %15s\n','Iteration','FunEvals','Step Length','Function Val','Opt Cond'); fclose(fid); end end % Output Function if ~isempty(outputFcn) callOutput(outputFcn,x,'init',0,funEvals,f,[],[],g,[],sum(abs(g)),varargin{:}); end % Initialize Trace trace.fval = f; trace.funcCount = funEvals; % Check optimality of initial point if sum(abs(g)) <= tolFun exitflag=1; msg = 'Optimality Condition below TolFun'; if verbose fprintf('%s\n',msg); end if nargout > 3 output = struct('iterations',0,'funcCount',1,... 'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace); end return; end % Perform up to a maximum of 'maxIter' descent steps: for i = 1:maxIter % ****************** COMPUTE DESCENT DIRECTION ***************** switch method case SD % Steepest Descent d = -g; case CSD % Cyclic Steepest Descent if mod(i,cycle) == 1 % Use Steepest Descent alpha = 1; LS_init = 2; LS = 4; % Precise Line Search elseif mod(i,cycle) == mod(1+1,cycle) % Use Previous Step alpha = t; LS_init = 0; LS = 2; % Non-monotonic line search end d = -alpha*g; case BB % Steepest Descent with Barzilai and Borwein Step Length if i == 1 d = -g; else y = g-g_old; s = t*d; if bbType == 0 yy = y'*y; alpha = (s'*y)/(yy); if alpha <= 1e-10 || alpha > 1e10 alpha = 1; end elseif bbType == 1 sy = s'*y; alpha = (s'*s)/sy; if alpha <= 1e-10 || alpha > 1e10 alpha = 1; end elseif bbType == 2 % Conic Interpolation ('Modified BB') sy = s'*y; ss = s'*s; alpha = ss/sy; if alpha <= 1e-10 || alpha > 1e10 alpha = 1; end alphaConic = ss/(6*(myF_old - f) + 4*g'*s + 2*g_old'*s); if alphaConic > .001*alpha && alphaConic < 1000*alpha alpha = alphaConic; end elseif bbType == 3 % Gradient Method with retards (bb type 1, random selection of previous step) sy = s'*y; alpha = (s'*s)/sy; if alpha <= 1e-10 || alpha > 1e10 alpha = 1; end v(1+mod(i-2,5)) = alpha; alpha = v(ceil(rand*length(v))); end d = -alpha*g; end g_old = g; myF_old = f; case CG % Non-Linear Conjugate Gradient if i == 1 d = -g; % Initially use steepest descent direction else gtgo = g'*g_old; gotgo = g_old'*g_old; if cgUpdate == 0 % Fletcher-Reeves beta = (g'*g)/(gotgo); elseif cgUpdate == 1 % Polak-Ribiere beta = (g'*(g-g_old)) /(gotgo); elseif cgUpdate == 2 % Hestenes-Stiefel beta = (g'*(g-g_old))/((g-g_old)'*d); else % Gilbert-Nocedal beta_FR = (g'*(g-g_old)) /(gotgo); beta_PR = (g'*g-gtgo)/(gotgo); beta = max(-beta_FR,min(beta_PR,beta_FR)); end d = -g + beta*d; % Restart if not a direction of sufficient descent if g'*d > -tolX if debug fprintf('Restarting CG\n'); end beta = 0; d = -g; end % Old restart rule: %if beta < 0 || abs(gtgo)/(gotgo) >= 0.1 || g'*d >= 0 end g_old = g; case PCG % Preconditioned Non-Linear Conjugate Gradient % Apply preconditioner to negative gradient if isempty(precFunc) % Use L-BFGS Preconditioner if i == 1 old_dirs = zeros(length(g),0); old_stps = zeros(length(g),0); Hdiag = 1; s = -g; else [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); if useMex s = lbfgsC(-g,old_dirs,old_stps,Hdiag); else s = lbfgs(-g,old_dirs,old_stps,Hdiag); end end else % User-supplied preconditioner s = precFunc(-g,x,varargin{:}); end if i == 1 d = s; else if cgUpdate == 0 % Preconditioned Fletcher-Reeves beta = (g'*s)/(g_old'*s_old); elseif cgUpdate < 3 % Preconditioned Polak-Ribiere beta = (g'*(s-s_old))/(g_old'*s_old); else % Preconditioned Gilbert-Nocedal beta_FR = (g'*s)/(g_old'*s_old); beta_PR = (g'*(s-s_old))/(g_old'*s_old); beta = max(-beta_FR,min(beta_PR,beta_FR)); end d = s + beta*d; if g'*d > -tolX if debug fprintf('Restarting CG\n'); end beta = 0; d = s; end end g_old = g; s_old = s; case LBFGS % L-BFGS % Update the direction and step sizes if i == 1 d = -g; % Initially use steepest descent direction old_dirs = zeros(length(g),0); old_stps = zeros(length(d),0); Hdiag = 1; else if Damped [old_dirs,old_stps,Hdiag] = dampedUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); else [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); end if useMex d = lbfgsC(-g,old_dirs,old_stps,Hdiag); else d = lbfgs(-g,old_dirs,old_stps,Hdiag); end end g_old = g; case QNEWTON % Use quasi-Newton Hessian approximation if i == 1 d = -g; else % Compute difference vectors y = g-g_old; s = t*d; if i == 2 % Make initial Hessian approximation if initialHessType == 0 % Identity if qnUpdate <= 1 R = eye(length(g)); else H = eye(length(g)); end else % Scaled Identity if debug fprintf('Scaling Initial Hessian Approximation\n'); end if qnUpdate <= 1 % Use Cholesky of Hessian approximation R = sqrt((y'*y)/(y'*s))*eye(length(g)); else % Use Inverse of Hessian approximation H = eye(length(g))*(y'*s)/(y'*y); end end end if qnUpdate == 0 % Use BFGS updates Bs = R'*(R*s); if Damped eta = .02; if y'*s < eta*s'*Bs if debug fprintf('Damped Update\n'); end theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1); y = theta*y + (1-theta)*Bs; end R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); else if y'*s > 1e-10 R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); else if debug fprintf('Skipping Update\n'); end end end elseif qnUpdate == 1 % Perform SR1 Update if it maintains positive-definiteness Bs = R'*(R*s); ymBs = y-Bs; if abs(s'*ymBs) >= norm(s)*norm(ymBs)*1e-8 && (s-((R\(R'\y))))'*y > 1e-10 R = cholupdate(R,-ymBs/sqrt(ymBs'*s),'-'); else if debug fprintf('SR1 not positive-definite, doing BFGS Update\n'); end if Damped eta = .02; if y'*s < eta*s'*Bs if debug fprintf('Damped Update\n'); end theta = min(max(0,((1-eta)*s'*Bs)/(s'*Bs - y'*s)),1); y = theta*y + (1-theta)*Bs; end R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); else if y'*s > 1e-10 R = cholupdate(cholupdate(R,y/sqrt(y'*s)),Bs/sqrt(s'*Bs),'-'); else if debug fprintf('Skipping Update\n'); end end end end elseif qnUpdate == 2 % Use Hoshino update v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y)); phi = 1/(1 + (y'*H*y)/(s'*y)); H = H + (s*s')/(s'*y) - (H*y*y'*H)/(y'*H*y) + phi*v*v'; elseif qnUpdate == 3 % Self-Scaling BFGS update ys = y'*s; Hy = H*y; yHy = y'*Hy; gamma = ys/yHy; v = sqrt(yHy)*(s/ys - Hy/yHy); H = gamma*(H - Hy*Hy'/yHy + v*v') + (s*s')/ys; elseif qnUpdate == 4 % Oren's Self-Scaling Variable Metric update % Oren's method if (s'*y)/(y'*H*y) > 1 phi = 1; % BFGS omega = 0; elseif (s'*(H\s))/(s'*y) < 1 phi = 0; % DFP omega = 1; else phi = (s'*y)*(y'*H*y-s'*y)/((s'*(H\s))*(y'*H*y)-(s'*y)^2); omega = phi; end gamma = (1-omega)*(s'*y)/(y'*H*y) + omega*(s'*(H\s))/(s'*y); v = sqrt(y'*H*y)*(s/(s'*y) - (H*y)/(y'*H*y)); H = gamma*(H - (H*y*y'*H)/(y'*H*y) + phi*v*v') + (s*s')/(s'*y); elseif qnUpdate == 5 % McCormick-Huang asymmetric update theta = 1; phi = 0; psi = 1; omega = 0; t1 = s*(theta*s + phi*H'*y)'; t2 = (theta*s + phi*H'*y)'*y; t3 = H*y*(psi*s + omega*H'*y)'; t4 = (psi*s + omega*H'*y)'*y; H = H + t1/t2 - t3/t4; end if qnUpdate <= 1 d = -R\(R'\g); else d = -H*g; end end g_old = g; case NEWTON0 % Hessian-Free Newton cgMaxIter = min(p,maxFunEvals-funEvals); cgForce = min(0.5,sqrt(norm(g)))*norm(g); % Set-up preconditioner precondFunc = []; precondArgs = []; if cgSolve == 1 if isempty(precFunc) % Apply L-BFGS preconditioner if i == 1 old_dirs = zeros(length(g),0); old_stps = zeros(length(g),0); Hdiag = 1; else [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); if useMex precondFunc = @lbfgsC; else precondFunc = @lbfgs; end precondArgs = {old_dirs,old_stps,Hdiag}; end g_old = g; else % Apply user-defined preconditioner precondFunc = precFunc; precondArgs = {x,varargin{:}}; end end % Solve Newton system using cg and hessian-vector products if isempty(HvFunc) % No user-supplied Hessian-vector function, % use automatic differentiation HvFun = @autoHv; HvArgs = {x,g,useComplex,funObj,varargin{:}}; else % Use user-supplid Hessian-vector function HvFun = HvFunc; HvArgs = {x,varargin{:}}; end if useNegCurv [d,cgIter,cgRes,negCurv] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs); else [d,cgIter,cgRes] = conjGrad([],-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFun,HvArgs); end funEvals = funEvals+cgIter; if debug fprintf('newtonCG stopped on iteration %d w/ residual %.5e\n',cgIter,cgRes); end if useNegCurv if ~isempty(negCurv) %if debug fprintf('Using negative curvature direction\n'); %end d = negCurv/norm(negCurv); d = d/sum(abs(g)); end end case NEWTON % Newton search direction if cgSolve == 0 if HessianModify == 0 % Attempt to perform a Cholesky factorization of the Hessian [R,posDef] = chol(H); % If the Cholesky factorization was successful, then the Hessian is % positive definite, solve the system if posDef == 0 d = -R\(R'\g); else % otherwise, adjust the Hessian to be positive definite based on the % minimum eigenvalue, and solve with QR % (expensive, we don't want to do this very much) if debug fprintf('Adjusting Hessian\n'); end H = H + eye(length(g)) * max(0,1e-12 - min(real(eig(H)))); d = -H\g; end elseif HessianModify == 1 % Modified Incomplete Cholesky R = mcholinc(H,debug); d = -R\(R'\g); elseif HessianModify == 2 % Modified Generalized Cholesky if useMex [L D perm] = mcholC(H); else [L D perm] = mchol(H); end d(perm) = -L' \ ((D.^-1).*(L \ g(perm))); elseif HessianModify == 3 % Modified Spectral Decomposition [V,D] = eig((H+H')/2); D = diag(D); D = max(abs(D),max(max(abs(D)),1)*1e-12); d = -V*((V'*g)./D); elseif HessianModify == 4 % Modified Symmetric Indefinite Factorization [L,D,perm] = ldl(H,'vector'); [blockPos junk] = find(triu(D,1)); for diagInd = setdiff(setdiff(1:p,blockPos),blockPos+1) if D(diagInd,diagInd) < 1e-12 D(diagInd,diagInd) = 1e-12; end end for blockInd = blockPos' block = D(blockInd:blockInd+1,blockInd:blockInd+1); block_a = block(1); block_b = block(2); block_d = block(4); lambda = (block_a+block_d)/2 - sqrt(4*block_b^2 + (block_a - block_d)^2)/2; D(blockInd:blockInd+1,blockInd:blockInd+1) = block+eye(2)*(lambda+1e-12); end d(perm) = -L' \ (D \ (L \ g(perm))); else % Take Newton step if Hessian is pd, % otherwise take a step with negative curvature [R,posDef] = chol(H); if posDef == 0 d = -R\(R'\g); else if debug fprintf('Taking Direction of Negative Curvature\n'); end [V,D] = eig(H); u = V(:,1); d = -sign(u'*g)*u; end end else % Solve with Conjugate Gradient cgMaxIter = p; cgForce = min(0.5,sqrt(norm(g)))*norm(g); % Select Preconditioner if cgSolve == 1 % No preconditioner precondFunc = []; precondArgs = []; elseif cgSolve == 2 % Diagonal preconditioner precDiag = diag(H); precDiag(precDiag < 1e-12) = 1e-12 - min(precDiag); precondFunc = @precondDiag; precondArgs = {precDiag.^-1}; elseif cgSolve == 3 % L-BFGS preconditioner if i == 1 old_dirs = zeros(length(g),0); old_stps = zeros(length(g),0); Hdiag = 1; else [old_dirs,old_stps,Hdiag] = lbfgsUpdate(g-g_old,t*d,corrections,debug,old_dirs,old_stps,Hdiag); end g_old = g; if useMex precondFunc = @lbfgsC; else precondFunc = @lbfgs; end precondArgs = {old_dirs,old_stps,Hdiag}; elseif cgSolve > 0 % Symmetric Successive Overelaxation Preconditioner omega = cgSolve; D = diag(H); D(D < 1e-12) = 1e-12 - min(D); precDiag = (omega/(2-omega))*D.^-1; precTriu = diag(D/omega) + triu(H,1); precondFunc = @precondTriuDiag; precondArgs = {precTriu,precDiag.^-1}; else % Incomplete Cholesky Preconditioner opts.droptol = -cgSolve; opts.rdiag = 1; R = cholinc(sparse(H),opts); if min(diag(R)) < 1e-12 R = cholinc(sparse(H + eye*(1e-12 - min(diag(R)))),opts); end precondFunc = @precondTriu; precondArgs = {R}; end % Run cg with the appropriate preconditioner if isempty(HvFunc) % No user-supplied Hessian-vector function [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs); else % Use user-supplied Hessian-vector function [d,cgIter,cgRes] = conjGrad(H,-g,cgForce,cgMaxIter,debug,precondFunc,precondArgs,HvFunc,{x,varargin{:}}); end if debug fprintf('CG stopped after %d iterations w/ residual %.5e\n',cgIter,cgRes); %funEvals = funEvals + cgIter; end end case TENSOR % Tensor Method if numDiff % Compute 3rd-order Tensor Numerically [junk1 junk2 junk3 T] = autoTensor(x,useComplex,funObj,varargin{:}); else % Use user-supplied 3rd-derivative Tensor [junk1 junk2 junk3 T] = feval(funObj, x, varargin{:}); end options_sub.Method = 'newton'; options_sub.Display = 'none'; options_sub.TolX = tolX; options_sub.TolFun = tolFun; d = minFunc(@taylorModel,zeros(p,1),options_sub,f,g,H,T); if any(abs(d) > 1e5) || all(abs(d) < 1e-5) || g'*d > -tolX if debug fprintf('Using 2nd-Order Step\n'); end [V,D] = eig((H+H')/2); D = diag(D); D = max(abs(D),max(max(abs(D)),1)*1e-12); d = -V*((V'*g)./D); else if debug fprintf('Using 3rd-Order Step\n'); end end end if ~isLegal(d) fprintf('Step direction is illegal!\n'); pause; return end % ****************** COMPUTE STEP LENGTH ************************ % Directional Derivative gtd = g'*d; % Check that progress can be made along direction if gtd > -tolX exitflag=2; msg = 'Directional Derivative below TolX'; break; end % Select Initial Guess if i == 1 if method < NEWTON0 t = min(1,1/sum(abs(g))); else t = 1; end else if LS_init == 0 % Newton step t = 1; elseif LS_init == 1 % Close to previous step length t = t*min(2,(gtd_old)/(gtd)); elseif LS_init == 2 % Quadratic Initialization based on {f,g} and previous f t = min(1,2*(f-f_old)/(gtd)); elseif LS_init == 3 % Double previous step length t = min(1,t*2); elseif LS_init == 4 % Scaled step length if possible if isempty(HvFunc) % No user-supplied Hessian-vector function, % use automatic differentiation dHd = d'*autoHv(d,x,g,0,funObj,varargin{:}); else % Use user-supplid Hessian-vector function dHd = d'*HvFunc(d,x,varargin{:}); end funEvals = funEvals + 1; if dHd > 0 t = -gtd/(dHd); else t = min(1,2*(f-f_old)/(gtd)); end end if t <= 0 t = 1; end end f_old = f; gtd_old = gtd; % Compute reference fr if using non-monotone objective if Fref == 1 fr = f; else if i == 1 old_fvals = repmat(-inf,[Fref 1]); end if i <= Fref old_fvals(i) = f; else old_fvals = [old_fvals(2:end);f]; end fr = max(old_fvals); end computeHessian = 0; if method >= NEWTON if HessianIter == 1 computeHessian = 1; elseif i > 1 && mod(i-1,HessianIter) == 0 computeHessian = 1; end end % Line Search f_old = f; if LS < 3 % Use Armijo Bactracking % Perform Backtracking line search if computeHessian [t,x,f,g,LSfunEvals,H] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:}); else [t,x,f,g,LSfunEvals] = ArmijoBacktrack(x,t,d,f,fr,g,gtd,c1,LS,tolX,debug,doPlot,1,funObj,varargin{:}); end funEvals = funEvals + LSfunEvals; elseif LS < 6 % Find Point satisfying Wolfe if computeHessian [t,f,g,LSfunEvals,H] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,LS_saveHessianComp,funObj,varargin{:}); else [t,f,g,LSfunEvals] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS,25,tolX,debug,doPlot,1,funObj,varargin{:}); end funEvals = funEvals + LSfunEvals; x = x + t*d; else % Use Matlab optim toolbox line search [t,f_new,fPrime_new,g_new,LSexitFlag,LSiter]=... lineSearch({'fungrad',[],funObj},x,p,1,p,d,f,gtd,t,c1,c2,-inf,maxFunEvals-funEvals,... tolX,[],[],[],varargin{:}); funEvals = funEvals + LSiter; if isempty(t) exitflag = -2; msg = 'Matlab LineSearch failed'; break; end if method >= NEWTON [f_new,g_new,H] = funObj(x + t*d,varargin{:}); funEvals = funEvals + 1; end x = x + t*d; f = f_new; g = g_new; end % Output iteration information if verboseI fprintf('%10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g))); end if logfile fid = fopen(logfile, 'a'); if (fid > 0) fprintf(fid, '-- %10d %10d %15.5e %15.5e %15.5e\n',i,funEvals*funEvalMultiplier,t,f,sum(abs(g))); fclose(fid); end end % Output Function if ~isempty(outputFcn) callOutput(outputFcn,x,'iter',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:}); end % Update Trace trace.fval(end+1,1) = f; trace.funcCount(end+1,1) = funEvals; % Check Optimality Condition if sum(abs(g)) <= tolFun exitflag=1; msg = 'Optimality Condition below TolFun'; break; end % ******************* Check for lack of progress ******************* if sum(abs(t*d)) <= tolX exitflag=2; msg = 'Step Size below TolX'; break; end if abs(f-f_old) < tolX exitflag=2; msg = 'Function Value changing by less than TolX'; break; end % ******** Check for going over iteration/evaluation limit ******************* if funEvals*funEvalMultiplier > maxFunEvals exitflag = 0; msg = 'Exceeded Maximum Number of Function Evaluations'; break; end if i == maxIter exitflag = 0; msg='Exceeded Maximum Number of Iterations'; break; end end if verbose fprintf('%s\n',msg); end if nargout > 3 output = struct('iterations',i,'funcCount',funEvals*funEvalMultiplier,... 'algorithm',method,'firstorderopt',sum(abs(g)),'message',msg,'trace',trace); end % Output Function if ~isempty(outputFcn) callOutput(outputFcn,x,'done',i,funEvals,f,t,gtd,g,d,sum(abs(g)),varargin{:}); end end