import numpy as np # definition of the function f ... def f(x): return(100*(x[1]-x[0]**2)**2 + (1-x[0])**2) # ... the gradient of f ... def gradf(x): grad1 = -2*(1-x[0]) - 400*(x[1]-x[0]**2)*x[0] grad2 = 200*(x[1]-x[0]**2) return np.array([grad1,grad2]) # ... and the Hessian of f: def hessf(x): h11 = 1200*x[0]**2 - 400*x[1] + 2 h12 = -400*x[0] h22 = 200 return np.array([[h11,h12],[h12,h22]]) ### Gradient Descent method # we require the function values, the gradient, and the initialisation # in addition, we have optional variables alpha0 (initialisation of the # step length), c (the Armijo parameter), and rho (contraction factor) # for the backtracking, and in addition we include two stopping criteria # for the outer iteration: size of the gradient, and number of steps; # and also a maximal number of steps for the backtracking. # There is no clear general way of choosing the initial step length, so we set # it to 1.0. The parameter c should in general be rather small (say 0.01) # in order to avoid many failed attempts in the backtracking iterations, # and rho should not be too large either (e.g. rho = 0.1). def GD(f,gradf,x, alpha0 = 1.0, c = 1e-2, rho = 0.1, grad_stop = 1e-6, max_steps_outer = 5000, max_steps_backtracking = 10): # set the current step to 0 n_steps = 0 # compute the current function value and gradient f_x = f(x) gradf_x = gradf(x) # compute its norm norm_gradf_x = np.linalg.norm(gradf_x) # iterate as long as the number of steps is within the given bounds # and the gradient is still large while ((n_steps < max_steps_outer) and (norm_gradf_x > grad_stop)): # set the step length back to alpha0 (in case it was changed # during the previous backtracking step) alpha = alpha0 # compute the proposed next step x_next = x - alpha*gradf_x # compute the next function value f_x_next = f(x_next) # test whether the Armijo condition is satisfied; # if not decrease the step length by rho and compute # a new proposed next iterate and new next function value # At the same time, we keep track of the number of backtracking # iterations and stop if the number becomes too large n_steps_backtracking = 0 while ((f_x_next > f_x - c*alpha*norm_gradf_x**2) and (n_steps_backtracking < max_steps_backtracking)): alpha *= rho x_next = x-alpha*gradf_x f_x_next = f(x_next) n_steps_backtracking += 1 # the final x_next will become our new x # we need not recompute the function value, because we # have already done so in the backtracking steps # the gradient has to be computed, though x = x_next f_x = f_x_next gradf_x = gradf(x) norm_gradf_x = np.linalg.norm(gradf_x) n_steps += 1 # just as an additional information, print whether the algorithm stopped # before the maximal number of iterations was reached, and if yes, when if (n_steps == max_steps_outer): print("The algorithm stopped after the maximal number of iterations was reached.") else: print("The algorithm stopped after {} iterations".format(n_steps)) # return the final iterate return x ### Newton's method # we require the function values, the gradient, the Hessian, # and the initialisation # in addition, we have optional variables , c (the Armijo parameter), # and rho (contraction factor) for the backtracking, # the parameter epsilon_grad for deciding when to switch to a gradient step # and in addition we include two stopping criteria # for the outer iteration: size of the gradient, and number of steps; # and also a maximal number of steps for the backtracking. # The parameter c should in general be rather small (say 0.01) # in order to avoid many failed attempts in the backtracking iterations, # and rho should not be too large either (e.g. rho = 0.25). # Note: # The parameter epsilon_grad should be rather small (but not too small) # so that the method is still usable for somewhat ill-conditioned problems; # else it might be necessary to switch to the gradient even close to the minimum. def Newton(f,gradf,hessf,x, c = 1e-2, rho = 0.1, grad_stop = 1e-6, max_steps_outer = 5000, max_steps_backtracking = 20, epsilon_grad = 1e-6): # set the current step to 0 n_steps = 0 # compute the current function value and gradient f_x = f(x) gradf_x = gradf(x) # compute its norm norm_gradf_x = np.linalg.norm(gradf_x) # iterate as long as the number of steps is within the given bounds # and the gradient is still large while ((n_steps < max_steps_outer) and (norm_gradf_x > grad_stop)): # try to compute the Newton direction; try: search_direction = -np.linalg.solve(hessf(x),gradf_x) # if the resulting direction is too close to being an ascent direction, # switch to gradient descent instead initial_descent = np.inner(search_direction,gradf_x) if (initial_descent >= -epsilon_grad*np.linalg.norm(search_direction)*norm_gradf_x): search_direction = -gradf_x initial_descent = norm_gradf_x**2 except np.linalg.LinAlgError: # also switch to the gradient descent direction, if the linalg package # had troubles solving the system # This usually happens if the Hessian is close to being singular, # or actually is singular search_direction = -gradf_x initial_descent = norm_gradf_x**2 # set the step length back to 1.0 (in case it was changed # during the previous backtracking step) alpha = 1.0 # compute the proposed next step x_next = x + alpha*search_direction # compute the next function value f_x_next = f(x_next) # test whether the Armijo condition is satisfied; # if not decrease the step length by rho and compute # a new proposed next iterate and new next function value # At the same time, we keep track of the number of backtracking # iterations and stop if the number becomes too large n_steps_backtracking = 0 while ((f_x_next > f_x - c*alpha*initial_descent) and (n_steps_backtracking < max_steps_backtracking)): alpha *= rho x_next = x + alpha*search_direction f_x_next = f(x_next) n_steps_backtracking += 1 # the final x_next will become our new x # we need not recompute the function value, because we # have already done so in the backtracking steps # the gradient has to be computed, though x = x_next f_x = f_x_next gradf_x = gradf(x) norm_gradf_x = np.linalg.norm(gradf_x) n_steps += 1 # just as an additional information, print whether the algorithm stopped # before the maximal number of iterations was reached, and if yes, when if (n_steps == max_steps_outer): print("The algorithm stopped after the maximal number of iterations was reached.") else: print("The algorithm stopped after {} iterations".format(n_steps)) # return the final iterate return x