import numpy as np # definition of the function f ... def f(x): return(100*(x[1]-x[0]**2)**2 + (1-x[0])**2) # ... the gradient of f ... def gradf(x): grad1 = -2*(1-x[0]) - 400*(x[1]-x[0]**2)*x[0] grad2 = 200*(x[1]-x[0]**2) return np.array([grad1,grad2]) # ... and the Hessian of f: def hessf(x): h11 = 1200*x[0]**2 - 400*x[1] + 2 h12 = -400*x[0] h22 = 200 return np.array([[h11,h12],[h12,h22]]) ################################################## # code for step length selection and CG method def StrongWolfe(f,gradf,x,p, initial_value, initial_descent, initial_step_length = 1.0, c1 = 1e-2, c2 = 0.49, max_extrapolation_iterations = 50, max_interpolation_iterations = 20, rho = 2.0): ''' Implementation of a bisection based bracketing method for the strong Wolfe conditions ''' # initialise the bounds of the bracketing interval alphaR = initial_step_length alphaL = 0.0 # Armijo condition and the two parts of the Wolfe condition # are implemented as Boolean variables next_x = x+alphaR*p next_value = f(next_x) next_grad = gradf(next_x) Armijo = (next_value <= initial_value+c1*alphaR*initial_descent) descentR = np.inner(p,next_grad) curvatureLow = (descentR >= c2*initial_descent) curvatureHigh = (descentR <= -c2*initial_descent) # We start by increasing alphaR as long as Armijo and curvatureHigh hold, # but curvatureLow fails (that is, alphaR is definitely too small). # Note that curvatureHigh is automatically satisfied if curvatureLow fails. # Thus we only need to check whether Armijo holds and curvatureLow fails. itnr = 0 while (itnr < max_extrapolation_iterations and (Armijo and (not curvatureLow))): itnr += 1 # alphaR is a new lower bound for the step length # the old upper bound alphaR needs to be replaced with a larger step length alphaL = alphaR alphaR *= rho # update function value and gradient next_x = x+alphaR*p next_value = f(next_x) next_grad = gradf(next_x) # update the Armijo and Wolfe conditions Armijo = (next_value <= initial_value+c1*alphaR*initial_descent) descentR = np.inner(p,next_grad) curvatureLow = (descentR >= c2*initial_descent) curvatureHigh = (descentR <= -c2*initial_descent) # at that point we should have a situation where alphaL is too small # and alphaR is either satisfactory or too large # (Unless we have stopped because we used too many iterations. There # are at the moment no exceptions raised if this is the case.) alpha = alphaR itnr = 0 # Use bisection in order to find a step length alpha that satisfies # all conditions. while (itnr < max_interpolation_iterations and (not (Armijo and curvatureLow and curvatureHigh))): itnr += 1 if (Armijo and (not curvatureLow)): # the step length alpha was still too small # replace the former lower bound with alpha alphaL = alpha else: # the step length alpha was too large # replace the upper bound with alpha alphaR = alpha # choose a new step length as the mean of the new bounds alpha = (alphaL+alphaR)/2 # update function value and gradient next_x = x+alphaR*p next_value = f(next_x) next_grad = gradf(next_x) # update the Armijo and Wolfe conditions Armijo = (next_value <= initial_value+c1*alphaR*initial_descent) descentR = np.inner(p,next_grad) curvatureLow = (descentR >= c2*initial_descent) curvatureHigh = (descentR <= -c2*initial_descent) # return the next iterate as well as the function value and gradient there # (in order to save time in the outer iteration; we have had to do these # computations anyway) return next_x,next_value,next_grad def CG_FR(f,gradf,x_init, max_steps = 2000, alpha_0 = 1.0, gradient_stop = 1e-6): ''' Implementation of the Fletcher Reeves method. ''' # Initialise the variable x x = x_init # set the current step number to 0 n_step = 0 # compute current function value and gradient current_value = f(x) current_gradient = gradf(x) # compute the squared norm of the gradient # in the iteration, we will also need the norm of the previous # gradient, so we initialise this variable as well old_gradient_norm_2 = np.linalg.norm(current_gradient)**2 current_gradient_norm_2 = old_gradient_norm_2 # initialise the search direction search_direction = -current_gradient # main loop while (n_step < max_steps) and (current_gradient_norm_2 > gradient_stop**2): # increase the current step number n_step += 1 # compute the expected descent for usage in the step length selection descent = np.inner(search_direction,current_gradient) # run the step length selection # for the strong Wolfe conditions, we need to compute # both the function value and the gradient at the eventually # chosen iterate # in order to avoid doing the same computation twice, we # let the step length selection also return these variables x,current_value,current_gradient = StrongWolfe(f,gradf,x,search_direction, current_value,descent, initial_step_length = alpha_0) # update the squared norm of the gradient of the current iterate current_gradient_norm_2 = np.linalg.norm(current_gradient)**2 # compute the beta in the Fletcher-Reeves method betaFR = current_gradient_norm_2/old_gradient_norm_2 # overwrite the size of the old gradient with the current one old_gradient_norm_2 = current_gradient_norm_2 # update the search direction search_direction = -current_gradient + betaFR*search_direction # print out some information about the final result if(n_step < max_steps): print('The iteration converged in {} steps.'.format(n_step)) else: print('The iteration failed to converge in {} steps.'.format(n_step)) print('The final iterate was {}.'.format(x)) print('The size of the gradient at the final iterate was {}.'.format(np.linalg.norm(current_gradient))) return x ####################################################### # run tests with the Rosenbrock function # test first for an initialisation at 0 print('First numerical test:') x_init = np.zeros(2) print('Initialisation: {}.'.format(x_init)) x = CG_FR(f,gradf,x_init) print('\n') # run the same test with a higher precision # we print('Second numerical test with higher precision:') x_init = np.zeros(2) print('Initialisation: {}.'.format(x_init)) x = CG_FR(f,gradf,x_init,gradient_stop = 1e-12) print('\n') # run the algorithm with a more challenging starting value print('Third numerical test with different starting value:') x_init = np.array([-10.0,20.0]) print('Initialisation: {}.'.format(x_init)) x = CG_FR(f,gradf,x_init)