import numpy as np

# definition of the function f ...
def f(x):
    return(100*(x[1]-x[0]**2)**2 + (1-x[0])**2)
# ... the gradient of f ...
def gradf(x):
    grad1 = -2*(1-x[0]) - 400*(x[1]-x[0]**2)*x[0]
    grad2 = 200*(x[1]-x[0]**2)
    return np.array([grad1,grad2])
# ... and the Hessian of f:
def hessf(x):
    h11 = 1200*x[0]**2 - 400*x[1] + 2
    h12 = -400*x[0]
    h22 = 200
    return np.array([[h11,h12],[h12,h22]])


### Gradient Descent method
# we require the function values, the gradient, and the initialisation
# in addition, we have optional variables alpha0 (initialisation of the
# step length), c (the Armijo parameter), and rho (contraction factor)
# for the backtracking, and in addition we include two stopping criteria
# for the outer iteration: size of the gradient, and number of steps;
# and also a maximal number of steps for the backtracking.
# There is no clear general way of choosing the initial step length, so we set
# it to 1.0. The parameter c should in general be rather small (say 0.01)
# in order to avoid many failed attempts in the backtracking iterations,
# and rho should not be too large either (e.g. rho = 0.1).
def GD(f,gradf,x,
       alpha0 = 1.0,
       c = 1e-2,
       rho = 0.1,
       grad_stop = 1e-6,
       max_steps_outer = 5000,
       max_steps_backtracking = 10):
    # set the current step to 0
    n_steps = 0
    # compute the current function value and gradient
    f_x = f(x)
    gradf_x = gradf(x)
    # compute its norm
    norm_gradf_x = np.linalg.norm(gradf_x)
    # iterate as long as the number of steps is within the given bounds
    # and the gradient is still large
    while ((n_steps < max_steps_outer) and (norm_gradf_x > grad_stop)):
        # set the step length back to alpha0 (in case it was changed
        # during the previous backtracking step)
        alpha = alpha0
        # compute the proposed next step
        x_next = x - alpha*gradf_x
        # compute the next function value
        f_x_next = f(x_next)
        # test whether the Armijo condition is satisfied;
        # if not decrease the step length by rho and compute
        # a new proposed next iterate and new next function value
        # At the same time, we keep track of the number of backtracking
        # iterations and stop if the number becomes too large
        n_steps_backtracking = 0
        while ((f_x_next > f_x - c*alpha*norm_gradf_x**2)
               and (n_steps_backtracking < max_steps_backtracking)):
            alpha *= rho
            x_next = x-alpha*gradf_x
            f_x_next = f(x_next)
            n_steps_backtracking += 1
        # the final x_next will become our new x
        # we need not recompute the function value, because we
        # have already done so in the backtracking steps
        # the gradient has to be computed, though
        x = x_next
        f_x = f_x_next
        gradf_x = gradf(x)
        norm_gradf_x = np.linalg.norm(gradf_x)
        n_steps += 1
    # just as an additional information, print whether the algorithm stopped
    # before the maximal number of iterations was reached, and if yes, when
    if (n_steps == max_steps_outer):
        print("The algorithm stopped after the maximal number of iterations was reached.")
    else:
        print("The algorithm stopped after {} iterations".format(n_steps))
    # return the final iterate
    return x


### Newton's method
# we require the function values, the gradient, the Hessian,
# and the initialisation
# in addition, we have optional variables , c (the Armijo parameter),
# and rho (contraction factor) for the backtracking,
# the parameter epsilon_grad for deciding when to switch to a gradient step
# and in addition we include two stopping criteria
# for the outer iteration: size of the gradient, and number of steps;
# and also a maximal number of steps for the backtracking.
# The parameter c should in general be rather small (say 0.01)
# in order to avoid many failed attempts in the backtracking iterations,
# and rho should not be too large either (e.g. rho = 0.25).
# Note: 
# The parameter epsilon_grad should be rather small (but not too small)
# so that the method is still usable for somewhat ill-conditioned problems;
# else it might be necessary to switch to the gradient even close to the minimum.
def Newton(f,gradf,hessf,x,
       c = 1e-2,
       rho = 0.1,
       grad_stop = 1e-6,
       max_steps_outer = 5000,
       max_steps_backtracking = 20,
       epsilon_grad = 1e-6):
    # set the current step to 0
    n_steps = 0
    # compute the current function value and gradient
    f_x = f(x)
    gradf_x = gradf(x)
    # compute its norm
    norm_gradf_x = np.linalg.norm(gradf_x)
    # iterate as long as the number of steps is within the given bounds
    # and the gradient is still large
    while ((n_steps < max_steps_outer) and (norm_gradf_x > grad_stop)):
        # try to compute the Newton direction;
        try:
            search_direction = -np.linalg.solve(hessf(x),gradf_x)
            # if the resulting direction is too close to being an ascent direction,
            # switch to gradient descent instead
            initial_descent = np.inner(search_direction,gradf_x)
            if (initial_descent >= -epsilon_grad*np.linalg.norm(search_direction)*norm_gradf_x):
                search_direction = -gradf_x
                initial_descent = norm_gradf_x**2
        except np.linalg.LinAlgError:
            # also switch to the gradient descent direction, if the linalg package
            # had troubles solving the system
            # This usually happens if the Hessian is close to being singular,
            # or actually is singular
            search_direction = -gradf_x
            initial_descent = norm_gradf_x**2
        # set the step length back to 1.0 (in case it was changed
        # during the previous backtracking step)
        alpha = 1.0
        # compute the proposed next step
        x_next = x + alpha*search_direction
        # compute the next function value
        f_x_next = f(x_next)
        # test whether the Armijo condition is satisfied;
        # if not decrease the step length by rho and compute
        # a new proposed next iterate and new next function value
        # At the same time, we keep track of the number of backtracking
        # iterations and stop if the number becomes too large
        n_steps_backtracking = 0
        while ((f_x_next > f_x - c*alpha*initial_descent)
               and (n_steps_backtracking < max_steps_backtracking)):
            alpha *= rho
            x_next = x + alpha*search_direction
            f_x_next = f(x_next)
            n_steps_backtracking += 1
        # the final x_next will become our new x
        # we need not recompute the function value, because we
        # have already done so in the backtracking steps
        # the gradient has to be computed, though
        x = x_next
        f_x = f_x_next
        gradf_x = gradf(x)
        norm_gradf_x = np.linalg.norm(gradf_x)
        n_steps += 1
    # just as an additional information, print whether the algorithm stopped
    # before the maximal number of iterations was reached, and if yes, when
    if (n_steps == max_steps_outer):
        print("The algorithm stopped after the maximal number of iterations was reached.")
    else:
        print("The algorithm stopped after {} iterations".format(n_steps))
    # return the final iterate
    return x