Source code for odl.solvers.smooth.gradient

# Copyright 2014-2019 The ODL contributors
#
# This file is part of ODL.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

"""Gradient-based optimization schemes."""

from __future__ import print_function, division, absolute_import
import numpy as np

from odl.solvers.util import ConstantLineSearch


__all__ = ('steepest_descent', 'adam')


# TODO: update all docs



[docs]
def steepest_descent(f, x, line_search=1.0, maxiter=1000, tol=1e-16,
                     projection=None, callback=None):
    r"""Steepest descent method to minimize an objective function.

    General implementation of steepest decent (also known as gradient
    decent) for solving

    .. math::
        \min f(x)

    The algorithm is intended for unconstrained problems. It needs line
    search in order guarantee convergence. With appropriate line search,
    it can also be used for constrained problems where one wants to
    minimize over some given set :math:`C`. This can be done by defining
    :math:`f(x) = \infty` for :math:`x\\not\\in C`, or by providing a
    ``projection`` function that projects the iterates on :math:`C`.

    The algorithm is described in [BV2004], section 9.3--9.4
    (`book available online
    <http://stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf>`_),
    [GNS2009], Section 12.2, and wikipedia
    `Gradient_descent
    <https://en.wikipedia.org/wiki/Gradient_descent>`_.

    Parameters
    ----------
    f : `Functional`
        Goal functional. Needs to have ``f.gradient``.
    x : ``f.domain`` element
        Starting point of the iteration
    line_search : float or `LineSearch`, optional
        Strategy to choose the step length. If a float is given, uses it as a
        fixed step length.
    maxiter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Tolerance that should be used for terminating the iteration.
    projection : callable, optional
        Function that can be used to modify the iterates in each iteration,
        for example enforcing positivity. The function should take one
        argument and modify it in-place.
    callback : callable, optional
        Object executing code per iteration, e.g. plotting each iterate

    See Also
    --------
    odl.solvers.iterative.iterative.landweber :
        Optimized solver for the case ``f(x) = ||Ax - b||_2^2``
    odl.solvers.iterative.iterative.conjugate_gradient :
        Optimized solver for the case ``f(x) = x^T Ax - 2 x^T b``

    References
    ----------
    [BV2004] Boyd, S, and Vandenberghe, L. *Convex optimization*.
    Cambridge university press, 2004.

    [GNS2009] Griva, I, Nash, S G, and Sofer, A. *Linear and nonlinear
    optimization*. Siam, 2009.
    """
    grad = f.gradient
    if x not in grad.domain:
        raise TypeError('`x` {!r} is not in the domain of `grad` {!r}'
                        ''.format(x, grad.domain))

    if not callable(line_search):
        line_search = ConstantLineSearch(line_search)

    grad_x = grad.range.element()
    for _ in range(maxiter):
        grad(x, out=grad_x)

        dir_derivative = -grad_x.norm() ** 2
        if np.abs(dir_derivative) < tol:
            return  # we have converged
        step = line_search(x, -grad_x, dir_derivative)

        x.lincomb(1, x, -step, grad_x)

        if projection is not None:
            projection(x)

        if callback is not None:
            callback(x)




[docs]
def adam(f, x, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8,
         maxiter=1000, tol=1e-16, callback=None):
    r"""ADAM method to minimize an objective function.

    General implementation of ADAM for solving

    .. math::
        \min f(x)

    where :math:`f` is a differentiable functional.

    The algorithm is described in [KB2015] (`arxiv
    <https://arxiv.org/abs/1412.6980>`_). All parameter names and default
    valuesare taken from the article.

    Parameters
    ----------
    f : `Functional`
        Goal functional. Needs to have ``f.gradient``.
    x : ``f.domain`` element
        Starting point of the iteration, updated in place.
    learning_rate : positive float, optional
        Step length of the method.
    beta1 : float in [0, 1), optional
        Update rate for first order moment estimate.
    beta2 : float in [0, 1), optional
        Update rate for second order moment estimate.
    eps : positive float, optional
        A small constant for numerical stability.
    maxiter : int, optional
        Maximum number of iterations.
    tol : positive float, optional
        Tolerance that should be used for terminating the iteration.
    callback : callable, optional
        Object executing code per iteration, e.g. plotting each iterate.

    See Also
    --------
    odl.solvers.smooth.gradient.steepest_descent : Simple gradient descent.
    odl.solvers.iterative.iterative.landweber :
        Optimized solver for the case ``f(x) = ||Ax - b||_2^2``.
    odl.solvers.iterative.iterative.conjugate_gradient :
        Optimized solver for the case ``f(x) = x^T Ax - 2 x^T b``.

    References
    ----------
    [KB2015] Kingma, D P and Ba, J.
    *Adam: A Method for Stochastic Optimization*, ICLR 2015.
    """
    grad = f.gradient
    if x not in grad.domain:
        raise TypeError('`x` {!r} is not in the domain of `grad` {!r}'
                        ''.format(x, grad.domain))

    m = grad.domain.zero()
    v = grad.domain.zero()

    grad_x = grad.range.element()
    for _ in range(maxiter):
        grad(x, out=grad_x)

        if grad_x.norm() < tol:
            return

        m.lincomb(beta1, m, 1 - beta1, grad_x)
        v.lincomb(beta2, v, 1 - beta2, grad_x ** 2)

        step = learning_rate * np.sqrt(1 - beta2) / (1 - beta1)

        x.lincomb(1, x, -step, m / (np.sqrt(v) + eps))

        if callback is not None:
            callback(x)



if __name__ == '__main__':
    from odl.util.testutils import run_doctests
    run_doctests()