Source code for odl.solvers.iterative.iterative

# Copyright 2014-2019 The ODL contributors
#
# This file is part of ODL.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

"""Simple iterative type optimization schemes."""

from __future__ import print_function, division, absolute_import
from builtins import next
import numpy as np

from odl.operator import IdentityOperator, OperatorComp, OperatorSum
from odl.util import normalized_scalar_param_list


__all__ = ('landweber', 'conjugate_gradient', 'conjugate_gradient_normal',
           'gauss_newton', 'kaczmarz')


# TODO: update all docs



[docs]
def landweber(op, x, rhs, niter, omega=None, projection=None, callback=None):
    r"""Optimized implementation of Landweber's method.

    Solves the inverse problem::

        A(x) = rhs

    Parameters
    ----------
    op : `Operator`
        Operator in the inverse problem. ``op.derivative(x).adjoint`` must be
        well-defined for ``x`` in the operator domain.
    x : ``op.domain`` element
        Element to which the result is written. Its initial value is
        used as starting point of the iteration, and its values are
        updated in each iteration step.
    rhs : ``op.range`` element
        Right-hand side of the equation defining the inverse problem.
    niter : int
        Number of iterations.
    omega : positive float, optional
        Relaxation parameter in the iteration.
        Default: ``1 / op.norm(estimate=True) ** 2``
    projection : callable, optional
        Function that can be used to modify the iterates in each iteration,
        for example enforcing positivity. The function should take one
        argument and modify it in-place.
    callback : callable, optional
        Object executing code per iteration, e.g. plotting each iterate.

    Notes
    -----
    This method calculates an approximate least-squares solution of
    the inverse problem of the first kind

    .. math::
        \mathcal{A} (x) = y,

    for a given :math:`y\in \mathcal{Y}`, i.e. an approximate
    solution :math:`x^*` to

    .. math::
        \min_{x\in \mathcal{X}} \| \mathcal{A}(x) - y \|_{\mathcal{Y}}^2

    for a (Frechet-) differentiable operator
    :math:`\mathcal{A}: \mathcal{X} \to \mathcal{Y}` between Hilbert
    spaces :math:`\mathcal{X}` and :math:`\mathcal{Y}`. The method
    starts from an initial guess :math:`x_0` and uses the
    iteration

    .. math::
        x_{k+1} = x_k -
                  \omega \ \partial \mathcal{A}(x)^* (\mathcal{A}(x_k) - y),

    where :math:`\partial \mathcal{A}(x)` is the Frechet derivative
    of :math:`\mathcal{A}` at :math:`x` and :math:`\omega` is a
    relaxation parameter. For linear problems, a choice
    :math:`0 < \omega < 2/\lVert \mathcal{A}^2\rVert` guarantees
    convergence, where :math:`\lVert\mathcal{A}\rVert` stands for the
    operator norm of :math:`\mathcal{A}`.

    Users may also optionally provide a projection to project each
    iterate onto some subset. For example enforcing positivity.

    This implementation uses a minimum amount of memory copies by
    applying re-usable temporaries and in-place evaluation.

    The method is also described in a
    `Wikipedia article
    <https://en.wikipedia.org/wiki/Landweber_iteration>`_.
    """
    # TODO: add a book reference

    if x not in op.domain:
        raise TypeError('`x` {!r} is not in the domain of `op` {!r}'
                        ''.format(x, op.domain))

    if omega is None:
        omega = 1 / op.norm(estimate=True) ** 2

    # Reusable temporaries
    tmp_ran = op.range.element()
    tmp_dom = op.domain.element()

    for _ in range(niter):
        op(x, out=tmp_ran)
        tmp_ran -= rhs
        op.derivative(x).adjoint(tmp_ran, out=tmp_dom)
        x.lincomb(1, x, -omega, tmp_dom)

        if projection is not None:
            projection(x)

        if callback is not None:
            callback(x)




[docs]
def conjugate_gradient(op, x, rhs, niter, callback=None):
    """Optimized implementation of CG for self-adjoint operators.

    This method solves the inverse problem (of the first kind)::

        A(x) = y

    for a linear and self-adjoint `Operator` ``A``.

    It uses a minimum amount of memory copies by applying re-usable
    temporaries and in-place evaluation.

    The method is described (for linear systems) in a
    `Wikipedia article
    <https://en.wikipedia.org/wiki/Conjugate_gradient_method>`_.

    Parameters
    ----------
    op : linear `Operator`
        Operator in the inverse problem. It must be linear and
        self-adjoint. This implies in particular that its domain and
        range are equal.
    x : ``op.domain`` element
        Element to which the result is written. Its initial value is
        used as starting point of the iteration, and its values are
        updated in each iteration step.
    rhs : ``op.range`` element
        Right-hand side of the equation defining the inverse problem.
    niter : int
        Number of iterations.
    callback : callable, optional
        Object executing code per iteration, e.g. plotting each iterate.

    See Also
    --------
    conjugate_gradient_normal : Solver for nonsymmetric matrices
    """
    # TODO: add a book reference
    # TODO: update doc

    if op.domain != op.range:
        raise ValueError('operator needs to be self-adjoint')

    if x not in op.domain:
        raise TypeError('`x` {!r} is not in the domain of `op` {!r}'
                        ''.format(x, op.domain))

    r = op(x)
    r.lincomb(1, rhs, -1, r)       # r = rhs - A x
    p = r.copy()
    d = op.domain.element()  # Extra storage for storing A x

    sqnorm_r_old = r.norm() ** 2  # Only recalculate norm after update

    if sqnorm_r_old == 0:  # Return if no step forward
        return

    for _ in range(niter):
        op(p, out=d)  # d = A p

        inner_p_d = p.inner(d)

        if inner_p_d == 0.0:  # Return if step is 0
            return

        alpha = sqnorm_r_old / inner_p_d

        x.lincomb(1, x, alpha, p)            # x = x + alpha*p
        r.lincomb(1, r, -alpha, d)           # r = r - alpha*d

        sqnorm_r_new = r.norm() ** 2

        beta = sqnorm_r_new / sqnorm_r_old
        sqnorm_r_old = sqnorm_r_new

        p.lincomb(1, r, beta, p)                       # p = s + b * p

        if callback is not None:
            callback(x)




[docs]
def conjugate_gradient_normal(op, x, rhs, niter=1, callback=None):
    """Optimized implementation of CG for the normal equation.

    This method solves the inverse problem (of the first kind) ::

        A(x) == rhs

    with a linear `Operator` ``A`` by looking at the normal equation ::

        A.adjoint(A(x)) == A.adjoint(rhs)

    It uses a minimum amount of memory copies by applying re-usable
    temporaries and in-place evaluation.

    The method is described (for linear systems) in a
    `Wikipedia article
    <https://en.wikipedia.org/wiki/Conjugate_gradient_method#\
Conjugate_gradient_on_the_normal_equations>`_.

    Parameters
    ----------
    op : `Operator`
        Operator in the inverse problem. If not linear, it must have
        an implementation of `Operator.derivative`, which
        in turn must implement `Operator.adjoint`, i.e.
        the call ``op.derivative(x).adjoint`` must be valid.
    x : ``op.domain`` element
        Element to which the result is written. Its initial value is
        used as starting point of the iteration, and its values are
        updated in each iteration step.
    rhs : ``op.range`` element
        Right-hand side of the equation defining the inverse problem
    niter : int
        Number of iterations.
    callback : callable, optional
        Object executing code per iteration, e.g. plotting each iterate.

    See Also
    --------
    conjugate_gradient : Optimized solver for symmetric matrices
    odl.solvers.smooth.nonlinear_cg.conjugate_gradient_nonlinear :
        Equivalent solver for the nonlinear case
    """
    # TODO: add a book reference
    # TODO: update doc

    if x not in op.domain:
        raise TypeError('`x` {!r} is not in the domain of `op` {!r}'
                        ''.format(x, op.domain))

    d = op(x)
    d.lincomb(1, rhs, -1, d)               # d = rhs - A x
    p = op.derivative(x).adjoint(d)
    s = p.copy()
    q = op.range.element()
    sqnorm_s_old = s.norm() ** 2  # Only recalculate norm after update

    for _ in range(niter):
        op(p, out=q)                       # q = A p
        sqnorm_q = q.norm() ** 2
        if sqnorm_q == 0.0:  # Return if residual is 0
            return

        a = sqnorm_s_old / sqnorm_q
        x.lincomb(1, x, a, p)               # x = x + a*p
        d.lincomb(1, d, -a, q)              # d = d - a*Ap
        op.derivative(p).adjoint(d, out=s)  # s = A^T d

        sqnorm_s_new = s.norm() ** 2
        b = sqnorm_s_new / sqnorm_s_old
        sqnorm_s_old = sqnorm_s_new

        p.lincomb(1, s, b, p)               # p = s + b * p

        if callback is not None:
            callback(x)




[docs]
def exp_zero_seq(base):
    """Default exponential zero sequence.

    It is defined by

        t_0 = 1.0
        t_m = t_(m-1) / base

    or, in closed form

        t_m = base^(-m-1)

    Parameters
    ----------
    base : float
        Base of the sequence. Its absolute value must be larger than 1.

    Yields
    ------
    val : float
        The next value in the exponential sequence.
    """
    value = 1.0
    while True:
        value /= base
        yield value




[docs]
def gauss_newton(op, x, rhs, niter, zero_seq=exp_zero_seq(2.0),
                 callback=None):
    """Optimized implementation of a Gauss-Newton method.

    This method solves the inverse problem (of the first kind)::

        A(x) = y

    for a (Frechet-) differentiable `Operator` ``A`` using a
    Gauss-Newton iteration.

    It uses a minimum amount of memory copies by applying re-usable
    temporaries and in-place evaluation.

    A variant of the method applied to a specific problem is described
    in a
    `Wikipedia article
    <https://en.wikipedia.org/wiki/Gauss%E2%80%93Newton_algorithm>`_.

    Parameters
    ----------
    op : `Operator`
        Operator in the inverse problem. If not linear, it must have
        an implementation of `Operator.derivative`, which
        in turn must implement `Operator.adjoint`, i.e.
        the call ``op.derivative(x).adjoint`` must be valid.
    x : ``op.domain`` element
        Element to which the result is written. Its initial value is
        used as starting point of the iteration, and its values are
        updated in each iteration step.
    rhs : ``op.range`` element
        Right-hand side of the equation defining the inverse problem
    niter : int
        Maximum number of iterations.
    zero_seq : iterable, optional
        Zero sequence whose values are used for the regularization of
        the linearized problem in each Newton step.
    callback : callable, optional
        Object executing code per iteration, e.g. plotting each iterate.
    """
    if x not in op.domain:
        raise TypeError('`x` {!r} is not in the domain of `op` {!r}'
                        ''.format(x, op.domain))

    x0 = x.copy()
    id_op = IdentityOperator(op.domain)
    dx = op.domain.zero()

    tmp_dom = op.domain.element()
    u = op.domain.element()
    tmp_ran = op.range.element()
    v = op.range.element()

    for _ in range(niter):
        tm = next(zero_seq)
        deriv = op.derivative(x)
        deriv_adjoint = deriv.adjoint

        # v = rhs - op(x) - deriv(x0-x)
        # u = deriv.T(v)
        op(x, out=tmp_ran)              # eval  op(x)
        v.lincomb(1, rhs, -1, tmp_ran)  # assign  v = rhs - op(x)
        tmp_dom.lincomb(1, x0, -1, x)   # assign temp  tmp_dom = x0 - x
        deriv(tmp_dom, out=tmp_ran)     # eval  deriv(x0-x)
        v -= tmp_ran                    # assign  v = rhs-op(x)-deriv(x0-x)
        deriv_adjoint(v, out=u)         # eval/assign  u = deriv.T(v)

        # Solve equation Tikhonov regularized system
        # (deriv.T o deriv + tm * id_op)^-1 u = dx
        tikh_op = OperatorSum(OperatorComp(deriv.adjoint, deriv),
                              tm * id_op, tmp_dom)

        # TODO: allow user to select other method
        conjugate_gradient(tikh_op, dx, u, 3)

        # Update x
        x.lincomb(1, x0, 1, dx)  # x = x0 + dx

        if callback is not None:
            callback(x)




[docs]
def kaczmarz(ops, x, rhs, niter, omega=1, projection=None, random=False,
             callback=None, callback_loop='outer'):
    r"""Optimized implementation of Kaczmarz's method.

    Solves the inverse problem given by the set of equations::

        A_n(x) = rhs_n

    This is also known as the Landweber-Kaczmarz's method, since the method
    coincides with the Landweber method for a single operator.

    Parameters
    ----------
    ops : sequence of `Operator`'s
        Operators in the inverse problem. ``op[i].derivative(x).adjoint`` must
        be well-defined for ``x`` in the operator domain and for all ``i``.
    x : ``op.domain`` element
        Element to which the result is written. Its initial value is
        used as starting point of the iteration, and its values are
        updated in each iteration step.
    rhs : sequence of ``ops[i].range`` elements
        Right-hand side of the equation defining the inverse problem.
    niter : int
        Number of iterations.
    omega : positive float or sequence of positive floats, optional
        Relaxation parameter in the iteration. If a single float is given the
        same step is used for all operators, otherwise separate steps are used.
    projection : callable, optional
        Function that can be used to modify the iterates in each iteration,
        for example enforcing positivity. The function should take one
        argument and modify it in-place.
    random : bool, optional
        If `True`, the order of the operators is randomized in each iteration.
    callback : callable, optional
        Object executing code per iteration, e.g. plotting each iterate.
    callback_loop : {'inner', 'outer'}
        Whether the callback should be called in the inner or outer loop.


    Notes
    -----
    This method calculates an approximate least-squares solution of
    the inverse problem of the first kind

    .. math::
        \mathcal{A}_i (x) = y_i \quad 1 \leq i \leq n,

    for a given :math:`y_n \in \mathcal{Y}_n`, i.e. an approximate
    solution :math:`x^*` to

    .. math::
        \min_{x\in \mathcal{X}}
        \sum_{i=1}^n \| \mathcal{A}_i(x) - y_i \|_{\mathcal{Y}_i}^2

    for a (Frechet-) differentiable operator
    :math:`\mathcal{A}: \mathcal{X} \to \mathcal{Y}` between Hilbert
    spaces :math:`\mathcal{X}` and :math:`\mathcal{Y}`. The method
    starts from an initial guess :math:`x_0` and uses the
    iteration

    .. math::
        x_{k+1} = x_k - \omega_{[k]} \ \partial \mathcal{A}_{[k]}(x_k)^*
                                 (\mathcal{A}_{[k]}(x_k) - y_{[k]}),

    where :math:`\partial \mathcal{A}_{[k]}(x_k)` is the Frechet derivative
    of :math:`\mathcal{A}_{[k]}` at :math:`x_k`, :math:`\omega_{[k]}` is a
    relaxation parameter and :math:`[k] := k \text{ mod } n`.

    For linear problems, a choice
    :math:`0 < \omega_i < 2/\lVert \mathcal{A}_{i}^2\rVert` guarantees
    convergence, where :math:`\|\mathcal{A}_{i}\|` stands for the
    operator norm of :math:`\mathcal{A}_{i}`.

    This implementation uses a minimum amount of memory copies by
    applying re-usable temporaries and in-place evaluation.

    The method is also described in a
    `Wikipedia article
    <https://en.wikipedia.org/wiki/Kaczmarz_method>`_. and in Natterer, F.
    Mathematical Methods in Image Reconstruction, section 5.3.2.

    See Also
    --------
    landweber
    """
    domain = ops[0].domain
    if any(domain != opi.domain for opi in ops):
        raise ValueError('domains of `ops` are not all equal')

    if x not in domain:
        raise TypeError('`x` {!r} is not in the domain of `ops` {!r}'
                        ''.format(x, domain))

    if len(ops) != len(rhs):
        raise ValueError('`number of `ops` {} does not match number of '
                         '`rhs` {}'.format(len(ops), len(rhs)))

    omega = normalized_scalar_param_list(omega, len(ops), param_conv=float)

    # Reusable elements in the range, one per type of space
    ranges = [opi.range for opi in ops]
    unique_ranges = set(ranges)
    tmp_rans = {ran: ran.element() for ran in unique_ranges}

    # Single reusable element in the domain
    tmp_dom = domain.element()

    # Iteratively find solution
    for _ in range(niter):
        if random:
            rng = np.random.permutation(range(len(ops)))
        else:
            rng = range(len(ops))

        for i in rng:
            # Find residual
            tmp_ran = tmp_rans[ops[i].range]
            ops[i](x, out=tmp_ran)
            tmp_ran -= rhs[i]

            # Update x
            ops[i].derivative(x).adjoint(tmp_ran, out=tmp_dom)
            x.lincomb(1, x, -omega[i], tmp_dom)

            if projection is not None:
                projection(x)

            if callback is not None and callback_loop == 'inner':
                callback(x)
        if callback is not None and callback_loop == 'outer':
            callback(x)



if __name__ == '__main__':
    from odl.util.testutils import run_doctests
    run_doctests()