Source code for odl.solvers.iterative.iterative

# Copyright 2014-2019 The ODL contributors
#
# This file is part of ODL.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

"""Simple iterative type optimization schemes."""

from __future__ import print_function, division, absolute_import
from builtins import next
import numpy as np

from odl.operator import IdentityOperator, OperatorComp, OperatorSum
from odl.util import normalized_scalar_param_list


__all__ = ('landweber', 'conjugate_gradient', 'conjugate_gradient_normal',
           'gauss_newton', 'kaczmarz')


# TODO: update all docs


[docs]def landweber(op, x, rhs, niter, omega=None, projection=None, callback=None): r"""Optimized implementation of Landweber's method. Solves the inverse problem:: A(x) = rhs Parameters ---------- op : `Operator` Operator in the inverse problem. ``op.derivative(x).adjoint`` must be well-defined for ``x`` in the operator domain. x : ``op.domain`` element Element to which the result is written. Its initial value is used as starting point of the iteration, and its values are updated in each iteration step. rhs : ``op.range`` element Right-hand side of the equation defining the inverse problem. niter : int Number of iterations. omega : positive float, optional Relaxation parameter in the iteration. Default: ``1 / op.norm(estimate=True) ** 2`` projection : callable, optional Function that can be used to modify the iterates in each iteration, for example enforcing positivity. The function should take one argument and modify it in-place. callback : callable, optional Object executing code per iteration, e.g. plotting each iterate. Notes ----- This method calculates an approximate least-squares solution of the inverse problem of the first kind .. math:: \mathcal{A} (x) = y, for a given :math:`y\in \mathcal{Y}`, i.e. an approximate solution :math:`x^*` to .. math:: \min_{x\in \mathcal{X}} \| \mathcal{A}(x) - y \|_{\mathcal{Y}}^2 for a (Frechet-) differentiable operator :math:`\mathcal{A}: \mathcal{X} \to \mathcal{Y}` between Hilbert spaces :math:`\mathcal{X}` and :math:`\mathcal{Y}`. The method starts from an initial guess :math:`x_0` and uses the iteration .. math:: x_{k+1} = x_k - \omega \ \partial \mathcal{A}(x)^* (\mathcal{A}(x_k) - y), where :math:`\partial \mathcal{A}(x)` is the Frechet derivative of :math:`\mathcal{A}` at :math:`x` and :math:`\omega` is a relaxation parameter. For linear problems, a choice :math:`0 < \omega < 2/\lVert \mathcal{A}^2\rVert` guarantees convergence, where :math:`\lVert\mathcal{A}\rVert` stands for the operator norm of :math:`\mathcal{A}`. Users may also optionally provide a projection to project each iterate onto some subset. For example enforcing positivity. This implementation uses a minimum amount of memory copies by applying re-usable temporaries and in-place evaluation. The method is also described in a `Wikipedia article <https://en.wikipedia.org/wiki/Landweber_iteration>`_. """ # TODO: add a book reference if x not in op.domain: raise TypeError('`x` {!r} is not in the domain of `op` {!r}' ''.format(x, op.domain)) if omega is None: omega = 1 / op.norm(estimate=True) ** 2 # Reusable temporaries tmp_ran = op.range.element() tmp_dom = op.domain.element() for _ in range(niter): op(x, out=tmp_ran) tmp_ran -= rhs op.derivative(x).adjoint(tmp_ran, out=tmp_dom) x.lincomb(1, x, -omega, tmp_dom) if projection is not None: projection(x) if callback is not None: callback(x)
[docs]def conjugate_gradient(op, x, rhs, niter, callback=None): """Optimized implementation of CG for self-adjoint operators. This method solves the inverse problem (of the first kind):: A(x) = y for a linear and self-adjoint `Operator` ``A``. It uses a minimum amount of memory copies by applying re-usable temporaries and in-place evaluation. The method is described (for linear systems) in a `Wikipedia article <https://en.wikipedia.org/wiki/Conjugate_gradient_method>`_. Parameters ---------- op : linear `Operator` Operator in the inverse problem. It must be linear and self-adjoint. This implies in particular that its domain and range are equal. x : ``op.domain`` element Element to which the result is written. Its initial value is used as starting point of the iteration, and its values are updated in each iteration step. rhs : ``op.range`` element Right-hand side of the equation defining the inverse problem. niter : int Number of iterations. callback : callable, optional Object executing code per iteration, e.g. plotting each iterate. See Also -------- conjugate_gradient_normal : Solver for nonsymmetric matrices """ # TODO: add a book reference # TODO: update doc if op.domain != op.range: raise ValueError('operator needs to be self-adjoint') if x not in op.domain: raise TypeError('`x` {!r} is not in the domain of `op` {!r}' ''.format(x, op.domain)) r = op(x) r.lincomb(1, rhs, -1, r) # r = rhs - A x p = r.copy() d = op.domain.element() # Extra storage for storing A x sqnorm_r_old = r.norm() ** 2 # Only recalculate norm after update if sqnorm_r_old == 0: # Return if no step forward return for _ in range(niter): op(p, out=d) # d = A p inner_p_d = p.inner(d) if inner_p_d == 0.0: # Return if step is 0 return alpha = sqnorm_r_old / inner_p_d x.lincomb(1, x, alpha, p) # x = x + alpha*p r.lincomb(1, r, -alpha, d) # r = r - alpha*d sqnorm_r_new = r.norm() ** 2 beta = sqnorm_r_new / sqnorm_r_old sqnorm_r_old = sqnorm_r_new p.lincomb(1, r, beta, p) # p = s + b * p if callback is not None: callback(x)
[docs]def conjugate_gradient_normal(op, x, rhs, niter=1, callback=None): """Optimized implementation of CG for the normal equation. This method solves the inverse problem (of the first kind) :: A(x) == rhs with a linear `Operator` ``A`` by looking at the normal equation :: A.adjoint(A(x)) == A.adjoint(rhs) It uses a minimum amount of memory copies by applying re-usable temporaries and in-place evaluation. The method is described (for linear systems) in a `Wikipedia article <https://en.wikipedia.org/wiki/Conjugate_gradient_method#\ Conjugate_gradient_on_the_normal_equations>`_. Parameters ---------- op : `Operator` Operator in the inverse problem. If not linear, it must have an implementation of `Operator.derivative`, which in turn must implement `Operator.adjoint`, i.e. the call ``op.derivative(x).adjoint`` must be valid. x : ``op.domain`` element Element to which the result is written. Its initial value is used as starting point of the iteration, and its values are updated in each iteration step. rhs : ``op.range`` element Right-hand side of the equation defining the inverse problem niter : int Number of iterations. callback : callable, optional Object executing code per iteration, e.g. plotting each iterate. See Also -------- conjugate_gradient : Optimized solver for symmetric matrices odl.solvers.smooth.nonlinear_cg.conjugate_gradient_nonlinear : Equivalent solver for the nonlinear case """ # TODO: add a book reference # TODO: update doc if x not in op.domain: raise TypeError('`x` {!r} is not in the domain of `op` {!r}' ''.format(x, op.domain)) d = op(x) d.lincomb(1, rhs, -1, d) # d = rhs - A x p = op.derivative(x).adjoint(d) s = p.copy() q = op.range.element() sqnorm_s_old = s.norm() ** 2 # Only recalculate norm after update for _ in range(niter): op(p, out=q) # q = A p sqnorm_q = q.norm() ** 2 if sqnorm_q == 0.0: # Return if residual is 0 return a = sqnorm_s_old / sqnorm_q x.lincomb(1, x, a, p) # x = x + a*p d.lincomb(1, d, -a, q) # d = d - a*Ap op.derivative(p).adjoint(d, out=s) # s = A^T d sqnorm_s_new = s.norm() ** 2 b = sqnorm_s_new / sqnorm_s_old sqnorm_s_old = sqnorm_s_new p.lincomb(1, s, b, p) # p = s + b * p if callback is not None: callback(x)
[docs]def exp_zero_seq(base): """Default exponential zero sequence. It is defined by t_0 = 1.0 t_m = t_(m-1) / base or, in closed form t_m = base^(-m-1) Parameters ---------- base : float Base of the sequence. Its absolute value must be larger than 1. Yields ------ val : float The next value in the exponential sequence. """ value = 1.0 while True: value /= base yield value
[docs]def gauss_newton(op, x, rhs, niter, zero_seq=exp_zero_seq(2.0), callback=None): """Optimized implementation of a Gauss-Newton method. This method solves the inverse problem (of the first kind):: A(x) = y for a (Frechet-) differentiable `Operator` ``A`` using a Gauss-Newton iteration. It uses a minimum amount of memory copies by applying re-usable temporaries and in-place evaluation. A variant of the method applied to a specific problem is described in a `Wikipedia article <https://en.wikipedia.org/wiki/Gauss%E2%80%93Newton_algorithm>`_. Parameters ---------- op : `Operator` Operator in the inverse problem. If not linear, it must have an implementation of `Operator.derivative`, which in turn must implement `Operator.adjoint`, i.e. the call ``op.derivative(x).adjoint`` must be valid. x : ``op.domain`` element Element to which the result is written. Its initial value is used as starting point of the iteration, and its values are updated in each iteration step. rhs : ``op.range`` element Right-hand side of the equation defining the inverse problem niter : int Maximum number of iterations. zero_seq : iterable, optional Zero sequence whose values are used for the regularization of the linearized problem in each Newton step. callback : callable, optional Object executing code per iteration, e.g. plotting each iterate. """ if x not in op.domain: raise TypeError('`x` {!r} is not in the domain of `op` {!r}' ''.format(x, op.domain)) x0 = x.copy() id_op = IdentityOperator(op.domain) dx = op.domain.zero() tmp_dom = op.domain.element() u = op.domain.element() tmp_ran = op.range.element() v = op.range.element() for _ in range(niter): tm = next(zero_seq) deriv = op.derivative(x) deriv_adjoint = deriv.adjoint # v = rhs - op(x) - deriv(x0-x) # u = deriv.T(v) op(x, out=tmp_ran) # eval op(x) v.lincomb(1, rhs, -1, tmp_ran) # assign v = rhs - op(x) tmp_dom.lincomb(1, x0, -1, x) # assign temp tmp_dom = x0 - x deriv(tmp_dom, out=tmp_ran) # eval deriv(x0-x) v -= tmp_ran # assign v = rhs-op(x)-deriv(x0-x) deriv_adjoint(v, out=u) # eval/assign u = deriv.T(v) # Solve equation Tikhonov regularized system # (deriv.T o deriv + tm * id_op)^-1 u = dx tikh_op = OperatorSum(OperatorComp(deriv.adjoint, deriv), tm * id_op, tmp_dom) # TODO: allow user to select other method conjugate_gradient(tikh_op, dx, u, 3) # Update x x.lincomb(1, x0, 1, dx) # x = x0 + dx if callback is not None: callback(x)
[docs]def kaczmarz(ops, x, rhs, niter, omega=1, projection=None, random=False, callback=None, callback_loop='outer'): r"""Optimized implementation of Kaczmarz's method. Solves the inverse problem given by the set of equations:: A_n(x) = rhs_n This is also known as the Landweber-Kaczmarz's method, since the method coincides with the Landweber method for a single operator. Parameters ---------- ops : sequence of `Operator`'s Operators in the inverse problem. ``op[i].derivative(x).adjoint`` must be well-defined for ``x`` in the operator domain and for all ``i``. x : ``op.domain`` element Element to which the result is written. Its initial value is used as starting point of the iteration, and its values are updated in each iteration step. rhs : sequence of ``ops[i].range`` elements Right-hand side of the equation defining the inverse problem. niter : int Number of iterations. omega : positive float or sequence of positive floats, optional Relaxation parameter in the iteration. If a single float is given the same step is used for all operators, otherwise separate steps are used. projection : callable, optional Function that can be used to modify the iterates in each iteration, for example enforcing positivity. The function should take one argument and modify it in-place. random : bool, optional If `True`, the order of the operators is randomized in each iteration. callback : callable, optional Object executing code per iteration, e.g. plotting each iterate. callback_loop : {'inner', 'outer'} Whether the callback should be called in the inner or outer loop. Notes ----- This method calculates an approximate least-squares solution of the inverse problem of the first kind .. math:: \mathcal{A}_i (x) = y_i \quad 1 \leq i \leq n, for a given :math:`y_n \in \mathcal{Y}_n`, i.e. an approximate solution :math:`x^*` to .. math:: \min_{x\in \mathcal{X}} \sum_{i=1}^n \| \mathcal{A}_i(x) - y_i \|_{\mathcal{Y}_i}^2 for a (Frechet-) differentiable operator :math:`\mathcal{A}: \mathcal{X} \to \mathcal{Y}` between Hilbert spaces :math:`\mathcal{X}` and :math:`\mathcal{Y}`. The method starts from an initial guess :math:`x_0` and uses the iteration .. math:: x_{k+1} = x_k - \omega_{[k]} \ \partial \mathcal{A}_{[k]}(x_k)^* (\mathcal{A}_{[k]}(x_k) - y_{[k]}), where :math:`\partial \mathcal{A}_{[k]}(x_k)` is the Frechet derivative of :math:`\mathcal{A}_{[k]}` at :math:`x_k`, :math:`\omega_{[k]}` is a relaxation parameter and :math:`[k] := k \text{ mod } n`. For linear problems, a choice :math:`0 < \omega_i < 2/\lVert \mathcal{A}_{i}^2\rVert` guarantees convergence, where :math:`\|\mathcal{A}_{i}\|` stands for the operator norm of :math:`\mathcal{A}_{i}`. This implementation uses a minimum amount of memory copies by applying re-usable temporaries and in-place evaluation. The method is also described in a `Wikipedia article <https://en.wikipedia.org/wiki/Kaczmarz_method>`_. and in Natterer, F. Mathematical Methods in Image Reconstruction, section 5.3.2. See Also -------- landweber """ domain = ops[0].domain if any(domain != opi.domain for opi in ops): raise ValueError('domains of `ops` are not all equal') if x not in domain: raise TypeError('`x` {!r} is not in the domain of `ops` {!r}' ''.format(x, domain)) if len(ops) != len(rhs): raise ValueError('`number of `ops` {} does not match number of ' '`rhs` {}'.format(len(ops), len(rhs))) omega = normalized_scalar_param_list(omega, len(ops), param_conv=float) # Reusable elements in the range, one per type of space ranges = [opi.range for opi in ops] unique_ranges = set(ranges) tmp_rans = {ran: ran.element() for ran in unique_ranges} # Single reusable element in the domain tmp_dom = domain.element() # Iteratively find solution for _ in range(niter): if random: rng = np.random.permutation(range(len(ops))) else: rng = range(len(ops)) for i in rng: # Find residual tmp_ran = tmp_rans[ops[i].range] ops[i](x, out=tmp_ran) tmp_ran -= rhs[i] # Update x ops[i].derivative(x).adjoint(tmp_ran, out=tmp_dom) x.lincomb(1, x, -omega[i], tmp_dom) if projection is not None: projection(x) if callback is not None and callback_loop == 'inner': callback(x) if callback is not None and callback_loop == 'outer': callback(x)
if __name__ == '__main__': from odl.util.testutils import run_doctests run_doctests()