Source code for odl.solvers.nonsmooth.douglas_rachford

# Copyright 2014-2019 The ODL contributors
#
# This file is part of ODL.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

"""Douglas-Rachford splitting algorithm for convex optimization."""

from __future__ import print_function, division, absolute_import

import numpy as np

from odl.operator import Operator


__all__ = ('douglas_rachford_pd', 'douglas_rachford_pd_stepsize')


[docs]def douglas_rachford_pd(x, f, g, L, niter, tau=None, sigma=None, callback=None, **kwargs): r"""Douglas-Rachford primal-dual splitting algorithm. Minimizes the sum of several convex functions composed with linear operators:: min_x f(x) + sum_i g_i(L_i x) where ``f``, ``g_i`` are convex functions, ``L_i`` are linear `Operator`'s. Can also be used to solve the more general problem:: min_x f(x) + sum_i (g_i @ l_i)(L_i x) where ``l_i`` are convex functions and ``@`` is the infimal convolution:: (g @ l)(x) = inf_y g(y) + l(x - y) For references on the algorithm, see algorithm 3.1 in [BH2013]. Parameters ---------- x : `LinearSpaceElement` Initial point, updated in-place. f : `Functional` `proximal factory` for the function ``f``. g : sequence of `Functional`'s Sequence of of the functions ``g_i``. Needs to have ``g[i].convex_conj.proximal``. L : sequence of `Operator`'s Sequence of `Operator`'s with as many elements as ``g``. niter : int Number of iterations. tau : float, optional Step size parameter for ``f``. Default: Sufficient for convergence, see `douglas_rachford_pd_stepsize`. sigma : sequence of floats, optional Step size parameters for the ``g_i``'s. Default: Sufficient for convergence, see `douglas_rachford_pd_stepsize`. callback : callable, optional Function called with the current iterate after each iteration. Other Parameters ---------------- l : sequence of `Functional`'s, optional Sequence of of the functions ``l_i``. Needs to have ``l[i].convex_conj.proximal``. If omitted, the simpler problem without ``l_i`` will be considered. lam : float or callable, optional Overrelaxation step size. If callable, it should take an index (starting at zero) and return the corresponding step size. Notes ----- The mathematical problem to solve is .. math:: \min_x f(x) + \sum_{i=0}^n (g_i \Box l_i)(L_i x), where :math:`f`, :math:`g_i`, :math:`l_i` are proper, convex and lower semicontinuous and :math:`L_i` are linear operators. The infimal convolution :math:`g \Box l` is defined by .. math:: (g \Box l)(x) = \inf_y g(y) + l(x - y). The simplified problem, .. math:: \min_x f(x) + \sum_{i=0}^n g_i(L_i x), can be obtained by setting .. math:: l(x) = 0 \text{ if } x = 0, \infty \text{ else.} To guarantee convergence, the parameters :math:`\tau`, :math:`\sigma_i` and :math:`L_i` need to satisfy .. math:: \tau \sum_{i=1}^n \sigma_i \|L_i\|^2 < 4 The parameter :math:`\lambda` needs to satisfy :math:`0 < \lambda < 2` and if it is given as a function it needs to satisfy .. math:: \sum_{n=1}^\infty \lambda_n (2 - \lambda_n) = +\infty. See Also -------- odl.solvers.nonsmooth.primal_dual_hybrid_gradient.pdhg : Solver for similar problems. odl.solvers.nonsmooth.forward_backward.forward_backward_pd : Solver for similar problems which can additionaly handle a differentiable term. References ---------- [BH2013] Bot, R I, and Hendrich, C. *A Douglas-Rachford type primal-dual method for solving inclusions with mixtures of composite and parallel-sum type monotone operators*. SIAM Journal on Optimization, 23.4 (2013), pp 2541--2565. """ # Validate input m = len(L) if not all(isinstance(op, Operator) for op in L): raise ValueError('`L` not a sequence of operators') if not all(op.is_linear for op in L): raise ValueError('not all operators in `L` are linear') if not all(x in op.domain for op in L): raise ValueError('`x` not in the domain of all operators') if len(g) != m: raise ValueError('len(prox_cc_g) != len(L)') tau, sigma = douglas_rachford_pd_stepsize(L, tau, sigma) if len(sigma) != m: raise ValueError('len(sigma) != len(L)') prox_cc_g = [gi.convex_conj.proximal for gi in g] # Get parameters from kwargs l = kwargs.pop('l', None) if l is not None and len(l) != m: raise ValueError('`l` does not have the same number of ' 'elements as `L`') if l is not None: prox_cc_l = [li.convex_conj.proximal for li in l] lam_in = kwargs.pop('lam', 1.0) if not callable(lam_in) and not (0 < lam_in < 2): raise ValueError('`lam` must callable or a number between 0 and 2') lam = lam_in if callable(lam_in) else lambda _: lam_in # Check for unused parameters if kwargs: raise TypeError('got unexpected keyword arguments: {}'.format(kwargs)) # Pre-allocate values v = [Li.range.zero() for Li in L] p1 = x.space.zero() p2 = [Li.range.zero() for Li in L] z1 = x.space.zero() # Save a bit of memory: z2 elements are local to the loop where they # are used rans = {Li.range for Li in L} z2 = {ran: ran.zero() for ran in rans} w1 = x.space.zero() w2 = [Li.range.zero() for Li in L] for k in range(niter): lam_k = lam(k) if len(L) > 0: # Compute z1 = sum(Li.adjoint(vi) for Li, vi in zip(L, v)) # NB: we abuse z1 as temporary here, in contrast to the algorithm # in the paper L[0].adjoint(v[0], out=z1) for Li, vi in zip(L[1:], v[1:]): Li.adjoint(vi, out=p1) z1 += p1 z1.lincomb(1, x, -tau / 2, z1) else: z1.assign(x) f.proximal(tau)(z1, out=p1) # Now p1 = prox[tau*f](x - tau/2 * sum(Li^* vi)) # Temporary z1 is no longer needed # w1 = 2 * p1 - x w1.lincomb(2, p1, -1, x) # Part 1 of x += lam(k) * (z1 - p1) x.lincomb(1, x, -lam_k, p1) # Now p1 is free to use as temporary; however, since p1 holds the # current primal iterate (not x) we call the callback here already # and return early if we're in the last iteration (also saves some # computation) if callback is not None: callback(p1) if k == niter - 1: x.assign(p1) return for i in range(m): # Compute p2[i] = prox[sigma * g^*](v[i] + sigma[i]/2 * L[i](w1)) L[i](w1, out=p2[i]) p2[i].lincomb(1, v[i], sigma[i] / 2, p2[i]) prox_cc_g[i](sigma[i])(p2[i], out=p2[i]) # w2[i] = 2 * p2[i] - v[i] w2[i].lincomb(2, p2[i], -1, v[i]) if len(L) > 0: # Compute p1 = sum(Li.adjoint(w2i) for Li, w2i in zip(L, w2)) # NB: we abuse p1 as temporary here, in contrast to the algorithm # in the paper L[0].adjoint(w2[0], out=p1) for Li, w2i in zip(L[1:], w2[1:]): Li.adjoint(w2i, out=z1) p1 += z1 else: p1.set_zero() # z1 = w2 - tau/2 * p1 z1.lincomb(1, w1, -tau / 2, p1) # Part 2 of x += lam(k) * (z1 - p1) x.lincomb(1, x, lam_k, z1) # p1 = 2 * z1 - w1 p1.lincomb(2, z1, -1, w1) for i in range(m): z2i = z2[L[i].range] # Compute # z2[i] = prox[sigma[i] * l[i]^*](w2[i] + sigma[i]/2 * L[i](p1)) L[i](p1, out=z2i) z2i.lincomb(1, w2[i], sigma[i] / 2, L[i](p1)) # prox_cc_l is the identity if `l is None`, thus omitted in that # case if l is not None: prox_cc_l[i](sigma[i])(z2i, out=z2i) # Compute v[i] += lam(k) * (z2[i] - p2[i]) v[i].lincomb(1, v[i], lam_k, z2i) v[i].lincomb(1, v[i], -lam_k, p2[i])
def _operator_norms(L): """Get operator norms if needed. Parameters ---------- L : sequence of `Operator` or float The operators or the norms of the operators that are used in the `douglas_rachford_pd` method. For `Operator` entries, the norm is computed with ``Operator.norm(estimate=True)``. """ L_norms = [] for Li in L: if np.isscalar(Li): L_norms.append(float(Li)) elif isinstance(Li, Operator): L_norms.append(Li.norm(estimate=True)) else: raise TypeError('invalid entry {!r} in `L`'.format(Li)) return L_norms
[docs]def douglas_rachford_pd_stepsize(L, tau=None, sigma=None): r"""Default step sizes for `douglas_rachford_pd`. Parameters ---------- L : sequence of `Operator` or float The operators or the norms of the operators that are used in the `douglas_rachford_pd` method. For `Operator` entries, the norm is computed with ``Operator.norm(estimate=True)``. tau : positive float, optional Use this value for ``tau`` instead of computing it from the operator norms, see Notes. sigma : tuple of float, optional The ``sigma`` step size parameters for the dual update. Returns ------- tau : float The ``tau`` step size parameter for the primal update. sigma : tuple of float The ``sigma`` step size parameters for the dual update. Notes ----- To guarantee convergence, the parameters :math:`\tau`, :math:`\sigma_i` and :math:`L_i` need to satisfy .. math:: \tau \sum_{i=1}^n \sigma_i \|L_i\|^2 < 4. This function has 4 options, :math:`\tau`/:math:`\sigma` given or not given. - If neither :math:`\tau` nor :math:`\sigma` are given, they are chosen as: .. math:: \tau = \frac{1}{\sum_{i=1}^n \|L_i\|}, \quad \sigma_i = \frac{2}{n \tau \|L_i\|^2} - If only :math:`\sigma` is given, :math:`\tau` is set to: .. math:: \tau = \frac{2}{\sum_{i=1}^n \sigma_i \|L_i\|^2} - If only :math:`\tau` is given, :math:`\sigma` is set to: .. math:: \sigma_i = \frac{2}{n \tau \|L_i\|^2} - If both are given, they are returned as-is without further validation. """ if tau is None and sigma is None: L_norms = _operator_norms(L) tau = 1 / sum(L_norms) sigma = [2.0 / (len(L_norms) * tau * Li_norm ** 2) for Li_norm in L_norms] return tau, tuple(sigma) elif tau is None: L_norms = _operator_norms(L) tau = 2 / sum(si * Li_norm ** 2 for si, Li_norm in zip(sigma, L_norms)) return tau, tuple(sigma) elif sigma is None: L_norms = _operator_norms(L) tau = float(tau) sigma = [2.0 / (len(L_norms) * tau * Li_norm ** 2) for Li_norm in L_norms] return tau, tuple(sigma) else: return float(tau), tuple(sigma)