Source code for odl.solvers.nonsmooth.alternating_dual_updates

# Copyright 2014-2019 The ODL contributors
#
# This file is part of ODL.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

"""Alternating dual (AD) update algorithm studied by McGaffin and Fessler.

The alternating dual upgrade method solves structured convex optimization
problems by successively updating dual variables which are associated with
each of the components.
"""

from __future__ import print_function, division, absolute_import

import numpy as np

__all__ = ('adupdates',)


[docs]def adupdates(x, g, L, stepsize, inner_stepsizes, niter, random=False, callback=None, callback_loop='outer'): r"""Alternating Dual updates method. The Alternating Dual (AD) updates method of McGaffin and Fessler `[MF2015] <http://ieeexplore.ieee.org/document/7271047/>`_ is designed to solve an optimization problem of the form :: min_x [ sum_i g_i(L_i x) ] where ``g_i`` are proper, convex and lower semicontinuous functions and ``L_i`` are linear `Operator` s. Parameters ---------- g : sequence of `Functional` s All functions need to provide a `Functional.convex_conj` with a `Functional.proximal` factory. L : sequence of `Operator` s Length of ``L`` must equal the length of ``g``. x : `LinearSpaceElement` Initial point, updated in-place. stepsize : positive float The stepsize for the outer (proximal point) iteration. The theory guarantees convergence for any positive real number, but the performance might depend on the choice of a good stepsize. inner_stepsizes : sequence of stepsizes Parameters determining the stepsizes for the inner iterations. Must be matched with the norms of ``L``, and convergence is guaranteed if the ``inner_stepsizes`` are small enough. See the Notes section for details. niter : int Number of (outer) iterations. random : bool, optional If `True`, the order of the dual upgdates is chosen randomly, otherwise the order provided by the lists ``g``, ``L`` and ``inner_stepsizes`` is used. callback : callable, optional Function called with the current iterate after each iteration. callback_loop : {'inner', 'outer'}, optional If 'inner', the ``callback`` function is called after each inner iteration, i.e., after each dual update. If 'outer', the ``callback`` function is called after each outer iteration, i.e., after each primal update. Notes ----- The algorithm as implemented here is described in the article [MF2015], where it is applied to a tomography problem. It solves the problem .. math:: \min_x \sum_{i=1}^m g_i(L_i x), where :math:`g_i` are proper, convex and lower semicontinuous functions and :math:`L_i` are linear, continuous operators for :math:`i = 1, \ldots, m`. In an outer iteration, the solution is found iteratively by an iteration .. math:: x_{n+1} = \mathrm{arg\,min}_x \sum_{i=1}^m g_i(L_i x) + \frac{\mu}{2} \|x - x_n\|^2 with some ``stepsize`` parameter :math:`\mu > 0` according to the proximal point algorithm. In the inner iteration, dual variables are introduced for each of the components of the sum. The Lagrangian of the problem is given by .. math:: S_n(x; v_1, \ldots, v_m) = \sum_{i=1}^m (\langle v_i, L_i x \rangle - g_i^*(v_i)) + \frac{\mu}{2} \|x - x_n\|^2. Given the dual variables, the new primal variable :math:`x_{n+1}` can be calculated by directly minimizing :math:`S_n` with respect to :math:`x`. This corresponds to the formula .. math:: x_{n+1} = x_n - \frac{1}{\mu} \sum_{i=1}^m L_i^* v_i. The dual updates are executed according to the following rule: .. math:: v_i^+ = \mathrm{Prox}^{\mu M_i^{-1}}_{g_i^*} (v_i + \mu M_i^{-1} L_i x_{n+1}), where :math:`x_{n+1}` is given by the formula above and :math:`M_i` is a diagonal matrix with positive diagonal entries such that :math:`M_i - L_i L_i^*` is positive semidefinite. The variable ``inner_stepsizes`` is chosen as a stepsize to the `Functional.proximal` to the `Functional.convex_conj` of each of the ``g`` s after multiplying with ``stepsize``. The ``inner_stepsizes`` contain the elements of :math:`M_i^{-1}` in one of the following ways: * Setting ``inner_stepsizes[i]`` a positive float :math:`\gamma` corresponds to the choice :math:`M_i^{-1} = \gamma \mathrm{Id}`. * Assume that ``g_i`` is a `SeparableSum`, then setting ``inner_stepsizes[i]`` a list :math:`(\gamma_1, \ldots, \gamma_k)` of positive floats corresponds to the choice of a block-diagonal matrix :math:`M_i^{-1}`, where each block corresponds to one of the space components and equals :math:`\gamma_i \mathrm{Id}`. * Assume that ``g_i`` is an `L1Norm` or an `L2NormSquared`, then setting ``inner_stepsizes[i]`` a ``g_i.domain.element`` :math:`z` corresponds to the choice :math:`M_i^{-1} = \mathrm{diag}(z)`. References ---------- [MF2015] McGaffin, M G, and Fessler, J A. *Alternating dual updates algorithm for X-ray CT reconstruction on the GPU*. IEEE Transactions on Computational Imaging, 1.3 (2015), pp 186--199. """ # Check the lenghts of the lists (= number of dual variables) length = len(g) if len(L) != length: raise ValueError('`len(L)` should equal `len(g)`, but {} != {}' ''.format(len(L), length)) if len(inner_stepsizes) != length: raise ValueError('len(`inner_stepsizes`) should equal `len(g)`, ' ' but {} != {}'.format(len(inner_stepsizes), length)) # Check if operators have a common domain # (the space of the primal variable): domain = L[0].domain if any(opi.domain != domain for opi in L): raise ValueError('domains of `L` are not all equal') # Check if range of the operators equals domain of the functionals ranges = [opi.range for opi in L] if any(L[i].range != g[i].domain for i in range(length)): raise ValueError('L[i].range` should equal `g.domain`') # Normalize string callback_loop, callback_loop_in = str(callback_loop).lower(), callback_loop if callback_loop not in ('inner', 'outer'): raise ValueError('`callback_loop` {!r} not understood' ''.format(callback_loop_in)) # Initialization of the dual variables duals = [space.zero() for space in ranges] # Reusable elements in the ranges, one per type of space unique_ranges = set(ranges) tmp_rans = {ran: ran.element() for ran in unique_ranges} # Prepare the proximal operators. Since the stepsize does not vary over # the iterations, we always use the same proximal operator. proxs = [func.convex_conj.proximal(stepsize * inner_ss if np.isscalar(inner_ss) else stepsize * np.asarray(inner_ss)) for (func, inner_ss) in zip(g, inner_stepsizes)] # Iteratively find a solution for _ in range(niter): # Update x = x - 1/stepsize * sum([ops[i].adjoint(duals[i]) # for i in range(length)]) for i in range(length): x -= (1.0 / stepsize) * L[i].adjoint(duals[i]) if random: rng = np.random.permutation(range(length)) else: rng = range(length) for j in rng: step = (stepsize * inner_stepsizes[j] if np.isscalar(inner_stepsizes[j]) else stepsize * np.asarray(inner_stepsizes[j])) arg = duals[j] + step * L[j](x) tmp_ran = tmp_rans[L[j].range] proxs[j](arg, out=tmp_ran) x -= 1.0 / stepsize * L[j].adjoint(tmp_ran - duals[j]) duals[j].assign(tmp_ran) if callback is not None and callback_loop == 'inner': callback(x) if callback is not None and callback_loop == 'outer': callback(x)
[docs]def adupdates_simple(x, g, L, stepsize, inner_stepsizes, niter, random=False): """Non-optimized version of ``adupdates``. This function is intended for debugging. It makes a lot of copies and performs no error checking. """ # Initializations length = len(g) ranges = [Li.range for Li in L] duals = [space.zero() for space in ranges] # Iteratively find a solution for _ in range(niter): # Update x = x - 1/stepsize * sum([ops[i].adjoint(duals[i]) # for i in range(length)]) for i in range(length): x -= (1.0 / stepsize) * L[i].adjoint(duals[i]) rng = np.random.permutation(range(length)) if random else range(length) for j in rng: dual_tmp = ranges[j].element() dual_tmp = (g[j].convex_conj.proximal (stepsize * inner_stepsizes[j] if np.isscalar(inner_stepsizes[j]) else stepsize * np.asarray(inner_stepsizes[j])) (duals[j] + stepsize * inner_stepsizes[j] * L[j](x) if np.isscalar(inner_stepsizes[j]) else duals[j] + stepsize * np.asarray(inner_stepsizes[j]) * L[j](x))) x -= 1.0 / stepsize * L[j].adjoint(dual_tmp - duals[j]) duals[j].assign(dual_tmp)