Source code for odl.solvers.nonsmooth.admm

# Copyright 2014-2019 The ODL contributors
#
# This file is part of ODL.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

"""Alternating Direction method of Multipliers (ADMM) method variants."""

from __future__ import division
from builtins import range

from odl.operator import Operator, OpDomainError


__all__ = ('admm_linearized',)


[docs]def admm_linearized(x, f, g, L, tau, sigma, niter, **kwargs): r"""Generic linearized ADMM method for convex problems. ADMM stands for "Alternating Direction Method of Multipliers" and is a popular convex optimization method. This variant solves problems of the form :: min_x [ f(x) + g(Lx) ] with convex ``f`` and ``g``, and a linear operator ``L``. See Section 4.4 of `[PB2014] <http://web.stanford.edu/~boyd/papers/prox_algs.html>`_ and the Notes for more mathematical details. Parameters ---------- x : ``L.domain`` element Starting point of the iteration, updated in-place. f, g : `Functional` The functions ``f`` and ``g`` in the problem definition. They need to implement the ``proximal`` method. L : linear `Operator` The linear operator that is composed with ``g`` in the problem definition. It must fulfill ``L.domain == f.domain`` and ``L.range == g.domain``. tau, sigma : positive float Step size parameters for the update of the variables. niter : non-negative int Number of iterations. Other Parameters ---------------- callback : callable, optional Function called with the current iterate after each iteration. Notes ----- Given :math:`x^{(0)}` (the provided ``x``) and :math:`u^{(0)} = z^{(0)} = 0`, linearized ADMM applies the following iteration: .. math:: x^{(k+1)} &= \mathrm{prox}_{\tau f} \left[ x^{(k)} - \sigma^{-1}\tau L^*\big( L x^{(k)} - z^{(k)} + u^{(k)} \big) \right] z^{(k+1)} &= \mathrm{prox}_{\sigma g}\left( L x^{(k+1)} + u^{(k)} \right) u^{(k+1)} &= u^{(k)} + L x^{(k+1)} - z^{(k+1)} The step size parameters :math:`\tau` and :math:`\sigma` must satisfy .. math:: 0 < \tau < \frac{\sigma}{\|L\|^2} to guarantee convergence. The name "linearized ADMM" comes from the fact that in the minimization subproblem for the :math:`x` variable, this variant uses a linearization of a quadratic term in the augmented Lagrangian of the generic ADMM, in order to make the step expressible with the proximal operator of :math:`f`. Another name for this algorithm is *split inexact Uzawa method*. References ---------- [PB2014] Parikh, N and Boyd, S. *Proximal Algorithms*. Foundations and Trends in Optimization, 1(3) (2014), pp 123-231. """ if not isinstance(L, Operator): raise TypeError('`op` {!r} is not an `Operator` instance' ''.format(L)) if x not in L.domain: raise OpDomainError('`x` {!r} is not in the domain of `op` {!r}' ''.format(x, L.domain)) tau, tau_in = float(tau), tau if tau <= 0: raise ValueError('`tau` must be positive, got {}'.format(tau_in)) sigma, sigma_in = float(sigma), sigma if sigma <= 0: raise ValueError('`sigma` must be positive, got {}'.format(sigma_in)) niter, niter_in = int(niter), niter if niter < 0 or niter != niter_in: raise ValueError('`niter` must be a non-negative integer, got {}' ''.format(niter_in)) # Callback object callback = kwargs.pop('callback', None) if callback is not None and not callable(callback): raise TypeError('`callback` {} is not callable'.format(callback)) # Initialize range variables z = L.range.zero() u = L.range.zero() # Temporary for Lx + u [- z] tmp_ran = L(x) # Temporary for L^*(Lx + u - z) tmp_dom = L.domain.element() # Store proximals since their initialization may involve computation prox_tau_f = f.proximal(tau) prox_sigma_g = g.proximal(sigma) for _ in range(niter): # tmp_ran has value Lx^k here # tmp_dom <- L^*(Lx^k + u^k - z^k) tmp_ran += u tmp_ran -= z L.adjoint(tmp_ran, out=tmp_dom) # x <- x^k - (tau/sigma) L^*(Lx^k + u^k - z^k) x.lincomb(1, x, -tau / sigma, tmp_dom) # x^(k+1) <- prox[tau*f](x) prox_tau_f(x, out=x) # tmp_ran <- Lx^(k+1) L(x, out=tmp_ran) # z^(k+1) <- prox[sigma*g](Lx^(k+1) + u^k) prox_sigma_g(tmp_ran + u, out=z) # 1 copy here # u^(k+1) = u^k + Lx^(k+1) - z^(k+1) u += tmp_ran u -= z if callback is not None: callback(x)
[docs]def admm_linearized_simple(x, f, g, L, tau, sigma, niter, **kwargs): """Non-optimized version of ``admm_linearized``. This function is intended for debugging. It makes a lot of copies and performs no error checking. """ callback = kwargs.pop('callback', None) z = L.range.zero() u = L.range.zero() for _ in range(niter): x[:] = f.proximal(tau)(x - tau / sigma * L.adjoint(L(x) + u - z)) z = g.proximal(sigma)(L(x) + u) u = L(x) + u - z if callback is not None: callback(x)