# Copyright 2014-2019 The ODL contributors
#
# This file is part of ODL.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
"""Utilities for computing the gradient and Hessian of functionals."""
from __future__ import print_function, division, absolute_import
import numpy as np
from odl.solvers.functional.functional import Functional
from odl.operator import Operator
from odl.space.base_tensors import TensorSpace
__all__ = ('NumericalDerivative', 'NumericalGradient',)
[docs]class NumericalDerivative(Operator):
"""The derivative of an operator by finite differences.
See Also
--------
NumericalGradient : Compute gradient of a functional
"""
[docs] def __init__(self, operator, point, method='forward', step=None):
r"""Initialize a new instance.
Parameters
----------
operator : `Operator`
The operator whose derivative should be computed numerically. Its
domain and range must be `TensorSpace` spaces.
point : ``operator.domain`` `element-like`
The point to compute the derivative in.
method : {'backward', 'forward', 'central'}, optional
The method to use to compute the derivative.
step : float, optional
The step length used in the derivative computation.
Default: selects the step according to the dtype of the space.
Examples
--------
Compute a numerical estimate of the derivative (Hessian) of the squared
L2 norm:
>>> space = odl.rn(3)
>>> func = odl.solvers.L2NormSquared(space)
>>> hess = NumericalDerivative(func.gradient, [1, 1, 1])
>>> hess([0, 0, 1])
rn(3).element([ 0., 0., 2.])
Find the Hessian matrix:
>>> hess_matrix = odl.matrix_representation(hess)
>>> np.allclose(hess_matrix, 2 * np.eye(3))
True
Notes
-----
If the operator is :math:`A` and step size :math:`h` is used, the
derivative in the point :math:`x` and direction :math:`dx` is computed
as follows.
``method='backward'``:
.. math::
\partial A(x)(dx) =
(A(x) - A(x - dx \cdot h / \| dx \|))
\cdot \frac{\| dx \|}{h}
``method='forward'``:
.. math::
\partial A(x)(dx) =
(A(x + dx \cdot h / \| dx \|) - A(x))
\cdot \frac{\| dx \|}{h}
``method='central'``:
.. math::
\partial A(x)(dx) =
(A(x + dx \cdot h / (2 \| dx \|)) -
A(x - dx \cdot h / (2 \| dx \|))
\cdot \frac{\| dx \|}{h}
The number of operator evaluations is ``2``, regardless of parameters.
"""
if not isinstance(operator, Operator):
raise TypeError('`operator` has to be an `Operator` instance')
if not isinstance(operator.domain, TensorSpace):
raise TypeError('`operator.domain` must be a `TensorSpace` '
'instance')
if not isinstance(operator.range, TensorSpace):
raise TypeError('`operator.range` must be a `TensorSpace` '
'instance')
self.operator = operator
self.point = operator.domain.element(point)
if step is None:
# Use half of the number of digits as machine epsilon, this
# "usually" gives a good balance between precision and numerical
# stability.
self.step = np.sqrt(np.finfo(operator.domain.dtype).eps)
else:
self.step = float(step)
self.method, method_in = str(method).lower(), method
if self.method not in ('backward', 'forward', 'central'):
raise ValueError("`method` '{}' not understood").format(method_in)
super(NumericalDerivative, self).__init__(
operator.domain, operator.range, linear=True)
[docs] def _call(self, dx):
"""Return ``self(x)``."""
x = self.point
dx_norm = dx.norm()
if dx_norm == 0:
return 0
scaled_dx = dx * (self.step / dx_norm)
if self.method == 'backward':
dAdx = self.operator(x) - self.operator(x - scaled_dx)
elif self.method == 'forward':
dAdx = self.operator(x + scaled_dx) - self.operator(x)
elif self.method == 'central':
dAdx = (self.operator(x + scaled_dx / 2) -
self.operator(x - scaled_dx / 2))
else:
raise RuntimeError('unknown method')
return dAdx * (dx_norm / self.step)
[docs]class NumericalGradient(Operator):
"""The gradient of a `Functional` computed by finite differences.
See Also
--------
NumericalDerivative : Compute directional derivative
"""
[docs] def __init__(self, functional, method='forward', step=None):
"""Initialize a new instance.
Parameters
----------
functional : `Functional`
The functional whose gradient should be computed. Its domain must
be a `TensorSpace`.
method : {'backward', 'forward', 'central'}, optional
The method to use to compute the gradient.
step : float, optional
The step length used in the derivative computation.
Default: selects the step according to the dtype of the space.
Examples
--------
>>> space = odl.rn(3)
>>> func = odl.solvers.L2NormSquared(space)
>>> grad = NumericalGradient(func)
>>> grad([1, 1, 1])
rn(3).element([ 2., 2., 2.])
The gradient gives the correct value with sufficiently small step size:
>>> grad([1, 1, 1]) == func.gradient([1, 1, 1])
True
If the step is too large the result is not correct:
>>> grad = NumericalGradient(func, step=0.5)
>>> grad([1, 1, 1])
rn(3).element([ 2.5, 2.5, 2.5])
But it can be improved by using the more accurate ``method='central'``:
>>> grad = NumericalGradient(func, method='central', step=0.5)
>>> grad([1, 1, 1])
rn(3).element([ 2., 2., 2.])
Notes
-----
If the functional is :math:`f` and step size :math:`h` is used, the
gradient is computed as follows.
``method='backward'``:
.. math::
(\nabla f(x))_i = \frac{f(x) - f(x - h e_i)}{h}
``method='forward'``:
.. math::
(\nabla f(x))_i = \frac{f(x + h e_i) - f(x)}{h}
``method='central'``:
.. math::
(\nabla f(x))_i = \frac{f(x + (h/2) e_i) - f(x - (h/2) e_i)}{h}
The number of function evaluations is ``functional.domain.size + 1`` if
``'backward'`` or ``'forward'`` is used and
``2 * functional.domain.size`` if ``'central'`` is used.
On large domains this will be computationally infeasible.
"""
if not isinstance(functional, Functional):
raise TypeError('`functional` has to be a `Functional` instance')
if not isinstance(functional.domain, TensorSpace):
raise TypeError('`functional.domain` must be a `TensorSpace` '
'instance')
self.functional = functional
if step is None:
# Use half of the number of digits as machine epsilon, this
# "usually" gives a good balance between precision and numerical
# stability.
self.step = np.sqrt(np.finfo(functional.domain.dtype).eps)
else:
self.step = float(step)
self.method, method_in = str(method).lower(), method
if self.method not in ('backward', 'forward', 'central'):
raise ValueError("`method` '{}' not understood").format(method_in)
super(NumericalGradient, self).__init__(
functional.domain, functional.domain, linear=functional.is_linear)
[docs] def _call(self, x):
"""Return ``self(x)``."""
# The algorithm takes finite differences in one dimension at a time
# reusing the dx vector to improve efficiency.
dfdx = self.domain.zero()
dx = self.domain.zero()
if self.method == 'backward':
fx = self.functional(x)
for i in range(self.domain.size):
dx[i - 1] = 0 # reset step from last iteration
dx[i] = self.step
dfdx[i] = fx - self.functional(x - dx)
elif self.method == 'forward':
fx = self.functional(x)
for i in range(self.domain.size):
dx[i - 1] = 0 # reset step from last iteration
dx[i] = self.step
dfdx[i] = self.functional(x + dx) - fx
elif self.method == 'central':
for i in range(self.domain.size):
dx[i - 1] = 0 # reset step from last iteration
dx[i] = self.step / 2
dfdx[i] = self.functional(x + dx) - self.functional(x - dx)
else:
raise RuntimeError('unknown method')
dfdx /= self.step
return dfdx
[docs] def derivative(self, point):
"""Return the derivative in ``point``.
The derivative of the gradient is often called the Hessian.
Parameters
----------
point : `domain` `element-like`
The point that the derivative should be taken in.
Returns
-------
derivative : `NumericalDerivative`
Numerical estimate of the derivative. Uses the same method as this
operator does, but with half the number of significant digits in
the step size in order to preserve numerical stability.
Examples
--------
Compute a numerical estimate of the derivative of the squared L2 norm:
>>> space = odl.rn(3)
>>> func = odl.solvers.L2NormSquared(space)
>>> grad = NumericalGradient(func)
>>> hess = grad.derivative([1, 1, 1])
>>> hess([1, 0, 0])
rn(3).element([ 2., 0., 0.])
Find the Hessian matrix:
>>> hess_matrix = odl.matrix_representation(hess)
>>> np.allclose(hess_matrix, 2 * np.eye(3))
True
"""
return NumericalDerivative(self, point,
method=self.method, step=np.sqrt(self.step))
if __name__ == '__main__':
from odl.util.testutils import run_doctests
run_doctests()