Source code for odl.util.numerics

# Copyright 2014-2020 The ODL contributors
#
# This file is part of ODL.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

"""Numerical helper functions for convenience or speed."""

from __future__ import absolute_import, division, print_function

import numpy as np
from odl.util.normalize import normalized_scalar_param_list, safe_int_conv

__all__ = (
    'apply_on_boundary',
    'fast_1d_tensor_mult',
    'resize_array',
    'zscore',
    'binning',
)


_SUPPORTED_RESIZE_PAD_MODES = ('constant', 'symmetric', 'periodic',
                               'order0', 'order1')



[docs]
def apply_on_boundary(array, func, only_once=True, which_boundaries=None,
                      axis_order=None, out=None):
    """Apply a function of the boundary of an n-dimensional array.

    All other values are preserved as-is.

    Parameters
    ----------
    array : `array-like`
        Modify the boundary of this array
    func : callable or sequence of callables
        If a single function is given, assign
        ``array[slice] = func(array[slice])`` on the boundary slices,
        e.g. use ``lamda x: x / 2`` to divide values by 2.
        A sequence of functions is applied per axis separately. It
        must have length ``array.ndim`` and may consist of one function
        or a 2-tuple of functions per axis.
        ``None`` entries in a sequence cause the axis (side) to be
        skipped.
    only_once : bool, optional
        If ``True``, ensure that each boundary point appears in exactly
        one slice. If ``func`` is a list of functions, the
        ``axis_order`` determines which functions are applied to nodes
        which appear in multiple slices, according to the principle
        "first-come, first-served".
    which_boundaries : sequence, optional
        If provided, this sequence determines per axis whether to
        apply the function at the boundaries in each axis. The entry
        in each axis may consist in a single bool or a 2-tuple of
        bool. In the latter case, the first tuple entry decides for
        the left, the second for the right boundary. The length of the
        sequence must be ``array.ndim``. ``None`` is interpreted as
        "all boundaries".
    axis_order : sequence of ints, optional
        Permutation of ``range(array.ndim)`` defining the order in which
        to process the axes. If combined with ``only_once`` and a
        function list, this determines which function is evaluated in
        the points that are potentially processed multiple times.
    out : `numpy.ndarray`, optional
        Location in which to store the result, can be the same as ``array``.
        Default: copy of ``array``

    Examples
    --------
    >>> arr = np.ones((3, 3))
    >>> apply_on_boundary(arr, lambda x: x / 2)
    array([[ 0.5,  0.5,  0.5],
           [ 0.5,  1. ,  0.5],
           [ 0.5,  0.5,  0.5]])

    If called with ``only_once=False``, the function is applied repeatedly:

    >>> apply_on_boundary(arr, lambda x: x / 2, only_once=False)
    array([[ 0.25,  0.5 ,  0.25],
           [ 0.5 ,  1.  ,  0.5 ],
           [ 0.25,  0.5 ,  0.25]])

    >>> apply_on_boundary(arr, lambda x: x / 2, only_once=True,
    ...                   which_boundaries=((True, False), True))
    array([[ 0.5,  0.5,  0.5],
           [ 0.5,  1. ,  0.5],
           [ 0.5,  1. ,  0.5]])

    Use the ``out`` parameter to store the result in an existing array:

    >>> out = np.empty_like(arr)
    >>> result = apply_on_boundary(arr, lambda x: x / 2, out=out)
    >>> result
    array([[ 0.5,  0.5,  0.5],
           [ 0.5,  1. ,  0.5],
           [ 0.5,  0.5,  0.5]])
    >>> result is out
    True
    """
    array = np.asarray(array)

    if callable(func):
        func = [func] * array.ndim
    elif len(func) != array.ndim:
        raise ValueError('sequence of functions has length {}, expected {}'
                         ''.format(len(func), array.ndim))

    if which_boundaries is None:
        which_boundaries = ([(True, True)] * array.ndim)
    elif len(which_boundaries) != array.ndim:
        raise ValueError('`which_boundaries` has length {}, expected {}'
                         ''.format(len(which_boundaries), array.ndim))

    if axis_order is None:
        axis_order = list(range(array.ndim))
    elif len(axis_order) != array.ndim:
        raise ValueError('`axis_order` has length {}, expected {}'
                         ''.format(len(axis_order), array.ndim))

    if out is None:
        out = array.copy()
    else:
        out[:] = array  # Self assignment is free, in case out is array

    # The 'only_once' functionality is implemented by storing for each axis
    # if the left and right boundaries have been processed. This information
    # is stored in a list of slices which is reused for the next axis in the
    # list.
    slices = [slice(None)] * array.ndim
    for ax, function, which in zip(axis_order, func, which_boundaries):
        if only_once:
            slc_l = list(slices)  # Make a copy; copy() exists in Py3 only
            slc_r = list(slices)
        else:
            slc_l = [slice(None)] * array.ndim
            slc_r = [slice(None)] * array.ndim

        # slc_l and slc_r select left and right boundary, resp, in this axis.
        slc_l[ax] = 0
        slc_r[ax] = -1

        slc_l, slc_r = tuple(slc_l), tuple(slc_r)

        try:
            # Tuple of functions in this axis
            func_l, func_r = function
        except TypeError:
            # Single function
            func_l = func_r = function

        try:
            # Tuple of bool
            mod_left, mod_right = which
        except TypeError:
            # Single bool
            mod_left = mod_right = which

        if mod_left and func_l is not None:
            out[slc_l] = func_l(out[slc_l])
            start = 1
        else:
            start = None

        if mod_right and func_r is not None:
            out[slc_r] = func_r(out[slc_r])
            end = -1
        else:
            end = None

        # Write the information for the processed axis into the slice list.
        # Start and end include the boundary if it was processed.
        slices[ax] = slice(start, end)

    return out




[docs]
def fast_1d_tensor_mult(ndarr, onedim_arrs, axes=None, out=None):
    """Fast multiplication of an n-dim array with an outer product.

    This method implements the multiplication of an n-dimensional array
    with an outer product of one-dimensional arrays, e.g.::

        a = np.ones((10, 10, 10))
        x = np.random.rand(10)
        a *= x[:, None, None] * x[None, :, None] * x[None, None, :]

    Basically, there are two ways to do such an operation:

    1. First calculate the factor on the right-hand side and do one
       "big" multiplication; or
    2. Multiply by one factor at a time.

    The procedure of building up the large factor in the first method
    is relatively cheap if the number of 1d arrays is smaller than the
    number of dimensions. For exactly n vectors, the second method is
    faster, although it loops of the array ``a`` n times.

    This implementation combines the two ideas into a hybrid scheme:

    - If there are less 1d arrays than dimensions, choose 1.
    - Otherwise, calculate the factor array for n-1 arrays
      and multiply it to the large array. Finally, multiply with the
      last 1d array.

    The advantage of this approach is that it is memory-friendly and
    loops over the big array only twice.

    Parameters
    ----------
    ndarr : `array-like`
        Array to multiply to
    onedim_arrs : sequence of `array-like`'s
        One-dimensional arrays to be multiplied with ``ndarr``. The
        sequence may not be longer than ``ndarr.ndim``.
    axes : sequence of ints, optional
        Take the 1d transform along these axes. ``None`` corresponds to
        the last ``len(onedim_arrs)`` axes, in ascending order.
    out : `numpy.ndarray`, optional
        Array in which the result is stored

    Returns
    -------
    out : `numpy.ndarray`
        Result of the modification. If ``out`` was given, the returned
        object is a reference to it.
    """
    if out is None:
        out = np.array(ndarr, copy=True)
    else:
        out[:] = ndarr  # Self-assignment is free if out is ndarr

    if not onedim_arrs:
        raise ValueError('no 1d arrays given')

    if axes is None:
        axes = list(range(out.ndim - len(onedim_arrs), out.ndim))
        axes_in = None
    elif len(axes) != len(onedim_arrs):
        raise ValueError('there are {} 1d arrays, but {} axes entries'
                         ''.format(len(onedim_arrs), len(axes)))
    else:
        # Make axes positive
        axes, axes_in = np.array(axes, dtype=int), axes
        axes[axes < 0] += out.ndim
        axes = list(axes)

    if not all(0 <= ai < out.ndim for ai in axes):
        raise ValueError('`axes` {} out of bounds for {} dimensions'
                         ''.format(axes_in, out.ndim))

    # Make scalars 1d arrays and squeezable arrays 1d
    alist = [np.atleast_1d(np.asarray(a).squeeze()) for a in onedim_arrs]
    if any(a.ndim != 1 for a in alist):
        raise ValueError('only 1d arrays allowed')

    if len(axes) < out.ndim:
        # Make big factor array (start with 0d)
        factor = np.array(1.0)
        for ax, arr in zip(axes, alist):
            # Meshgrid-style slice
            slc = [None] * out.ndim
            slc[ax] = slice(None)
            factor = factor * arr[tuple(slc)]

        out *= factor

    else:
        # Hybrid approach

        # Get the axis to spare for the final multiplication, the one
        # with the largest stride.
        last_ax = np.argmax(out.strides)
        last_arr = alist[axes.index(last_ax)]

        # Build the semi-big array and multiply
        factor = np.array(1.0)
        for ax, arr in zip(axes, alist):
            if ax == last_ax:
                continue

            slc = [None] * out.ndim
            slc[ax] = slice(None)
            factor = factor * arr[tuple(slc)]

        out *= factor

        # Finally multiply by the remaining 1d array
        slc = [None] * out.ndim
        slc[last_ax] = slice(None)
        out *= last_arr[tuple(slc)]

    return out




[docs]
def resize_array(arr, newshp, offset=None, pad_mode='constant', pad_const=0,
                 direction='forward', out=None):
    """Return the resized version of ``arr`` with shape ``newshp``.

    In axes where ``newshp > arr.shape``, padding is applied according
    to the supplied options.
    Where ``newshp < arr.shape``, the array is cropped to the new
    size.

    See `the online documentation
    <https://odlgroup.github.io/odl/math/resizing_ops.html>`_
    on resizing operators for mathematical details.

    Parameters
    ----------
    arr : `array-like`
        Array to be resized.
    newshp : sequence of ints
        Desired shape of the output array.
    offset : sequence of ints, optional
        Specifies how many entries are added to/removed from the "left"
        side (corresponding to low indices) of ``arr``.
    pad_mode : string, optional
        Method to be used to fill in missing values in an enlarged array.

        ``'constant'``: Fill with ``pad_const``.

        ``'symmetric'``: Reflect at the boundaries, not doubling the
        outmost values. This requires left and right padding sizes
        to be strictly smaller than the original array shape.

        ``'periodic'``: Fill in values from the other side, keeping
        the order. This requires left and right padding sizes to be
        at most as large as the original array shape.

        ``'order0'``: Extend constantly with the outmost values
        (ensures continuity).

        ``'order1'``: Extend with constant slope (ensures continuity of
        the first derivative). This requires at least 2 values along
        each axis where padding is applied.

    pad_const : scalar, optional
        Value to be used in the ``'constant'`` padding mode.
    direction : {'forward', 'adjoint'}
        Determines which variant of the resizing is applied.

        'forward' : in axes where ``out`` is larger than ``arr``,
        apply padding. Otherwise, restrict to the smaller size.

        'adjoint' : in axes where ``out`` is larger than ``arr``,
        apply zero-padding. Otherwise, restrict to the smaller size
        and add the outside contributions according to ``pad_mode``.

    out : `numpy.ndarray`, optional
        Array to write the result to. Must have shape ``newshp`` and
        be able to hold the data type of the input array.

    Returns
    -------
    resized : `numpy.ndarray`
        Resized array created according to the above rules. If ``out``
        was given, the returned object is a reference to it.

    Examples
    --------
    The input can be shrunk by simply providing a smaller size.
    By default, values are removed from the right. When enlarging,
    zero-padding is applied by default, and the zeros are added to
    the right side. That behavior can be changed with the ``offset``
    parameter:

    >>> from odl.util.numerics import resize_array
    >>> resize_array([1, 2, 3], (1,))
    array([1])
    >>> resize_array([1, 2, 3], (1,), offset=2)
    array([3])
    >>> resize_array([1, 2, 3], (6,))
    array([1, 2, 3, 0, 0, 0])
    >>> resize_array([1, 2, 3], (7,), offset=2)
    array([0, 0, 1, 2, 3, 0, 0])

    The padding constant can be changed, as well as the padding
    mode:

    >>> resize_array([1, 2, 3], (7,), pad_const=-1, offset=2)
    array([-1, -1,  1,  2,  3, -1, -1])
    >>> resize_array([1, 2, 3], (7,), pad_mode='periodic', offset=2)
    array([2, 3, 1, 2, 3, 1, 2])
    >>> resize_array([1, 2, 3], (7,), pad_mode='symmetric', offset=2)
    array([3, 2, 1, 2, 3, 2, 1])
    >>> resize_array([1, 2, 3], (7,), pad_mode='order0', offset=2)
    array([1, 1, 1, 2, 3, 3, 3])
    >>> resize_array([1, 2, 3], (7,), pad_mode='order1', offset=2)
    array([-1,  0,  1,  2,  3,  4,  5])

    Everything works for arbitrary number of dimensions:

    >>> # Take the middle two columns and extend rows symmetrically
    >>> resize_array([[1, 2, 3, 4],
    ...               [5, 6, 7, 8],
    ...               [9, 10, 11, 12]],
    ...               (5, 2), pad_mode='symmetric', offset=[1, 1])
    array([[ 6,  7],
           [ 2,  3],
           [ 6,  7],
           [10, 11],
           [ 6,  7]])
    >>> # Take the rightmost two columns and extend rows symmetrically
    >>> # downwards
    >>> resize_array([[1, 2, 3, 4],
    ...               [5, 6, 7, 8],
    ...               [9, 10, 11, 12]], (5, 2), pad_mode='symmetric',
    ...              offset=[0, 2])
    array([[ 3,  4],
           [ 7,  8],
           [11, 12],
           [ 7,  8],
           [ 3,  4]])
    """
    # Handle arrays and shapes
    try:
        newshp = tuple(newshp)
    except TypeError:
        raise TypeError('`newshp` must be a sequence, got {!r}'.format(newshp))

    if out is not None:
        if not isinstance(out, np.ndarray):
            raise TypeError('`out` must be a `numpy.ndarray` instance, got '
                            '{!r}'.format(out))
        if out.shape != newshp:
            raise ValueError('`out` must have shape {}, got {}'
                             ''.format(newshp, out.shape))

        order = 'C' if out.flags.c_contiguous else 'F'
        arr = np.asarray(arr, dtype=out.dtype, order=order)
        if arr.ndim != out.ndim:
            raise ValueError('number of axes of `arr` and `out` do not match '
                             '({} != {})'.format(arr.ndim, out.ndim))
    else:
        arr = np.asarray(arr)
        order = 'C' if arr.flags.c_contiguous else 'F'
        out = np.empty(newshp, dtype=arr.dtype, order=order)
        if len(newshp) != arr.ndim:
            raise ValueError('number of axes of `arr` and `len(newshp)` do '
                             'not match ({} != {})'
                             ''.format(arr.ndim, len(newshp)))

    # Handle offset
    if offset is None:
        offset = [0] * out.ndim
    else:
        offset = normalized_scalar_param_list(
            offset, out.ndim, param_conv=safe_int_conv, keep_none=False)

    # Handle padding
    pad_mode, pad_mode_in = str(pad_mode).lower(), pad_mode
    if pad_mode not in _SUPPORTED_RESIZE_PAD_MODES:
        raise ValueError("`pad_mode` '{}' not understood".format(pad_mode_in))

    if (pad_mode == 'constant' and
        not np.can_cast(pad_const, out.dtype) and
        any(n_new > n_orig
            for n_orig, n_new in zip(arr.shape, out.shape))):
        raise ValueError('`pad_const` {} cannot be safely cast to the data '
                         'type {} of the output array'
                         ''.format(pad_const, out.dtype))

    # Handle direction
    direction, direction_in = str(direction).lower(), direction
    if direction not in ('forward', 'adjoint'):
        raise ValueError("`direction` '{}' not understood"
                         "".format(direction_in))

    if direction == 'adjoint' and pad_mode == 'constant' and pad_const != 0:
        raise ValueError("`pad_const` must be 0 for 'adjoint' direction, "
                         "got {}".format(pad_const))

    if direction == 'forward' and pad_mode == 'constant' and pad_const != 0:
        out.fill(pad_const)
    else:
        out.fill(0)

    # Perform the resizing
    if direction == 'forward':
        if pad_mode == 'constant':
            # Constant padding does not require the helper function
            _assign_intersection(out, arr, offset)
        else:
            # First copy the inner part and use it for padding
            _assign_intersection(out, arr, offset)
            _apply_padding(out, arr, offset, pad_mode, 'forward')
    else:
        if pad_mode == 'constant':
            # Skip the padding helper
            _assign_intersection(out, arr, offset)
        else:
            # Apply adjoint padding to a copy of the input and copy the inner
            # part when finished
            tmp = arr.copy()
            _apply_padding(tmp, out, offset, pad_mode, 'adjoint')
            _assign_intersection(out, tmp, offset)

    return out



def _intersection_slice_tuples(lhs_arr, rhs_arr, offset):
    """Return tuples to yield the intersecting part of both given arrays.

    The returned slices ``lhs_slc`` and ``rhs_slc`` are such that
    ``lhs_arr[lhs_slc]`` and ``rhs_arr[rhs_slc]`` have the same shape.
    The ``offset`` parameter determines how much is skipped/added on the
    "left" side (small indices).
    """
    lhs_slc, rhs_slc = [], []
    for istart, n_lhs, n_rhs in zip(offset, lhs_arr.shape, rhs_arr.shape):

        # Slice for the inner part in the larger array corresponding to the
        # small one, offset by the given amount
        istop = istart + min(n_lhs, n_rhs)
        inner_slc = slice(istart, istop)

        if n_lhs > n_rhs:
            # Extension
            lhs_slc.append(inner_slc)
            rhs_slc.append(slice(None))
        elif n_lhs < n_rhs:
            # Restriction
            lhs_slc.append(slice(None))
            rhs_slc.append(inner_slc)
        else:
            # Same size, so full slices for both
            lhs_slc.append(slice(None))
            rhs_slc.append(slice(None))

    return tuple(lhs_slc), tuple(rhs_slc)


def _assign_intersection(lhs_arr, rhs_arr, offset):
    """Assign the intersecting region from ``rhs_arr`` to ``lhs_arr``."""
    lhs_slc, rhs_slc = _intersection_slice_tuples(lhs_arr, rhs_arr, offset)
    lhs_arr[lhs_slc] = rhs_arr[rhs_slc]


def _padding_slices_outer(lhs_arr, rhs_arr, axis, offset):
    """Return slices into the outer array part where padding is applied.

    When padding is performed, these slices yield the outer (excess) part
    of the larger array that is to be filled with values. Slices for
    both sides of the arrays in a given ``axis`` are returned.

    The same slices are used also in the adjoint padding correction,
    however in a different way.

    See `the online documentation
    <https://odlgroup.github.io/odl/math/resizing_ops.html>`_
    on resizing operators for details.
    """
    istart_inner = offset[axis]
    istop_inner = istart_inner + min(lhs_arr.shape[axis], rhs_arr.shape[axis])
    return slice(istart_inner), slice(istop_inner, None)


def _padding_slices_inner(lhs_arr, rhs_arr, axis, offset, pad_mode):
    """Return slices into the inner array part for a given ``pad_mode``.

    When performing padding, these slices yield the values from the inner
    part of a larger array that are to be assigned to the excess part
    of the same array. Slices for both sides ("left", "right") of
    the arrays in a given ``axis`` are returned.
    """
    # Calculate the start and stop indices for the inner part
    istart_inner = offset[axis]
    n_large = max(lhs_arr.shape[axis], rhs_arr.shape[axis])
    n_small = min(lhs_arr.shape[axis], rhs_arr.shape[axis])
    istop_inner = istart_inner + n_small
    # Number of values padded to left and right
    n_pad_l = istart_inner
    n_pad_r = n_large - istop_inner

    if pad_mode == 'periodic':
        # left: n_pad_l forward, ending at istop_inner - 1
        pad_slc_l = slice(istop_inner - n_pad_l, istop_inner)
        # right: n_pad_r forward, starting at istart_inner
        pad_slc_r = slice(istart_inner, istart_inner + n_pad_r)

    elif pad_mode == 'symmetric':
        # left: n_pad_l backward, ending at istart_inner + 1
        pad_slc_l = slice(istart_inner + n_pad_l, istart_inner, -1)
        # right: n_pad_r backward, starting at istop_inner - 2
        # For the corner case that the stopping index is -1, we need to
        # replace it with None, since -1 as stopping index is equivalent
        # to the last index, which is not what we want (0 as last index).
        istop_r = istop_inner - 2 - n_pad_r
        if istop_r == -1:
            istop_r = None
        pad_slc_r = slice(istop_inner - 2, istop_r, -1)

    elif pad_mode in ('order0', 'order1'):
        # left: only the first entry, using a slice to avoid squeezing
        pad_slc_l = slice(istart_inner, istart_inner + 1)
        # right: only last entry
        pad_slc_r = slice(istop_inner - 1, istop_inner)

    else:
        # Slices are not used, returning trivial ones. The function should not
        # be used for other modes anyway.
        pad_slc_l, pad_slc_r = slice(0), slice(0)

    return pad_slc_l, pad_slc_r


def _apply_padding(lhs_arr, rhs_arr, offset, pad_mode, direction):
    """Apply padding to ``lhs_arr`` according to ``pad_mode``.

    This helper assigns the values in the excess parts (if existent)
    of ``lhs_arr`` according to the provided padding mode.

    This applies to the following values for ``pad_mode``:
    ``periodic``, ``symmetric``, ``order0``, ``order1``

    See `the online documentation
    <https://odlgroup.github.io/odl/math/resizing_ops.html>`_
    on resizing operators for details.
    """
    if pad_mode not in ('periodic', 'symmetric', 'order0', 'order1'):
        return

    full_slc = [slice(None)] * lhs_arr.ndim
    intersec_slc, _ = _intersection_slice_tuples(lhs_arr, rhs_arr, offset)

    if direction == 'forward':
        working_slc = list(intersec_slc)
    else:
        working_slc = list(full_slc)

    # TODO: order axes according to padding size for optimization (largest
    # last)? Axis strides could be important, too.
    for axis, (n_lhs, n_rhs) in enumerate(zip(lhs_arr.shape, rhs_arr.shape)):

        if n_lhs <= n_rhs:
            continue  # restriction, nothing to do

        n_pad_l = offset[axis]
        n_pad_r = n_lhs - n_rhs - n_pad_l

        # Error scenarios with illegal lengths
        if pad_mode == 'order0' and n_rhs == 0:
            raise ValueError('in axis {}: the smaller array must have size '
                             '>= 1 for order 0 padding, got 0'
                             ''.format(axis))

        if pad_mode == 'order1' and n_rhs < 2:
            raise ValueError('in axis {}: the smaller array must have size '
                             '>= 2 for order 1 padding, got {}'
                             ''.format(axis, n_rhs))

        for lr, pad_len in [('left', n_pad_l), ('right', n_pad_r)]:
            if pad_mode == 'periodic' and pad_len > n_rhs:
                raise ValueError('in axis {}: {} padding length {} exceeds '
                                 'the size {} of the smaller array; this is '
                                 'not allowed for periodic padding'
                                 ''.format(axis, lr, pad_len, n_rhs))

            elif pad_mode == 'symmetric' and pad_len >= n_rhs:
                raise ValueError('in axis {}: {} padding length {} is larger '
                                 'or equal to the size {} of the smaller '
                                 'array; this is not allowed for symmetric '
                                 'padding'
                                 ''.format(axis, lr, pad_len, n_rhs))

        # Slice tuples used to index LHS and RHS for left and right padding,
        # respectively; we make 4 copies of `working_slc` as lists
        lhs_slc_l, lhs_slc_r, rhs_slc_l, rhs_slc_r = map(
            list, [working_slc] * 4)

        # We're always using the outer (excess) parts involved in padding
        # on the LHS of the assignment, so we set them here.
        pad_slc_outer_l, pad_slc_outer_r = _padding_slices_outer(
            lhs_arr, rhs_arr, axis, offset)

        if direction == 'forward':
            lhs_slc_l[axis] = pad_slc_outer_l
            lhs_slc_r[axis] = pad_slc_outer_r
        else:
            rhs_slc_l[axis] = pad_slc_outer_l
            rhs_slc_r[axis] = pad_slc_outer_r

        if pad_mode in ('periodic', 'symmetric'):
            pad_slc_inner_l, pad_slc_inner_r = _padding_slices_inner(
                lhs_arr, rhs_arr, axis, offset, pad_mode)

            # Using `lhs_arr` on both sides of the assignment such that the
            # shapes match and the "corner" blocks are properly assigned
            # or used in the addition for the adjoint, respectively.
            if direction == 'forward':
                rhs_slc_l[axis] = pad_slc_inner_l
                rhs_slc_r[axis] = pad_slc_inner_r
                lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r = map(
                    tuple, [lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r])

                lhs_arr[lhs_slc_l] = lhs_arr[rhs_slc_l]
                lhs_arr[lhs_slc_r] = lhs_arr[rhs_slc_r]
            else:
                lhs_slc_l[axis] = pad_slc_inner_l
                lhs_slc_r[axis] = pad_slc_inner_r
                lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r = map(
                    tuple, [lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r])

                lhs_arr[lhs_slc_l] += lhs_arr[rhs_slc_l]
                lhs_arr[lhs_slc_r] += lhs_arr[rhs_slc_r]

        elif pad_mode == 'order0':
            # The `_padding_slices_inner` helper returns the slices for the
            # boundary values.
            left_slc, right_slc = _padding_slices_inner(
                lhs_arr, rhs_arr, axis, offset, pad_mode)

            if direction == 'forward':
                rhs_slc_l[axis] = left_slc
                rhs_slc_r[axis] = right_slc
                lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r = map(
                    tuple, [lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r])

                lhs_arr[lhs_slc_l] = lhs_arr[rhs_slc_l]
                lhs_arr[lhs_slc_r] = lhs_arr[rhs_slc_r]
            else:
                lhs_slc_l[axis] = left_slc
                lhs_slc_r[axis] = right_slc
                lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r = map(
                    tuple, [lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r])

                lhs_arr[lhs_slc_l] += np.sum(
                    lhs_arr[rhs_slc_l],
                    axis=axis, keepdims=True, dtype=lhs_arr.dtype)
                lhs_arr[lhs_slc_r] += np.sum(
                    lhs_arr[rhs_slc_r],
                    axis=axis, keepdims=True, dtype=lhs_arr.dtype)

        elif pad_mode == 'order1':
            # Some extra work necessary: need to compute the derivative at
            # the boundary and use that to continue with constant derivative.

            # Slice for broadcasting of a 1D array along `axis`
            bcast_slc = [None] * lhs_arr.ndim
            bcast_slc[axis] = slice(None)
            bcast_slc = tuple(bcast_slc)

            # Slices for the boundary in `axis`
            left_slc, right_slc = _padding_slices_inner(
                lhs_arr, rhs_arr, axis, offset, pad_mode)

            # Create slice tuples for indexing of the boundary values
            bdry_slc_l = list(working_slc)
            bdry_slc_l[axis] = left_slc
            bdry_slc_l = tuple(bdry_slc_l)
            bdry_slc_r = list(working_slc)
            bdry_slc_r[axis] = right_slc
            bdry_slc_r = tuple(bdry_slc_r)

            # For the slope at the boundary, we need two neighboring points.
            # We create the corresponding slices from the boundary slices.
            slope_slc_l = list(working_slc)
            slope_slc_l[axis] = slice(left_slc.start, left_slc.stop + 1)
            slope_slc_l = tuple(slope_slc_l)
            slope_slc_r = list(working_slc)
            slope_slc_r[axis] = slice(right_slc.start - 1, right_slc.stop)
            slope_slc_r = tuple(slope_slc_r)

            # The `np.arange`s, broadcast along `axis`, are used to create the
            # constant-slope continuation (forward) or to calculate the
            # first order moments (adjoint).
            arange_l = np.arange(-n_pad_l, 0,
                                 dtype=lhs_arr.dtype)[bcast_slc]
            arange_r = np.arange(1, n_pad_r + 1,
                                 dtype=lhs_arr.dtype)[bcast_slc]

            lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r = map(
                tuple, [lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r])

            if direction == 'forward':
                # Take first order difference to get the derivative
                # along `axis`.
                slope_l = np.diff(lhs_arr[slope_slc_l], n=1, axis=axis)
                slope_r = np.diff(lhs_arr[slope_slc_r], n=1, axis=axis)

                # Finally assign the constant slope values
                lhs_arr[lhs_slc_l] = lhs_arr[bdry_slc_l] + arange_l * slope_l
                lhs_arr[lhs_slc_r] = lhs_arr[bdry_slc_r] + arange_r * slope_r
            else:
                # Same as in 'order0'
                lhs_arr[bdry_slc_l] += np.sum(lhs_arr[rhs_slc_l],
                                              axis=axis, keepdims=True,
                                              dtype=lhs_arr.dtype)
                lhs_arr[bdry_slc_r] += np.sum(lhs_arr[rhs_slc_r],
                                              axis=axis, keepdims=True,
                                              dtype=lhs_arr.dtype)

                # Calculate the order 1 moments
                moment1_l = np.sum(arange_l * lhs_arr[rhs_slc_l],
                                   axis=axis, keepdims=True,
                                   dtype=lhs_arr.dtype)
                moment1_r = np.sum(arange_r * lhs_arr[rhs_slc_r],
                                   axis=axis, keepdims=True,
                                   dtype=lhs_arr.dtype)

                # Add moment1 at the "width-2 boundary layers", with the sign
                # corresponding to the sign in the derivative calculation
                # of the forward padding.
                sign = np.array([-1, 1])[bcast_slc]
                lhs_arr[slope_slc_l] += moment1_l * sign
                lhs_arr[slope_slc_r] += moment1_r * sign

        if direction == 'forward':
            working_slc[axis] = full_slc[axis]
        else:
            working_slc[axis] = intersec_slc[axis]



[docs]
def zscore(arr):
    """Return arr normalized with mean 0 and unit variance.

    If the input has 0 variance, the result will also have 0 variance.

    Parameters
    ----------
    arr : array-like

    Returns
    -------
    zscore : array-like

    Examples
    --------
    Compute the z score for a small array:

    >>> result = zscore([1, 0])
    >>> result
    array([ 1., -1.])
    >>> np.mean(result)
    0.0
    >>> np.std(result)
    1.0

    Does not re-scale in case the input is constant (has 0 variance):

    >>> zscore([1, 1])
    array([ 0., 0.])
    """
    arr = arr - np.mean(arr)
    std = np.std(arr)
    if std != 0:
        arr /= std
    return arr




[docs]
def binning(arr, bin_size, reduction=np.sum):
    """Bin an array by a factor.

    Parameters
    ----------
    arr : `array-like`
        The array that should be binned.
    bin_size : positive int or sequence
        Size or per-axis sizes of the bins.
    reduction: callable, optional
        Function used to perform the binning by reduction over temporary
        axes of size ``bin_size``. It is called as ::

            reduction(reshaped_arr, axis=reduction_axes)

        hence it must support this signature. The function is expected to
        collapse ``reduction_axes``.

        Default: `numpy.sum`

    Returns
    -------
    binned_arr : numpy.ndarray
        Array of shape ``n[i] // b[i]`` in axis ``i``, where ``n`` refers
        to the original shape and ``b`` to the bin sizes.

    Examples
    --------
    Binning by the same factor in all axes can be done with an integer
    ``bin_size``:

    >>> arr = np.arange(24).reshape((4, 6))
    >>> arr
    array([[ 0,  1,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10, 11],
           [12, 13, 14, 15, 16, 17],
           [18, 19, 20, 21, 22, 23]])
    >>> binning(arr, bin_size=2)
    array([[14, 22, 30],
           [62, 70, 78]])

    If a sequence is given, the bin sizes are specific for each axis:

    >>> binning(arr, bin_size=(2, 3))
    array([[ 24,  42],
           [ 96, 114]])

    Instead of the default `numpy.sum`, other functions that accept an
    array as first argument and an ``axis`` keyword argument can be used for
    reduction. For instance, `numpy.max`, resulting in "max pooling":

    >>> binning(arr, bin_size=2, reduction=np.max)
    array([[ 7,  9, 11],
           [19, 21, 23]])
    """
    arr = np.asarray(arr)
    d = arr.ndim
    try:
        bin_sizes = [int(bin_size)] * d
    except TypeError:
        bin_sizes = bin_size

    if not all(b > 0 for b in bin_sizes):
        raise ValueError('expected positive `bin_size`, got {}'
                         ''.format(bin_size))
    if len(bin_sizes) != d:
        raise ValueError(
            '`len(bin_sizes)` must be equal to `arr.ndim`, but {} != {}'
            ''.format(len(bin_sizes), d)
        )
    if any(b > n for n, b in zip(arr.shape, bin_sizes)):
        raise ValueError(
            '`bin_size` {} may not exceed array shape {} in any axis'
            ''.format(bin_size, arr.shape)
        )
    if not all(n % b == 0 for n, b in zip(arr.shape, bin_sizes)):
        raise ValueError(
            '`bin_size` must divide `arr.shape` evenly, but `{} / {}` has a '
            'remainder of {}'
            ''.format(
                arr.shape, bin_size, tuple(np.remainder(arr.shape, bin_sizes))
            )
        )

    red_shp = []
    red_axes = []
    ax = 0
    for n, b in zip(arr.shape, bin_sizes):
        if b == 1:
            # Optimization for "no binning"
            red_shp.append(n)
            ax += 1
        else:
            red_shp.append(n // b)
            red_shp.append(b)
            red_axes.append(ax + 1)
            ax += 2

    red_axes = tuple(red_axes)

    reshaped_arr = arr.reshape(red_shp)
    red_arr = reduction(reshaped_arr, axis=red_axes)

    out_shp = tuple(n for i, n in enumerate(red_shp) if i not in red_axes)
    if red_arr.shape != out_shp:
        raise ValueError('`reduction` does not produce the expected shape '
                         '{} from `arr.shape = {}` and `bin_size = {}`'
                         ''.format(out_shp, arr.shape, bin_size))

    return red_arr



if __name__ == '__main__':
    from odl.util.testutils import run_doctests
    run_doctests()