# Copyright 2014-2020 The ODL contributors
#
# This file is part of ODL.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
"""Numerical helper functions for convenience or speed."""
from __future__ import absolute_import, division, print_function
import numpy as np
from odl.util.normalize import normalized_scalar_param_list, safe_int_conv
__all__ = (
'apply_on_boundary',
'fast_1d_tensor_mult',
'resize_array',
'zscore',
'binning',
)
_SUPPORTED_RESIZE_PAD_MODES = ('constant', 'symmetric', 'periodic',
'order0', 'order1')
[docs]def apply_on_boundary(array, func, only_once=True, which_boundaries=None,
axis_order=None, out=None):
"""Apply a function of the boundary of an n-dimensional array.
All other values are preserved as-is.
Parameters
----------
array : `array-like`
Modify the boundary of this array
func : callable or sequence of callables
If a single function is given, assign
``array[slice] = func(array[slice])`` on the boundary slices,
e.g. use ``lamda x: x / 2`` to divide values by 2.
A sequence of functions is applied per axis separately. It
must have length ``array.ndim`` and may consist of one function
or a 2-tuple of functions per axis.
``None`` entries in a sequence cause the axis (side) to be
skipped.
only_once : bool, optional
If ``True``, ensure that each boundary point appears in exactly
one slice. If ``func`` is a list of functions, the
``axis_order`` determines which functions are applied to nodes
which appear in multiple slices, according to the principle
"first-come, first-served".
which_boundaries : sequence, optional
If provided, this sequence determines per axis whether to
apply the function at the boundaries in each axis. The entry
in each axis may consist in a single bool or a 2-tuple of
bool. In the latter case, the first tuple entry decides for
the left, the second for the right boundary. The length of the
sequence must be ``array.ndim``. ``None`` is interpreted as
"all boundaries".
axis_order : sequence of ints, optional
Permutation of ``range(array.ndim)`` defining the order in which
to process the axes. If combined with ``only_once`` and a
function list, this determines which function is evaluated in
the points that are potentially processed multiple times.
out : `numpy.ndarray`, optional
Location in which to store the result, can be the same as ``array``.
Default: copy of ``array``
Examples
--------
>>> arr = np.ones((3, 3))
>>> apply_on_boundary(arr, lambda x: x / 2)
array([[ 0.5, 0.5, 0.5],
[ 0.5, 1. , 0.5],
[ 0.5, 0.5, 0.5]])
If called with ``only_once=False``, the function is applied repeatedly:
>>> apply_on_boundary(arr, lambda x: x / 2, only_once=False)
array([[ 0.25, 0.5 , 0.25],
[ 0.5 , 1. , 0.5 ],
[ 0.25, 0.5 , 0.25]])
>>> apply_on_boundary(arr, lambda x: x / 2, only_once=True,
... which_boundaries=((True, False), True))
array([[ 0.5, 0.5, 0.5],
[ 0.5, 1. , 0.5],
[ 0.5, 1. , 0.5]])
Use the ``out`` parameter to store the result in an existing array:
>>> out = np.empty_like(arr)
>>> result = apply_on_boundary(arr, lambda x: x / 2, out=out)
>>> result
array([[ 0.5, 0.5, 0.5],
[ 0.5, 1. , 0.5],
[ 0.5, 0.5, 0.5]])
>>> result is out
True
"""
array = np.asarray(array)
if callable(func):
func = [func] * array.ndim
elif len(func) != array.ndim:
raise ValueError('sequence of functions has length {}, expected {}'
''.format(len(func), array.ndim))
if which_boundaries is None:
which_boundaries = ([(True, True)] * array.ndim)
elif len(which_boundaries) != array.ndim:
raise ValueError('`which_boundaries` has length {}, expected {}'
''.format(len(which_boundaries), array.ndim))
if axis_order is None:
axis_order = list(range(array.ndim))
elif len(axis_order) != array.ndim:
raise ValueError('`axis_order` has length {}, expected {}'
''.format(len(axis_order), array.ndim))
if out is None:
out = array.copy()
else:
out[:] = array # Self assignment is free, in case out is array
# The 'only_once' functionality is implemented by storing for each axis
# if the left and right boundaries have been processed. This information
# is stored in a list of slices which is reused for the next axis in the
# list.
slices = [slice(None)] * array.ndim
for ax, function, which in zip(axis_order, func, which_boundaries):
if only_once:
slc_l = list(slices) # Make a copy; copy() exists in Py3 only
slc_r = list(slices)
else:
slc_l = [slice(None)] * array.ndim
slc_r = [slice(None)] * array.ndim
# slc_l and slc_r select left and right boundary, resp, in this axis.
slc_l[ax] = 0
slc_r[ax] = -1
slc_l, slc_r = tuple(slc_l), tuple(slc_r)
try:
# Tuple of functions in this axis
func_l, func_r = function
except TypeError:
# Single function
func_l = func_r = function
try:
# Tuple of bool
mod_left, mod_right = which
except TypeError:
# Single bool
mod_left = mod_right = which
if mod_left and func_l is not None:
out[slc_l] = func_l(out[slc_l])
start = 1
else:
start = None
if mod_right and func_r is not None:
out[slc_r] = func_r(out[slc_r])
end = -1
else:
end = None
# Write the information for the processed axis into the slice list.
# Start and end include the boundary if it was processed.
slices[ax] = slice(start, end)
return out
[docs]def fast_1d_tensor_mult(ndarr, onedim_arrs, axes=None, out=None):
"""Fast multiplication of an n-dim array with an outer product.
This method implements the multiplication of an n-dimensional array
with an outer product of one-dimensional arrays, e.g.::
a = np.ones((10, 10, 10))
x = np.random.rand(10)
a *= x[:, None, None] * x[None, :, None] * x[None, None, :]
Basically, there are two ways to do such an operation:
1. First calculate the factor on the right-hand side and do one
"big" multiplication; or
2. Multiply by one factor at a time.
The procedure of building up the large factor in the first method
is relatively cheap if the number of 1d arrays is smaller than the
number of dimensions. For exactly n vectors, the second method is
faster, although it loops of the array ``a`` n times.
This implementation combines the two ideas into a hybrid scheme:
- If there are less 1d arrays than dimensions, choose 1.
- Otherwise, calculate the factor array for n-1 arrays
and multiply it to the large array. Finally, multiply with the
last 1d array.
The advantage of this approach is that it is memory-friendly and
loops over the big array only twice.
Parameters
----------
ndarr : `array-like`
Array to multiply to
onedim_arrs : sequence of `array-like`'s
One-dimensional arrays to be multiplied with ``ndarr``. The
sequence may not be longer than ``ndarr.ndim``.
axes : sequence of ints, optional
Take the 1d transform along these axes. ``None`` corresponds to
the last ``len(onedim_arrs)`` axes, in ascending order.
out : `numpy.ndarray`, optional
Array in which the result is stored
Returns
-------
out : `numpy.ndarray`
Result of the modification. If ``out`` was given, the returned
object is a reference to it.
"""
if out is None:
out = np.array(ndarr, copy=True)
else:
out[:] = ndarr # Self-assignment is free if out is ndarr
if not onedim_arrs:
raise ValueError('no 1d arrays given')
if axes is None:
axes = list(range(out.ndim - len(onedim_arrs), out.ndim))
axes_in = None
elif len(axes) != len(onedim_arrs):
raise ValueError('there are {} 1d arrays, but {} axes entries'
''.format(len(onedim_arrs), len(axes)))
else:
# Make axes positive
axes, axes_in = np.array(axes, dtype=int), axes
axes[axes < 0] += out.ndim
axes = list(axes)
if not all(0 <= ai < out.ndim for ai in axes):
raise ValueError('`axes` {} out of bounds for {} dimensions'
''.format(axes_in, out.ndim))
# Make scalars 1d arrays and squeezable arrays 1d
alist = [np.atleast_1d(np.asarray(a).squeeze()) for a in onedim_arrs]
if any(a.ndim != 1 for a in alist):
raise ValueError('only 1d arrays allowed')
if len(axes) < out.ndim:
# Make big factor array (start with 0d)
factor = np.array(1.0)
for ax, arr in zip(axes, alist):
# Meshgrid-style slice
slc = [None] * out.ndim
slc[ax] = slice(None)
factor = factor * arr[tuple(slc)]
out *= factor
else:
# Hybrid approach
# Get the axis to spare for the final multiplication, the one
# with the largest stride.
last_ax = np.argmax(out.strides)
last_arr = alist[axes.index(last_ax)]
# Build the semi-big array and multiply
factor = np.array(1.0)
for ax, arr in zip(axes, alist):
if ax == last_ax:
continue
slc = [None] * out.ndim
slc[ax] = slice(None)
factor = factor * arr[tuple(slc)]
out *= factor
# Finally multiply by the remaining 1d array
slc = [None] * out.ndim
slc[last_ax] = slice(None)
out *= last_arr[tuple(slc)]
return out
[docs]def resize_array(arr, newshp, offset=None, pad_mode='constant', pad_const=0,
direction='forward', out=None):
"""Return the resized version of ``arr`` with shape ``newshp``.
In axes where ``newshp > arr.shape``, padding is applied according
to the supplied options.
Where ``newshp < arr.shape``, the array is cropped to the new
size.
See `the online documentation
<https://odlgroup.github.io/odl/math/resizing_ops.html>`_
on resizing operators for mathematical details.
Parameters
----------
arr : `array-like`
Array to be resized.
newshp : sequence of ints
Desired shape of the output array.
offset : sequence of ints, optional
Specifies how many entries are added to/removed from the "left"
side (corresponding to low indices) of ``arr``.
pad_mode : string, optional
Method to be used to fill in missing values in an enlarged array.
``'constant'``: Fill with ``pad_const``.
``'symmetric'``: Reflect at the boundaries, not doubling the
outmost values. This requires left and right padding sizes
to be strictly smaller than the original array shape.
``'periodic'``: Fill in values from the other side, keeping
the order. This requires left and right padding sizes to be
at most as large as the original array shape.
``'order0'``: Extend constantly with the outmost values
(ensures continuity).
``'order1'``: Extend with constant slope (ensures continuity of
the first derivative). This requires at least 2 values along
each axis where padding is applied.
pad_const : scalar, optional
Value to be used in the ``'constant'`` padding mode.
direction : {'forward', 'adjoint'}
Determines which variant of the resizing is applied.
'forward' : in axes where ``out`` is larger than ``arr``,
apply padding. Otherwise, restrict to the smaller size.
'adjoint' : in axes where ``out`` is larger than ``arr``,
apply zero-padding. Otherwise, restrict to the smaller size
and add the outside contributions according to ``pad_mode``.
out : `numpy.ndarray`, optional
Array to write the result to. Must have shape ``newshp`` and
be able to hold the data type of the input array.
Returns
-------
resized : `numpy.ndarray`
Resized array created according to the above rules. If ``out``
was given, the returned object is a reference to it.
Examples
--------
The input can be shrunk by simply providing a smaller size.
By default, values are removed from the right. When enlarging,
zero-padding is applied by default, and the zeros are added to
the right side. That behavior can be changed with the ``offset``
parameter:
>>> from odl.util.numerics import resize_array
>>> resize_array([1, 2, 3], (1,))
array([1])
>>> resize_array([1, 2, 3], (1,), offset=2)
array([3])
>>> resize_array([1, 2, 3], (6,))
array([1, 2, 3, 0, 0, 0])
>>> resize_array([1, 2, 3], (7,), offset=2)
array([0, 0, 1, 2, 3, 0, 0])
The padding constant can be changed, as well as the padding
mode:
>>> resize_array([1, 2, 3], (7,), pad_const=-1, offset=2)
array([-1, -1, 1, 2, 3, -1, -1])
>>> resize_array([1, 2, 3], (7,), pad_mode='periodic', offset=2)
array([2, 3, 1, 2, 3, 1, 2])
>>> resize_array([1, 2, 3], (7,), pad_mode='symmetric', offset=2)
array([3, 2, 1, 2, 3, 2, 1])
>>> resize_array([1, 2, 3], (7,), pad_mode='order0', offset=2)
array([1, 1, 1, 2, 3, 3, 3])
>>> resize_array([1, 2, 3], (7,), pad_mode='order1', offset=2)
array([-1, 0, 1, 2, 3, 4, 5])
Everything works for arbitrary number of dimensions:
>>> # Take the middle two columns and extend rows symmetrically
>>> resize_array([[1, 2, 3, 4],
... [5, 6, 7, 8],
... [9, 10, 11, 12]],
... (5, 2), pad_mode='symmetric', offset=[1, 1])
array([[ 6, 7],
[ 2, 3],
[ 6, 7],
[10, 11],
[ 6, 7]])
>>> # Take the rightmost two columns and extend rows symmetrically
>>> # downwards
>>> resize_array([[1, 2, 3, 4],
... [5, 6, 7, 8],
... [9, 10, 11, 12]], (5, 2), pad_mode='symmetric',
... offset=[0, 2])
array([[ 3, 4],
[ 7, 8],
[11, 12],
[ 7, 8],
[ 3, 4]])
"""
# Handle arrays and shapes
try:
newshp = tuple(newshp)
except TypeError:
raise TypeError('`newshp` must be a sequence, got {!r}'.format(newshp))
if out is not None:
if not isinstance(out, np.ndarray):
raise TypeError('`out` must be a `numpy.ndarray` instance, got '
'{!r}'.format(out))
if out.shape != newshp:
raise ValueError('`out` must have shape {}, got {}'
''.format(newshp, out.shape))
order = 'C' if out.flags.c_contiguous else 'F'
arr = np.asarray(arr, dtype=out.dtype, order=order)
if arr.ndim != out.ndim:
raise ValueError('number of axes of `arr` and `out` do not match '
'({} != {})'.format(arr.ndim, out.ndim))
else:
arr = np.asarray(arr)
order = 'C' if arr.flags.c_contiguous else 'F'
out = np.empty(newshp, dtype=arr.dtype, order=order)
if len(newshp) != arr.ndim:
raise ValueError('number of axes of `arr` and `len(newshp)` do '
'not match ({} != {})'
''.format(arr.ndim, len(newshp)))
# Handle offset
if offset is None:
offset = [0] * out.ndim
else:
offset = normalized_scalar_param_list(
offset, out.ndim, param_conv=safe_int_conv, keep_none=False)
# Handle padding
pad_mode, pad_mode_in = str(pad_mode).lower(), pad_mode
if pad_mode not in _SUPPORTED_RESIZE_PAD_MODES:
raise ValueError("`pad_mode` '{}' not understood".format(pad_mode_in))
if (pad_mode == 'constant' and
not np.can_cast(pad_const, out.dtype) and
any(n_new > n_orig
for n_orig, n_new in zip(arr.shape, out.shape))):
raise ValueError('`pad_const` {} cannot be safely cast to the data '
'type {} of the output array'
''.format(pad_const, out.dtype))
# Handle direction
direction, direction_in = str(direction).lower(), direction
if direction not in ('forward', 'adjoint'):
raise ValueError("`direction` '{}' not understood"
"".format(direction_in))
if direction == 'adjoint' and pad_mode == 'constant' and pad_const != 0:
raise ValueError("`pad_const` must be 0 for 'adjoint' direction, "
"got {}".format(pad_const))
if direction == 'forward' and pad_mode == 'constant' and pad_const != 0:
out.fill(pad_const)
else:
out.fill(0)
# Perform the resizing
if direction == 'forward':
if pad_mode == 'constant':
# Constant padding does not require the helper function
_assign_intersection(out, arr, offset)
else:
# First copy the inner part and use it for padding
_assign_intersection(out, arr, offset)
_apply_padding(out, arr, offset, pad_mode, 'forward')
else:
if pad_mode == 'constant':
# Skip the padding helper
_assign_intersection(out, arr, offset)
else:
# Apply adjoint padding to a copy of the input and copy the inner
# part when finished
tmp = arr.copy()
_apply_padding(tmp, out, offset, pad_mode, 'adjoint')
_assign_intersection(out, tmp, offset)
return out
def _intersection_slice_tuples(lhs_arr, rhs_arr, offset):
"""Return tuples to yield the intersecting part of both given arrays.
The returned slices ``lhs_slc`` and ``rhs_slc`` are such that
``lhs_arr[lhs_slc]`` and ``rhs_arr[rhs_slc]`` have the same shape.
The ``offset`` parameter determines how much is skipped/added on the
"left" side (small indices).
"""
lhs_slc, rhs_slc = [], []
for istart, n_lhs, n_rhs in zip(offset, lhs_arr.shape, rhs_arr.shape):
# Slice for the inner part in the larger array corresponding to the
# small one, offset by the given amount
istop = istart + min(n_lhs, n_rhs)
inner_slc = slice(istart, istop)
if n_lhs > n_rhs:
# Extension
lhs_slc.append(inner_slc)
rhs_slc.append(slice(None))
elif n_lhs < n_rhs:
# Restriction
lhs_slc.append(slice(None))
rhs_slc.append(inner_slc)
else:
# Same size, so full slices for both
lhs_slc.append(slice(None))
rhs_slc.append(slice(None))
return tuple(lhs_slc), tuple(rhs_slc)
def _assign_intersection(lhs_arr, rhs_arr, offset):
"""Assign the intersecting region from ``rhs_arr`` to ``lhs_arr``."""
lhs_slc, rhs_slc = _intersection_slice_tuples(lhs_arr, rhs_arr, offset)
lhs_arr[lhs_slc] = rhs_arr[rhs_slc]
def _padding_slices_outer(lhs_arr, rhs_arr, axis, offset):
"""Return slices into the outer array part where padding is applied.
When padding is performed, these slices yield the outer (excess) part
of the larger array that is to be filled with values. Slices for
both sides of the arrays in a given ``axis`` are returned.
The same slices are used also in the adjoint padding correction,
however in a different way.
See `the online documentation
<https://odlgroup.github.io/odl/math/resizing_ops.html>`_
on resizing operators for details.
"""
istart_inner = offset[axis]
istop_inner = istart_inner + min(lhs_arr.shape[axis], rhs_arr.shape[axis])
return slice(istart_inner), slice(istop_inner, None)
def _padding_slices_inner(lhs_arr, rhs_arr, axis, offset, pad_mode):
"""Return slices into the inner array part for a given ``pad_mode``.
When performing padding, these slices yield the values from the inner
part of a larger array that are to be assigned to the excess part
of the same array. Slices for both sides ("left", "right") of
the arrays in a given ``axis`` are returned.
"""
# Calculate the start and stop indices for the inner part
istart_inner = offset[axis]
n_large = max(lhs_arr.shape[axis], rhs_arr.shape[axis])
n_small = min(lhs_arr.shape[axis], rhs_arr.shape[axis])
istop_inner = istart_inner + n_small
# Number of values padded to left and right
n_pad_l = istart_inner
n_pad_r = n_large - istop_inner
if pad_mode == 'periodic':
# left: n_pad_l forward, ending at istop_inner - 1
pad_slc_l = slice(istop_inner - n_pad_l, istop_inner)
# right: n_pad_r forward, starting at istart_inner
pad_slc_r = slice(istart_inner, istart_inner + n_pad_r)
elif pad_mode == 'symmetric':
# left: n_pad_l backward, ending at istart_inner + 1
pad_slc_l = slice(istart_inner + n_pad_l, istart_inner, -1)
# right: n_pad_r backward, starting at istop_inner - 2
# For the corner case that the stopping index is -1, we need to
# replace it with None, since -1 as stopping index is equivalent
# to the last index, which is not what we want (0 as last index).
istop_r = istop_inner - 2 - n_pad_r
if istop_r == -1:
istop_r = None
pad_slc_r = slice(istop_inner - 2, istop_r, -1)
elif pad_mode in ('order0', 'order1'):
# left: only the first entry, using a slice to avoid squeezing
pad_slc_l = slice(istart_inner, istart_inner + 1)
# right: only last entry
pad_slc_r = slice(istop_inner - 1, istop_inner)
else:
# Slices are not used, returning trivial ones. The function should not
# be used for other modes anyway.
pad_slc_l, pad_slc_r = slice(0), slice(0)
return pad_slc_l, pad_slc_r
def _apply_padding(lhs_arr, rhs_arr, offset, pad_mode, direction):
"""Apply padding to ``lhs_arr`` according to ``pad_mode``.
This helper assigns the values in the excess parts (if existent)
of ``lhs_arr`` according to the provided padding mode.
This applies to the following values for ``pad_mode``:
``periodic``, ``symmetric``, ``order0``, ``order1``
See `the online documentation
<https://odlgroup.github.io/odl/math/resizing_ops.html>`_
on resizing operators for details.
"""
if pad_mode not in ('periodic', 'symmetric', 'order0', 'order1'):
return
full_slc = [slice(None)] * lhs_arr.ndim
intersec_slc, _ = _intersection_slice_tuples(lhs_arr, rhs_arr, offset)
if direction == 'forward':
working_slc = list(intersec_slc)
else:
working_slc = list(full_slc)
# TODO: order axes according to padding size for optimization (largest
# last)? Axis strides could be important, too.
for axis, (n_lhs, n_rhs) in enumerate(zip(lhs_arr.shape, rhs_arr.shape)):
if n_lhs <= n_rhs:
continue # restriction, nothing to do
n_pad_l = offset[axis]
n_pad_r = n_lhs - n_rhs - n_pad_l
# Error scenarios with illegal lengths
if pad_mode == 'order0' and n_rhs == 0:
raise ValueError('in axis {}: the smaller array must have size '
'>= 1 for order 0 padding, got 0'
''.format(axis))
if pad_mode == 'order1' and n_rhs < 2:
raise ValueError('in axis {}: the smaller array must have size '
'>= 2 for order 1 padding, got {}'
''.format(axis, n_rhs))
for lr, pad_len in [('left', n_pad_l), ('right', n_pad_r)]:
if pad_mode == 'periodic' and pad_len > n_rhs:
raise ValueError('in axis {}: {} padding length {} exceeds '
'the size {} of the smaller array; this is '
'not allowed for periodic padding'
''.format(axis, lr, pad_len, n_rhs))
elif pad_mode == 'symmetric' and pad_len >= n_rhs:
raise ValueError('in axis {}: {} padding length {} is larger '
'or equal to the size {} of the smaller '
'array; this is not allowed for symmetric '
'padding'
''.format(axis, lr, pad_len, n_rhs))
# Slice tuples used to index LHS and RHS for left and right padding,
# respectively; we make 4 copies of `working_slc` as lists
lhs_slc_l, lhs_slc_r, rhs_slc_l, rhs_slc_r = map(
list, [working_slc] * 4)
# We're always using the outer (excess) parts involved in padding
# on the LHS of the assignment, so we set them here.
pad_slc_outer_l, pad_slc_outer_r = _padding_slices_outer(
lhs_arr, rhs_arr, axis, offset)
if direction == 'forward':
lhs_slc_l[axis] = pad_slc_outer_l
lhs_slc_r[axis] = pad_slc_outer_r
else:
rhs_slc_l[axis] = pad_slc_outer_l
rhs_slc_r[axis] = pad_slc_outer_r
if pad_mode in ('periodic', 'symmetric'):
pad_slc_inner_l, pad_slc_inner_r = _padding_slices_inner(
lhs_arr, rhs_arr, axis, offset, pad_mode)
# Using `lhs_arr` on both sides of the assignment such that the
# shapes match and the "corner" blocks are properly assigned
# or used in the addition for the adjoint, respectively.
if direction == 'forward':
rhs_slc_l[axis] = pad_slc_inner_l
rhs_slc_r[axis] = pad_slc_inner_r
lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r = map(
tuple, [lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r])
lhs_arr[lhs_slc_l] = lhs_arr[rhs_slc_l]
lhs_arr[lhs_slc_r] = lhs_arr[rhs_slc_r]
else:
lhs_slc_l[axis] = pad_slc_inner_l
lhs_slc_r[axis] = pad_slc_inner_r
lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r = map(
tuple, [lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r])
lhs_arr[lhs_slc_l] += lhs_arr[rhs_slc_l]
lhs_arr[lhs_slc_r] += lhs_arr[rhs_slc_r]
elif pad_mode == 'order0':
# The `_padding_slices_inner` helper returns the slices for the
# boundary values.
left_slc, right_slc = _padding_slices_inner(
lhs_arr, rhs_arr, axis, offset, pad_mode)
if direction == 'forward':
rhs_slc_l[axis] = left_slc
rhs_slc_r[axis] = right_slc
lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r = map(
tuple, [lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r])
lhs_arr[lhs_slc_l] = lhs_arr[rhs_slc_l]
lhs_arr[lhs_slc_r] = lhs_arr[rhs_slc_r]
else:
lhs_slc_l[axis] = left_slc
lhs_slc_r[axis] = right_slc
lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r = map(
tuple, [lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r])
lhs_arr[lhs_slc_l] += np.sum(
lhs_arr[rhs_slc_l],
axis=axis, keepdims=True, dtype=lhs_arr.dtype)
lhs_arr[lhs_slc_r] += np.sum(
lhs_arr[rhs_slc_r],
axis=axis, keepdims=True, dtype=lhs_arr.dtype)
elif pad_mode == 'order1':
# Some extra work necessary: need to compute the derivative at
# the boundary and use that to continue with constant derivative.
# Slice for broadcasting of a 1D array along `axis`
bcast_slc = [None] * lhs_arr.ndim
bcast_slc[axis] = slice(None)
bcast_slc = tuple(bcast_slc)
# Slices for the boundary in `axis`
left_slc, right_slc = _padding_slices_inner(
lhs_arr, rhs_arr, axis, offset, pad_mode)
# Create slice tuples for indexing of the boundary values
bdry_slc_l = list(working_slc)
bdry_slc_l[axis] = left_slc
bdry_slc_l = tuple(bdry_slc_l)
bdry_slc_r = list(working_slc)
bdry_slc_r[axis] = right_slc
bdry_slc_r = tuple(bdry_slc_r)
# For the slope at the boundary, we need two neighboring points.
# We create the corresponding slices from the boundary slices.
slope_slc_l = list(working_slc)
slope_slc_l[axis] = slice(left_slc.start, left_slc.stop + 1)
slope_slc_l = tuple(slope_slc_l)
slope_slc_r = list(working_slc)
slope_slc_r[axis] = slice(right_slc.start - 1, right_slc.stop)
slope_slc_r = tuple(slope_slc_r)
# The `np.arange`s, broadcast along `axis`, are used to create the
# constant-slope continuation (forward) or to calculate the
# first order moments (adjoint).
arange_l = np.arange(-n_pad_l, 0,
dtype=lhs_arr.dtype)[bcast_slc]
arange_r = np.arange(1, n_pad_r + 1,
dtype=lhs_arr.dtype)[bcast_slc]
lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r = map(
tuple, [lhs_slc_l, rhs_slc_l, lhs_slc_r, rhs_slc_r])
if direction == 'forward':
# Take first order difference to get the derivative
# along `axis`.
slope_l = np.diff(lhs_arr[slope_slc_l], n=1, axis=axis)
slope_r = np.diff(lhs_arr[slope_slc_r], n=1, axis=axis)
# Finally assign the constant slope values
lhs_arr[lhs_slc_l] = lhs_arr[bdry_slc_l] + arange_l * slope_l
lhs_arr[lhs_slc_r] = lhs_arr[bdry_slc_r] + arange_r * slope_r
else:
# Same as in 'order0'
lhs_arr[bdry_slc_l] += np.sum(lhs_arr[rhs_slc_l],
axis=axis, keepdims=True,
dtype=lhs_arr.dtype)
lhs_arr[bdry_slc_r] += np.sum(lhs_arr[rhs_slc_r],
axis=axis, keepdims=True,
dtype=lhs_arr.dtype)
# Calculate the order 1 moments
moment1_l = np.sum(arange_l * lhs_arr[rhs_slc_l],
axis=axis, keepdims=True,
dtype=lhs_arr.dtype)
moment1_r = np.sum(arange_r * lhs_arr[rhs_slc_r],
axis=axis, keepdims=True,
dtype=lhs_arr.dtype)
# Add moment1 at the "width-2 boundary layers", with the sign
# corresponding to the sign in the derivative calculation
# of the forward padding.
sign = np.array([-1, 1])[bcast_slc]
lhs_arr[slope_slc_l] += moment1_l * sign
lhs_arr[slope_slc_r] += moment1_r * sign
if direction == 'forward':
working_slc[axis] = full_slc[axis]
else:
working_slc[axis] = intersec_slc[axis]
[docs]def zscore(arr):
"""Return arr normalized with mean 0 and unit variance.
If the input has 0 variance, the result will also have 0 variance.
Parameters
----------
arr : array-like
Returns
-------
zscore : array-like
Examples
--------
Compute the z score for a small array:
>>> result = zscore([1, 0])
>>> result
array([ 1., -1.])
>>> np.mean(result)
0.0
>>> np.std(result)
1.0
Does not re-scale in case the input is constant (has 0 variance):
>>> zscore([1, 1])
array([ 0., 0.])
"""
arr = arr - np.mean(arr)
std = np.std(arr)
if std != 0:
arr /= std
return arr
[docs]def binning(arr, bin_size, reduction=np.sum):
"""Bin an array by a factor.
Parameters
----------
arr : `array-like`
The array that should be binned.
bin_size : positive int or sequence
Size or per-axis sizes of the bins.
reduction: callable, optional
Function used to perform the binning by reduction over temporary
axes of size ``bin_size``. It is called as ::
reduction(reshaped_arr, axis=reduction_axes)
hence it must support this signature. The function is expected to
collapse ``reduction_axes``.
Default: `numpy.sum`
Returns
-------
binned_arr : numpy.ndarray
Array of shape ``n[i] // b[i]`` in axis ``i``, where ``n`` refers
to the original shape and ``b`` to the bin sizes.
Examples
--------
Binning by the same factor in all axes can be done with an integer
``bin_size``:
>>> arr = np.arange(24).reshape((4, 6))
>>> arr
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
>>> binning(arr, bin_size=2)
array([[14, 22, 30],
[62, 70, 78]])
If a sequence is given, the bin sizes are specific for each axis:
>>> binning(arr, bin_size=(2, 3))
array([[ 24, 42],
[ 96, 114]])
Instead of the default `numpy.sum`, other functions that accept an
array as first argument and an ``axis`` keyword argument can be used for
reduction. For instance, `numpy.max`, resulting in "max pooling":
>>> binning(arr, bin_size=2, reduction=np.max)
array([[ 7, 9, 11],
[19, 21, 23]])
"""
arr = np.asarray(arr)
d = arr.ndim
try:
bin_sizes = [int(bin_size)] * d
except TypeError:
bin_sizes = bin_size
if not all(b > 0 for b in bin_sizes):
raise ValueError('expected positive `bin_size`, got {}'
''.format(bin_size))
if len(bin_sizes) != d:
raise ValueError(
'`len(bin_sizes)` must be equal to `arr.ndim`, but {} != {}'
''.format(len(bin_sizes), d)
)
if any(b > n for n, b in zip(arr.shape, bin_sizes)):
raise ValueError(
'`bin_size` {} may not exceed array shape {} in any axis'
''.format(bin_size, arr.shape)
)
if not all(n % b == 0 for n, b in zip(arr.shape, bin_sizes)):
raise ValueError(
'`bin_size` must divide `arr.shape` evenly, but `{} / {}` has a '
'remainder of {}'
''.format(
arr.shape, bin_size, tuple(np.remainder(arr.shape, bin_sizes))
)
)
red_shp = []
red_axes = []
ax = 0
for n, b in zip(arr.shape, bin_sizes):
if b == 1:
# Optimization for "no binning"
red_shp.append(n)
ax += 1
else:
red_shp.append(n // b)
red_shp.append(b)
red_axes.append(ax + 1)
ax += 2
red_axes = tuple(red_axes)
reshaped_arr = arr.reshape(red_shp)
red_arr = reduction(reshaped_arr, axis=red_axes)
out_shp = tuple(n for i, n in enumerate(red_shp) if i not in red_axes)
if red_arr.shape != out_shp:
raise ValueError('`reduction` does not produce the expected shape '
'{} from `arr.shape = {}` and `bin_size = {}`'
''.format(out_shp, arr.shape, bin_size))
return red_arr
if __name__ == '__main__':
from odl.util.testutils import run_doctests
run_doctests()