Source code for skg.util

"""
Shared utility functions used by the fitting routines.
"""

from numpy import (
    argsort, array, float_, inexact, issubdtype, __version__ as __np_version__
)
from numpy.core.multiarray import normalize_axis_index
from numpy.lib import NumpyVersion


__all__ = [
    'moveaxis', 'preprocess', 'preprocess_pair', 'preprocess_npair'
]


if NumpyVersion(__np_version__) >= '1.11.0':
    from numpy import moveaxis
else:
    from numpy import rollaxis
    def moveaxis(a, start, end):
        return rollaxis(a, start, normalize_axis_index(end, a.ndim + 1))


[docs]def preprocess(x, copy=False, float=False, axis=None):
    """
    Ensure that `x` is a properly formatted numpy array.

    Proper formatting means at least one dimension, and may include
    optional copying, reshaping and coersion into a floating point
    datatype.

    Parameters
    ----------
    x : array-like
        The array to process. If not already a numpy array, it will be
        converted to one.
    copy : bool, optional
        If True, a copy is made regardless of whether `x` is already a
        numpy array or not. The default is False.
    float : bool, optional
        If True, and `x` is not an inexact array already
        (:py:attr:`numpy.float16`, :py:attr:`numpy.float32`,
        :py:attr:`numpy.float64`, :py:attr:`numpy.float96`,
        :py:attr:`numpy.float128`, etc), coerce to be of type
        :py:attr:`numpy.float_`. Defaults to False.
    axis : int, optional
        If specified, the specified axis is moved to the end of the
        shape. Default is to return `x` without reshaping.

    Return
    ------
    x : ~numpy.ndarray
        Processed version of the input.
    """
    if float:
        dtype = x.dtype if hasattr(x, 'dtype') and \
                           issubdtype(x.dtype, inexact) else float_
    else:
        dtype=None

    x = array(x, copy=copy, subok=not copy, ndmin=1, dtype=dtype)
    if axis is not None and axis not in (-1, x.ndim - 1):
        # moveaxis always returns a new view, never the same object
        x = moveaxis(x, axis, -1)
    return x


[docs]def preprocess_pair(x, y, sorted=True, xcopy=False, ycopy=False):
    """
    Ensure that `x` and `y` are floating point arrays of the same size,
    ranked in increasing order by `x`.

    Parameters
    ----------
    x : array-like
        The x-values of the data points. The array will be converted to
        floating point, raveled and sorted, only as necessary.
    y : array-like
        The y-values of the data points corresponding to `x`. Must be
        the same size as `x`. Will be converted to floating point and
        raveled only as necessary. Will be sorted if `x` gets sorted.
    sorted : bool
        Set to True if `x` is already monotonically increasing or
        decreasing. If False, `x` will be sorted into increasing order,
        and `y` will be sorted along with it.
    xcopy : bool, optional
        Ensure that `x` gets copied even if it is already an array. The
        default is to leave arrays untouched as much as possible.
    ycopy : bool
        Ensure that `y` gets copied even if it is already an array. The
        default is to leave arrays untouched as much as possible.

    Return
    ------
    x, y : ~numpy.ndarray
        Processed versions of the inputs.

    See Also
    --------
    preprocess_npair : Similar function but for `x` containing vectors
        and `y` scalars.
    """
    x = preprocess(x, copy=xcopy, float=True)
    y = preprocess(y, copy=ycopy, float=True)
    if x.shape != y.shape:
        raise ValueError('x and y must be the same shape')
    x = x.ravel()
    y = y.ravel()
    if not sorted:
        # Is there a better way to do this in scipy?
        ind = argsort(x)
        x = x[ind]
        y = y[ind]
    return x, y


[docs]def preprocess_npair(x, y, axis=-1, xcopy=False, ycopy=False):
    """
    Ensure that `x` and `y` are floating point arrays of compatible
    size.

    `x` is an array containing vectors along dimension `axis`. `y`
    contains scalar elements. The shape of `y` must match that of `x`
    exactly except for `axis`.

    Parameters
    ----------
    x : array-like
        The vector x-values of the data points. The array will be
        converted to floating point, and raveled along all dimensions
        but `axis`, which will be the last dimension.
    y : array-like
        The y-values of the data points corresponding to `x`. Must have
        one fewer dimension than `x`, and its shape must match all
        elements of `x`'s shape except `axis`. Will be converted to
        floating point and raveled.
    xcopy : bool, optional
        Ensure that `x` gets copied even if it is already an array. The
        default is to leave arrays untouched as much as possible.
    ycopy : bool
        Ensure that `y` gets copied even if it is already an array. The
        default is to leave arrays untouched as much as possible.

    Return
    ------
    x, y : ~numpy.ndarray
        Processed versions of the inputs.

    See Also
    --------
    preprocess_pair : For cases when `x` and `y` both contain scalars,
        and are the exact same size.
    """
    if axis is None:
        raise ValueError('Axis must be an integer, not None')
    x = preprocess(x, copy=xcopy, float=True, axis=axis)
    y = preprocess(y, copy=ycopy, float=True)
    if x.shape[:-1] != y.shape:
        raise ValueError('x and y must be the same shape besides axis in x')
    x = x.reshape(-1, x.shape[-1])
    y = y.ravel()
    return x, y