# BEGIN OF LICENSE NOTE
# This file is part of Pyoints.
# Copyright (c) 2018, Sebastian Lamprecht, Trier University,
# lamprecht@uni-trier.de
#
# Pyoints is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Pyoints is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Pyoints. If not, see <https://www.gnu.org/licenses/>.
# END OF LICENSE NOTE
"""Functions for convenient handling of numpy arrays.
"""
import numpy as np
from numbers import Number
from .misc import print_rounded
NUMERIC_DTYPES = [
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.int8,
np.int16,
np.int32,
np.int64,
np.float16,
np.float32,
np.float64,
]
[docs]def isarray(o):
"""Checks whether or nor an object is an array.
Parameters
----------
o : object
Some object.
Returns
-------
bool
Indicates whether or not the object is an array.
Examples
--------
>>> isarray([1, 2, 3])
True
>>> isarray('text')
False
"""
return (not isinstance(o, str) and
hasattr(o, '__getitem__') and
hasattr(o, '__iter__'))
[docs]def isnumeric(arr, dtypes=NUMERIC_DTYPES):
"""Checks if the data type of an array is numeric.
Parameters
----------
arr : array_like
Numpy array to check.
dtypes : optional, tuple
Tuple of allowed numeric data types.
Returns
-------
bool
Indicates whether or not the array is numeric.
Raises
------
TypeError
Examples
--------
>>> isnumeric([1, 2, 3])
True
>>> isnumeric(['1', '2', '3'])
False
>>> isnumeric([1, 2, None])
False
"""
if not isarray(arr):
raise TypeError("'arr' needs to an array like object")
if not isinstance(arr, np.ndarray):
arr = np.array(arr)
if not isinstance(dtypes, list):
raise TypeError("'dtypes' needs to be a list. got %s" % (str(dtypes)))
for dtype in dtypes:
if np.issubdtype(arr.dtype.type, np.dtype(dtype).type):
return True
return False
[docs]def haskeys(d):
"""Checks if an object has keys and can be treated like a dictionary.
Parameters
----------
d : object
Object to be checked.
Returns
-------
bool
Indicates whether or not the object has accessable keys.
Examples
--------
>>> haskeys({'a': 5, 'b': 3})
True
>>> haskeys([5, 6])
False
>>> haskeys(np.recarray(3, dtype=[('a', int)]))
False
"""
return hasattr(d, '__getitem__') and hasattr(d, 'keys')
[docs]def missing(data):
"""Find missing values in an array.
Parameters
----------
data : array_like
A array like object which might contain missing values. Missing values
are assumed to be either None or NaN.
Returns
-------
array_like(bool, shape=data.shape)
Boolean values indicate missing values.
Raises
------
ValueError
Examples
--------
Finding missing values in a list.
>>> arr = ['str', 1, None, np.nan, np.NaN]
>>> print(missing(arr))
[False False True True True]
Finding missing values in a multi-dimensional array.
>>> arr = np.array([(0, np.nan), (None, 1), (2, 3)], dtype=float)
>>> print(missing(arr))
[[False True]
[ True False]
[False False]]
"""
if not hasattr(data, '__len__'):
raise ValueError("'data' has be a array like object")
strings = np.array(data, dtype=str)
ismissing = np.equal(data, None)
ismissing[strings == 'nan'] = True
return ismissing
[docs]def dtype_subset(dtype, names):
"""Creates a subset of a numpy type object.
Parameters
----------
dtype : list or np.dtype
Numpy data type.
names : list of str
Fields to select.
Raises
------
TypeError
Returns
-------
list
Desired subset of numpy data type descriptions.
Examples
--------
>>> dtypes = [('coords', float, 3), ('values', int), ('text', '<U0')]
>>> print(dtype_subset(dtypes, ['text', 'coords']))
[('text', '<U0'), ('coords', '<f8', (3,))]
"""
if not hasattr(names, '__iter__'):
raise TypeError("'names' needs to be iterable")
descr = np.dtype(dtype).descr
out_dtype = []
for name in names:
for dt in descr:
if dt[0] == name:
out_dtype.append(dt)
return out_dtype
[docs]def recarray(data_dict, dtype=[], dim=1):
"""Converts a dictionary of array like objects to a numpy record array.
This function is mostly used for convenience.
Parameters
----------
data_dict : dict
Dictionary of array like objects to convert to a numpy record array.
Each key in `data_dict` represents a field name of the output record
array. Each item in `data_dict` represents the corresponding values.
Thus, at least `shape[0:dim]` of all input arrays in `data_dict` have
to match.
dtype : optional, numpy.dtype
Describes the desired data types of specific fields. If the data type
of a field is not given, the data type is estimated by numpy.
dim : positive int
Desired dimension of the resulting numpy record array.
Returns
-------
np.recarray
Numpy record array build from input dictionary.
Raises
------
TypeError, ValueError
Examples
--------
Creation of a one dimensional numpy record array using a dictionary.
>>> rec = recarray({
... 'coords': [ (3, 4), (3, 2), (0, 2), (5, 2)],
... 'text': ['text1', 'text2', 'text3', 'text4'],
... 'n': [1, 3, 1, 2],
... 'missing': [None, None, 'str', None],
... })
>>> print(sorted(rec.dtype.names))
['coords', 'missing', 'n', 'text']
>>> print(rec.coords)
[[3 4]
[3 2]
[0 2]
[5 2]]
Create a two dimensional record array.
>>> data = {
... 'coords': [
... [(2, 3.2, 1), (-3, 2.2, 4)],
... [(0, 1.1, 2), (-1, 2.2, 5)],
... [(-7, -1, 3), (9.2, -5, 6)]
... ],
... 'values': [[1, 3], [4, 0], [-4, 2]]
... }
>>> rec = recarray(data, dim=2)
>>> print(rec.shape)
(3, 2)
>>> print(rec.coords)
[[[ 2. 3.2 1. ]
[-3. 2.2 4. ]]
<BLANKLINE>
[[ 0. 1.1 2. ]
[-1. 2.2 5. ]]
<BLANKLINE>
[[-7. -1. 3. ]
[ 9.2 -5. 6. ]]]
>>> print(rec.values)
[[ 1 3]
[ 4 0]
[-4 2]]
>>> print(sorted(rec.dtype.names))
['coords', 'values']
"""
if not haskeys(data_dict):
raise TypeError("'dataDict' has to be a dictionary like object")
# check data types
dtype = np.dtype(dtype)
for key in dtype.names:
if key not in data_dict.keys():
raise ValueError('column "%s" not found!' % key)
if not isinstance(dim, int) and dim > 0:
raise ValueError("'dim' has to be an integer greater zero")
# convert to numpy arrays if neccessary
for key in data_dict.keys():
if not isinstance(data_dict[key], (np.ndarray, np.recarray)):
if key in dtype.names:
dt = dtype_subset(dtype, [key])
data_dict[key] = np.array(data_dict[key], dtype=dt, copy=False)
else:
data_dict[key] = np.array(data_dict[key], copy=False)
# get data types
out_dtypes = []
for key in data_dict.keys():
if key in dtype.names:
dt = dtype_subset(dtype, [key])[0]
else:
arr = data_dict[key]
dt = (key, arr.dtype.descr[0][1], arr.shape[dim:])
out_dtypes.append(dt)
# define array
shape = next(iter(data_dict.values())).shape
rec = np.recarray(shape[:dim], dtype=out_dtypes)
if len(rec) > 0:
for key in data_dict.keys():
rec[key][:] = data_dict[key]
return rec
[docs]def add_fields(arr, dtypes, data=None):
"""Adds additional fields to a numpy record array.
Parameters
----------
arr : np.recarray
Numpy record array to add fields to.
dtypes : np.dtype
Data types of the new fields.
data : optional, list of array_like
Data values of the new fields. The shape of each array has to be
compatible to arr.
Returns
-------
np.recarray
Record array similar to `A`, but with additional fields of type
`dtypes` and values of `data`.
Examples
--------
>>> A = recarray({'a': [0, 1, 2, 3]})
>>> C = add_fields(A, [('b', float, 2), ('c', int)])
>>> print(sorted(C.dtype.names))
['a', 'b', 'c']
>>> D = add_fields(A, [('d', int), ('e', str)], data=[[1, 2, 3, 4], None])
>>> print(D)
[(0, 1, '') (1, 2, '') (2, 3, '') (3, 4, '')]
"""
if not isinstance(arr, np.recarray):
raise TypeError("'arr' has to be an numpy record array")
if data is not None and not hasattr(data, '__iter__'):
raise ValueError("'data' has to be iterable")
dtypes = np.dtype(dtypes)
# check for duplicate fields
for name in dtypes.names:
if hasattr(arr, name):
raise ValueError("can not overwrite attribute '%s'" % name)
if name in arr.dtype.names:
raise ValueError("field '%s' already exists" % name)
newDtypes = arr.dtype.descr + dtypes.descr
# set values
rec = np.recarray(arr.shape, dtype=newDtypes)
for name in arr.dtype.names:
rec[name] = arr[name]
# set new values
if data is not None:
for name, column in zip(dtypes.names, data):
if column is not None:
rec[name] = column
return rec
[docs]def fuse(*recarrays):
"""Fuses multiple numpy record arrays of identical shape to one array.
Parameters
----------
\*recarrays : np.recarray
Numpy record arrays to fuse.
Returns
-------
np.recarray
Record array with all fields of `recarrays`.
Examples
--------
Fuse one dimensional arrays.
>>> A = recarray({'a': [0, 1, 2, 3]})
>>> B = recarray({'b': [4, 5, 6, 7]})
>>> C = fuse(A, B)
>>> print(C.shape)
(4,)
>>> print(C.dtype.names)
('a', 'b')
Fuse multiple two dimensional arrays.
>>> A = recarray({'a': [[0, 1], [2, 3]]}, dim = 2)
>>> B = recarray({'b': [[4, 5], [6, 7]]}, dim = 2)
>>> C = recarray({
... 'c1': [['c1', 'c2'], ['c3', 'c4']],
... 'c2': [[0.1, 0.2], [0.3, 0.3]],
... }, dim = 2)
>>> D = fuse(A, B, C)
>>> print(sorted(D.dtype.names))
['a', 'b', 'c1', 'c2']
>>> print(D.shape)
(2, 2)
>>> print_rounded(D.a)
[[0 1]
[2 3]]
>>> print(D.c1)
[['c1' 'c2']
['c3' 'c4']]
>>> print_rounded(D.c2)
[[ 0.1 0.2]
[ 0.3 0.3]]
"""
shape = None
dtype = []
for arr in recarrays:
if not isinstance(arr, np.recarray):
raise TypeError("all arrays have to be of type 'np.recarray'")
dtype.extend(arr.dtype.descr)
# check shape
if shape is None:
shape = arr.shape
elif not arr.shape == shape:
raise ValueError("all arrays have to have the same shape")
# define array
fused = np.recarray(shape, dtype=dtype)
for arr in recarrays:
for name in arr.dtype.names:
fused[name] = arr[name]
return fused
[docs]def merge(arrays, strategy=np.concatenate):
"""Merges multiple arrays with similar fields.
Parameters
----------
arrays : list of np.recarray
Numpy arrays to merge.
strategy : optional, function
Aggregate function to apply during merging. Suggested values:
np.conatenate, np.hstack, np.vstack, np.dstack.
Returns
-------
np.recarray
Merged numpy record array of same data type as the first input array.
Raises
------
TypeError
Examples
--------
One dimensional arrays.
>>> A = recarray({'a': [(0, 1), (2, 3), (4, 5)], 'b': ['e', 'f', 'g']})
>>> B = recarray({'a': [(6, 7), (8, 9), (0, 1)], 'b': ['h', 'i', 'j']})
>>> C = recarray({'a': [(2, 3), (4, 5), (6, 7)], 'b': ['k', 'l', 'm']})
>>> D = merge((A, B, C))
>>> print(sorted(D.dtype.names))
['a', 'b']
>>> print(D.b)
['e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm']
>>> print(D.shape)
(9,)
>>> D = merge((A, B, C), strategy=np.hstack)
>>> print(D.shape)
(9,)
>>> D = merge((A, B, C), strategy=np.vstack)
>>> print(D.shape)
(3, 3)
>>> D = merge((A, B, C), strategy=np.dstack)
>>> print(D.shape)
(1, 3, 3)
Merge two dimensional arrays.
>>> A = recarray({
... 'a': [(0, 1), (2, 3)], 'b': [('e', 'f'), ('g', 'h')]
... }, dim=2)
>>> B = recarray({
... 'a': [(4, 5), (6, 7)], 'b': [('i', 'j'), ('k', 'l')]
... }, dim=2)
>>> C = recarray({
... 'a': [(1, 3), (7, 2)], 'b': [('m', 'n'), ('o', 'p')]
... }, dim=2)
>>> D = merge((A, B, C))
>>> print(sorted(D.dtype.names))
['a', 'b']
>>> print(D.b)
[['e' 'f']
['g' 'h']
['i' 'j']
['k' 'l']
['m' 'n']
['o' 'p']]
>>> print(D.shape)
(6, 2)
>>> D = merge((A, B, C), strategy=np.hstack)
>>> print(D.shape)
(2, 6)
>>> D = merge((A, B, C), strategy=np.vstack)
>>> print(D.shape)
(6, 2)
>>> D = merge((A, B, C), strategy=np.dstack)
>>> print(D.shape)
(2, 2, 3)
>>> D = merge((A, B, C), strategy=np.concatenate)
>>> print(D.shape)
(6, 2)
>>> A = np.recarray(1, dtype=[('a', object, 2), ('b', str)])
>>> B = np.recarray(2, dtype=[('a', object, 2), ('b', str)])
>>> D = merge((A, B), strategy=np.concatenate)
>>> print(D)
[([None, None], '') ([None, None], '') ([None, None], '')]
"""
if not hasattr(arrays, '__iter__'):
raise TypeError("'arrays' needs to be iterable")
dtype = None
for arr in arrays:
if not isinstance(arr, (np.recarray, np.ndarray)):
raise TypeError("'array' needs to be an iterable of 'np.recarray'")
if dtype is None:
dtype = arrays[0].dtype.descr
elif not arr.dtype.descr == dtype:
raise TypeError("all data types need to match")
return arrays[0].__array_wrap__(strategy(arrays))
[docs]def flatten_dtypes(np_dtypes):
"""Extract name, datatype and shape information from a numpy data type.
Parameters
----------
np_dtypes : np.dtype
Numpy data types to flatten.
Returns
-------
names : list of str
Names of fields.
dtypes : list of dtypes
Data types of fields.
shapes : list of tuples
Shapes of fields.
Examples
--------
>>> dtype = np.dtype([
... ('simple', np.int8),
... ('multidimensional', np.float32, 3),
... ])
>>> names, dtypes, shapes = flatten_dtypes(dtype)
>>> names
['simple', 'multidimensional']
>>> dtypes
[dtype('int8'), dtype('float32')]
>>> shapes
[0, 3]
"""
np_dtypes = np.dtype(np_dtypes)
dtypes = []
shapes = []
names = list(np_dtypes.names)
for name in names:
dtype = np_dtypes[name]
shape = 0
subDtype = dtype.subdtype
if subDtype is not None:
shape = dtype.shape[0]
dtype = dtype.subdtype[0]
dtypes.append(dtype)
shapes.append(shape)
return names, dtypes, shapes
[docs]def unnest(arr, deep=False):
"""Unnest a numpy record array. This function recursively splits a nested
numpy array to a list of arrays.
Parameters
----------
rec: np.recarray or np.ndarray
Numpy array to unnest.
deep : bool
Indicates whether or not numpy ndarrays shall be splitted into
individual colums or not.
Raises
------
TypeError
Returns
-------
list
List of unnested fields.
Examples
--------
>>> dtype = [
... ('regular', np.int, 1),
... ('nested', [
... ('child1', np.str),
... ('child2', np.float, 2)
... ])
... ]
>>> rec = np.ones(2, dtype=dtype).view(np.recarray)
>>> print(rec.nested.child2)
[[ 1. 1.]
[ 1. 1.]]
>>> unnested = unnest(rec)
>>> print(unnested[0])
[1 1]
>>> print(unnested[1])
['' '']
>>> print(unnested[2])
[[ 1. 1.]
[ 1. 1.]]
"""
if not isinstance(arr, (np.recarray, np.ndarray)):
m = "'rec' has to be an instance of 'np.recarray' or 'np.ndarray'"
raise TypeError(m)
if not isinstance(arr, np.recarray):
if deep and len(arr.shape) > 1:
ret = []
for col in colzip(arr):
ret.extend(unnest(col, deep=deep))
else:
ret = [arr]
else:
ret = []
for name in arr.dtype.names:
ret.extend(unnest(arr[name], deep=deep))
return ret
[docs]def colzip(arr):
"""Splits a two dimensional numpy array into a list of columns.
Parameters
----------
arr : np.ndarray(shape=(n, k)) or np.recarray(shape=(n, ))
Numpy array with `n` rows and `k` columns.
Returns
-------
columns : list of np.ndarray
List of `k` numpy arrays.
Raises
------
TypeError, ValueError
Examples
--------
>>> arr = np.eye(3, dtype=int)
>>> cols = colzip(arr)
>>> len(cols)
3
>>> print(cols[0])
[1 0 0]
"""
if isinstance(arr, np.recarray):
return [arr[name] for name in arr.dtype.names]
elif isinstance(arr, np.ndarray):
if not len(arr.shape) == 2:
raise ValueError("'arr' has be two dimensional")
return [arr[:, col] for col in range(arr.shape[1])]
else:
raise TypeError("unexpected type of 'arr'")
[docs]def apply_function(arr, func, dtype=None):
"""Applies a function to each record of a numpy array.
Parameters
----------
arr : np.ndarray or np.recarray
Numpy array to apply function to.
func : function
Function to apply to each record.
dtypes : optional, np.dtype
Desired data type of the output array.
Returns
-------
np.recarray
Record array similar to input array, but with function applied to.
Examples
--------
Apply a function to a numpy ndarray.
>>> arr = np.ones((2, 3), dtype=[('a', int), ('b', int)])
>>> func = lambda item: item[0] + item[1]
>>> print(apply_function(arr, func))
[[2 2 2]
[2 2 2]]
Aggregate a one dimensional numpy record array.
>>> data = { 'a': [0, 1, 2, 3], 'b': [1, 2, 3, 4] }
>>> arr = recarray(data)
>>> func = lambda record: record.a + record.b
>>> print(apply_function(arr, func))
[1 3 5 7]
Two dimensional case.
>>> data = { 'a': [[0, 1], [2, 3]], 'b': [[1, 2], [3, 4]] }
>>> arr = recarray(data, dim=2)
>>> func = lambda record: record.a ** record.b
>>> print(apply_function(arr, func))
[[ 0 1]
[ 8 81]]
Specify the output data type.
>>> func = lambda record: (record.a + record.b, record.a ** record.b)
>>> print(apply_function(arr, func, dtype=[('c', float), ('d', int)]))
[[( 1., 0) ( 3., 1)]
[( 5., 8) ( 7., 81)]]
Specify a multi-dimensional output data type.
>>> func = lambda record: (record.a + 2, [record.a ** 2, record.b * 3])
>>> print(apply_function(arr, func, dtype=[('c', float), ('d', int, 2)]))
[[( 2., [ 0, 3]) ( 3., [ 1, 6])]
[( 4., [ 4, 9]) ( 5., [ 9, 12])]]
>>> func = lambda record: ([record.a ** 2, record.b * 3],)
>>> print(apply_function(arr, func, dtype=[('d', int, 2)]))
[[([ 0, 3],) ([ 1, 6],)]
[([ 4, 9],) ([ 9, 12],)]]
"""
if not callable(func):
raise ValueError("'func' needs to be callable")
if not isinstance(arr, (np.ndarray, np.recarray)):
m = "'ndarray' needs to an instance of 'np.ndarray' or 'np.recarray'"
raise TypeError(m)
if dtype is not None:
dtype = np.dtype(dtype)
args = np.broadcast(None, arr)
values = [func(*arg[1:]) for arg in args]
if dtype is None or dtype.names is None:
res = np.array(values, dtype=dtype).reshape(arr.shape)
else:
res = np.array(
values, dtype=dtype).reshape(arr.shape).view(np.recarray)
return res
[docs]def indices(shape, flatten=False):
"""Create keys or indices of a numpy ndarray.
Parameters
----------
shape : array_like(int)
Shape of desired output array.
Returns
-------
np.ndarray(int, shape=(\*shape, len(shape)))
Array of indices with desired `shape`. Each entry provides an index
tuple to access the array entries.
Examples
--------
One dimensional case.
>>> keys = indices(9)
>>> print(keys.shape)
(9,)
>>> print(keys)
[0 1 2 3 4 5 6 7 8]
Two dimensional case.
>>> keys = indices((3, 4))
>>> keys.shape
(3, 4, 2)
>>> print(keys)
[[[0 0]
[0 1]
[0 2]
[0 3]]
<BLANKLINE>
[[1 0]
[1 1]
[1 2]
[1 3]]
<BLANKLINE>
[[2 0]
[2 1]
[2 2]
[2 3]]]
Get iterable of indices.
>>> keys = indices((3, 4), flatten=True)
>>> print(keys)
[[0 0]
[0 1]
[0 2]
...,
[2 1]
[2 2]
[2 3]]
"""
if isinstance(shape, int):
keys = np.arange(shape)
else:
shape = np.array(shape, dtype=int)
keys = np.indices(shape)
if flatten:
keys = keys.reshape(-1, np.product(shape)).T
else:
keys = np.moveaxis(keys, 0, -1)
return keys
[docs]def range_filter(arr, min_value=-np.inf, max_value=np.inf):
"""Filter values by range.
Parameters
----------
arr : array_like(Number)
Numeric array to filter.
min_value,max_value : Number
Minimum and maximum values to define the desired value range
`[min_value, max_value]` of `arr`.
Returns
-------
np.ndarray(int)
Indices of all values of `arr` in the desired range.
Examples
--------
Filter a one dimensional array.
>>> a = [0, 2, 1, -1, 5, 7, 9, 4, 3, 2, -2, -11]
>>> indices = range_filter(a, min_value=0)
>>> print(indices)
[0 1 2 4 5 6 7 8 9]
>>> indices = range_filter(a, max_value=5)
>>> print(indices)
[ 0 1 2 3 4 7 8 9 10 11]
>>> idx = range_filter(a, min_value=0, max_value=5)
>>> print(idx)
[0 1 2 4 7 8 9]
>>> print(np.array(a)[idx])
[0 2 1 5 4 3 2]
Filter a multi-dimensional array.
>>> a = [(1, 0), (-2, -1), (3, -5), (4, 2), (-7, 9), (0.5, 2)]
>>> idx = range_filter(a, min_value=2)
>>> print(idx)
((2, 3, 3, 4, 5), (0, 0, 1, 1, 1))
>>> print(np.array(a)[idx])
[ 3. 4. 2. 9. 2.]
>>> idx = range_filter(a, min_value=2, max_value=5)
>>> print(idx)
((2, 3, 3, 5), (0, 0, 1, 1))
>>> print(np.array(a)[idx])
[ 3. 4. 2. 2.]
"""
if not isnumeric(arr):
raise TypeError("'arr' needs to be an numeric array")
if not isinstance(min_value, Number):
raise TypeError("'min_value' needs to a number")
if not isinstance(max_value, Number):
raise TypeError("'max_value' needs to a number")
if not max_value >= min_value:
m = "'max_value' needs to be greater or equal 'min_value'"
raise ValueError(m)
arr = np.array(arr)
mask = np.all((arr >= min_value, arr <= max_value), axis=0)
if len(arr.shape) == 1:
ids = np.where(mask)[0]
else:
ids = tuple(map(tuple, np.array(np.where(mask))))
return ids
[docs]def max_value_range(dtype):
"""Returns the maximum value range of a numeric numpy data type.
Parameters
----------
dtype : np.dtype
Numeric data type to check
Returns
-------
min_value,max_value : int
Minimum and maximum value
Examples
--------
>>> value_range = max_value_range(np.dtype(np.uint8))
>>> print(value_range)
(0, 255)
>>> value_range = max_value_range(np.dtype(np.uint16))
>>> print(value_range)
(0, 65535)
>>> value_range = max_value_range(np.dtype(np.int8))
>>> print(value_range)
(-128, 127)
>>> value_range = max_value_range(np.dtype(np.int16))
>>> print(value_range)
(-32768, 32767)
>>> value_range = max_value_range(np.dtype(np.float16))
>>> print(value_range)
(-65504.0, 65504.0)
"""
dtype = np.dtype(dtype)
if dtype.kind in ('i', 'u'):
info = np.iinfo(dtype)
elif dtype.kind in ('f'):
info = np.finfo(dtype)
else:
raise ValueError("unknown numeric data type '%s'" % dtype)
return info.min, info.max
[docs]def minimum_numeric_dtype(arr):
"""Determines the minimum required data type of a numpy without loosing
accuracy.
Parameters
----------
arr : np.ndarray(Number)
Numeric array to find minimum data type for.
Returns
-------
np.dtype
Minimum required data type.
Examples
--------
Find minimum data type for integer arrays.
>>> arr = np.array([0, 255], dtype=np.int32)
>>> print(arr.dtype)
int32
>>> print(minimum_numeric_dtype(arr))
uint8
>>> arr = np.array([0, 256])
>>> print(minimum_numeric_dtype(arr))
uint16
>>> arr = np.array([-5, 127])
>>> print(minimum_numeric_dtype(arr))
int8
>>> arr = np.array([-5, 128])
>>> print(minimum_numeric_dtype(arr))
int16
>>> arr = np.array([-5, 214748364])
>>> print(minimum_numeric_dtype(arr))
int32
Find minimum data type for floating point arrays.
>>> arr = np.array([-5.2, 100.3])
>>> print(arr.dtype)
float64
>>> print(minimum_numeric_dtype(arr))
float16
"""
if not isinstance(arr, np.ndarray):
raise TypeError("'arr' needs to be an instance of 'np.ndarray'")
if arr.dtype.kind not in ('u', 'i', 'f'):
raise ValueError("unknown numeric data type '%s'" % arr.dtype)
min_value = arr.min()
max_value = arr.max()
if arr.dtype.kind in ('u', 'i'):
if min_value < 0:
kind = 'i'
else:
kind = 'u'
else:
kind = 'f'
for new_dtype in NUMERIC_DTYPES:
new_dtype = np.dtype(new_dtype)
if kind == new_dtype.kind:
value_range = max_value_range(new_dtype)
if min_value >= value_range[0] and max_value <= value_range[1]:
return new_dtype
return arr.dtype