Source code for pyoints.nptools

# BEGIN OF LICENSE NOTE
# This file is part of Pyoints.
# Copyright (c) 2018, Sebastian Lamprecht, Trier University,
# lamprecht@uni-trier.de
#
# Pyoints is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Pyoints is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Pyoints. If not, see <https://www.gnu.org/licenses/>.
# END OF LICENSE NOTE
"""Functions for convenient handling of numpy arrays.
"""

import numpy as np
from numbers import Number

from .misc import print_rounded

NUMERIC_DTYPES = [
    np.uint8,
    np.uint16,
    np.uint32,
    np.uint64,
    np.int8,
    np.int16,
    np.int32,
    np.int64,
    np.float16,
    np.float32,
    np.float64,
]


[docs]def isarray(o): """Checks whether or nor an object is an array. Parameters ---------- o : object Some object. Returns ------- bool Indicates whether or not the object is an array. Examples -------- >>> isarray([1, 2, 3]) True >>> isarray('text') False """ return (not isinstance(o, str) and hasattr(o, '__getitem__') and hasattr(o, '__iter__'))
[docs]def isnumeric(arr, dtypes=NUMERIC_DTYPES): """Checks if the data type of an array is numeric. Parameters ---------- arr : array_like Numpy array to check. dtypes : optional, tuple Tuple of allowed numeric data types. Returns ------- bool Indicates whether or not the array is numeric. Raises ------ TypeError Examples -------- >>> isnumeric([1, 2, 3]) True >>> isnumeric(['1', '2', '3']) False >>> isnumeric([1, 2, None]) False """ if not isarray(arr): raise TypeError("'arr' needs to an array like object") if not isinstance(arr, np.ndarray): arr = np.array(arr) if not isinstance(dtypes, list): raise TypeError("'dtypes' needs to be a list. got %s" % (str(dtypes))) for dtype in dtypes: if np.issubdtype(arr.dtype.type, np.dtype(dtype).type): return True return False
[docs]def haskeys(d): """Checks if an object has keys and can be treated like a dictionary. Parameters ---------- d : object Object to be checked. Returns ------- bool Indicates whether or not the object has accessable keys. Examples -------- >>> haskeys({'a': 5, 'b': 3}) True >>> haskeys([5, 6]) False >>> haskeys(np.recarray(3, dtype=[('a', int)])) False """ return hasattr(d, '__getitem__') and hasattr(d, 'keys')
[docs]def missing(data): """Find missing values in an array. Parameters ---------- data : array_like A array like object which might contain missing values. Missing values are assumed to be either None or NaN. Returns ------- array_like(bool, shape=data.shape) Boolean values indicate missing values. Raises ------ ValueError Examples -------- Finding missing values in a list. >>> arr = ['str', 1, None, np.nan, np.NaN] >>> print(missing(arr)) [False False True True True] Finding missing values in a multi-dimensional array. >>> arr = np.array([(0, np.nan), (None, 1), (2, 3)], dtype=float) >>> print(missing(arr)) [[False True] [ True False] [False False]] """ if not hasattr(data, '__len__'): raise ValueError("'data' has be a array like object") strings = np.array(data, dtype=str) ismissing = np.equal(data, None) ismissing[strings == 'nan'] = True return ismissing
[docs]def dtype_subset(dtype, names): """Creates a subset of a numpy type object. Parameters ---------- dtype : list or np.dtype Numpy data type. names : list of str Fields to select. Raises ------ TypeError Returns ------- list Desired subset of numpy data type descriptions. Examples -------- >>> dtypes = [('coords', float, 3), ('values', int), ('text', '<U0')] >>> print(dtype_subset(dtypes, ['text', 'coords'])) [('text', '<U0'), ('coords', '<f8', (3,))] """ if not hasattr(names, '__iter__'): raise TypeError("'names' needs to be iterable") descr = np.dtype(dtype).descr out_dtype = [] for name in names: for dt in descr: if dt[0] == name: out_dtype.append(dt) return out_dtype
[docs]def recarray(data_dict, dtype=[], dim=1): """Converts a dictionary of array like objects to a numpy record array. This function is mostly used for convenience. Parameters ---------- data_dict : dict Dictionary of array like objects to convert to a numpy record array. Each key in `data_dict` represents a field name of the output record array. Each item in `data_dict` represents the corresponding values. Thus, at least `shape[0:dim]` of all input arrays in `data_dict` have to match. dtype : optional, numpy.dtype Describes the desired data types of specific fields. If the data type of a field is not given, the data type is estimated by numpy. dim : positive int Desired dimension of the resulting numpy record array. Returns ------- np.recarray Numpy record array build from input dictionary. Raises ------ TypeError, ValueError Examples -------- Creation of a one dimensional numpy record array using a dictionary. >>> rec = recarray({ ... 'coords': [ (3, 4), (3, 2), (0, 2), (5, 2)], ... 'text': ['text1', 'text2', 'text3', 'text4'], ... 'n': [1, 3, 1, 2], ... 'missing': [None, None, 'str', None], ... }) >>> print(sorted(rec.dtype.names)) ['coords', 'missing', 'n', 'text'] >>> print(rec.coords) [[3 4] [3 2] [0 2] [5 2]] Create a two dimensional record array. >>> data = { ... 'coords': [ ... [(2, 3.2, 1), (-3, 2.2, 4)], ... [(0, 1.1, 2), (-1, 2.2, 5)], ... [(-7, -1, 3), (9.2, -5, 6)] ... ], ... 'values': [[1, 3], [4, 0], [-4, 2]] ... } >>> rec = recarray(data, dim=2) >>> print(rec.shape) (3, 2) >>> print(rec.coords) [[[ 2. 3.2 1. ] [-3. 2.2 4. ]] <BLANKLINE> [[ 0. 1.1 2. ] [-1. 2.2 5. ]] <BLANKLINE> [[-7. -1. 3. ] [ 9.2 -5. 6. ]]] >>> print(rec.values) [[ 1 3] [ 4 0] [-4 2]] >>> print(sorted(rec.dtype.names)) ['coords', 'values'] """ if not haskeys(data_dict): raise TypeError("'dataDict' has to be a dictionary like object") # check data types dtype = np.dtype(dtype) for key in dtype.names: if key not in data_dict.keys(): raise ValueError('column "%s" not found!' % key) if not isinstance(dim, int) and dim > 0: raise ValueError("'dim' has to be an integer greater zero") # convert to numpy arrays if neccessary for key in data_dict.keys(): if not isinstance(data_dict[key], (np.ndarray, np.recarray)): if key in dtype.names: dt = dtype_subset(dtype, [key]) data_dict[key] = np.array(data_dict[key], dtype=dt, copy=False) else: data_dict[key] = np.array(data_dict[key], copy=False) # get data types out_dtypes = [] for key in data_dict.keys(): if key in dtype.names: dt = dtype_subset(dtype, [key])[0] else: arr = data_dict[key] dt = (key, arr.dtype.descr[0][1], arr.shape[dim:]) out_dtypes.append(dt) # define array shape = next(iter(data_dict.values())).shape rec = np.recarray(shape[:dim], dtype=out_dtypes) if len(rec) > 0: for key in data_dict.keys(): rec[key][:] = data_dict[key] return rec
[docs]def add_fields(arr, dtypes, data=None): """Adds additional fields to a numpy record array. Parameters ---------- arr : np.recarray Numpy record array to add fields to. dtypes : np.dtype Data types of the new fields. data : optional, list of array_like Data values of the new fields. The shape of each array has to be compatible to arr. Returns ------- np.recarray Record array similar to `A`, but with additional fields of type `dtypes` and values of `data`. Examples -------- >>> A = recarray({'a': [0, 1, 2, 3]}) >>> C = add_fields(A, [('b', float, 2), ('c', int)]) >>> print(sorted(C.dtype.names)) ['a', 'b', 'c'] >>> D = add_fields(A, [('d', int), ('e', str)], data=[[1, 2, 3, 4], None]) >>> print(D) [(0, 1, '') (1, 2, '') (2, 3, '') (3, 4, '')] """ if not isinstance(arr, np.recarray): raise TypeError("'arr' has to be an numpy record array") if data is not None and not hasattr(data, '__iter__'): raise ValueError("'data' has to be iterable") dtypes = np.dtype(dtypes) # check for duplicate fields for name in dtypes.names: if hasattr(arr, name): raise ValueError("can not overwrite attribute '%s'" % name) if name in arr.dtype.names: raise ValueError("field '%s' already exists" % name) newDtypes = arr.dtype.descr + dtypes.descr # set values rec = np.recarray(arr.shape, dtype=newDtypes) for name in arr.dtype.names: rec[name] = arr[name] # set new values if data is not None: for name, column in zip(dtypes.names, data): if column is not None: rec[name] = column return rec
[docs]def fuse(*recarrays): """Fuses multiple numpy record arrays of identical shape to one array. Parameters ---------- \*recarrays : np.recarray Numpy record arrays to fuse. Returns ------- np.recarray Record array with all fields of `recarrays`. Examples -------- Fuse one dimensional arrays. >>> A = recarray({'a': [0, 1, 2, 3]}) >>> B = recarray({'b': [4, 5, 6, 7]}) >>> C = fuse(A, B) >>> print(C.shape) (4,) >>> print(C.dtype.names) ('a', 'b') Fuse multiple two dimensional arrays. >>> A = recarray({'a': [[0, 1], [2, 3]]}, dim = 2) >>> B = recarray({'b': [[4, 5], [6, 7]]}, dim = 2) >>> C = recarray({ ... 'c1': [['c1', 'c2'], ['c3', 'c4']], ... 'c2': [[0.1, 0.2], [0.3, 0.3]], ... }, dim = 2) >>> D = fuse(A, B, C) >>> print(sorted(D.dtype.names)) ['a', 'b', 'c1', 'c2'] >>> print(D.shape) (2, 2) >>> print_rounded(D.a) [[0 1] [2 3]] >>> print(D.c1) [['c1' 'c2'] ['c3' 'c4']] >>> print_rounded(D.c2) [[ 0.1 0.2] [ 0.3 0.3]] """ shape = None dtype = [] for arr in recarrays: if not isinstance(arr, np.recarray): raise TypeError("all arrays have to be of type 'np.recarray'") dtype.extend(arr.dtype.descr) # check shape if shape is None: shape = arr.shape elif not arr.shape == shape: raise ValueError("all arrays have to have the same shape") # define array fused = np.recarray(shape, dtype=dtype) for arr in recarrays: for name in arr.dtype.names: fused[name] = arr[name] return fused
[docs]def merge(arrays, strategy=np.concatenate): """Merges multiple arrays with similar fields. Parameters ---------- arrays : list of np.recarray Numpy arrays to merge. strategy : optional, function Aggregate function to apply during merging. Suggested values: np.conatenate, np.hstack, np.vstack, np.dstack. Returns ------- np.recarray Merged numpy record array of same data type as the first input array. Raises ------ TypeError Examples -------- One dimensional arrays. >>> A = recarray({'a': [(0, 1), (2, 3), (4, 5)], 'b': ['e', 'f', 'g']}) >>> B = recarray({'a': [(6, 7), (8, 9), (0, 1)], 'b': ['h', 'i', 'j']}) >>> C = recarray({'a': [(2, 3), (4, 5), (6, 7)], 'b': ['k', 'l', 'm']}) >>> D = merge((A, B, C)) >>> print(sorted(D.dtype.names)) ['a', 'b'] >>> print(D.b) ['e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm'] >>> print(D.shape) (9,) >>> D = merge((A, B, C), strategy=np.hstack) >>> print(D.shape) (9,) >>> D = merge((A, B, C), strategy=np.vstack) >>> print(D.shape) (3, 3) >>> D = merge((A, B, C), strategy=np.dstack) >>> print(D.shape) (1, 3, 3) Merge two dimensional arrays. >>> A = recarray({ ... 'a': [(0, 1), (2, 3)], 'b': [('e', 'f'), ('g', 'h')] ... }, dim=2) >>> B = recarray({ ... 'a': [(4, 5), (6, 7)], 'b': [('i', 'j'), ('k', 'l')] ... }, dim=2) >>> C = recarray({ ... 'a': [(1, 3), (7, 2)], 'b': [('m', 'n'), ('o', 'p')] ... }, dim=2) >>> D = merge((A, B, C)) >>> print(sorted(D.dtype.names)) ['a', 'b'] >>> print(D.b) [['e' 'f'] ['g' 'h'] ['i' 'j'] ['k' 'l'] ['m' 'n'] ['o' 'p']] >>> print(D.shape) (6, 2) >>> D = merge((A, B, C), strategy=np.hstack) >>> print(D.shape) (2, 6) >>> D = merge((A, B, C), strategy=np.vstack) >>> print(D.shape) (6, 2) >>> D = merge((A, B, C), strategy=np.dstack) >>> print(D.shape) (2, 2, 3) >>> D = merge((A, B, C), strategy=np.concatenate) >>> print(D.shape) (6, 2) >>> A = np.recarray(1, dtype=[('a', object, 2), ('b', str)]) >>> B = np.recarray(2, dtype=[('a', object, 2), ('b', str)]) >>> D = merge((A, B), strategy=np.concatenate) >>> print(D) [([None, None], '') ([None, None], '') ([None, None], '')] """ if not hasattr(arrays, '__iter__'): raise TypeError("'arrays' needs to be iterable") dtype = None for arr in arrays: if not isinstance(arr, (np.recarray, np.ndarray)): raise TypeError("'array' needs to be an iterable of 'np.recarray'") if dtype is None: dtype = arrays[0].dtype.descr elif not arr.dtype.descr == dtype: raise TypeError("all data types need to match") return arrays[0].__array_wrap__(strategy(arrays))
[docs]def flatten_dtypes(np_dtypes): """Extract name, datatype and shape information from a numpy data type. Parameters ---------- np_dtypes : np.dtype Numpy data types to flatten. Returns ------- names : list of str Names of fields. dtypes : list of dtypes Data types of fields. shapes : list of tuples Shapes of fields. Examples -------- >>> dtype = np.dtype([ ... ('simple', np.int8), ... ('multidimensional', np.float32, 3), ... ]) >>> names, dtypes, shapes = flatten_dtypes(dtype) >>> names ['simple', 'multidimensional'] >>> dtypes [dtype('int8'), dtype('float32')] >>> shapes [0, 3] """ np_dtypes = np.dtype(np_dtypes) dtypes = [] shapes = [] names = list(np_dtypes.names) for name in names: dtype = np_dtypes[name] shape = 0 subDtype = dtype.subdtype if subDtype is not None: shape = dtype.shape[0] dtype = dtype.subdtype[0] dtypes.append(dtype) shapes.append(shape) return names, dtypes, shapes
[docs]def unnest(arr, deep=False): """Unnest a numpy record array. This function recursively splits a nested numpy array to a list of arrays. Parameters ---------- rec: np.recarray or np.ndarray Numpy array to unnest. deep : bool Indicates whether or not numpy ndarrays shall be splitted into individual colums or not. Raises ------ TypeError Returns ------- list List of unnested fields. Examples -------- >>> dtype = [ ... ('regular', np.int, 1), ... ('nested', [ ... ('child1', np.str), ... ('child2', np.float, 2) ... ]) ... ] >>> rec = np.ones(2, dtype=dtype).view(np.recarray) >>> print(rec.nested.child2) [[ 1. 1.] [ 1. 1.]] >>> unnested = unnest(rec) >>> print(unnested[0]) [1 1] >>> print(unnested[1]) ['' ''] >>> print(unnested[2]) [[ 1. 1.] [ 1. 1.]] """ if not isinstance(arr, (np.recarray, np.ndarray)): m = "'rec' has to be an instance of 'np.recarray' or 'np.ndarray'" raise TypeError(m) if not isinstance(arr, np.recarray): if deep and len(arr.shape) > 1: ret = [] for col in colzip(arr): ret.extend(unnest(col, deep=deep)) else: ret = [arr] else: ret = [] for name in arr.dtype.names: ret.extend(unnest(arr[name], deep=deep)) return ret
[docs]def colzip(arr): """Splits a two dimensional numpy array into a list of columns. Parameters ---------- arr : np.ndarray(shape=(n, k)) or np.recarray(shape=(n, )) Numpy array with `n` rows and `k` columns. Returns ------- columns : list of np.ndarray List of `k` numpy arrays. Raises ------ TypeError, ValueError Examples -------- >>> arr = np.eye(3, dtype=int) >>> cols = colzip(arr) >>> len(cols) 3 >>> print(cols[0]) [1 0 0] """ if isinstance(arr, np.recarray): return [arr[name] for name in arr.dtype.names] elif isinstance(arr, np.ndarray): if not len(arr.shape) == 2: raise ValueError("'arr' has be two dimensional") return [arr[:, col] for col in range(arr.shape[1])] else: raise TypeError("unexpected type of 'arr'")
[docs]def apply_function(arr, func, dtype=None): """Applies a function to each record of a numpy array. Parameters ---------- arr : np.ndarray or np.recarray Numpy array to apply function to. func : function Function to apply to each record. dtypes : optional, np.dtype Desired data type of the output array. Returns ------- np.recarray Record array similar to input array, but with function applied to. Examples -------- Apply a function to a numpy ndarray. >>> arr = np.ones((2, 3), dtype=[('a', int), ('b', int)]) >>> func = lambda item: item[0] + item[1] >>> print(apply_function(arr, func)) [[2 2 2] [2 2 2]] Aggregate a one dimensional numpy record array. >>> data = { 'a': [0, 1, 2, 3], 'b': [1, 2, 3, 4] } >>> arr = recarray(data) >>> func = lambda record: record.a + record.b >>> print(apply_function(arr, func)) [1 3 5 7] Two dimensional case. >>> data = { 'a': [[0, 1], [2, 3]], 'b': [[1, 2], [3, 4]] } >>> arr = recarray(data, dim=2) >>> func = lambda record: record.a ** record.b >>> print(apply_function(arr, func)) [[ 0 1] [ 8 81]] Specify the output data type. >>> func = lambda record: (record.a + record.b, record.a ** record.b) >>> print(apply_function(arr, func, dtype=[('c', float), ('d', int)])) [[( 1., 0) ( 3., 1)] [( 5., 8) ( 7., 81)]] Specify a multi-dimensional output data type. >>> func = lambda record: (record.a + 2, [record.a ** 2, record.b * 3]) >>> print(apply_function(arr, func, dtype=[('c', float), ('d', int, 2)])) [[( 2., [ 0, 3]) ( 3., [ 1, 6])] [( 4., [ 4, 9]) ( 5., [ 9, 12])]] >>> func = lambda record: ([record.a ** 2, record.b * 3],) >>> print(apply_function(arr, func, dtype=[('d', int, 2)])) [[([ 0, 3],) ([ 1, 6],)] [([ 4, 9],) ([ 9, 12],)]] """ if not callable(func): raise ValueError("'func' needs to be callable") if not isinstance(arr, (np.ndarray, np.recarray)): m = "'ndarray' needs to an instance of 'np.ndarray' or 'np.recarray'" raise TypeError(m) if dtype is not None: dtype = np.dtype(dtype) args = np.broadcast(None, arr) values = [func(*arg[1:]) for arg in args] if dtype is None or dtype.names is None: res = np.array(values, dtype=dtype).reshape(arr.shape) else: res = np.array( values, dtype=dtype).reshape(arr.shape).view(np.recarray) return res
[docs]def indices(shape, flatten=False): """Create keys or indices of a numpy ndarray. Parameters ---------- shape : array_like(int) Shape of desired output array. Returns ------- np.ndarray(int, shape=(\*shape, len(shape))) Array of indices with desired `shape`. Each entry provides an index tuple to access the array entries. Examples -------- One dimensional case. >>> keys = indices(9) >>> print(keys.shape) (9,) >>> print(keys) [0 1 2 3 4 5 6 7 8] Two dimensional case. >>> keys = indices((3, 4)) >>> keys.shape (3, 4, 2) >>> print(keys) [[[0 0] [0 1] [0 2] [0 3]] <BLANKLINE> [[1 0] [1 1] [1 2] [1 3]] <BLANKLINE> [[2 0] [2 1] [2 2] [2 3]]] Get iterable of indices. >>> keys = indices((3, 4), flatten=True) >>> print(keys) [[0 0] [0 1] [0 2] ..., [2 1] [2 2] [2 3]] """ if isinstance(shape, int): keys = np.arange(shape) else: shape = np.array(shape, dtype=int) keys = np.indices(shape) if flatten: keys = keys.reshape(-1, np.product(shape)).T else: keys = np.moveaxis(keys, 0, -1) return keys
[docs]def range_filter(arr, min_value=-np.inf, max_value=np.inf): """Filter values by range. Parameters ---------- arr : array_like(Number) Numeric array to filter. min_value,max_value : Number Minimum and maximum values to define the desired value range `[min_value, max_value]` of `arr`. Returns ------- np.ndarray(int) Indices of all values of `arr` in the desired range. Examples -------- Filter a one dimensional array. >>> a = [0, 2, 1, -1, 5, 7, 9, 4, 3, 2, -2, -11] >>> indices = range_filter(a, min_value=0) >>> print(indices) [0 1 2 4 5 6 7 8 9] >>> indices = range_filter(a, max_value=5) >>> print(indices) [ 0 1 2 3 4 7 8 9 10 11] >>> idx = range_filter(a, min_value=0, max_value=5) >>> print(idx) [0 1 2 4 7 8 9] >>> print(np.array(a)[idx]) [0 2 1 5 4 3 2] Filter a multi-dimensional array. >>> a = [(1, 0), (-2, -1), (3, -5), (4, 2), (-7, 9), (0.5, 2)] >>> idx = range_filter(a, min_value=2) >>> print(idx) ((2, 3, 3, 4, 5), (0, 0, 1, 1, 1)) >>> print(np.array(a)[idx]) [ 3. 4. 2. 9. 2.] >>> idx = range_filter(a, min_value=2, max_value=5) >>> print(idx) ((2, 3, 3, 5), (0, 0, 1, 1)) >>> print(np.array(a)[idx]) [ 3. 4. 2. 2.] """ if not isnumeric(arr): raise TypeError("'arr' needs to be an numeric array") if not isinstance(min_value, Number): raise TypeError("'min_value' needs to a number") if not isinstance(max_value, Number): raise TypeError("'max_value' needs to a number") if not max_value >= min_value: m = "'max_value' needs to be greater or equal 'min_value'" raise ValueError(m) arr = np.array(arr) mask = np.all((arr >= min_value, arr <= max_value), axis=0) if len(arr.shape) == 1: ids = np.where(mask)[0] else: ids = tuple(map(tuple, np.array(np.where(mask)))) return ids
[docs]def max_value_range(dtype): """Returns the maximum value range of a numeric numpy data type. Parameters ---------- dtype : np.dtype Numeric data type to check Returns ------- min_value,max_value : int Minimum and maximum value Examples -------- >>> value_range = max_value_range(np.dtype(np.uint8)) >>> print(value_range) (0, 255) >>> value_range = max_value_range(np.dtype(np.uint16)) >>> print(value_range) (0, 65535) >>> value_range = max_value_range(np.dtype(np.int8)) >>> print(value_range) (-128, 127) >>> value_range = max_value_range(np.dtype(np.int16)) >>> print(value_range) (-32768, 32767) >>> value_range = max_value_range(np.dtype(np.float16)) >>> print(value_range) (-65504.0, 65504.0) """ dtype = np.dtype(dtype) if dtype.kind in ('i', 'u'): info = np.iinfo(dtype) elif dtype.kind in ('f'): info = np.finfo(dtype) else: raise ValueError("unknown numeric data type '%s'" % dtype) return info.min, info.max
[docs]def minimum_numeric_dtype(arr): """Determines the minimum required data type of a numpy without loosing accuracy. Parameters ---------- arr : np.ndarray(Number) Numeric array to find minimum data type for. Returns ------- np.dtype Minimum required data type. Examples -------- Find minimum data type for integer arrays. >>> arr = np.array([0, 255], dtype=np.int32) >>> print(arr.dtype) int32 >>> print(minimum_numeric_dtype(arr)) uint8 >>> arr = np.array([0, 256]) >>> print(minimum_numeric_dtype(arr)) uint16 >>> arr = np.array([-5, 127]) >>> print(minimum_numeric_dtype(arr)) int8 >>> arr = np.array([-5, 128]) >>> print(minimum_numeric_dtype(arr)) int16 >>> arr = np.array([-5, 214748364]) >>> print(minimum_numeric_dtype(arr)) int32 Find minimum data type for floating point arrays. >>> arr = np.array([-5.2, 100.3]) >>> print(arr.dtype) float64 >>> print(minimum_numeric_dtype(arr)) float16 """ if not isinstance(arr, np.ndarray): raise TypeError("'arr' needs to be an instance of 'np.ndarray'") if arr.dtype.kind not in ('u', 'i', 'f'): raise ValueError("unknown numeric data type '%s'" % arr.dtype) min_value = arr.min() max_value = arr.max() if arr.dtype.kind in ('u', 'i'): if min_value < 0: kind = 'i' else: kind = 'u' else: kind = 'f' for new_dtype in NUMERIC_DTYPES: new_dtype = np.dtype(new_dtype) if kind == new_dtype.kind: value_range = max_value_range(new_dtype) if min_value >= value_range[0] and max_value <= value_range[1]: return new_dtype return arr.dtype