# BEGIN OF LICENSE NOTE
# This file is part of Pyoints.
# Copyright (c) 2018, Sebastian Lamprecht, Trier University,
# lamprecht@uni-trier.de
#
# Pyoints is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Pyoints is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Pyoints. If not, see <https://www.gnu.org/licenses/>.
# END OF LICENSE NOTE
"""Collection of functions to classify or reclassify values or cluster values.
"""
import numpy as np
from collections import defaultdict
from . import (
    assertion,
    nptools,
)
from .misc import print_rounded
[docs]def classes_to_dict(
        classification,
        ids=None,
        min_size=1,
        max_size=np.inf,
        missing_value=-1):
    """Converts a list of class indices to a dictionary of grouped classes.
    Parameters
    ----------
    classification : array_like(shape=(n))
        Array of class indices.
    ids : optional, array_like(int, shape=(n))
        Indices to specify a subset of `classification`. If none, the indices
        are numbered consecutively.
    min_size,max_size : optional, positive int
        Minimum and maximum desired size of a class to be kept in the result.
    missing_value : optional, object
        Default value for unclassified values.
    Returns
    -------
    dict
        Dictionary of class indices. The dictionary keys represent the class
        ids, while the values represent the indices in the original array.
    See Also
    --------
    dict_to_classes
        Dictionary representation of `classification`.
    Examples
    --------
    >>> classes = ['cat', 'cat', 'dog', 'bird', 'dog', 'bird', 'cat', 'dog']
    >>> class_dict = classes_to_dict(classes)
    >>> print(sorted(class_dict))
    ['bird', 'cat', 'dog']
    >>> print_rounded(class_dict['cat'])
    [0 1 6]
    >>> classes = [0, 0, 1, 2, 1, 0, 3, 3, 5, 3, 2, 1, 0]
    >>> print(classes_to_dict(classes))
    {0: [0, 1, 5, 12], 1: [2, 4, 11], 2: [3, 10], 3: [6, 7, 9], 5: [8]}
    """
    if not nptools.isarray(classification):
        raise ValueError("'classification' needs to be an array like object")
    if ids is None:
        ids = range(len(classification))
    elif not len(ids) == len(classification):
        m = "'classification' and 'ids' must have the same length"
        raise ValueError(m)
    # set values
    classes = defaultdict(list)
    for id, cId in zip(ids, classification):
        if not cId == missing_value:
            classes[cId].append(id)
    # check size
    if min_size > 1 or max_size < np.inf:
        for key in list(classes.keys()):
            s = len(classes[key])
            if s < min_size or s > max_size:
                del classes[key]
    return dict(classes) 
[docs]def dict_to_classes(
        classes_dict,
        n,
        min_size=1,
        max_size=np.inf,
        missing_value=-1):
    """Converts a dictionary of classes into a list of classes.
    Parameters
    ----------
    classes_dict : dict
        Dictionary of class indices.
    n : positive int
        Desired size of the output array. It must be at least the size of the
        maximum class index.
    min_size,max_size : optional, positive int
        Minimum and maximum desired size of a class to be kept in the result.
    missing_value : optional, object
        Default value for unclassified values.
    Returns
    -------
    np.ndarray(int, shape=(n))
        Array representation of `classes_dict`.
    See Also
    --------
    classes_to_dict
    Notes
    -----
    Only a minimal input validation is provided.
    Examples
    --------
    Alphanumeric classes.
    >>> classes_dict = {'bird': [0, 1, 5, 4], 'dog': [3, 6, 8], 'cat': [7]}
    >>> print(dict_to_classes(classes_dict, 10, missing_value=''))
    ['bird' 'bird' '' 'dog' 'bird' 'bird' 'dog' 'cat' 'dog' '']
    Omit small classes.
    >>> print(dict_to_classes(classes_dict, 10, min_size=2))
    ['bird' 'bird' -1 'dog' 'bird' 'bird' 'dog' -1 'dog' -1]
    Numeric classes.
    >>> classes_dict = {0: [0, 1, 5], 1: [3, 6], 2: [7, 2]}
    >>> print(dict_to_classes(classes_dict, 9))
    [0 0 2 1 -1 0 1 2 -1]
    """
    # type validation
    if not isinstance(classes_dict, dict):
        raise TypeError("dictionary required")
    if not isinstance(n, int) and n > 0:
        raise ValueError("'n' needs to be an integer greater zero")
    # prepare output
    dtype = np.array(classes_dict.values()).dtype
    classification = np.full(n, missing_value, dtype=dtype)
    # assign classes
    for cId, ids in classes_dict.items():
        if len(ids) >= min_size and len(ids) <= max_size:
            classification[ids] = cId
    return classification 
[docs]def split_by_breaks(values, breaks):
    """Classifies values by ranges.
    Parameters
    ----------
    values : array_like(Number, shape=(n))
        Values to classify.
    breaks : array_like(Number, shape=(m))
        Series of value ranges.
    Returns
    -------
    classification : np.ndarray(int, shape=(n))
        Desired class affiliation of `values`. A value of `classification[i]`
        equal to `k` means that 'values[i]' is in range
        `[breaks[k], breaks[k][`
    Examples
    --------
    >>> values = np.arange(10)
    >>> breaks = [0.5, 5, 7.5]
    >>> classes = split_by_breaks(values, breaks)
    >>> print_rounded(classes)
    [0 1 1 1 1 2 2 2 3 3]
    """
    values = assertion.ensure_numvector(values)
    breaks = assertion.ensure_numvector(breaks)
    return np.digitize(values, breaks) 
[docs]def rename_dict(d, ids=None):
    """Assigns new key names to a dictionary.
    Parameters
    ----------
    d : dict
        Dictionary to rename.
    ids : optional, array_like(shape=(len(d)))
        Desired key names. If none, the keys are numbered consecutively.
    Returns
    -------
    dict
        Dictionary with new names.
    Examples
    --------
    >>> d = {1: [0, 1], 2: None, 3: 'text'}
    >>> renamed_dict = rename_dict(d, ['first', 'second', 'last'])
    >>> print(sorted(renamed_dict))
    ['first', 'last', 'second']
    """
    if not isinstance(d, dict):
        raise TypeError("dictionary required")
    if ids is None:
        ids = range(len(d))
    elif not len(ids) == len(d):
        raise ValueError("same number of keys required")
    return dict(zip(ids, d.values())) 
[docs]def majority(classes, empty_value=-1):
    """Finds most frequent class or value in an array.
    Parameters
    ----------
    classes : array_like(object, shape=(n))
        Classes or values to check.
    empty_value : optional, object
        Class value in case that no decision can be made.
    Returns
    -------
    object
        Most frequent class.
    Notes
    -----
    Only a limited input validation is provided.
    Examples
    --------
    Find majority class.
    >>> classes =['cat', 'dog', 'dog', 'bird', 'cat', 'dog']
    >>> print(majority(classes))
    dog
    >>> classes =[1, 8, 9, 0, 0, 2, 4, 2, 4, 3, 2, 3, 5, 6]
    >>> print_rounded(majority(classes))
    2
    No decision possible.
    >>> classes =[1, 2, 3, 4, 4, 3]
    >>> print_rounded(majority(classes))
    -1
    """
    if not nptools.isarray(classes):
        raise ValueError("'classes' needs to be an array like object")
    k = len(classes) // 2
    count = defaultdict(lambda: 0)
    for cId in classes:
        count[cId] += 1
        if count[cId] > k:
            return cId
    for key in count:
        if count[key] > count[cId]:
            cId = key
    for key in count:
        if count[key] == count[cId] and key != cId:
            return empty_value
    return cId