Source code for araucaria.utils

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
The :mod:`~araucaria.utils` module offers the following utility functions:

.. list-table::
   :widths: auto
   :header-rows: 1

   * - Function
     - Description
   * - :func:`get_version`
     - Returns version of araucaria.
   * - :func:`check_objattrs`
     - Check type and attributes of an object.
   * - :func:`check_xrange`
     - Range values for an array.
   * - :func:`index_xrange`
     - Index of values in given range for an array.
   * - :func:`index_dups`
     - Index of duplicate values in an array.
   * - :func:`index_nans`
     - Index of NaN values in an array.
   * - :func:`index_nearest`
     - Index of nearest value in an array.
   * - :func:`interp_yvals`
     - Returns interpolated values for 1-D function.
   * - :func:`read_fdicts`
     - Reads file with multiple dictionaries.
"""
from typing import List, Union, TypeVar
from pathlib import Path
from re import findall
from ast import literal_eval
from numpy import (ndarray, diff, abs, argwhere, where, 
                   ravel, apply_along_axis, isnan, isinf)
from scipy.interpolate import interp1d


[docs]def get_version(dependencies:bool=False) -> str:
    """Returns installed version of araucaria.

    Parameters
    ----------
    dependencies
        Condition to additionally get version of
        dependencies. The default is False.

    Returns
    -------
    :
        Printable string with version of araucaria.

    Examples
    --------
    >>> from araucaria.utils import get_version
    >>> print(get_version()) #doctest: +ELLIPSIS
    Araucaria version   : ...
    """
    import os, platform
    import numpy as np
    import scipy as sp
    import lmfit as lm
    import h5py  as h5
    import matplotlib as mpl
    import araucaria as ara

    libr = ('Python', 'Numpy', 'Scipy', 'Lmfit', 'H5py', 'Matplotlib')
    verf = ''    # string container

    if dependencies:
        for i, lib in enumerate((platform, np, sp, lm, h5, mpl)):
            if lib == platform: 
                ver = lib.python_version()
            else:
                ver = lib.__version__
            verf   += '{0:20}: {1}\n'.format(libr[i]+' version',ver)

    ver   = ara.__version__
    verf += '{0:20}: {1}'.format('Araucaria version', ver)

    return verf


[docs]def check_objattrs(obj: object, objtype: TypeVar, attrlist: list=None, 
                   exceptions: bool=False) -> List[bool]:
    """Check type and attributes of an object.
    
    Parameters
    ----------
    obj
        Object to check.
    objtype
        Type for the object.
    attrlist
        List with names of attributes to check.
    exceptions
        Condition to raise exceptions if attributes 
        are not in the object. The default is False.
    
    Returns
    -------
    :
        List with booleans for each attribute of the object.

    Raises
    ------
    TypeError
        If ``obj`` is not an instance of ``objtype``.

    Examples
    --------
    >>> from araucaria import Group
    >>> from araucaria.utils import check_objattrs
    >>> group   = Group(**{'energy': [1,2,3,4], 'mu': [2,2,3,1]})
    >>> # checking class type
    >>> check_objattrs(group, Group)
    True
    
    >>> # checking class type and attributes
    >>> alist   = ['energy', 'mu', 'mu_ref']
    >>> check_objattrs(group, Group, attrlist = alist)
    [True, True, False]
    """
    if not isinstance(obj, objtype):
        raise TypeError('object is not a valid %s instance.' % objtype.__name__)
    elif attrlist is None:
        return True

    boolist = []
    for attr in attrlist:
        if hasattr(obj, attr) is False:
            if exceptions:
                raise AttributeError("%s instance has no '%s' attribute." % (objtype.__name__, attr))
            else:
                boolist.append(False)
        else:
            boolist.append(True)

    return boolist

[docs]def check_xrange(x_range: list, x: ndarray, refval: float=None) -> list:
    """Returns range values inside an array.
    
    Parameters
    ----------
    x_range
        List with min and max values.
        Supports :data:`~numpy.inf` values.
    x
        Array with values.
    refval
        If given, x_range is assumed to contain values relative to refval.
        If None, x_range is assumed to contain absolute values.

    Returns
    -------
    :
        New range values inside the array.
    
    Examples
    --------
    >>> from numpy import inf, linspace
    >>> from araucaria.utils import check_xrange
    >>> k_range = [-inf,inf]
    >>> k       = linspace(0,15)
    >>> krange = check_xrange(k_range, k)
    >>> print(krange)
    [0.0, 15.0]

    >>> # using a reference value
    >>> e_range = [-inf, -50]
    >>> energy  = linspace(8900, 9100)
    >>> e0      = 8979
    >>> erange  = check_xrange(e_range, energy, refval=e0)
    >>> print(erange)
    [-79.0, -50]
    """
    x_range = list(x_range)
    x_range.sort()
    
    x_min = x_range[0]
    x_max = x_range[1]
    
    if refval:
        # min and max values are computed with respect
        # to refval
        xmin = min(x) - refval
        xmax = max(x) - refval
    else:
        xmin = min(x)
        xmax = max(x)
    
    if (isinf(x_min) or x_min < xmin):
        x_min = xmin
    if (isinf(x_max) or x_max > xmax):
        x_max = xmax

    return [x_min, x_max]

[docs]def index_xrange(x_range: list, x: ndarray, refval: float=None) -> ndarray:
    """Returns indexes of range values inside an array.
    
    Parameters
    ----------
    x_range
        List with real min and max values.
    x
        Array with values.
    refval
        If given, x_range is assumed to contain values relative to refval.
        If None, x_range is assumed to contain absolute values.

    Returns
    -------
    :
        Array with indexes of range values inside the array.
    
    Examples
    --------
    >>> from numpy import arange, inf
    >>> from araucaria.utils import index_xrange
    >>> k_range = [4, 8]
    >>> k       = arange(0,16)
    >>> index   = index_xrange(k_range, k)
    >>> k[index]
    array([4, 5, 6, 7, 8])

    >>> # using a reference value
    >>> e_range = [-inf, -50]
    >>> energy  = arange(8900, 9100)
    >>> e0      = 8979
    >>> index   = index_xrange(e_range, energy, refval=e0)
    >>> energy[index][0], energy[index][-1]
    (8900, 8929)
    """
    x_range = list(x_range)
    x_range.sort()

    x_min = x_range[0]
    x_max = x_range[1]

    if refval:
        # min and max values are computed with respect
        # to refval
        x_min = x_min + refval
        x_max = x_max + refval
    
    index = where((x >= x_min) & (x <= x_max))
    return index

[docs]def index_dups(data: ndarray, tol: float=1e-4) -> ndarray:
    """Index of duplicate values.


    Parameters
    ----------
    data
        Array to search for duplicates.
        
    tol
        Tolerance value (the detault is 1e-4).

    Returns
    -------
    :
        Index array with the location of duplicates.
        
    Notes
    -----
    A value in ``data`` is considered a duplicate if the
    difference with respect to the previous value is strictly
    lower than the given ``tol`` value.

    If the dimension of ``data`` is larger than 1 the array will be 
    flattened by indexing the elements in row-major (i.e. C-style).
    
    Examples
    --------    
    :func:`index_dups` is useful to remove duplicates and ensure monotonicity of a 1-D array.
    
    >>> from numpy import array, delete
    >>> from araucaria.utils import index_dups
    >>> energy = array([9000, 9000.1, 9005, 9005.1, 9008])
    >>> index  = index_dups(energy, tol=0.5)
    >>> print(index)
    [1 3]
    
    >>> # duplicactes
    >>> print(energy[index])
    [9000.1 9005.1]
    
    >>> # removing duplicates
    >>> from numpy import delete
    >>> new_energy = delete(energy,index,0)
    >>> print(new_energy)
    [9000. 9005. 9008.]
    """
    if len(data.shape) > 1:
        data = ravel(data)

    dif   = diff(data)
    index = argwhere(dif < tol)

    return ravel(index + 1)

[docs]def index_nans(data: ndarray, axis: int=0):
    """Index of NaN values in an array.
    
    Parameters
    ----------
    data
        Array to search for NaN values.
    axis
        Axis along which NaN values will be searched.
        The detault is 0.

    Returns
    -------
    :
        Index array with NaN values in the given axis.
        
    Raises
    ------
    IndexError
        If ``axis`` is greater than the dimension of ``data``.

    Examples
    --------
    :func:`index_nans` is useful to remove NaN values from arrays.

    >>> from numpy import arange, nan, delete
    >>> from araucaria.utils import index_nans
    >>> data      = arange(20, dtype=float).reshape(5,4)
    >>> data[1,2] = nan; data[3,1] = nan
    >>> print(data)
    [[ 0.  1.  2.  3.]
     [ 4.  5. nan  7.]
     [ 8.  9. 10. 11.]
     [12. nan 14. 15.]
     [16. 17. 18. 19.]]

    >>> # removing NaN values from rows
    >>> rindex = index_nans(data, 0)
    >>> print(rindex)
    [[1]
     [3]]
    >>> print(delete(data, rindex, 0))
    [[ 0.  1.  2.  3.]
     [ 8.  9. 10. 11.]
     [16. 17. 18. 19.]]

    >>> # removing NaN values from columns
    >>> cindex = index_nans(data, 1)
    >>> print(cindex)
    [[1]
     [2]]
    >>> print(delete(data, cindex, 1))
    [[ 0.  3.]
     [ 4.  7.]
     [ 8. 11.]
     [12. 15.]
     [16. 19.]]
    """
    if axis > len(data.shape):
        raise IndexError('axis is larger than the dimensions of the array.')
    
    # values are inverted since there seems to be an inconsistency in
    # apply_along_axis 
    if axis == 0:
        aval = 1
    elif axis == 1:
        aval = 0
    else:
        aval = axis
    
    index = apply_along_axis(lambda x : any(isnan(x)), aval, data)

    return argwhere(index)

[docs]def index_nearest(data: ndarray, val: float, kind: str='nearest') -> float:
    """Index of nearest value in an array.
    
    Parameters
    ----------
    data
        Array to search for nearest value.

    val
        Search value. It supports :data:`~numpy.inf`.
    kind
        Either 'lower', 'nearest' or 'higher'. 
        The default is 'nearest'. See Notes for details. 

    Returns
    -------
    :
        Index of nearest value in the array.
    
    Raises
    ------
    ValueError
        If ``kind`` is not recognized.
    
    Notes
    -----
    The ``kind`` parameter controls the returned index:
    
    - If ``kind='lower'`` the returned index will be of a value
      strictly lower than ``val``, or 0 if ``val`` if lower than the
      minimum value of ``data``.
    - If ``kind='nearest'`` the returned index will be of the nearest
      value with respect to ``val``.
    - If ``kind='higher'`` the returned index will be of a value
      strictly higher than ``val``, or -1 if ``val`` is higher than the
      maximum value of ``data``.
    
    Examples
    -------
    >>> from numpy import linspace
    >>> from araucaria.utils import index_nearest
    >>> energy = linspace(8900, 9000, 6)
    >>> val    = 8965
    >>> # find nearest value
    >>> index  = index_nearest(energy, val)
    >>> print(index, energy[index], val)
    3 8960.0 8965
    
    >>> # find strictly lower nearest value
    >>> index  = index_nearest(energy, val, kind='lower')
    >>> print(index, energy[index], val)
    3 8960.0 8965
    
    >>> # find strictly higher nearest value
    >>> index  = index_nearest(energy, val, kind='higher')
    >>> print(index, energy[index], val)
    4 8980.0 8965
    """
    kinds = ['lower', 'nearest', 'higher']
    
    if kind not in kinds:
        raise ValueError('kind %s not recognized.' % kind)
    
    if val <= data[0]:
        index = 0
    elif val >= data[-1]:
        index = len(data) - 1  # index starts from cero.
    elif kind == 'nearest':
        index = abs(data-val).argmin()
    elif kind == 'lower':
        index = max(where(data<=val)[0])
    else: # kind higher
        index = min(where(data>=val)[0])

    return index

[docs]def interp_yvals(x: ndarray, y: ndarray, xnew: ndarray, 
                kind: str='cubic') -> ndarray:
    """Returns interpolated values for a 1-D function.

    Parameters
    -----------
    x
        Array with original domain.
    y
        Array with original values of function f(x)=y.
    xnew
        Array with new domain.
    kind
        Type of interpolation.
        See :class:`~scipy.interpolate.interp1d` class
        for valid types.
        Default is 'cubic'.
    
    Returns
    -------
    :
        Array with interpolated values.
    
    Example
    -------
    >>> from numpy import linspace
    >>> from araucaria.utils import interp_yvals
    >>> x  = linspace(0,10)
    >>> y  = x**2
    >>> xp = x[0:10]
    >>> yp = interp_yvals(x,y,xp)
    >>> print(len(yp))
    10
    """
    s = interp1d(x, y, kind=kind)
    yvals = s(xnew)

    return yvals

[docs]def read_fdicts(fpath: Path) -> List[dict]:
    """Reads file with multiple dictionaries

    Parameters
    ----------
    fpath
        File path.

    Returns
    -------
    :
        List with dictionaries.

    Example
    -------
    >>> from os import remove
    >>> from araucaria.utils import read_fdicts
    >>> fpath ='file.txt'
    >>> data  = "{'ener': [1,2,3], 'mu': [1,2,3]}"
    >>> # create file with dictionary data
    >>> with open(fpath, 'w') as f:
    ...     fw = f.write(data)
    >>> # reading file with dictionary
    >>> dicts = read_fdicts(fpath)
    >>> remove(fpath)
    >>> for d in dicts:
    ...     print(type(d), d)
    <class 'dict'> {'ener': [1, 2, 3], 'mu': [1, 2, 3]}
    """
    # regex search for dictionaries
    regex = '''(\{[\w\s:.,+\-'"\[\]\(\)]+\})'''

    # reading file
    with open(fpath) as f:
        rawdata = f.read()

    # searching file and creating dictionaries with list comprehension
    data = findall(regex, rawdata)
    data = [literal_eval(raw) for raw in data]

    return data

if __name__ == '__main__':
    import doctest
    doctest.testmod()