Source code for araucaria.io.io_read

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
The :mod:`~araucaria.io.io_read` submodule offers functions to perform the following tasks:

1. Read XAFS files in common formats;
2. Read data from report files of Linear Combination Fitting (LCF) analysis.

Read XAFS files
***************

The following functions are currently implemented to read XAFS files:

.. list-table::
   :widths: auto
   :header-rows: 1

   * - Function
     - Description
   * - :func:`read_p65`
     - Reads a XAFS file from the P65 beamline (PETRA III).
   * - :func:`read_dnd`
     - Reads a XAFS file fom the DND-CAT beamline (APS).
   * - :func:`read_xmu`
     - Reads a XAFS file based on specified columns.
   * - :func:`read_file`
     - Utility function to read a XAFS file based on specified columns.
   * - :func:`read_rawfile`
     - Utility function to read a XAFS file based on specified count columns.

By convention these read functions will return a ``group`` class with the following attributes:

- ``group.energy``: the energy array.
- ``group.mu``: the transmission mu(E) array. Returned if ``scan='mu'``.
- ``group.fluo`` : the fluorescence mu(E) array. Returned if ``scan='fluo'``.
- ``group.mu_ref``: the transmission reference array. Returned if ``ref=True``.

The attribute ``mu_ref`` is also returned by default when ``scan`` is either 'mu' or 'fluo'.
 
Choose ``scan = None`` and ``ref=True`` to return only the transmission reference.
        
Tip
---
The ``group`` returned by any read method will contain either a ``mu`` 
or a ``fluo`` ìnstance, but not both. If both instances are required, create an 
additional ``group`` by calling the read method with a different ``scan`` value.


Read LCF files
**************
The following functions can be used to extract batch information from 
linear combination fit (LCF) report files:

.. list-table::
   :widths: auto
   :header-rows: 1

   * - Function
     - Description
   * - :func:`read_lcf_coefs`
     - Returns amplitude coefficients for a given reference.
   * - :func:`read_lcf_chisqr`
     - Returns chi-square statistics.

Important
---------
The previous functions expect valid LCF report files
generated by :func:`~araucaria.io.io_write.write_lcf_report`.
"""
from os.path import isfile, basename
from http.client import HTTPResponse
import warnings
from typing import List, Tuple, Union
from pathlib import Path
from numpy import loadtxt, delete, log, append, float, ndarray
from .. import Group
from ..utils import index_dups

[docs]def read_p65(fpath: Path, scan: str='mu', ref: bool=True, tol: float=1e-4) -> Group:
    """Reads a XAFS file from the P65 beamline.
 
    P65 is located in the PETRA III storage ring (DESY, Hamburg, Germany).
    
    Parameters
    ----------
    fpath
        Path to file.
    scan
        Requested mu(E). Accepted values are transmission ('mu'), fluorescence ('fluo'),
        or None. The default is 'mu'.
    ref
        Indicates if the transmission reference ('mu_ref') should also be returned.
        The default is True.
    tol
        Tolerance in energy units to remove duplicate values. The default is 1e-4,

    Returns
    -------
    :
        Group containing the requested arrays.

    See also
    --------
    read_rawfile: Reads a XAFS file based on specified count columns.
    
    Examples
    --------
    >>> from araucaria import Group
    >>> from araucaria.io import read_p65
    >>> from araucaria.testdata import get_testpath
    >>> from araucaria.utils import check_objattrs
    >>> fpath = get_testpath('p65_testfile.xdi')
    >>> # extracting mu and mu_ref scans
    >>> group_mu = read_p65(fpath, scan='mu')
    >>> check_objattrs(group_mu, Group, attrlist=['mu', 'mu_ref'])
    [True, True]
    
    >>> # extracting only fluo scan
    >>> group_fluo = read_p65(fpath, scan='fluo', ref=False)
    >>> check_objattrs(group_fluo, Group, attrlist=['fluo'])
    [True]
    
    >>> # extracting only mu_ref scan
    >>> group_ref = read_p65(fpath, scan=None, ref=True)
    >>> check_objattrs(group_ref, Group, attrlist=['mu_ref'])
    [True]
    """
    # default modes and channels
    scandict = ['mu', 'fluo', None]
    chdict   = {'i0': 10, 'it1': 11, 'it2':12, 'if':13}
    
    # testing that scan exists in the current dictionary 
    if scan not in scandict:
        warnings.warn("scan mode %s not recognized. Retrieving transmission measurement ('mu')." %scan)
        scan = 'mu'

    if scan is None:
        usecols = (0, chdict['it1'], chdict['it2'])
    elif scan == 'mu':
        usecols = (0, chdict['i0'], chdict['it1'], chdict['it2'])
    else:
        usecols = (0, chdict['i0'], chdict['if'], chdict['it1'], chdict['it2'])

    group     = read_rawfile(fpath, usecols, scan, ref, tol)
    return (group)

[docs]def read_dnd(fpath: Path, scan: str='mu', ref: bool=True, tol: float=1e-4) -> Group:
    """Reads a XAFS file from the DND-CAT beamline (5-BMD).
 
    DND-CAT is located in the Advanced Photon Source (APS, Argonne, USA).
    
    Parameters
    ----------
    fpath
        Path to file.
    scan
        Requested mu(E). Accepted values are transmission ('mu'), fluorescence ('fluo'),
        or None. The default is 'mu'.
    ref
        Indicates if the transmission reference ('mu_ref') should also be returned.
        The default is True.
    tol
        Tolerance in energy units to remove duplicate values.

    Returns
    -------
    :
        Group containing the requested arrays.
        
    See also
    --------
    read_file: Reads a XAFS file based on specified columns.
    
    Examples
    --------
    >>> from araucaria import Group
    >>> from araucaria.io import read_dnd
    >>> from araucaria.testdata import get_testpath
    >>> from araucaria.utils import check_objattrs
    >>> fpath = get_testpath('dnd_testfile1.dat')
    >>> # extracting mu and mu_ref scans
    >>> group_mu = read_dnd(fpath, scan='mu')
    >>> check_objattrs(group_mu, Group, attrlist=['mu', 'mu_ref'])
    [True, True]
    
    >>> # extracting only fluo scan
    >>> group_fluo = read_dnd(fpath, scan='fluo', ref=False)
    >>> check_objattrs(group_fluo, Group, attrlist=['fluo'])
    [True]
    
    >>> # extracting only mu_ref scan
    >>> group_ref = read_dnd(fpath, scan=None, ref=True)
    >>> check_objattrs(group_ref, Group, attrlist=['mu_ref'])
    [True]
    """
    # default modes and channels    
    scandict = ['mu', 'fluo', None]
    coldict  = {'fluo':16, 'mu':17, 'mu_ref':18}
    
    # testing that scan exits in the current dictionary 
    if scan not in scandict:
        warnings.warn("scan mode %s not recognized. Retrieving transmission measurement ('mu')." %scan)
        scan = 'mu'

    if scan is None:
        usecols = (0, coldict['mu_ref'])
    else:
        usecols = (0, coldict[scan], coldict['mu_ref'])

    group = read_file(fpath, usecols, scan, ref, tol)
    return (group)

[docs]def read_xmu(fpath: Path, scan: str='mu', ref: bool=True, tol: float=1e-4) -> Group:
    """Reads a generic XAFS file in plain format.
 
    Parameters
    ----------
    fpath
        Path to file.
    scan
        Requested mu(E). Accepted values are transmission ('mu'), fluorescence ('fluo'),
        or None. The default is 'mu'.
    ref
        Indicates if the transmission reference ('mu_ref') should also be returned.
        The default is True.
    tol
        Tolerance in energy units to remove duplicate values.

    Returns
    -------
    :
        Group containing the requested arrays.
    
    Notes
    -----
    :func:`read_xmu` assumes the following column order in the file:
    
    1. energy.
    2. transmission/fluorescence mu(E).
    3. transmission reference.
    
    See also
    --------
    read_file : Reads a XAFS file based on specified columns.
    
    Examples
    --------
    >>> from araucaria import Group
    >>> from araucaria.io import read_xmu
    >>> from araucaria.testdata import get_testpath
    >>> from araucaria.utils import check_objattrs
    >>> fpath = get_testpath('xmu_testfile.xmu')
    >>> # extracting mu and mu_ref scans
    >>> group_mu = read_xmu(fpath, scan='mu')
    >>> check_objattrs(group_mu, Group, attrlist=['mu', 'mu_ref'])
    [True, True]
    
    >>> # extracting only fluo scan
    >>> group_fluo = read_xmu(fpath, scan='fluo', ref=False)
    >>> check_objattrs(group_fluo, Group, attrlist=['fluo'])
    [True]
    
    >>> # extracting only mu_ref scan
    >>> group_ref = read_xmu(fpath, scan=None, ref=True)
    >>> check_objattrs(group_ref, Group, attrlist=['mu_ref'])
    [True]
    """
    # default modes and channels
    scandict = ['mu', 'fluo', None]
    coldict = {'fluo':1, 'mu':1, 'mu_ref':2}
    
    # testing that scan exists in the current dictionary 
    if scan not in scandict:
        warnings.warn("scan mode %s not recognized. Retrieving transmission measurement ('mu')." %scan)
        scan = 'mu'

    if scan is None:
        usecols = (0, coldict['mu_ref'])
    else:
        usecols = (0, coldict[scan], coldict['mu_ref'])

    group = read_file(fpath, usecols, scan, ref, tol)
    return (group)

[docs]def read_file(fpath: Union[Path, HTTPResponse], usecols: tuple, 
              scan: str, ref: bool, tol: float) -> Group:
    """Utility function to read a XAFS file based on specified columns.
        
    Parameters
    ----------
    fpath
        Path to file, or output from url open request.
    usecols
        Tuple with column indexes to extract from the file.
    scan
        Assigned mu(E), either transmission ('mu'), fluorescence ('fluo'),
        or None.
    ref
        Indicates if the transmission reference ('mu_ref') should also be returned.
    tol
        Tolerance in energy units to remove duplicate values.

    Returns
    -------
    :
        Group containing the requested arrays.
    
    Raises
    ------
    IOError
        If the file does not exist in the specified path.
    ValueError
        If no mu(E) or transmission reference are requested.
    TypeError
        If ``ref`` is not a valid boolean.
    
    Notes
    -----
    ``usecols`` should provide column indexes in the following order:
    
    1. energy.
    2. transmission/fluorescence mu(E).
    3. transmission reference, if ``mu_ref=True``.
    
    If only ``mu_ref`` scan is requested , ``usecols`` should provide
    column indexes in the following order:
    
    1. energy.
    2. transmission reference.
    
    Warning
    -------
    The indexing order of ``usecols`` must be respected, 
    or the assigned mu(E) will be incorrect.

    """
    # testing if fpath is http response
    if type(fpath) is HTTPResponse:
        pass
    # testing if file exists
    elif not isfile(fpath):
        raise IOError('file %s does not exists.' % fpath)
    
    # testing if mu_ref is valid boolean
    if not isinstance(ref, (int, float)):
        raise TypeError('ref: %s is not a valid boolean.' % ref)
    
    # Testing if no scan was requested
    if scan is None and ref is False:
        raise ValueError('no scan requested from file.' )
        
    raw    = loadtxt(fpath, usecols=usecols)

    # deleting duplicate energy points
    index  = index_dups(raw[:,0],tol)
    raw    = delete(raw,index,0)

    if ref is True:
        # returning the requested scan and the reference
        if scan in ['mu','fluo']:
            # transmission or fluorescence
            group = Group(**{'energy':raw[:,0], scan:raw[:,1], 'mu_ref':raw[:,2]})
        else:
            group = Group(**{'energy':raw[:,0], 'mu_ref':raw[:,1]})
    else:
        # returning only the requested scan
        group = Group(**{'energy':raw[:,0], scan:raw[:,1]})

    # saving filename in group
    if type(fpath) is HTTPResponse:
        fpath = fpath.url
    group.name = basename(fpath)
    return (group)
    
[docs]def read_rawfile(fpath: Union[Path, HTTPResponse], usecols: tuple, 
                 scan: str, ref: bool, tol: float) -> Group:
    """Utility function to read a XAFS file based on specified count columns.
    
    Parameters
    ----------
    fpath
        Path to file, or output from url open request.
    usecols
        Tuple with columns indexes to extract from the file.
    scan
        Computed mu(E), either transmission ('mu'), fluorescence ('fluo'),
        or None.
    ref
        Indicates if the transmission reference ('mu_ref') should also be returned.
    tol
        Tolerance value to remove duplicate energy values.

    Returns
    -------
    :
        Group containing the requested arrays.
    
    Raises
    ------
    IOError
        If the file does not exist in the specified path.
    ValueError
        If no mu(E) or transmission reference are requested.
    TypeError
        If ``ref`` is not a valid boolean.

    Notes
    -----
    ``usecols`` should provide column indexes in the following order:

    1. energy.
    2. monochromator intensity (I0).
    3. transmitted intensity (IT1).
    4. fluorescence intensity(IF), if ``scan='fluo'``.
    5. transmitted intensity (IT2), if ``mu_ref=True``.

    If ``mu_ref`` scan is not requested, ``usecols`` should provide
    column indexes in the following order:

    1. energy
    2. monochromator intensity (I0).
    3. transmitted intensity (IT1)/fluorescence intensity(IF).

    If only ``mu_ref`` scan is requested , ``usecols`` should provide
    column indexes in the following order:
    
    1. energy.
    2. transmitted intensity (IT1).
    3. transmitted intensity (IT2).
    
    Warning
    -------
    The indexing order of ``usecols`` must be respected, 
    or the computed mu(E) will be incorrect.

    Important
    ---------
    If ``scan='fluo'`` and ``mu_ref`` is requested, all column
    indexes must be provided.
    """
    # testing if fpath is http response
    if type(fpath) is HTTPResponse:
        pass
    # testing if file exists
    elif not isfile(fpath):
        raise IOError('file %s does not exists.' % fpath)
        
    # testing if mu_ref is valid boolean
    if not isinstance(ref, (int, float)):
        raise TypeError('ref: %s is not a valid boolean.' % ref)
    
    # Testing if no scan was requested
    if scan is None and ref is False:
        raise ValueError('no scan requested from file.' )

    raw    = loadtxt(fpath, usecols=usecols)

    # deleting duplicate energy points
    index  = index_dups(raw[:,0],tol)
    raw    = delete(raw,index,0)    

    if ref is True:
        # returning the requested scan and the reference
        if scan == 'mu':
            # transmission
            mu     = -log(raw[:,2]/raw[:,1])
            mu_ref = -log(raw[:,3]/raw[:,2])
            group  = Group(**{'energy':raw[:,0], scan:mu, 'mu_ref':mu_ref})
        elif scan == 'fluo':
            # fluorescence
            fluo   = raw[:,3]/raw[:,1]
            mu_ref = -log(raw[:,4]/raw[:,2])
            group  = Group(**{'energy':raw[:,0], scan:fluo, 'mu_ref':mu_ref})
        else:
            # no scan requested
            mu_ref = -log(raw[:,2]/raw[:,1])
            group = Group(**{'energy':raw[:,0], 'mu_ref':mu_ref})

    else:
        # returning only the requested scan
        if scan == 'mu':
            # transmission
            mu     = -log(raw[:,2]/raw[:,1])
            group  = Group(**{'energy':raw[:,0], scan:mu})
        elif scan == 'fluo':
            # fluorescence
            fluo   = raw[:,2]/raw[:,1]
            group  = Group(**{'energy':raw[:,0], scan:fluo})

    # saving filename in group
    if type(fpath) is HTTPResponse:
        fpath = fpath.url
    group.name = basename(fpath)
    return (group)

[docs]def read_lcf_coefs(fpaths: List[Path], refgroup: str, 
                   error: bool=True) -> Union[Tuple[List], list]:
    """Returns amplitude coefficients for a given LCF reference.
    
    Amplitude coefficients are read directly from a list of paths 
    to LCF report files generated by :func:`~araucaria.io.io_write.write_lcf_report`.

    Parameters
    ----------
    fpaths
        List of paths to valid LCF report files.
    refgroup
        Name of the reference group.
    error
        If True the error of the fit will also be returned.
        The default is True.
    
    Returns
    -------
    :
        Amplitude coefficients and error for the reference in the LCF.
    
    Raises
    ------
    IOError
        If a file does not exist in the specified path.
    TypeError
        If a file is not a valid LCF report.
    ValueError
        If ``refgroup`` was fitted during the LCF analysis (i.e. not a reference).

    Examples
    --------
    >>> from araucaria.testdata import get_testpath
    >>> from araucaria.io import read_lcf_coefs
    >>> fpath = get_testpath('test_lcf_report.log')
    >>> read_lcf_coefs([fpath], 'group1')
    ([0.40034377], [0.01195335])
    >>> read_lcf_coefs([fpath], 'group2', error=False)
    [0.59428689]
    """
   # testing that the file exists
    for fpath in fpaths:
        if not isfile(fpath):
            raise IOError('file %s does not exists.' % fpath)
    
    vallist = []    # container for values
    errlist = []    # container for errors
    for fpath in fpaths:
        getref = True   # reference is always searched
        getval = False  # value is retrieved only if reference was used during the lcf
        
        f = open(fpath, 'r')
        fline = f.readline()
        if 'lcf report' not in fline:
            raise TypeError('%s is not a valid LCF report file.' % fpath)
        
        while getref:
            line = f.readline()
            if refgroup in line:
                # reference found in line
                if 'scan' in line:
                    raise ValueError('%s was fitted in %s.' %(refgroup, fpath))
                else:
                    # we extract the standard index
                    index = line.split()[0][-1]
                    stdval = "amp"+index
                    getref = False
                    getval = True

            elif "[[Fit Statistics]]" in line:
                # This line indicates that we already passed the [[Group]] section
                # There is nothing else to search so return zeroes instead
                vallist = append(vallist,0.00)
                errlist = append(errlist,0.00)
                getref = False
                break

        while getval:
            line = f.readline()
            if stdval in line:
                val = float(line.split()[1])
                err = float(line.split()[3])
                vallist.append(val)
                errlist.append(err)
                getval = False
        f.close()
    if error:
        return (vallist, errlist)
    else:
        return (vallist)

[docs]def read_lcf_chisqr(fpaths: List[Path], redchi: bool=False) -> list:
    """Returns chi square statistic for LCF reports.
    
    Chi square values are read directly from a list of paths 
    to LCF report files generated by :func:`~araucaria.io.io_write.write_lcf_report`.
    
    Parameters
    ----------
    fpaths
        List of paths to valid LCF report files.
    
    redchi
        Indicates if the reduced chi square statistic should be returned
        instead.
    
    Returns
    -------
    :
        Chi square values. Reduced chi square values are optionally
        returned if ``redchi=True``.
    
    Raises
    ------
    IOError
        If a file does not exist in the specified path.
    TypeError
        If a file is not a valid LCF report.

    Examples
    --------
    >>> from araucaria.testdata import get_testpath
    >>> from araucaria.io import read_lcf_chisqr
    >>> fpath = get_testpath('test_lcf_report.log')
    >>> read_lcf_chisqr([fpath])
    [1.40551323]
    >>> read_lcf_chisqr([fpath], redchi=True)
    [0.01011161]
    """
    # testing that the file exists
    for fpath in fpaths:
        if not isfile(fpath):
            raise IOError('file %s does not exists.' % fpath)   

    if redchi:
        reference = " reduced chi-square"
    else:
        reference = "    chi-square"

    vallist = []    # container for values
    for fpath in fpaths:
        getval = True
        f = open(fpath, 'r')
        fline = f.readline()
        if 'lcf report' not in fline:
            raise TypeError('%s is not a valid LCF report file.' % fpath)
        
        while getval:
            line = f.readline()
            if reference in line:
                val = float(line.split("=")[1])
                vallist.append(val)
                getval = False
        f.close()

    return (vallist)

if __name__ == '__main__':
    import doctest
    doctest.testmod()