Source code for pyls.matlab.io

# -*- coding: utf-8 -*-

from collections.abc import MutableMapping

import numpy as np
import scipy.io as sio

from ..structures import PLSResults

_result_mapping = (
    ('u', 'x_weights'),
    ('s', 'singvals'),
    ('v', 'y_weights'),
    ('usc', 'x_scores'),
    ('vsc', 'y_scores'),
    ('lvcorrs', 'y_loadings'),
    # permres
    ('perm_result_sprob', 'pvals'),
    ('perm_result_permsamp', 'permsamples'),
    # bootres
    ('boot_result_compare_u', 'x_weights_normed'),
    ('boot_result_u_se', 'x_weights_stderr'),
    ('boot_result_bootsamp', 'bootsamples'),
    # splitres
    ('perm_splithalf_orig_ucorr', 'ucorr'),
    ('perm_splithalf_orig_vcorr', 'vcorr'),
    ('perm_splithalf_ucorr_prob', 'ucorr_pvals'),
    ('perm_splithalf_vcorr_prob', 'vcorr_pvals'),
    ('perm_splithalf_ucorr_ul', 'ucorr_uplim'),
    ('perm_splithalf_vcorr_ul', 'vcorr_lolim'),
    ('perm_splithalf_ucorr_ll', 'ucorr_uplim'),
    ('perm_splithalf_vcorr_ll', 'vcorr_lolim'),
    # inputs
    ('inputs_X', 'X'),
    ('stacked_behavdata', 'Y'),
    ('num_subj_lst', 'groups'),
    ('num_conditions', 'n_cond'),
    ('perm_result_num_perm', 'n_perm'),
    ('boot_result_num_boot', 'n_boot'),
    ('perm_splithalf_num_split', 'n_split'),
    ('boot_result_clim', 'ci'),
    ('other_input_meancentering_type', 'mean_centering'),
    ('method', 'method')
)

_mean_centered_mapping = (
    ('boot_result_orig_usc', 'contrast'),
    ('boot_result_distrib', 'contrast_boot'),
    ('boot_result_ulusc', 'contrast_ci_up'),
    ('boot_result_llusc', 'contrast_ci_lo'),
)

_behavioral_mapping = (
    ('boot_result_orig_corr', 'y_loadings'),
    ('boot_result_distrib', 'y_loadings_boot'),
    ('boot_result_ulcorr', 'y_loadings_ci_up'),
    ('boot_result_llcorr', 'y_loadings_ci_lo'),
)


def _coerce_void(value):
    """
    Converts `value` to `value.dtype`

    Parameters
    ----------
    value : array_like

    Returns
    -------
    value : dtype
        `Value` coerced to `dtype`
    """

    if np.squeeze(value).ndim == 0:
        return value.dtype.type(value.squeeze())
    else:
        return np.squeeze(value)


def _flatten(d, parent_key='', sep='_'):
    """
    Flattens nested dictionary `d` into single dictionary with new keyset

    Parameters
    ----------
    d : dict
        Dictionary to be flattened
    parent_key : str, optional
        Key of parent dictionary of `d`. Default: ''
    sep : str, optional
        How to join keys of `d` with `parent_key`, if provided. Default: '_'

    Returns
    -------
    flat : dict
        Flattened input dictionary `d`

    Notes
    -----
    Taken directly from https://stackoverflow.com/a/6027615
    """

    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, MutableMapping):
            items.extend(_flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)


def _rename_keys(d, mapping):
    """
    Renames keys in dictionary `d` based on tuples in `mapping`

    Parameters
    ----------
    d : dict
        Dictionary with keys to be renamed
    mapping : list of tuples
        List of (oldkey, newkey) pairs to rename entries in `d`

    Returns
    -------
    renamed : dict
        Input dictionary `d` with keys renamed
    """

    new_dict = d.copy()
    for oldkey, newkey in mapping:
        try:
            new_dict[newkey] = new_dict.pop(oldkey)
        except KeyError:
            pass

    return new_dict


[docs]def import_matlab_result(fname, datamat='datamat_lst'):
    """
    Imports `fname` PLS result from Matlab

    Parameters
    ----------
    fname : str
        Filepath to output mat file obtained from Matlab PLS toolbox. Should
        contain at least a result struct object.
    datamat : str, optional
        Variable name of datamat ('X' array) provided to original PLS if it
        exists `fname`. By default the datamat is not stored in the PLS results
        structure, but if it is was saved in `fname` it can be loaded and
        cached in the returned results object. Default: 'datamat_lst'

    Returns
    -------
    results : :obj:`~.structures.PLSResults`
        Matlab results in a Python-friendly format
    """

    def get_labels(fields):
        labels = [k for k, v in sorted(fields.items(),
                                       key=lambda x: x[-1][-1])]
        return labels

    # load mat file using scipy.io
    matfile = sio.loadmat(fname)

    # if 'result' key is missing then consider this a malformed PLS result mat
    try:
        result = matfile.get('result')[0, 0]
    except (IndexError, TypeError):
        raise ValueError('Cannot get result struct from provided mat file')

    # convert result structure to a dictionary using dtypes as keys
    labels = get_labels(result.dtype.fields)
    result = {labels[n]: value for n, value in enumerate(result)}

    # convert sub-structures to dictionaries using dtypes as keys
    struct = ['boot_result', 'perm_result', 'perm_splithalf', 'other_input']
    for attr in struct:
        if result.get(attr) is not None:
            labels = get_labels(result[attr].dtype.fields)
            result[attr] = {labels[n]: _coerce_void(value) for n, value
                            in enumerate(result[attr][0, 0])}

    # get input data from results file, if it exists
    X = matfile.get(datamat)
    result['inputs'] = dict(X=np.vstack(X[:, 0])) if X is not None else dict()

    # squeeze all the values so they're a bit more interpretable
    for key, val in result.items():
        if isinstance(val, np.ndarray):
            result[key] = _coerce_void(val)

    # flatten the dictionary and rename the keys according to our mapping
    result = _rename_keys(_flatten(result), _result_mapping)
    if result['method'] == 3:
        result = _rename_keys(result, _behavioral_mapping)
        if 'y_loadings_ci_up' in result:
            result['y_loadings_ci'] = np.stack([
                result['y_loadings_ci_lo'], result['y_loadings_ci_up']
            ], axis=-1)
    else:
        result = _rename_keys(result, _mean_centered_mapping)
        if 'contrast_ci_up' in result:
            result['contrast_ci'] = np.stack([
                result['contrast_ci_lo'], result['contrast_ci_up']
            ], axis=-1)

    # index arrays - 1 to account for Matlab vs Python 1- vs 0-indexing
    for key in ['bootsamples', 'permsamples']:
        try:
            result[key] -= 1
        except KeyError:
            continue

    if result.get('n_split', None) is None:
        result['n_split'] = None

    # pack it into a `PLSResults` class instance for easy attribute access
    results = PLSResults(**result)

    return results