Source code for pyls.structures

# -*- coding: utf-8 -*-
"""
Data structures to hold PLS inputs and results objects
"""

from multiprocessing import cpu_count
from textwrap import dedent
from .utils import ResDict

_pls_input_docs = dict(
    decomposition_narrative=dedent("""\
    The singular value decomposition generates mutually orthogonal latent
    variables (LVs), comprised of left and right singular vectors and a
    diagonal matrix of singular values. The `i`-th pair of singular vectors
    detail the contributions of individual input features to an overall,
    multivariate pattern (the `i`-th LV), and the singular values explain the
    amount of variance captured by that pattern.

    Statistical significance of the LVs is determined via permutation testing.
    Bootstrap resampling is used to examine the contribution and reliability of
    the input features to each LV. Split-half resampling can optionally be used
    to assess the reliability of the LVs. A cross-validated framework can
    optionally be used to examine how accurate the decomposition is when
    employed in a predictive framework.\
    """),
    input_matrix=dedent("""\
    X : (S, B) array_like
        Input data matrix, where `S` is samples and `B` is features\
    """),
    groups=dedent("""\
    groups : (G,) list of int
        List with the number of subjects present in each of `G` groups. Input
        data should be organized as subjects within groups (i.e., groups should
        be vertically stacked). If there is only one group this can be left
        blank.\
    """),
    conditions=dedent("""\
    n_cond : int
        Number of conditions observed in data. Note that all subjects must have
        the same number of conditions. If both conditions and groups are
        present then the input data should be organized as subjects within
        conditions within groups (i.e., g1c1s[1-S], g1c2s[1-S], g2c1s[1-S],
        g2c2s[1-S]).\
    """),
    mean_centering=dedent("""\
    mean_centering : {0, 1, 2}, optional
        Mean-centering method to use. This will determine how the mean-centered
        matrix is generated and what effects are "boosted" during the SVD.
        Default: 0\
    """),
    # perms / resampling / crossval
    stat_test=dedent("""\
    n_perm : int, optional
        Number of permutations to use for testing significance of components.
         Default: 5000
    n_boot : int, optional
        Number of bootstraps to use for testing reliability of data features.
        Default: 5000\
    """),
    split_half=dedent("""\
    n_split : int, optional
        Number of split-half resamples to assess during permutation testing.
        Default: 0\
    """),
    cross_val=dedent("""\
    test_split : int, optional
        Number of splits for generating test sets during cross-validation.
        Default: 100
    test_size : [0, 1) float, optional
        Proportion of data to partition to test set during cross-validation.
        Default: 0.25\
    """),
    covariance=dedent("""\
    covariance : bool, optional
        Whether to use the cross-covariance matrix instead of the cross-
        correlation during the decomposition. Only set if you are sure this is
        what you want as many of the results may become more difficult to
        interpret (i.e., :py:attr:`~.structures.PLSResults.behavcorr` will no
        longer be intepretable as Pearson correlation values). Default: False\
    """),
    rotate=dedent("""\
    rotate : bool, optional
        Whether to perform Procrustes rotations during permutation testing. Can
        inflate false-positive rates; see Kovacevic et al., (2013) for more
        information. Default: True\
    """),
    ci=dedent("""\
    ci : [0, 100] float, optional
        Confidence interval to use for assessing bootstrap results. This
        roughly corresponds to an alpha rate; e.g., the 95%ile CI is
        approximately equivalent to a two-tailed p <= 0.05. Default: 95\
    """),
    proc_options=dedent("""\
    seed : {int, :obj:`numpy.random.RandomState`, None}, optional
        Seed to use for random number generation. Helps ensure reproducibility
        of results. Default: None
    verbose : bool, optional
        Whether to show progress bars as the analysis runs. Note that progress
        bars will not persist after the analysis is completed. Default: True
    n_proc : int, optional
        How many processes to use for parallelizing permutation testing and
        bootstrap resampling. If not specified will default to serialized
        processing (i.e., one processor). Can optionally specify 'max' to use
        all available processors. Default: None\
    """),
    pls_results=dedent("""\
    results : :obj:`pyls.structures.PLSResults`
        Dictionary-like object containing results from the PLS analysis\
    """),
    resamples=dedent("""\
    permsamples : array_like, optional
        Re-sampling array to be used during permutation test (if n_perm > 0).
        If not specified a set of unique permutations will be generated.
        Default: None
    bootsamples : array_like, optional
        Resampling array to be used during bootstrap resampling (if n_boot >
        0). If not specified a set of unique bootstraps will be generated.
        Default: None\
    """),
    references=dedent("""\
    McIntosh, A. R., Bookstein, F. L., Haxby, J. V., & Grady, C. L. (1996).
    Spatial pattern analysis of functional brain images using partial least
    squares. NeuroImage, 3(3), 143-157.

    McIntosh, A. R., & Lobaugh, N. J. (2004). Partial least squares analysis of
    neuroimaging data: applications and advances. NeuroImage, 23, S250-S263.

    Krishnan, A., Williams, L. J., McIntosh, A. R., & Abdi, H. (2011). Partial
    Least Squares (PLS) methods for neuroimaging: a tutorial and review.
    NeuroImage, 56(2), 455-475.

    Kovacevic, N., Abdi, H., Beaton, D., & McIntosh, A. R. (2013). Revisiting
    PLS resampling: comparing significance versus reliability across range of
    simulations. In New Perspectives in Partial Least Squares and Related
    Methods (pp. 159-170). Springer, New York, NY. Chicago\
    """)
)


[docs]class PLSInputs(ResDict): allowed = [ 'X', 'Y', 'groups', 'n_cond', 'n_perm', 'n_boot', 'n_split', 'test_split', 'test_size', 'mean_centering', 'covariance', 'rotate', 'ci', 'seed', 'verbose', 'n_proc', 'bootsamples', 'permsamples', 'method', 'n_components', 'aggfunc' ] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if self.get('n_split') == 0: self['n_split'] = None if self.get('test_split') == 0: self['test_split'] = None if self.get('n_proc') is not None: n_proc = self.get('n_proc') if n_proc == 'max' or n_proc == -1: self['n_proc'] = cpu_count() elif n_proc < 0: self['n_proc'] = cpu_count() + 1 + n_proc ts = self.get('test_size') if ts is not None and (ts < 0 or ts >= 1): raise ValueError('test_size must be in [0, 1). Provided value: {}' .format(ts))
PLSInputs.__doc__ = dedent("""\ PLS input information Attributes ---------- X : (S, B) array_like Input data matrix, where `S` is observations and `B` is features. Y : (S, T) array_like Behavioral matrix, where `S` is observations and `T` is features. If from :obj:`.behavioral_pls`, this is the provided behavior matrix; if from :obj:`.meancentered_pls`, this is a dummy-coded group/condition matrix. {groups} {conditions} {mean_centering} {covariance} {stat_test} {rotate} {ci} {proc_options} """).format(**_pls_input_docs)
[docs]class PLSResults(ResDict): r""" Dictionary-like object containing results of PLS analysis Attributes ---------- x_weights : (B, L) `numpy.ndarray` Weights of `B` features used to project `X` matrix into PLS-derived component space y_weights : (J, L) `numpy.ndarray` Weights of `J` features used to project `Y` matrix into PLS-derived component space; not available with :func:`.pls_regression` x_scores : (S, L) `numpy.ndarray` Projection of `X` matrix into PLS-derived component space y_scores : (S, L) `numpy.ndarray` Projection of `Y` matrix into PLS-derived component space y_loadings : (J, L) `numpy.ndarray` Covariance of features in `Y` with projected `x_scores` singvals : (L, L) `numpy.ndarray` Singular values for PLS-derived component space; not available with :func:`.pls_regression` varexp : (L,) `numpy.ndarray` Variance explained in each of the PLS-derived components permres : :obj:`~.structures.PLSPermResults` Results of permutation testing, as applicable bootres : :obj:`~.structures.PLSBootResults` Results of bootstrap resampling, as applicable splitres : :obj:`~.structures.PLSSplitHalfResults` Results of split-half resampling, as applicable cvres : :obj:`~.structures.PLSCrossValidationResults` Results of cross-validation testing, as applicable inputs : :obj:`~.structures.PLSInputs` Inputs provided to original PLS """ allowed = [ 'x_weights', 'y_weights', 'x_scores', 'y_scores', 'y_loadings', 'singvals', 'varexp', 'permres', 'bootres', 'splitres', 'cvres', 'inputs' ] def __init__(self, **kwargs): super().__init__(**kwargs) # create all sub-dictionaries self.inputs = PLSInputs(**kwargs.get('inputs', kwargs)) self.bootres = PLSBootResults(**kwargs.get('bootres', kwargs)) self.permres = PLSPermResults(**kwargs.get('permres', kwargs)) self.splitres = PLSSplitHalfResults(**kwargs.get('splitres', kwargs)) self.cvres = PLSCrossValidationResults(**kwargs.get('cvres', kwargs))
[docs]class PLSBootResults(ResDict): """ Dictionary-like object containing results of PLS bootstrap resampling Attributes ---------- x_weights_normed : (B, L) `numpy.ndarray` `x_weights` normalized by their standard error, obtained from bootstrap resampling (see `x_weights_stderr`) x_weights_stderr : (B, L) `numpy.ndarray` Standard error of `x_weights`, used to generate `x_weights_normed` y_loadings : (J, L) `numpy.ndarray` Covariance of features in `Y` with projected `x_scores`; not available with :func:`.meancentered_pls` y_loadings_boot : (J, L, R) `numpy.ndarray` Distribution of `y_loadings` across all bootstrap resamples; not available with :func:`.meancentered_pls` y_loadings_ci: (J, L, 2) `numpy.ndarray` Lower (..., 0) and upper (..., 1) bounds of confidence interval for `y_loadings`; not available with :func:`.meancentered_pls` contrast : (J, L) `numpy.ndarray` Group x condition averages of :attr:`brainscores_demeaned`. Can be treated as a contrast indicating group x condition differences. Only obtained from :obj:`.meancentered_pls`. contrast_boot : (J, L, R) `numpy.ndarray` Bootstrapped distribution of `contrast`; only available with :func:`.meancentered_pls` contrast_ci : (J, L, 2) `numpy.ndarray` Lower (..., 0) and upper (..., 1) bounds of confidence interval for `contrast`; only available with :func:`.meancentered_pls` bootsamples : (S, R) `numpy.ndarray` Indices of bootstrapped samples `S` across `R` resamples. """ allowed = [ 'x_weights_normed', 'x_weights_stderr', 'bootsamples', 'y_loadings', 'y_loadings_boot', 'y_loadings_ci', 'contrast', 'contrast_boot', 'contrast_ci' ]
[docs]class PLSPermResults(ResDict): """ Dictionary-like object containing results of PLS permutation testing Attributes ---------- pvals : (L,) `numpy.ndarray` Non-parametric p-values used to examine whether components from original decomposition explain more variance than permuted components permsamples : (S, P) `numpy.ndarray` Resampling array used to permute `S` samples over `P` permutations """ allowed = [ 'pvals', 'permsamples' ]
[docs]class PLSSplitHalfResults(ResDict): """ Dictionary-like object containing results of PLS split-half resampling Attributes ---------- ucorr, vcorr : (L,) `numpy.ndarray` Average correlations between split-half resamples in original (non- permuted) data for left/right singular vectors. Can be interpreted as reliability of `L` latent variables ucorr_pvals, vcorr_pvals : (L,) `numpy.ndarray` Number of permutations where correlation between split-half resamples exceeded original correlations, normalized by the total number of permutations. Can be interpreted as the statistical significance of the reliability of `L` latent variables ucorr_uplim, vcorr_uplim : (L,) `numpy.ndarray` Upper bound of confidence interval for correlations between split halves for left/right singular vectors ucorr_lolim, vcorr_lolim : (L,) `numpy.ndarray` Lower bound of confidence interval for correlations between split halves for left/right singular vectors """ allowed = [ 'ucorr', 'vcorr', 'ucorr_pvals', 'vcorr_pvals', 'ucorr_uplim', 'vcorr_uplim', 'ucorr_lolim', 'vcorr_lolim' ]
[docs]class PLSCrossValidationResults(ResDict): """ Dictionary-like object containing results of PLS cross-validation testing Attributes ---------- r_squared : (T, I) `numpy.ndarray` R-squared ("determination coefficient") for each of `T` predicted behavioral scores against true behavioral scores across `I` train / test split pearson_r : (T, I) `numpy.ndarray` Pearson's correlation for each of `T` predicted behavioral scores against true behavioral scores across `I` train / test split """ allowed = [ 'pearson_r', 'r_squared' ]