Empirical data: run analyses

In this notebook, we perform CCA and PLS analyses of empirical datasets.

Note: The used datasets are not included in this release.

Setup

[ ]:
import numbers
import os

import numpy as np
import xarray as xr
import pandas as pd
from scipy.stats import beta
from scipy.spatial.distance import pdist

from sklearn.utils import check_random_state

import pickle

from gemmr.data import *
from gemmr.data.preprocessing import _smith_feature_names, _check_features
from gemmr.estimators import *
from gemmr.sample_analysis import *
from gemmr.sample_analysis.macros import *
from gemmr.model_selection import max_min_detector

import matplotlib
import holoviews as hv
from holoviews import opts
hv.extension('matplotlib')
hv.renderer('matplotlib').set_param(dpi=120)

from my_config import *

import warnings
from matplotlib import MatplotlibDeprecationWarning
warnings.simplefilter('ignore', MatplotlibDeprecationWarning)
warnings.filterwarnings(
    'ignore', 'aspect is not supported for Axes with xscale=log, yscale=linear', category=UserWarning
)  # holoviews emits this for log-linear plots
[2]:
hcp_fMRI_palm_permutations = pd.read_csv(os.path.expanduser('~/Projects/restricted_data/hcp/palm_permutations.txt'), index_col=None, header=None).T.values - 1
[6]:
_ds = xr.open_dataset('~/gemmr_data/empirical/hcp_fMRI_pcs.nc')
hcp_fMRI_X, hcp_fMRI_Y = _ds['X_pcs'].values, _ds['Y_pcs'].values

print(len(hcp_fMRI_X))
948
[7]:
_ds = xr.open_dataset('~/gemmr_data/empirical/hcp_dMRI_pcs.nc')
hcp_dMRI_X, hcp_dMRI_Y = _ds['X_pcs'].values, _ds['Y_pcs'].values

print(len(hcp_dMRI_X))
1020
[8]:
_ds = xr.open_dataset('~/gemmr_data/empirical/ukbb_fMRI_pcs.nc')
ukbb_fMRI_X, ukbb_fMRI_Y = _ds['X_pcs'].values, _ds['Y_pcs'].values

print(len(ukbb_fMRI_X))
20000
[13]:
_ds = xr.open_dataset('~/gemmr_data/empirical/hcp_fMRI_optiNpcs_pcs.nc')
hcp_fMRI_opti_nPCs_X, hcp_fMRI_optiNpcs_Y = _ds['X_pcs'].values, _ds['Y_pcs'].values

print(len(hcp_fMRI_opti_nPCs_X))
948

Run analyses

[4]:
# Estimators used later
pls = SVDPLS(n_components=1, scale=False, std_ddof=1)
cca = SVDCCA(n_components=1, scale=False, std_ddof=1, normalize_weights=True, cov_out_of_bounds='ignore')

Note: The following cells take a long time to run, especially UKBB

HCP fMRI

[81]:
len(hcp_fMRI_X)* .2
[81]:
189.60000000000002
[ ]:
hcp_fMRI_cca_analyses = analyze_subsampled_and_resampled(
    cca, hcp_fMRI_X, hcp_fMRI_Y, permutations=hcp_fMRI_palm_permutations[1:1001], n_test_subsample=189, n_jobs=-2,
)
pickle.dump(hcp_fMRI_cca_analyses, open('hcp_fMRI_cca_analyses.pickle', 'wb'))
[ ]:
hcp_fMRI_pls_analyses = analyze_subsampled_and_resampled(
    pls, hcp_fMRI_X, hcp_fMRI_Y, permutations=hcp_fMRI_palm_permutations[1:1001], n_test_subsample=189, n_jobs=-2
)
pickle.dump(hcp_fMRI_pls_analyses, open('hcp_fMRI_pls_analyses.pickle', 'wb'))

HCP dMRI

[ ]:
hcp_dMRI_cca_analyses = analyze_subsampled_and_resampled(
    cca, hcp_dMRI_X, hcp_dMRI_Y, n_test_subsample=189, n_jobs=-2
)
pickle.dump(hcp_dMRI_cca_analyses, open('hcp_dMRI_cca_analyses.pickle', 'wb'))
[ ]:
hcp_dMRI_pls_analyses = analyze_subsampled_and_resampled(
    pls, hcp_dMRI_X, hcp_dMRI_Y, n_test_subsample=189, n_jobs=-2
)
pickle.dump(hcp_dMRI_pls_analyses, open('hcp_dMRI_pls_analyses.pickle', 'wb'))

UKBB fMRI

[ ]:
ukbb_fMRI_cca_analyses = analyze_subsampled_and_resampled(
    cca, ukbb_fMRI_X, ukbb_fMRI_Y, n_test_subsample=189, n_jobs=-2
)
pickle.dump(ukbb_fMRI_cca_analyses, open('ukbb_fMRI_cca_analyses.pickle', 'wb'))
[ ]:
ukbb_fMRI_pls_analyses = analyze_subsampled_and_resampled(
    pls, ukbb_fMRI_X, ukbb_fMRI_Y, n_test_subsample=189, n_jobs=-2
)
pickle.dump(ukbb_fMRI_pls_analyses, open('ukbb_fMRI_pls_analyses.pickle', 'wb'))

HCP fMRI with optimized number of PCs

[ ]:
hcp_fMRI_optiNpcs_cca_analyses = analyze_subsampled_and_resampled(
    cca, hcp_fMRI_opti_nPCs_X, hcp_fMRI_optiNpcs_Y, permutations=hcp_fMRI_palm_permutations[1:1001], n_jobs=-2
)
pickle.dump(hcp_fMRI_optiNpcs_cca_analyses, open('hcp_fMRI_optiNpcs_cca_analyses.pickle', 'wb'))
[ ]:
hcp_fMRI_optiNpcs_pls_analyses = analyze_subsampled_and_resampled(
    pls, hcp_fMRI_opti_nPCs_X, hcp_fMRI_optiNpcs_Y, permutations=hcp_fMRI_palm_permutations[1:1001], n_jobs=-2,
)
pickle.dump(hcp_fMRI_optiNpcs_pls_analyses, open('hcp_fMRI_optiNpcs_pls_analyses.pickle', 'wb'))