Empirical data: run analyses¶
In this notebook, we perform CCA and PLS analyses of empirical datasets.
Note: The used datasets are not included in this release.
Setup¶
[ ]:
import numbers
import os
import numpy as np
import xarray as xr
import pandas as pd
from scipy.stats import beta
from scipy.spatial.distance import pdist
from sklearn.utils import check_random_state
import pickle
from gemmr.data import *
from gemmr.data.preprocessing import _smith_feature_names, _check_features
from gemmr.estimators import *
from gemmr.sample_analysis import *
from gemmr.sample_analysis.macros import *
from gemmr.model_selection import max_min_detector
import matplotlib
import holoviews as hv
from holoviews import opts
hv.extension('matplotlib')
hv.renderer('matplotlib').set_param(dpi=120)
from my_config import *
import warnings
from matplotlib import MatplotlibDeprecationWarning
warnings.simplefilter('ignore', MatplotlibDeprecationWarning)
warnings.filterwarnings(
'ignore', 'aspect is not supported for Axes with xscale=log, yscale=linear', category=UserWarning
) # holoviews emits this for log-linear plots
[2]:
hcp_fMRI_palm_permutations = pd.read_csv(os.path.expanduser('~/Projects/restricted_data/hcp/palm_permutations.txt'), index_col=None, header=None).T.values - 1
[6]:
_ds = xr.open_dataset('~/gemmr_data/empirical/hcp_fMRI_pcs.nc')
hcp_fMRI_X, hcp_fMRI_Y = _ds['X_pcs'].values, _ds['Y_pcs'].values
print(len(hcp_fMRI_X))
948
[7]:
_ds = xr.open_dataset('~/gemmr_data/empirical/hcp_dMRI_pcs.nc')
hcp_dMRI_X, hcp_dMRI_Y = _ds['X_pcs'].values, _ds['Y_pcs'].values
print(len(hcp_dMRI_X))
1020
[8]:
_ds = xr.open_dataset('~/gemmr_data/empirical/ukbb_fMRI_pcs.nc')
ukbb_fMRI_X, ukbb_fMRI_Y = _ds['X_pcs'].values, _ds['Y_pcs'].values
print(len(ukbb_fMRI_X))
20000
[13]:
_ds = xr.open_dataset('~/gemmr_data/empirical/hcp_fMRI_optiNpcs_pcs.nc')
hcp_fMRI_opti_nPCs_X, hcp_fMRI_optiNpcs_Y = _ds['X_pcs'].values, _ds['Y_pcs'].values
print(len(hcp_fMRI_opti_nPCs_X))
948
Run analyses¶
[4]:
# Estimators used later
pls = SVDPLS(n_components=1, scale=False, std_ddof=1)
cca = SVDCCA(n_components=1, scale=False, std_ddof=1, normalize_weights=True, cov_out_of_bounds='ignore')
Note: The following cells take a long time to run, especially UKBB
HCP fMRI¶
[81]:
len(hcp_fMRI_X)* .2
[81]:
189.60000000000002
[ ]:
hcp_fMRI_cca_analyses = analyze_subsampled_and_resampled(
cca, hcp_fMRI_X, hcp_fMRI_Y, permutations=hcp_fMRI_palm_permutations[1:1001], n_test_subsample=189, n_jobs=-2,
)
pickle.dump(hcp_fMRI_cca_analyses, open('hcp_fMRI_cca_analyses.pickle', 'wb'))
[ ]:
hcp_fMRI_pls_analyses = analyze_subsampled_and_resampled(
pls, hcp_fMRI_X, hcp_fMRI_Y, permutations=hcp_fMRI_palm_permutations[1:1001], n_test_subsample=189, n_jobs=-2
)
pickle.dump(hcp_fMRI_pls_analyses, open('hcp_fMRI_pls_analyses.pickle', 'wb'))
HCP dMRI¶
[ ]:
hcp_dMRI_cca_analyses = analyze_subsampled_and_resampled(
cca, hcp_dMRI_X, hcp_dMRI_Y, n_test_subsample=189, n_jobs=-2
)
pickle.dump(hcp_dMRI_cca_analyses, open('hcp_dMRI_cca_analyses.pickle', 'wb'))
[ ]:
hcp_dMRI_pls_analyses = analyze_subsampled_and_resampled(
pls, hcp_dMRI_X, hcp_dMRI_Y, n_test_subsample=189, n_jobs=-2
)
pickle.dump(hcp_dMRI_pls_analyses, open('hcp_dMRI_pls_analyses.pickle', 'wb'))
UKBB fMRI¶
[ ]:
ukbb_fMRI_cca_analyses = analyze_subsampled_and_resampled(
cca, ukbb_fMRI_X, ukbb_fMRI_Y, n_test_subsample=189, n_jobs=-2
)
pickle.dump(ukbb_fMRI_cca_analyses, open('ukbb_fMRI_cca_analyses.pickle', 'wb'))
[ ]:
ukbb_fMRI_pls_analyses = analyze_subsampled_and_resampled(
pls, ukbb_fMRI_X, ukbb_fMRI_Y, n_test_subsample=189, n_jobs=-2
)
pickle.dump(ukbb_fMRI_pls_analyses, open('ukbb_fMRI_pls_analyses.pickle', 'wb'))
HCP fMRI with optimized number of PCs¶
[ ]:
hcp_fMRI_optiNpcs_cca_analyses = analyze_subsampled_and_resampled(
cca, hcp_fMRI_opti_nPCs_X, hcp_fMRI_optiNpcs_Y, permutations=hcp_fMRI_palm_permutations[1:1001], n_jobs=-2
)
pickle.dump(hcp_fMRI_optiNpcs_cca_analyses, open('hcp_fMRI_optiNpcs_cca_analyses.pickle', 'wb'))
[ ]:
hcp_fMRI_optiNpcs_pls_analyses = analyze_subsampled_and_resampled(
pls, hcp_fMRI_opti_nPCs_X, hcp_fMRI_optiNpcs_Y, permutations=hcp_fMRI_palm_permutations[1:1001], n_jobs=-2,
)
pickle.dump(hcp_fMRI_optiNpcs_pls_analyses, open('hcp_fMRI_optiNpcs_pls_analyses.pickle', 'wb'))