Powerlaw exponents

How do principal component spectra of empirical datasets look like?

Note: These datasets are not included in this release.

Setup

[1]:
import os
import pickle

import numpy as np
import xarray as xr
import pandas as pd

from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression

from gemmr.data import preproc_smith
from gemmr.util import pc_spectrum_decay_constant

from hcp import get_fc, triu2mat
from hcp.subject_measures import get_subject_measures

import warnings
from matplotlib import MatplotlibDeprecationWarning
warnings.simplefilter('ignore', MatplotlibDeprecationWarning)

import matplotlib
import holoviews as hv
from holoviews import opts
hv.extension('matplotlib')
hv.renderer('matplotlib').set_param(dpi=120)

from my_config import *

Load and preprocess data

HCP fMRI

[2]:
fc_noGSR = get_fc('hcp/rsBOLD_RelatedValidation210_mVWM1d', subject=None, task=None, kind='fisher').mean('task')
fc_GSR = get_fc('hcp/rsBOLD_RelatedValidation210_mVWMWB1d', subject=None, task=None, kind='fisher').mean('task')

fc_noGSR.shape, fc_GSR.shape
[2]:
((877, 64620), (951, 64620))
[3]:
def triufc2gbc(triufc):
    return triu2mat(triufc, diag=1).mean(0)


gbc_noGSR = xr.apply_ufunc(
    triufc2gbc,
    fc_noGSR,
    input_core_dims=(['connection'],),
    output_core_dims=(['roi'],),
    vectorize=True
)

gbc_GSR = xr.apply_ufunc(
    triufc2gbc,
    fc_GSR,
    input_core_dims=(['connection'],),
    output_core_dims=(['roi'],),
    vectorize=True
)
[12]:
# head motion parameters
head_motion = xr.load_dataarray(os.path.expanduser('~/Projects/restricted_data/hcp/movement_parameters.nc'))
head_motion = head_motion.to_dataframe('dummy').unstack('movement_kind')['dummy']
[13]:
subject_measures_fname = os.path.expanduser('~/Projects/restricted_data/hcp/unrestricted.csv')
sm = pd.read_csv(subject_measures_fname, sep=',', index_col='Subject')
sm.index = sm.index.set_names(['subject'])
sm.columns = sm.columns.set_names(['feature'])

subject_measures_fname = os.path.expanduser('~/Projects/restricted_data/hcp/RESTRICTED.csv')
sm_r = pd.read_csv(subject_measures_fname, sep=',', index_col='Subject')
sm_r.index = sm_r.index.set_names(['subject'])
sm_r.columns = sm_r.columns.set_names(['feature'])

sm = pd.concat([sm, sm_r, head_motion], axis=1)
del sm_r

sm_noGSR = sm.loc[fc_noGSR.subject]
sm_GSR = sm.loc[fc_GSR.subject]
[8]:
assert len(fc_noGSR) == len(sm_noGSR)
assert len(fc_GSR) == len(sm_GSR)

n_subjects_GSR = len(fc_GSR)
n_subjects_noGSR = len(fc_noGSR)
[9]:
preprocessed_data_GSR = preproc_smith(
    fc_GSR, sm_GSR, final_sm_deconfound=False,
    confounders=('movement_AbsoluteRMS_mean', 'movement_RelativeRMS_mean'),
    hcp_confounders=True, hcp_confounder_software_version=True, squared_confounders=True,
    hcp_data_dict_correct_pct_to_t=True
)

print()

preprocessed_data_noGSR = preproc_smith(
    fc_noGSR, sm_noGSR, final_sm_deconfound=False,
    confounders=('movement_AbsoluteRMS_mean', 'movement_RelativeRMS_mean'),
    hcp_confounders=True, hcp_confounder_software_version=True, squared_confounders=True,
    hcp_data_dict_correct_pct_to_t=True
)
used fMRI 3T reconstruction software versions are: ['r177' 'r177 r227' 'r227']
3.25% of values in confounders missing, imputing 0 for these
(951, 82689)
158 out of 158 features found
smallest sval S_cov = 2.85272671133e-19
smallest sval S_cov_psd = 1.48106418634e-08
rank S_cov = 262
rank S_cov_psd = 951

used fMRI 3T reconstruction software versions are: ['r177' 'r177 r227' 'r227']
3.18% of values in confounders missing, imputing 0 for these
(877, 129240)
158 out of 158 features found
smallest sval S_cov = 4.45258378738e-18
smallest sval S_cov_psd = 1.43975543935e-08
rank S_cov = 251
rank S_cov_psd = 877

HCP dMRI

[10]:
_ds = xr.open_dataset('~/gemmr_data/empirical/hcp_dMRI_pcs.nc')
hcp_dMRI_X, hcp_dMRI_Y = _ds['X_pcs'].values, _ds['Y_pcs'].values

print(len(hcp_dMRI_X))
1020

UKBB fMRI

[11]:
_ds = xr.open_dataset('~/gemmr_data/empirical/ukbb_fMRI_pcs.nc')
ukbb_fMRI_X, ukbb_fMRI_Y = _ds['X_pcs'].values, _ds['Y_pcs'].values

print(len(ukbb_fMRI_X))
20000

Powerlaw exponents of within-set variance spectra

[14]:
panels = {}

dc, panels['fc_GSR'] = pc_spectrum_decay_constant(fc_GSR.values, expl_var_ratios=(.3, .9), plot=True)
dc, panels['uu1_GSR'] = pc_spectrum_decay_constant(preprocessed_data_GSR['X'], expl_var_ratios=(.3, .9), plot=True)

dc, panels['fc_noGSR'] = pc_spectrum_decay_constant(fc_noGSR.values, expl_var_ratios=(.3, .9), plot=True)
dc, panels['uu1_noGSR'] = pc_spectrum_decay_constant(preprocessed_data_noGSR['X'], expl_var_ratios=(.3, .9), plot=True)

dc, panels['gbc_GSR'] = pc_spectrum_decay_constant(gbc_GSR.values, expl_var_ratios=(.3, .9), plot=True)
dc, panels['gbc_noGSR'] = pc_spectrum_decay_constant(gbc_noGSR.values, expl_var_ratios=(.3, .9), plot=True)

dc, panels['uu2'] = pc_spectrum_decay_constant(preprocessed_data_GSR['Y'], expl_var_ratios=(.3, .9), plot=True)

dc, panels['dMRI_preprocessed'] = pc_spectrum_decay_constant(hcp_dMRI_X, expl_var_ratios=(.3, .9), plot=True)

dc, panels['ukbb_fMRI_preprocessed'] = pc_spectrum_decay_constant(ukbb_fMRI_X, expl_var_ratios=(.3, .9), plot=True)
dc, panels['ukbb_behavior_preprocessed'] = pc_spectrum_decay_constant(ukbb_fMRI_Y, expl_var_ratios=(.3, .9), plot=True)
[15]:
fig = (
    panels['fc_GSR'].relabel('HCP FC w/ GSR').opts(xlabel='')
    + panels['uu1_GSR'].relabel('HCP processed FC w/ GSR').opts(xlabel='', ylabel='')
    + panels['fc_noGSR'].relabel('HCP FC w/o GSR').opts(xlabel='', ylabel='')
    + panels['uu1_noGSR'].relabel('HCP processed FC w/o GSR').opts(xlabel='', ylabel='')
    # ---
    + panels['gbc_GSR'].relabel('HCP GBC w/ GSR').opts(xlabel='')
    + panels['gbc_noGSR'].relabel('HCP GBC w/o GSR').opts(xlabel='', ylabel='')
    + panels['uu2'].relabel('HCP processed behavior').opts(xlabel='', ylabel='')
    + panels['dMRI_preprocessed'].relabel('HCP processed dMRI').opts(xlabel='', ylabel='')
    # ---
    + panels['ukbb_fMRI_preprocessed'].relabel('UKBB processed fMRI')
    + panels['ukbb_behavior_preprocessed'].relabel('UKBB processed behavior').opts(ylabel='')
).cols(
    4
).opts(*fig_opts).opts(
    opts.Curve(linewidth=1.5),
    opts.Overlay(hooks=[legend_frame_off, Format_log_axis('x', [1,])], xlim=(1, 210), xticks=3, ylim=(1e-5, 1)),
    opts.Layout(fig_inches=(7, None), vspace=.5, sublabel_position=(-.45, .95))
)

hv.save(fig, 'fig/figS_powerlaw_exponents_real_data.pdf')

fig
[15]:

Some direct comparisons

[28]:
hcp_fMRI_vX = preprocessed_data_GSR['X'].var(0)[:100]
hcp_fMRI_vY = preprocessed_data_GSR['Y'].var(0)[:100]

hcp_dMRI_vX = hcp_dMRI_X.var(0)[:100]
hcp_dMRI_vY = hcp_dMRI_Y.var(0)[:100]

ukbb_fMRI_vX = ukbb_fMRI_X.var(0)
ukbb_fMRI_vY = ukbb_fMRI_Y.var(0)

(
    (
        hv.Curve((np.arange(1, 101), hcp_fMRI_vX / hcp_fMRI_vX[0]), label='nim')
        * hv.Curve((np.arange(1, 101), hcp_fMRI_vY / hcp_fMRI_vY[0]), label='beh')
    ).relabel('HCP fMRI')
    # ---
    + (
        hv.Curve((np.arange(1, 101), hcp_dMRI_vX / hcp_dMRI_vX[0]))
        * hv.Curve((np.arange(1, 101), hcp_dMRI_vY / hcp_dMRI_vY[0]))
    ).relabel('HCP dMRI')
    # ---
    + (
        hv.Curve((np.arange(1, 101), ukbb_fMRI_vX / ukbb_fMRI_vX[0]))
        * hv.Curve((np.arange(1, 101), ukbb_fMRI_vY / ukbb_fMRI_vY[0]))
    ).relabel('UKBB fMRI')
).redim(
    x='PC #',
    y='normalized explained variance'
).opts(*fig_opts).opts(
    opts.Curve(logx=True, logy=True),
    opts.Layout(fig_inches=(1.7 * 3, None))
)
[28]: