Cross-loadings¶
We explore how many samples are necessary to obtain stable cross-loadings, and compare to loadings.
Setup¶
[1]:
import numpy as np
import xarray as xr
from scipy.stats import pearsonr, spearmanr
from scipy.spatial.distance import pdist
from gemmr.data import load_outcomes, print_ds_stats
from gemmr.generative_model import *
from gemmr.sample_analysis import *
from gemmr.estimators import SVDCCA
from gemmr.sample_size import *
from gemmr.metrics import *
import holoviews as hv
from holoviews import opts
hv.renderer('matplotlib').set_param(dpi=120)
hv.extension('matplotlib')
from my_config import *
import warnings
warnings.simplefilter('ignore', UserWarning)
warnings.filterwarnings(
'ignore', 'aspect is not supported for Axes with xscale=log, yscale=linear', category=UserWarning
) # holoviews emits this for log-linear plots
/Users/mdh56/Projects/gemmr/gemmr/sample_analysis/analyzers.py:13: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)
from tqdm.autonotebook import tqdm
[2]:
ds_cca = load_outcomes('cca')
ds_pls = load_outcomes('pls', tag='axPlusay-2')
What’s in the outcome data?
[3]:
print_ds_stats(ds_cca)
n_modes 1
n_rep 100
n_per_ftr [ 3 4 8 16 32 64 128 256 512 1024]
r [0.1 0.3 0.5 0.7 0.9]
px [ 2 4 8 16 32 64 128]
ax+ay range (0.00, 0.00)
py == px
<xarray.DataArray 'n_Sigmas' (px: 7, r: 5)>
array([[25, 25, 25, 25, 25],
[25, 25, 25, 25, 25],
[25, 25, 25, 25, 25],
[25, 25, 25, 25, 25],
[25, 25, 25, 25, 25],
[ 0, 25, 25, 25, 25],
[ 0, 25, 25, 25, 25]])
Coordinates:
* r (r) float64 0.1 0.3 0.5 0.7 0.9
* px (px) int64 2 4 8 16 32 64 128
power calculated
[4]:
print_ds_stats(ds_pls)
n_modes 1
n_rep 100
n_per_ftr [ 3 4 8 16 32 64 128 256 512 1024 2048 4096 8192]
r [0.1 0.3 0.5 0.7 0.9]
px [ 2 4 8 16 32 64 128]
ax+ay range (-2.00, -2.00)
py == px
<xarray.DataArray 'n_Sigmas' (px: 7, r: 5)>
array([[25, 25, 25, 25, 25],
[25, 25, 25, 25, 25],
[25, 25, 25, 25, 25],
[ 0, 25, 25, 25, 25],
[ 0, 25, 25, 25, 25],
[ 0, 0, 0, 25, 25],
[ 0, 0, 0, 25, 25]])
Coordinates:
* r (r) float64 0.1 0.3 0.5 0.7 0.9
* px (px) int64 2 4 8 16 32 64 128
power calculated
True loadings vs true cross-loadings¶
How similar are loadings and cross-loadings in principle? We compare here their true values that were calculated directly from the assumed joint covariance matrices (i.e. they were not estimated from data).
[5]:
def _corr(x, y, corr_fun=pearsonr):
mask = np.isfinite(x) & np.isfinite(y)
if mask.sum() >= 2:
return corr_fun(x[mask], y[mask])[0]
else:
return np.nan
def corrs_loading_crossloadings_true(ds, corr_fun=pearsonr):
return xr.apply_ufunc(
_corr,
ds.x_loadings_true,
ds.x_crossloadings_true,
input_core_dims=[['x_feature'], ['x_feature']],
vectorize=True,
kwargs=dict(corr_fun=corr_fun)
)
[6]:
cca_pearsonr_loadings_crossloadings_true = corrs_loading_crossloadings_true(ds_cca, pearsonr).mean('Sigma_id').rename('Pearson correlation between\ntrue loadings and\ntrue cross-loadings')
cca_spearmanr_loadings_crossloadings_true = corrs_loading_crossloadings_true(ds_cca, spearmanr).mean('Sigma_id').rename('Spearman correlation between\ntrue loadings and\ntrue cross-loadings')
pls_pearsonr_loadings_crossloadings_true = corrs_loading_crossloadings_true(ds_pls, pearsonr).mean('Sigma_id').rename('Pearson correlation between\ntrue loadings and\ntrue cross-loadings')
pls_spearmanr_loadings_crossloadings_true = corrs_loading_crossloadings_true(ds_pls, spearmanr).mean('Sigma_id').rename('Spearman correlation between\ntrue loadings and\ntrue cross-loadings')
/anaconda3/envs/gemmrtest/lib/python3.8/site-packages/xarray/core/nanops.py:142: RuntimeWarning: Mean of empty slice
return np.nanmean(a, axis=axis, dtype=dtype)
[7]:
fig_true = (
hv.QuadMesh(cca_pearsonr_loadings_crossloadings_true, kdims=['px', 'r']).opts(hooks=[suptitle_cca])
+ hv.QuadMesh(pls_pearsonr_loadings_crossloadings_true, kdims=['px', 'r']).opts(colorbar=True, ylabel='', hooks=[suptitle_pls])
#+ hv.QuadMesh(cca_spearmanr_loadings_crossloadings_true, kdims=['px', 'r'])
#+ hv.QuadMesh(pls_spearmanr_loadings_crossloadings_true, kdims=['px', 'r']).opts(colorbar=True)
).redim(
px='Number of features',
r='$r_\mathrm{true}$'
).cols(
2
).opts(*fig_opts).opts(
opts.QuadMesh(logx=True, cmap='inferno', clim=(0, 1)),
opts.Layout(hspace=.5, sublabel_position=(-.45, .95), fig_inches=(3.42, 1.))
)
hv.save(fig_true, 'fig/figS_true_loadings_vs_true_crossloadings.pdf')
fig_true
[7]:
How many samples are required to obtain stable cross-loadings, compared to the number of required samples for loadings?¶
[8]:
target_error = 0.1
n_reqs_cca = calc_n_required_all_metrics(ds_cca.sel(mode=0), target_error=target_error, search_dim='n_per_ftr')
n_reqs_pls = calc_n_required_all_metrics(ds_pls.sel(mode=0), target_error=target_error, search_dim='n_per_ftr')
n_reqs_cca['crossloadingError'] = calc_n_required(
mk_crossloadingError(ds_cca).mean('rep'),
-target_error, target_error, search_dim='n_per_ftr')
n_reqs_pls['crossloadingError'] = calc_n_required(
mk_crossloadingError(ds_pls).mean('rep'),
-target_error, target_error, search_dim='n_per_ftr')
/anaconda3/envs/gemmrtest/lib/python3.8/site-packages/xarray/core/nanops.py:142: RuntimeWarning: Mean of empty slice
return np.nanmean(a, axis=axis, dtype=dtype)
[9]:
pxs_cca = n_reqs_cca['combined'].px
pxs_pls = n_reqs_pls['combined'].px
rel_diff = lambda n_reqs, ref_metric: ((n_reqs['crossloadingError'] - n_reqs[ref_metric]) / n_reqs[ref_metric]).mean('Sigma_id').sel(px=n_reqs[ref_metric].px>2).rename('Required samples per feature\n(cross-loadings - loadings)/loadings')
fig = (
hv.QuadMesh(rel_diff(n_reqs_cca, 'loadingError'), kdims=['px', 'r']).opts(hooks=[suptitle_cca])
+ hv.QuadMesh(rel_diff(n_reqs_pls, 'loadingError'), kdims=['px', 'r']).opts(
ylabel='', colorbar=True, hooks=[suptitle_pls]
)
).redim(
px='Number of features',
r='$r_\mathrm{true}$'
).cols(
2
).opts(*fig_opts).opts(
opts.QuadMesh(logx=True, logz=False, cmap='RdBu_r', symmetric=True, clim=(-.65, .65)),
opts.Layout(hspace=.5, vspace=.5, sublabel_position=(-.4, .95), fig_inches=(3.42, 1.)),
)
hv.save(fig, 'fig/fig7_crossloadings.pdf')
fig
[9]: