Compatible true vs observed associations
When a given observation strength is estimated with CCA or PLS, what could have been the true association strength?
Setup
[1]:
import numpy as np
import xarray as xr
from gemmr.data import *
import matplotlib
import holoviews as hv
from holoviews import opts
hv.extension('matplotlib')
hv.renderer('matplotlib').param.set_param(dpi=120)
from my_config import *
import warnings
from matplotlib import MatplotlibDeprecationWarning
warnings.simplefilter('ignore', MatplotlibDeprecationWarning)
warnings.filterwarnings(
'ignore', 'aspect is not supported for Axes with xscale=log, yscale=linear', category=UserWarning
) # holoviews emits this for log-linear plots
Data generation
First let’s load the data
[2]:
data_home = None
compatible_true_vs_observed_assocs_cca = load_outcomes('compatTrueVsObsAssoc_cca_cca_random_sum+-2+-2', data_home=data_home).sel(mode=0)
compatible_true_vs_observed_assocs_pls = load_outcomes('compatTrueVsObsAssoc_pls_pls_random_sum+-2+-2', data_home=data_home).sel(mode=0)
Loading data from subfolder 'gemmr_latest'
Loading data from subfolder 'gemmr_latest'
[3]:
compatible_true_vs_observed_assocs_cca = compatible_true_vs_observed_assocs_cca.sel(px=compatible_true_vs_observed_assocs_cca.px > 2)
compatible_true_vs_observed_assocs_pls = compatible_true_vs_observed_assocs_pls.sel(px=compatible_true_vs_observed_assocs_pls.px > 2)
[4]:
compatible_true_vs_observed_assocs_cca = compatible_true_vs_observed_assocs_cca.sel(px=compatible_true_vs_observed_assocs_cca.px < 128)
compatible_true_vs_observed_assocs_pls = compatible_true_vs_observed_assocs_pls.sel(px=compatible_true_vs_observed_assocs_pls.px < 128)
What’s in these data files?
[5]:
print_ds_stats(compatible_true_vs_observed_assocs_cca)
n_rep 100
n_per_ftr [ 5 50]
r [0. 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1 0.11 0.12 0.13
0.14 0.15 0.16 0.17 0.18 0.19 0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.27
0.28 0.29 0.3 0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.4 0.41
0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49 0.5 0.51 0.52 0.53 0.54 0.55
0.56 0.57 0.58 0.59 0.6 0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69
0.7 0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 0.8 0.81 0.82 0.83
0.84 0.85 0.86 0.87 0.88 0.89 0.9 0.91 0.92 0.93 0.94 0.95 0.96 0.97
0.98 0.99]
px [ 4 8 16 32 64]
ax+ay range (-2.00, -2.00)
py == px
<xarray.DataArray 'n_Sigmas' (px: 5, r: 100)>
array([[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10],
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10],
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10],
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10],
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10]])
Coordinates:
* r (r) float64 0.0 0.01 0.02 0.03 0.04 ... 0.95 0.96 0.97 0.98 0.99
* px (px) int64 4 8 16 32 64
power not calculated
[6]:
print_ds_stats(compatible_true_vs_observed_assocs_pls)
n_rep 100
n_per_ftr [ 5 50]
r [0. 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1 0.11 0.12 0.13
0.14 0.15 0.16 0.17 0.18 0.19 0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.27
0.28 0.29 0.3 0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.4 0.41
0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49 0.5 0.51 0.52 0.53 0.54 0.55
0.56 0.57 0.58 0.59 0.6 0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69
0.7 0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 0.8 0.81 0.82 0.83
0.84 0.85 0.86 0.87 0.88 0.89 0.9 0.91 0.92 0.93 0.94 0.95 0.96 0.97
0.98 0.99]
px [ 4 8 16 32 64]
ax+ay range (-2.00, -2.00)
py == px
<xarray.DataArray 'n_Sigmas' (px: 5, r: 100)>
array([[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10],
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 7, 1],
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 9, 9, 9, 9, 8, 8, 7, 5, 3, 1, 0, 0,
0, 0, 0, 0],
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 4, 4,
3, 3, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0],
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 4,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0]])
Coordinates:
* r (r) float64 0.0 0.01 0.02 0.03 0.04 ... 0.95 0.96 0.97 0.98 0.99
* px (px) int64 4 8 16 32 64
power not calculated
Figure
[7]:
def plot_compatible_true_vs_observed_assocs(
compatible_true_vs_observed_assocs, qs=(.025, .975), plot_identity_line=True
):
_ds = xr.Dataset(dict(
between_assocs=compatible_true_vs_observed_assocs.between_assocs,
between_assocs_true=compatible_true_vs_observed_assocs.between_assocs_true,
)).stack(it=('Sigma_id', 'px', 'rep', 'r'))
panel = hv.Overlay()
for n_per_ftr in _ds.n_per_ftr.values:
__ds = _ds.sel(n_per_ftr=n_per_ftr)
rounded = np.round(__ds.between_assocs, decimals=2)
g_q = __ds.between_assocs_true.groupby(rounded).quantile(qs)
panel *= hv.Area(
(g_q.between_assocs, g_q.sel(quantile=qs[0]), g_q.sel(quantile=qs[-1])),
vdims=['y', 'y2'],
label='Samples / ftr = %i' % n_per_ftr
)
if plot_identity_line:
panel *= hv.Curve(([0, 1], [0, 1])).opts(color='black', linestyle='--', linewidth=1)
return panel.redim(
x='Observed assoc. strength',
y='Compatible true assoc. strength',
)
[8]:
fig = (
(
plot_compatible_true_vs_observed_assocs(compatible_true_vs_observed_assocs_cca)
* hv.Text(.025, 1, '5 samples / ftr', fontsize=7, halign='left', valign='top')
* hv.Text(.025, .9, '50 samples / ftr', fontsize=7, halign='left', valign='top')
).opts(
opts.Text(color=hv.Palette('cool')),
opts.Overlay(xlim=(0, 1), ylim=(0, 1), show_legend=False, hooks=[suptitle_cca])
)
+ plot_compatible_true_vs_observed_assocs(compatible_true_vs_observed_assocs_pls).opts(
opts.Area(show_legend=True),
opts.Overlay(xlim=(0, None), ylim=(0, None), ylabel='', show_legend=False, hooks=[suptitle_pls]),
)
)
def hook_legend(plot, elements):
try:
legend = plot.handles['legend']
except KeyError:
pass
else:
legend.set_frame_on(False)
legend.set_bbox_to_anchor((0, 1.35))
fig = fig.cols(
2
).opts(
*fig_opts
).opts(
opts.Area(linewidth=0, alpha=.5, color=hv.Palette('cool')),
opts.Overlay(fig_inches=(1.7, None), sublabel_position=(-.25, 1))
)
hv.save(fig, 'fig/figS_compatible_true_vs_observed_assocs.pdf')
fig
[8]: