We sample two profile models:
This notebook compares those two models to see what impact full-model training has on user inference.
import os
from pathlib import Path
import pandas as pd
import numpy as np
from scipy.special import expit, logit
from scipy import stats
import seaborn as sns
import plotnine as p
import matplotlib.pyplot as plt
from statsmodels.nonparametric.kde import KDEUnivariate
import zarr
from IPython.display import display, Markdown
import bookgender.datatools as dt
from bookgender.nbutils import *
fig_dir = init_figs('ProfileModelCompare')
datasets = list(dt.datasets.keys())
datasets
def load(ds, model):
_zf = zarr.ZipStore(f'data/{ds}/inference/{model}/samples.zarr', mode='r')
_c = zarr.LRUStoreCache(_zf, 2**30)
return zarr.group(_c)
p_samp = {}
f_samp = {}
for ds in datasets:
p_samp[ds] = load(ds, 'profile')
f_samp[ds] = load(ds, 'full')
The primary parameters of interest for profiles are $\mu$ and $\sigma$ - the mean and variance of the (log odds) proportions.
p_mu = pd.DataFrame(dict((ds, p_samp[ds]['mu']) for ds in datasets))
p_mu.index.name = 'Sample'
p_mu.describe()
f_mu = pd.DataFrame(dict((ds, f_samp[ds]['mu']) for ds in datasets))
f_mu.index.name = 'Sample'
f_mu.describe()
mu = pd.concat({'Separate': p_mu, 'Full': f_mu}, names=['Model']).reset_index()
mu = mu.melt(id_vars=['Model', 'Sample'], var_name='Set')
sns.boxplot('Set', 'value', hue='Model', data=mu)
p_s = pd.DataFrame(dict((ds, p_samp[ds]['sigma']) for ds in datasets))
p_s.index.name = 'Sample'
p_s.describe()
f_s = pd.DataFrame(dict((ds, f_samp[ds]['sigma']) for ds in datasets))
f_s.index.name = 'Sample'
f_s.describe()
sigma = pd.concat({'Separate': p_s, 'Full': f_s}, names=['Model']).reset_index()
sigma = sigma.melt(id_vars=['Model', 'Sample'], var_name='Set')
sns.boxplot('Set', 'value', hue='Model', data=sigma)
Let's now compare the $\theta$ values - what does each model predict for the distribution of user profile tendencies?
p_th = pd.DataFrame(dict((ds, p_samp[ds]['thetaP']) for ds in datasets))
p_th.index.name = 'Sample'
p_th.describe()
f_th = pd.DataFrame(dict((ds, f_samp[ds]['thetaP']) for ds in datasets))
f_th.index.name = 'Sample'
f_th.describe()
thetaP = pd.concat({'Separate': p_th, 'Full': f_th}, names=['Model']).reset_index()
thetaP = thetaP.melt(id_vars=['Model', 'Sample'], var_name='Set')
grid = sns.FacetGrid(row='Set', hue='Model', data=thetaP, aspect=2)
grid.map(sns.kdeplot, 'value')
grid.add_legend()
(pn.ggplot(thetaP, pn.aes('value', color='Model'))
+ pn.geom_line(stat='density', adjust=0.5)
+ pn.facet_grid('Set ~'))
Now let's look at individual user's estimated theta values. How different are they? We will start by loading each user's posterior expected $\theta_u$ - by linearity of expectation, the expected difference is the difference in expected values.
p_thu = pd.concat(dict(
(ds, pd.DataFrame({'nTheta': np.mean(p_samp[ds]['nTheta'], axis=0)}))
for ds in datasets
), names=['Set', 'User'])
p_thu['Theta'] = expit(p_thu['nTheta'])
p_thu
f_thu = pd.concat(dict(
(ds, pd.DataFrame({'nTheta': np.mean(f_samp[ds]['nTheta'], axis=0)}))
for ds in datasets
), names=['Set', 'User'])
f_thu['Theta'] = expit(f_thu['nTheta'])
f_thu
thetaU = p_thu.join(f_thu, rsuffix='_f')
thetaU['ndiff'] = thetaU['nTheta_f'] - thetaU['nTheta']
thetaU['diff'] = thetaU['Theta_f'] - thetaU['Theta']
thetaU.describe()
thetaU['diff'].quantile([0.025, 0.975])
thetaU['diff'].abs().describe()
95%ile?
thetaU['diff'].abs().quantile(0.95)
sns.kdeplot(thetaU['diff'].abs(), cumulative=True)
plt.axvline(thetaU['diff'].abs().quantile(0.95), color='grey')
sns.kdeplot(thetaU['ndiff'].abs(), cumulative=True)
plt.axvline(thetaU['ndiff'].abs().quantile(0.95), color='grey')