Setup¶
In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
sns.set_theme()
In [2]:
DATA = Path('..') / 'data'
PROCESSED = DATA / 'processed'
Load data and utils¶
In [3]:
def pano_to_long(df):
pano_long = df.apply(pd.Series.explode)
pano_long['pov_index'] = pano_long.groupby('task_id').cumcount()
pano_long = pano_long.rename(columns={'headings': 'heading', 'scores': 'score'})
return pano_long
In [4]:
pano_scores = pd.read_json(PROCESSED / 'panorama_scores.json')
pano_scores_long = pano_to_long(pano_scores)
img_scores = pd.read_json(PROCESSED / 'image_scores.json')
runs = pd.read_json(PROCESSED / 'runs.json')
users = pd.read_json(PROCESSED / 'users.json')
print('pano_scores:', len(pano_scores))
display(pano_scores.head(3))
print('pano_scores_long:', len(pano_scores_long))
display(pano_scores_long.head(2))
print('img_scores:', len(img_scores))
display(img_scores.head(3))
print('runs:', len(runs))
display(runs.head(3))
print('users:', len(users))
display(users.head(3))
pano_scores: 650
panoid | time_started | time_finished | scores | headings | familiarity | run_id | index_in_run | task_id | run_template_id | user_id | time | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ToX0MGNWH-VsUEqq5wSCzQ | 2023-02-02T23:00:51.256000Z | 2023-02-02T23:02:15.021000Z | [2.0, 4.0, 1.5, 1.5] | [167, 257, 347, 77] | 1 | 84 | 3 | 668 | 10 | 75 | 83 |
1 | _zI03ND3kqbk0lyGr6va_A | 2023-02-02T23:02:40.071000Z | 2023-02-02T23:03:40.548000Z | [4.3, 3.5, 3.3, 3.7] | [313, 43, 133, 223] | 1 | 84 | 6 | 667 | 10 | 75 | 60 |
2 | vTjGNS9tAM-xTV0j85XRtA | 2023-03-14T14:42:07.762000Z | 2023-03-14T14:43:08.706000Z | [3.5, 3.5, 3.5, 3.5] | [352, 82, 172, 262] | 1 | 361 | 3 | 2886 | 3 | 143 | 60 |
pano_scores_long: 2600
panoid | time_started | time_finished | score | heading | familiarity | run_id | index_in_run | task_id | run_template_id | user_id | time | pov_index | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ToX0MGNWH-VsUEqq5wSCzQ | 2023-02-02T23:00:51.256000Z | 2023-02-02T23:02:15.021000Z | 2.0 | 167 | 1 | 84 | 3 | 668 | 10 | 75 | 83 | 0 |
0 | ToX0MGNWH-VsUEqq5wSCzQ | 2023-02-02T23:00:51.256000Z | 2023-02-02T23:02:15.021000Z | 4.0 | 257 | 1 | 84 | 3 | 668 | 10 | 75 | 83 | 1 |
img_scores: 1940
panoid | heading | time_started | time_finished | score | familiarity | run_id | index_in_run | task_id | run_template_id | user_id | time | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | yGy5iCyoO0PbL2JaR9EggQ | 346 | 2023-02-02T23:00:23.057000Z | 2023-02-02T23:00:41.429000Z | 1.5 | 1 | 84 | 1 | 666 | 10 | 75 | 18 |
1 | Wv_rNo6f8bMLVKkHdZotfg | 142 | 2023-02-02T23:00:41.512000Z | 2023-02-02T23:00:51.156000Z | 4.5 | 1 | 84 | 2 | 671 | 10 | 75 | 9 |
2 | 1Lg37smPwqvAXl4bdDqhlw | 162 | 2023-02-02T23:02:15.165000Z | 2023-02-02T23:02:28.510000Z | 4.5 | 1 | 84 | 4 | 670 | 10 | 75 | 13 |
runs: 329
id | time_started | time_finished | run_template_id | user_id | img_task_ids | pano_task_ids | time | |
---|---|---|---|---|---|---|---|---|
0 | 84 | 2023-02-02T22:59:11.602033Z | 2023-02-02T23:03:44.914786Z | 10 | 75 | [666, 671, 670, 672, 665, 669] | [668, 667] | 273 |
1 | 361 | 2023-03-14T14:40:56.078633Z | 2023-03-14T14:45:08.342496Z | 3 | 143 | [2883, 2890, 2884, 2887, 2885, 2888] | [2886, 2889] | 252 |
2 | 91 | 2023-02-03T09:44:16.438030Z | 2023-02-03T09:49:42.938003Z | 10 | 82 | [721, 722, 728, 727, 725, 726] | [723, 724] | 326 |
users: 329
id | run_ids | age_range | gender | level_of_education | location | |
---|---|---|---|---|---|---|
0 | 39 | [43] | 25-29 | male | doctorate | FR |
1 | 20 | [23] | 25-29 | female | doctorate | FR |
2 | 70 | [79] | 15-19 | female | high school | FR |
Define some constants used later:
In [5]:
PANOIDS_TASKS = list(pano_scores['panoid'].unique()) # all panoids except calibration
PANOIDS_CALIBRATION = list(set(img_scores['panoid'].unique()) - set(PANOIDS_TASKS))
PANOIDS_ALL = PANOIDS_TASKS + PANOIDS_CALIBRATION
print(len(PANOIDS_TASKS))
print(PANOIDS_CALIBRATION)
RUN_TEMPLATE_IDS = sorted(img_scores['run_template_id'].unique())
print(len(RUN_TEMPLATE_IDS))
IMG_TASK_PARAMS = list(img_scores[img_scores['panoid'].isin(PANOIDS_TASKS)].groupby(['panoid', 'heading']).size().index) # use groupby to get unique pairs of panoid, heading
print(len(IMG_TASK_PARAMS))
print(IMG_TASK_PARAMS)
PANO_TASK_PARAMS = pano_scores_long[['panoid', 'heading']].drop_duplicates().sort_values('panoid').values.tolist()
print(len(PANO_TASK_PARAMS))
24 ['yGy5iCyoO0PbL2JaR9EggQ', 'TY6wOkPRysul_e_W73lmnQ'] 12 48 [('1Lg37smPwqvAXl4bdDqhlw', 162), ('1Lg37smPwqvAXl4bdDqhlw', 342), ('GnReLERfph4NUc7ZCnYYIA', 30), ('GnReLERfph4NUc7ZCnYYIA', 210), ('J6b4uNaJXJePwh3i4g-J9g', 63), ('J6b4uNaJXJePwh3i4g-J9g', 243), ('OES3M70uJAKNvDX2lKkMPQ', 145), ('OES3M70uJAKNvDX2lKkMPQ', 325), ('P4F4QKFaTE5d-QNz_Jx0kg', 158), ('P4F4QKFaTE5d-QNz_Jx0kg', 248), ('PGxyIaP90yNpqgAOQylGog', 170), ('PGxyIaP90yNpqgAOQylGog', 350), ('SQRLJrD2KYe1d2VA6txyig', 11), ('SQRLJrD2KYe1d2VA6txyig', 191), ('ToX0MGNWH-VsUEqq5wSCzQ', 167), ('ToX0MGNWH-VsUEqq5wSCzQ', 347), ('UHt0RLnWk1TJnhkBTp6DeA', 103), ('UHt0RLnWk1TJnhkBTp6DeA', 283), ('Wv_rNo6f8bMLVKkHdZotfg', 142), ('Wv_rNo6f8bMLVKkHdZotfg', 322), ('_zI03ND3kqbk0lyGr6va_A', 43), ('_zI03ND3kqbk0lyGr6va_A', 223), ('aOT4Hl_n33HvBXyWpvYb4Q', 152), ('aOT4Hl_n33HvBXyWpvYb4Q', 332), ('dTb77iHE5hYvcqD26Y99TA', 98), ('dTb77iHE5hYvcqD26Y99TA', 278), ('iDUxUuJHoy4jOx-Yt8laNA', 58), ('iDUxUuJHoy4jOx-Yt8laNA', 238), ('iZ2ARYVKACAF8KFRIHr15w', 75), ('iZ2ARYVKACAF8KFRIHr15w', 165), ('jI40EDTDeCsmBibs1jbXzQ', 35), ('jI40EDTDeCsmBibs1jbXzQ', 215), ('m4kX2Djw5DmJbL40tel9Yw', 125), ('m4kX2Djw5DmJbL40tel9Yw', 305), ('pKtV8k7abhxUSE9JAkZLsA', 8), ('pKtV8k7abhxUSE9JAkZLsA', 188), ('qgpfKJzOZ5OBo5JdvCAp8Q', 152), ('qgpfKJzOZ5OBo5JdvCAp8Q', 332), ('tlPLzx1D7MRgcvFowbmWGw', 128), ('tlPLzx1D7MRgcvFowbmWGw', 308), ('v9YEYuKKwMPo3RZWKewZEQ', 0), ('v9YEYuKKwMPo3RZWKewZEQ', 90), ('vTjGNS9tAM-xTV0j85XRtA', 172), ('vTjGNS9tAM-xTV0j85XRtA', 352), ('x_gO8pWHTNMwQxHg9Xv5Sg', 86), ('x_gO8pWHTNMwQxHg9Xv5Sg', 266), ('ySFr8WwsE0Y1vkN1nZ19Rw', 164), ('ySFr8WwsE0Y1vkN1nZ19Rw', 344)] 96
In [6]:
CALIBRATION_TASK_PARAMS = [('yGy5iCyoO0PbL2JaR9EggQ', 346.0), ('TY6wOkPRysul_e_W73lmnQ', 224.0)]
In [7]:
def get_pano_img_scores(panoid, heading):
pano_scores = pano_scores_long[(pano_scores_long['panoid']==panoid) & (pano_scores_long['heading']==heading)]['score']
img_scores = img_scores[(img_scores['panoid']==panoid) & (img_scores['heading']==heading)]['score']
return pano_scores, img_scores
Transform Dataframes¶
Create a Dataframe for the MOS recovery process with ZREC¶
We need to create an array of shape (n_participants, n_stimuli)
. Since ZREC handles NaNs, we put all the scores in one array with 14 (nb of tasks per run) * 12 (number of different playlists) = 168 columns and n_participants rows, filling the rest with NaNs.
In [8]:
def get_task_params(run_template_id):
img_task_params = img_scores[img_scores['run_template_id']==run_template_id][['panoid', 'heading']].drop_duplicates().values.tolist()
pano_task_panoids = pano_scores[pano_scores['run_template_id']==run_template_id]['panoid'].unique()
pano_task_params = {}
for panoid in pano_task_panoids:
pano_task_params[panoid] = eval(pano_scores[(pano_scores['panoid']==panoid) & (pano_scores['run_template_id']==run_template_id)]['headings'].iloc[0])
return img_task_params, pano_task_params
def get_img_scores(img_task_params, run_id):
scores = []
for panoid, heading in img_task_params:
score = img_scores[(img_scores['panoid']==panoid) & (img_scores['heading']==heading) & (img_scores['run_id']==run_id)]['score'].iloc[0]
scores.append(score)
return scores
def get_pano_scores(pano_task_params, run_id):
scores = []
p = pano_scores_long
for panoid, headings in pano_task_params.items():
for heading in headings:
score = p[(p['panoid']==panoid) & (p['heading']==heading) & (p['run_id']==run_id)]['score'].iloc[0]
scores.append(score)
return scores
def get_column_names(img_task_params, pano_task_params, run_template_id):
img_cols = ['{}__{}__img'.format(panoid, heading) if panoid not in PANOIDS_CALIBRATION else '{}__{}__{}__img'.format(panoid, heading, run_template_id) for panoid, heading in img_task_params]
pano_cols = ['{}__{}__pano'.format(panoid, heading) for panoid, headings in pano_task_params.items() for heading in headings]
return img_cols + pano_cols
In [9]:
# First, identify all 168 columns: task_type, panoid, heading
def make_task_params(task_type, task_params):
return [(task_type, panoid, heading) for panoid, heading in task_params]
TASKS_COMBINED = make_task_params('img', IMG_TASK_PARAMS) + make_task_params('pano', PANO_TASK_PARAMS) + make_task_params('img', CALIBRATION_TASK_PARAMS)
zrec_data = np.full((len(runs), len(TASKS_COMBINED)), np.nan)
for row_idx, row in runs.iterrows():
for col_idx, task in enumerate(TASKS_COMBINED):
task_type, panoid, heading = task
score = np.nan
if task_type == 'img':
data = img_scores[(img_scores['panoid']==panoid) & (img_scores['heading']==heading) & (img_scores['run_id']==row['id'])]['score']
if len(data) > 0:
score = data.iloc[0]
elif task_type == 'pano':
data = pano_scores_long[(pano_scores_long['panoid']==panoid) & (pano_scores_long['heading']==heading) & (pano_scores_long['run_id']==row['id'])]['score']
if len(data) > 0:
score = data.iloc[0]
zrec_data[row_idx, col_idx] = score
TASKS_COMBINED_STR = ['{}__{}__{}'.format(task_type, panoid, heading) for task_type, panoid, heading in TASKS_COMBINED]
zrec_data = pd.DataFrame(zrec_data, columns=TASKS_COMBINED_STR)
zrec_data.head()
Out[9]:
img__1Lg37smPwqvAXl4bdDqhlw__162 | img__1Lg37smPwqvAXl4bdDqhlw__342 | img__GnReLERfph4NUc7ZCnYYIA__30 | img__GnReLERfph4NUc7ZCnYYIA__210 | img__J6b4uNaJXJePwh3i4g-J9g__63 | img__J6b4uNaJXJePwh3i4g-J9g__243 | img__OES3M70uJAKNvDX2lKkMPQ__145 | img__OES3M70uJAKNvDX2lKkMPQ__325 | img__P4F4QKFaTE5d-QNz_Jx0kg__158 | img__P4F4QKFaTE5d-QNz_Jx0kg__248 | ... | pano__x_gO8pWHTNMwQxHg9Xv5Sg__356 | pano__x_gO8pWHTNMwQxHg9Xv5Sg__86 | pano__x_gO8pWHTNMwQxHg9Xv5Sg__266 | pano__x_gO8pWHTNMwQxHg9Xv5Sg__176 | pano__ySFr8WwsE0Y1vkN1nZ19Rw__344 | pano__ySFr8WwsE0Y1vkN1nZ19Rw__74 | pano__ySFr8WwsE0Y1vkN1nZ19Rw__164 | pano__ySFr8WwsE0Y1vkN1nZ19Rw__254 | img__yGy5iCyoO0PbL2JaR9EggQ__346.0 | img__TY6wOkPRysul_e_W73lmnQ__224.0 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 4.5 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.5 | 4.5 |
1 | NaN | NaN | NaN | NaN | 3.0 | NaN | NaN | NaN | 3.5 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.5 | 4.0 |
2 | 3.7 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4.1 | 5.0 |
3 | 4.3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.5 | 5.0 |
4 | NaN | NaN | NaN | NaN | 5.0 | NaN | NaN | NaN | 4.0 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.5 | 1.5 |
5 rows × 146 columns
Apply ZREC¶
In [10]:
from ZREC_Mos_Recovery import zrec_mos_recovery, plot_subject_inconsistency_bias
In [11]:
def mean_confidence_interval(data, confidence=0.95):
"""
Return the mean associated CI for each column of the data, at the specified confidence level.
Wrapper of `mean_ci`.
"""
means, cis = [], []
for j in range(data.shape[1]):
d = data[:, j]
m, err = mean_ci(d, confidence)
means.append(m)
cis.append(err) # ci: [m-err, m+err]
return np.array(means), np.array(cis)
def mean_ci(sample, confidence):
"""Return the mean and associated CI of the sample, at the specified confidence level."""
n = len(sample)
m, sem = np.nanmean(sample), scipy.stats.sem(sample, nan_policy='omit') # se = np.std(a, ddof=1) / np.sqrt(n)
if n < 30:
critical_value = scipy.stats.t.ppf((1 + confidence) / 2, df=n-1)
else:
critical_value = scipy.stats.norm.ppf(0.975)
err = sem * critical_value
return m, err
Apply the ZREC method:
In [12]:
zrecmos, zrecmos_95ci, subject_inconsistency, subject_bias_factor, content_ambiguity, unbiased, weights = zrec_mos_recovery(zrec_data.to_numpy())
means, cis = mean_confidence_interval(zrec_data.to_numpy())
Create the mos
dataframe that holds the Recovered MOS from the ZREC methods, as well as some other data useful for the statistical tests.
In [13]:
TARGET_COLUMNS = ['panoid', 'heading', 'task_type', 'zrec_mean', 'zrec_ci95', 'mean', 'ci95', 'unbiased_scores', 'zrec_weights', 'scores', 'ambiguity']
def get_task_mask(task_str):
"""Return a boolean mask to filter the scores that belong to this task, out of the scores of the whole experiment"""
mask = zrec_data[task_str].isna()
mask = ~mask
return mask
def get_data(task_idx, task_str):
"""Return the zrec scores for this task"""
mask = get_task_mask(task_str)
scores = zrec_data[task_str][mask].to_numpy()
scores_unbiased = unbiased[mask, task_idx]
w = weights[mask]
return scores, scores_unbiased, w
data = np.stack((zrecmos, zrecmos_95ci, content_ambiguity, means, cis), axis=1)
mos = pd.DataFrame(data, columns=['zrec_mean', 'zrec_ci95', 'ambiguity', 'mean', 'ci95'])
mos['task_str'] = TASKS_COMBINED_STR
mos[[ 'task_type', 'panoid', 'heading']] = mos['task_str'].str.split('__', expand=True)
mos[['scores', 'unbiased_scores', 'zrec_weights']] = mos.apply(lambda x: get_data(x.name, x['task_str']), axis=1, result_type='expand')
mos['n_scores'] = mos['unbiased_scores'].apply(len)
mos.head()
Out[13]:
zrec_mean | zrec_ci95 | ambiguity | mean | ci95 | task_str | task_type | panoid | heading | scores | unbiased_scores | zrec_weights | n_scores | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3.639504 | 0.461978 | 1.192592 | 3.665000 | 0.536245 | img__1Lg37smPwqvAXl4bdDqhlw__162 | img | 1Lg37smPwqvAXl4bdDqhlw | 162 | [4.5, 3.7, 4.3, 3.0, 1.8, 4.5, 1.8, 4.5, 4.0, ... | [4.526984895425468, 2.275517706406146, 3.33829... | [1.6559975051207856, 2.6094936981201253, 3.134... | 20 |
1 | 3.214081 | 0.223306 | 0.971571 | 3.144444 | 0.373453 | img__1Lg37smPwqvAXl4bdDqhlw__342 | img | 1Lg37smPwqvAXl4bdDqhlw | 342 | [1.5, 3.0, 4.0, 4.7, 3.7, 2.5, 3.1, 4.0, 3.5, ... | [1.8499639156614003, 3.0755039274116753, 3.550... | [0.970630255541091, 0.7109065126885892, 4.0378... | 27 |
2 | 2.987567 | 0.294954 | 0.982113 | 3.200000 | 0.420049 | img__GnReLERfph4NUc7ZCnYYIA__30 | img | GnReLERfph4NUc7ZCnYYIA | 30 | [2.2, 3.9, 5.0, 3.0, 4.0, 3.0, 1.4, 2.7, 3.0, ... | [2.319277317640751, 3.394029792131038, 3.73095... | [1.4136090540448125, 0.8850909961208744, 2.258... | 22 |
3 | 3.432103 | 0.232188 | 0.749119 | 3.474074 | 0.287947 | img__GnReLERfph4NUc7ZCnYYIA__210 | img | GnReLERfph4NUc7ZCnYYIA | 210 | [3.3, 3.6, 4.0, 2.0, 3.5, 2.4, 3.5, 3.5, 3.2, ... | [2.322018398203028, 3.341323072300512, 3.99976... | [1.9876567428685794, 1.3418638823051372, 2.186... | 27 |
4 | 3.637987 | 0.215803 | 1.019651 | 3.496970 | 0.353284 | img__J6b4uNaJXJePwh3i4g-J9g__63 | img | J6b4uNaJXJePwh3i4g-J9g | 63 | [3.0, 5.0, 4.0, 3.0, 4.3, 4.5, 3.5, 3.6, 2.0, ... | [3.299029028200214, 4.45527377607297, 4.292272... | [7.850303318750475, 1.1137259140920244, 2.3090... | 33 |
Quick check that everything works when calculating the MOS and Recovered MOS by hand from the scores
, unbiased_scores
and zrec_weights
:
In [14]:
for idx in (0, 1, 2, 98, 99):
print('idx:', idx)
row = mos.iloc[idx]
print('ZREC MOS: {} -- {}'.format(row['zrec_mean'], np.average(row['unbiased_scores'], weights=row['zrec_weights']))) # check the formula
print('Regular MOS: {} -- {}'.format(row['mean'], np.average(row['scores'])))
idx: 0 ZREC MOS: 3.6395039525041413 -- 3.6395039525041395 Regular MOS: 3.6650000000000005 -- 3.665 idx: 1 ZREC MOS: 3.2140813841623856 -- 3.2140813841623856 Regular MOS: 3.144444444444445 -- 3.144444444444444 idx: 2 ZREC MOS: 2.9875670209338767 -- 2.9875670209338754 Regular MOS: 3.2 -- 3.2 idx: 98 ZREC MOS: 3.6818145591995877 -- 3.681814559199587 Regular MOS: 3.755555555555556 -- 3.755555555555556 idx: 99 ZREC MOS: 3.7420230303909485 -- 3.7420230303909485 Regular MOS: 3.7777777777777777 -- 3.7777777777777777
In [15]:
plot_subject_inconsistency_bias(subject_inconsistency, subject_bias_factor);
In [16]:
sns.histplot(subject_bias_factor, bins=20, kde=True);
In [17]:
sns.histplot(subject_inconsistency, bins=20, kde=True);
Export MOS¶
In [18]:
zrec_data.to_csv(PROCESSED / 'zrec_data.csv', index=False)
In [19]:
mos.to_json(PROCESSED / 'mos.json', orient='records')