Setup¶

In [36]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.stats.weightstats
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

from pp2_preds_postprocessor import PP2PredsPostprocessor

sns.set_theme()
In [37]:
DATA = Path('..') / 'data'
RAW = DATA / 'raw'
PROCESSED = DATA / 'processed'

FIGURES_PAPER = Path('..') / 'figures'
FIGURES_PAPER.mkdir(exist_ok=True, parents=True)
In [38]:
EXPORT_FIGURES = True

def export_figure_paper(fig, title, dpi=400, bbox_inches='tight', **params):
    if EXPORT_FIGURES:
        fig.savefig(FIGURES_PAPER / title, dpi=dpi, bbox_inches=bbox_inches, **params)

Load data and utils¶

General¶

In [39]:
def pano_to_long(df):
    pano_long = df.apply(pd.Series.explode)
    pano_long['pov_index'] = pano_long.groupby('task_id').cumcount()
    pano_long = pano_long.rename(columns={'headings': 'heading', 'scores': 'score'})
    return pano_long
In [40]:
pano_scores = pd.read_json(PROCESSED / 'panorama_scores.json')
pano_scores_long = pano_to_long(pano_scores)
img_scores = pd.read_json(PROCESSED / 'image_scores.json')
runs = pd.read_json(PROCESSED / 'runs.json')
users = pd.read_json(PROCESSED / 'users.json')
users[['age_range', 'location', 'gender', 'level_of_education']] = users[['age_range', 'location', 'gender', 'level_of_education']].astype('category')



print('pano_scores:', len(pano_scores))
display(pano_scores.head(3))
print('pano_scores_long:', len(pano_scores_long))
display(pano_scores_long.head(2))
print('img_scores:', len(img_scores))
display(img_scores.head(3))
print('runs:', len(runs))
display(runs.head(3))
print('users:', len(users))
display(users.head(3))
pano_scores: 650
panoid time_started time_finished scores headings familiarity run_id index_in_run task_id run_template_id user_id time
0 ToX0MGNWH-VsUEqq5wSCzQ 2023-02-02T23:00:51.256000Z 2023-02-02T23:02:15.021000Z [2.0, 4.0, 1.5, 1.5] [167, 257, 347, 77] 1 84 3 668 10 75 83
1 _zI03ND3kqbk0lyGr6va_A 2023-02-02T23:02:40.071000Z 2023-02-02T23:03:40.548000Z [4.3, 3.5, 3.3, 3.7] [313, 43, 133, 223] 1 84 6 667 10 75 60
2 vTjGNS9tAM-xTV0j85XRtA 2023-03-14T14:42:07.762000Z 2023-03-14T14:43:08.706000Z [3.5, 3.5, 3.5, 3.5] [352, 82, 172, 262] 1 361 3 2886 3 143 60
pano_scores_long: 2600
panoid time_started time_finished score heading familiarity run_id index_in_run task_id run_template_id user_id time pov_index
0 ToX0MGNWH-VsUEqq5wSCzQ 2023-02-02T23:00:51.256000Z 2023-02-02T23:02:15.021000Z 2.0 167 1 84 3 668 10 75 83 0
0 ToX0MGNWH-VsUEqq5wSCzQ 2023-02-02T23:00:51.256000Z 2023-02-02T23:02:15.021000Z 4.0 257 1 84 3 668 10 75 83 1
img_scores: 1940
panoid heading time_started time_finished score familiarity run_id index_in_run task_id run_template_id user_id time
0 yGy5iCyoO0PbL2JaR9EggQ 346 2023-02-02T23:00:23.057000Z 2023-02-02T23:00:41.429000Z 1.5 1 84 1 666 10 75 18
1 Wv_rNo6f8bMLVKkHdZotfg 142 2023-02-02T23:00:41.512000Z 2023-02-02T23:00:51.156000Z 4.5 1 84 2 671 10 75 9
2 1Lg37smPwqvAXl4bdDqhlw 162 2023-02-02T23:02:15.165000Z 2023-02-02T23:02:28.510000Z 4.5 1 84 4 670 10 75 13
runs: 329
id time_started time_finished run_template_id user_id img_task_ids pano_task_ids time
0 84 2023-02-02T22:59:11.602033Z 2023-02-02T23:03:44.914786Z 10 75 [666, 671, 670, 672, 665, 669] [668, 667] 273
1 361 2023-03-14T14:40:56.078633Z 2023-03-14T14:45:08.342496Z 3 143 [2883, 2890, 2884, 2887, 2885, 2888] [2886, 2889] 252
2 91 2023-02-03T09:44:16.438030Z 2023-02-03T09:49:42.938003Z 10 82 [721, 722, 728, 727, 725, 726] [723, 724] 326
users: 329
id run_ids age_range gender level_of_education location
0 39 [43] 25-29 male doctorate FR
1 20 [23] 25-29 female doctorate FR
2 70 [79] 15-19 female high school FR
In [41]:
PANOIDS_TASKS = list(pano_scores['panoid'].unique())  # all panoids except calibration
PANOIDS_CALIBRATION = list(set(img_scores['panoid'].unique()) - set(PANOIDS_TASKS))
PANOIDS_ALL = PANOIDS_TASKS + PANOIDS_CALIBRATION
print(len(PANOIDS_TASKS))
print(PANOIDS_CALIBRATION)

RUN_TEMPLATE_IDS = sorted(img_scores['run_template_id'].unique())
print(len(RUN_TEMPLATE_IDS))

IMG_TASK_PARAMS = list(img_scores[img_scores['panoid'].isin(PANOIDS_TASKS)].groupby(['panoid', 'heading']).size().index)  # use groupby to get unique pairs of panoid, heading
print(len(IMG_TASK_PARAMS))
print(IMG_TASK_PARAMS)
24
['TY6wOkPRysul_e_W73lmnQ', 'yGy5iCyoO0PbL2JaR9EggQ']
12
48
[('1Lg37smPwqvAXl4bdDqhlw', 162), ('1Lg37smPwqvAXl4bdDqhlw', 342), ('GnReLERfph4NUc7ZCnYYIA', 30), ('GnReLERfph4NUc7ZCnYYIA', 210), ('J6b4uNaJXJePwh3i4g-J9g', 63), ('J6b4uNaJXJePwh3i4g-J9g', 243), ('OES3M70uJAKNvDX2lKkMPQ', 145), ('OES3M70uJAKNvDX2lKkMPQ', 325), ('P4F4QKFaTE5d-QNz_Jx0kg', 158), ('P4F4QKFaTE5d-QNz_Jx0kg', 248), ('PGxyIaP90yNpqgAOQylGog', 170), ('PGxyIaP90yNpqgAOQylGog', 350), ('SQRLJrD2KYe1d2VA6txyig', 11), ('SQRLJrD2KYe1d2VA6txyig', 191), ('ToX0MGNWH-VsUEqq5wSCzQ', 167), ('ToX0MGNWH-VsUEqq5wSCzQ', 347), ('UHt0RLnWk1TJnhkBTp6DeA', 103), ('UHt0RLnWk1TJnhkBTp6DeA', 283), ('Wv_rNo6f8bMLVKkHdZotfg', 142), ('Wv_rNo6f8bMLVKkHdZotfg', 322), ('_zI03ND3kqbk0lyGr6va_A', 43), ('_zI03ND3kqbk0lyGr6va_A', 223), ('aOT4Hl_n33HvBXyWpvYb4Q', 152), ('aOT4Hl_n33HvBXyWpvYb4Q', 332), ('dTb77iHE5hYvcqD26Y99TA', 98), ('dTb77iHE5hYvcqD26Y99TA', 278), ('iDUxUuJHoy4jOx-Yt8laNA', 58), ('iDUxUuJHoy4jOx-Yt8laNA', 238), ('iZ2ARYVKACAF8KFRIHr15w', 75), ('iZ2ARYVKACAF8KFRIHr15w', 165), ('jI40EDTDeCsmBibs1jbXzQ', 35), ('jI40EDTDeCsmBibs1jbXzQ', 215), ('m4kX2Djw5DmJbL40tel9Yw', 125), ('m4kX2Djw5DmJbL40tel9Yw', 305), ('pKtV8k7abhxUSE9JAkZLsA', 8), ('pKtV8k7abhxUSE9JAkZLsA', 188), ('qgpfKJzOZ5OBo5JdvCAp8Q', 152), ('qgpfKJzOZ5OBo5JdvCAp8Q', 332), ('tlPLzx1D7MRgcvFowbmWGw', 128), ('tlPLzx1D7MRgcvFowbmWGw', 308), ('v9YEYuKKwMPo3RZWKewZEQ', 0), ('v9YEYuKKwMPo3RZWKewZEQ', 90), ('vTjGNS9tAM-xTV0j85XRtA', 172), ('vTjGNS9tAM-xTV0j85XRtA', 352), ('x_gO8pWHTNMwQxHg9Xv5Sg', 86), ('x_gO8pWHTNMwQxHg9Xv5Sg', 266), ('ySFr8WwsE0Y1vkN1nZ19Rw', 164), ('ySFr8WwsE0Y1vkN1nZ19Rw', 344)]

MOS¶

In [42]:
mos = pd.read_json(PROCESSED / 'mos.json')

display(mos.head(3))
zrec_mean zrec_ci95 ambiguity mean ci95 task_str task_type panoid heading scores unbiased_scores zrec_weights n_scores
0 3.639504 0.461978 1.192592 3.665000 0.536245 img__1Lg37smPwqvAXl4bdDqhlw__162 img 1Lg37smPwqvAXl4bdDqhlw 162 [4.5, 3.7, 4.3, 3.0, 1.8, 4.5, 1.8, 4.5, 4.0, ... [4.5269848954, 2.2755177064, 3.3382972997, 3.4... [1.6559975051, 2.6094936981, 3.1341151709, 2.3... 20
1 3.214081 0.223306 0.971571 3.144444 0.373453 img__1Lg37smPwqvAXl4bdDqhlw__342 img 1Lg37smPwqvAXl4bdDqhlw 342 [1.5, 3.0, 4.0, 4.7, 3.7, 2.5, 3.1, 4.0, 3.5, ... [1.8499639157, 3.0755039274, 3.550159749, 3.91... [0.9706302555, 0.7109065127, 4.0378420193, 2.8... 27
2 2.987567 0.294954 0.982113 3.200000 0.420049 img__GnReLERfph4NUc7ZCnYYIA__30 img GnReLERfph4NUc7ZCnYYIA 30 [2.2, 3.9, 5.0, 3.0, 4.0, 3.0, 1.4, 2.7, 3.0, ... [2.3192773176, 3.3940297921, 3.7309587511, 3.7... [1.413609054, 0.8850909961000001, 2.2580586099... 22

PP2 scores¶

In [43]:
PP2_RESULTS = RAW / 'pp2_safer.json'  # predictions for the sames headings as in the experiment

def load_pp2_results(path):
    df = pd.read_json(path)
    # Create panoid, heading columns
    df['panoid'] = df['img'].apply(lambda x: '_'.join(x.split('_')[:-1]))
    df['heading'] = df['img'].apply(lambda x: int(x.split('_')[-1].rstrip('.jpg')))
    # Filter rows, keep only the PANOIDS that we used in this experiment
    df = df[df['panoid'].isin(PANOIDS_ALL)]
    # Process scores
    df = df.rename(columns={'scores': 'score'})
    processor = PP2PredsPostprocessor()
    df['score'] = processor.process(df, ['score'], outliers='clip')
    df['score'] = df['score'] * 4 + 1  # adapt to the [1, 5] range used in the experiment
    return df
    

pp2 = load_pp2_results(PP2_RESULTS)
print('pp2: {} rows'.format(len(pp2)))
display(pp2.head())
print('NaN:', pp2['score'].isna().sum())
pp2: 98 rows
img score panoid heading
0 GnReLERfph4NUc7ZCnYYIA_300.jpg 2.367921 GnReLERfph4NUc7ZCnYYIA 300
1 OES3M70uJAKNvDX2lKkMPQ_145.jpg 3.554820 OES3M70uJAKNvDX2lKkMPQ 145
2 x_gO8pWHTNMwQxHg9Xv5Sg_266.jpg 3.178799 x_gO8pWHTNMwQxHg9Xv5Sg 266
3 J6b4uNaJXJePwh3i4g-J9g_63.jpg 3.585738 J6b4uNaJXJePwh3i4g-J9g 63
4 vTjGNS9tAM-xTV0j85XRtA_352.jpg 3.622242 vTjGNS9tAM-xTV0j85XRtA 352
NaN: 0

Plotting function¶

In [44]:
def plot_regression_and_print_coefficients(data, x_col, y_col, title=None, ax=None, xlim=None, ylim=None):
    ax_provided = ax is not None
    # Plotting regression
    ax = sns.regplot(x=x_col, y=y_col, data=data, ax=ax)
    if xlim is not None:
        ax.set_xlim(*xlim)
    if ylim is not None:
        ax.set_ylim(*ylim)
    if title is not None:
        ax.set_title(title)
    if not ax_provided:
        plt.show()

    # Calculate linear regression coefficients and R^2
    X = data[[x_col]]
    y = data[y_col]
    model = LinearRegression().fit(X, y)
    r2 = r2_score(y, model.predict(X))

    print(f"Slope: {model.coef_[0]:.4f}")
    print(f"Intercept: {model.intercept_:.4f}")
    print(f"R^2: {r2:.4f}")
    print("------------------------")

    return ax

Compare scores between img/pano tasks¶

Statistical tests (Weighted t-test)¶

Perform the weighted t-test between each pair of distributions. For each POV (determined by a panoid and a heading), we retrieve the scores obtained under the 1) panorama modality (with context) and 2) the image modality (without context). Then, we perform the statistical test between the two samples, and calculate the difference in Recovered MOS. The test is performed using the unbiased scores and the weights returned by the ZREC method, since the recovered MOS is calculated as the weighted average of the unbiased scores.

The results are stored in the DataFrame stats_img_vs_pano.

In [45]:
weighted_t_test_p, weighted_t_test_significant, significance_level, img_pano_mos_diff, img_pano_mos_rel_diff = [], [], [], [], []
for panoid, heading in IMG_TASK_PARAMS:
    data = mos[(mos['panoid']==panoid) & (mos['heading']==heading)]
    img, pano = data[data['task_type']=='img'].iloc[0], data[data['task_type']=='pano'].iloc[0]  # Series
    # Different variance
    tstat, pvalue, _ = statsmodels.stats.weightstats.ttest_ind(img['unbiased_scores'], pano['unbiased_scores'], 
                                                               weights=(img['zrec_weights'], pano['zrec_weights']), usevar='unequal')
    mos_diff = img['zrec_mean'] - pano['zrec_mean']
    mos_rel_diff = np.abs(mos_diff) / img['zrec_mean']  # image as reference, so we can see the effect of 'adding context'
    img_pano_mos_diff.append(mos_diff)
    img_pano_mos_rel_diff.append(mos_rel_diff)
    assert np.isclose(np.average(img['unbiased_scores'], weights=img['zrec_weights']), img['zrec_mean']), 'ZREC issue w/ img at task {} {}'.format(panoid, heading)
    assert np.isclose(np.average(pano['unbiased_scores'], weights=pano['zrec_weights']), pano['zrec_mean']), 'ZREC issue w/ pano at task {} {}'.format(panoid, heading)
    weighted_t_test_p.append(pvalue)
    weighted_t_test_significant.append(pvalue < 0.05)
    if pvalue < 0.001:
        significance_level.append('***')
    elif pvalue < 0.01:
        significance_level.append('**')
    elif pvalue < 0.05:
        significance_level.append('*')
    else:
        significance_level.append('')

stats_img_vs_pano = pd.DataFrame(IMG_TASK_PARAMS, columns=['panoid', 'heading'])
stats_img_vs_pano['weighted_t_test_p'] = weighted_t_test_p
stats_img_vs_pano['weighted_t_test_significant'] = weighted_t_test_significant
stats_img_vs_pano['significance_level'] = significance_level
stats_img_vs_pano['img_pano_mos_diff'] = img_pano_mos_diff
stats_img_vs_pano['img_pano_mos_rel_diff'] = img_pano_mos_rel_diff
stats_img_vs_pano.head()
Out[45]:
panoid heading weighted_t_test_p weighted_t_test_significant significance_level img_pano_mos_diff img_pano_mos_rel_diff
0 1Lg37smPwqvAXl4bdDqhlw 162 0.607590 False -0.095127 0.026137
1 1Lg37smPwqvAXl4bdDqhlw 342 0.941152 False 0.007523 0.002341
2 GnReLERfph4NUc7ZCnYYIA 30 0.035863 True * -0.298607 0.099950
3 GnReLERfph4NUc7ZCnYYIA 210 0.026778 True * 0.315095 0.091808
4 J6b4uNaJXJePwh3i4g-J9g 63 0.060193 False -0.147668 0.040591

Add corrected p-values (from R):

In [46]:
r_stats = pd.read_csv(PROCESSED / 'statistics.csv')

# Join stats_img_vs_pano with r_stats. Left: on "{panoid}__{heading}", right: on pov
stats_img_vs_pano['pov'] = stats_img_vs_pano['panoid'] + '__' + stats_img_vs_pano['heading'].astype(str)
merged_stats = stats_img_vs_pano.merge(r_stats[['pov', 'p_adjusted_holm']], on='pov', how='left')

# Rename columns
merged_stats = merged_stats.rename(columns={'p_adjusted_holm': 'weighted_t_test_p', 'weighted_t_test_p': 'weighted_t_test_p_not_corrected'})

# Recalculate the significance level ("weighted_t_test_significant" boolean and "significance_level")
merged_stats['weighted_t_test_significant'] = merged_stats['weighted_t_test_p'] < 0.05
merged_stats['significance_level'] = merged_stats['weighted_t_test_p'].apply(lambda p: '***' if p < 0.001 else '**' if p < 0.01 else '*' if p < 0.05 else '')

merged_stats.head()
Out[46]:
panoid heading weighted_t_test_p_not_corrected weighted_t_test_significant significance_level img_pano_mos_diff img_pano_mos_rel_diff pov weighted_t_test_p
0 1Lg37smPwqvAXl4bdDqhlw 162 0.607590 False -0.095127 0.026137 1Lg37smPwqvAXl4bdDqhlw__162 1.000000
1 1Lg37smPwqvAXl4bdDqhlw 342 0.941152 False 0.007523 0.002341 1Lg37smPwqvAXl4bdDqhlw__342 1.000000
2 GnReLERfph4NUc7ZCnYYIA 30 0.035863 False -0.298607 0.099950 GnReLERfph4NUc7ZCnYYIA__30 0.824843
3 GnReLERfph4NUc7ZCnYYIA 210 0.026778 False 0.315095 0.091808 GnReLERfph4NUc7ZCnYYIA__210 0.696216
4 J6b4uNaJXJePwh3i4g-J9g 63 0.060193 False -0.147668 0.040591 J6b4uNaJXJePwh3i4g-J9g__63 1.000000
In [47]:
stats_img_vs_pano = merged_stats
In [48]:
print('Significant weighted t-test: {:.2f} %'.format(stats_img_vs_pano['weighted_t_test_significant'].sum() / len(stats_img_vs_pano) * 100))
Significant weighted t-test: 18.75 %
In [49]:
stats_img_vs_pano.loc[stats_img_vs_pano['weighted_t_test_significant'], 'significance_level'].value_counts().sort_index()
Out[49]:
significance_level
*      4
**     4
***    1
Name: count, dtype: int64

Effect Size: Check difference in MOS on significant cases¶

In [50]:
stats_img_vs_pano.loc[stats_img_vs_pano['weighted_t_test_significant'], 'img_pano_mos_diff'].abs().describe()
Out[50]:
count    9.000000
mean     0.492379
std      0.118611
min      0.250198
25%      0.425648
50%      0.524238
75%      0.578933
max      0.623508
Name: img_pano_mos_diff, dtype: float64
In [51]:
stats_img_vs_pano.loc[stats_img_vs_pano['weighted_t_test_significant'], 'img_pano_mos_rel_diff'].abs().describe()
Out[51]:
count    9.000000
mean     0.170509
std      0.065659
min      0.072794
25%      0.134577
50%      0.171377
75%      0.210820
max      0.280479
Name: img_pano_mos_rel_diff, dtype: float64

Check if the average pano score has an influence on the sign of the diff img-pano¶

We previously strictly compared an image score to the pano score of the same POV. What about this image score to the average pano score?

Better yet: we define a context_score as the average of the 3 other POVs, rated with the panorama. Now, we compare if the image score is lower or not than the context score. We expect the context_score to explain the sign of the difference between the img and pano scores, i.e. if img > pano, then the context should be also lower than img. The context is what determines why there is a difference in the pano score.

We create a new df context.

In [52]:
context = []
for panoid, heading in IMG_TASK_PARAMS:
    significant = stats_img_vs_pano[(stats_img_vs_pano['panoid']==panoid) & (stats_img_vs_pano['heading']==heading)]['weighted_t_test_significant'].iloc[0]
    img_mos = mos[(mos['panoid']==panoid) & (mos['heading']==heading) & (mos['task_type']=='img')]['zrec_mean'].iloc[0]
    other_img_mos = mos[(mos['panoid']==panoid) & (mos['heading']!=heading) & (mos['task_type']=='img')]['zrec_mean'].iloc[0]
    pano_mos = mos[(mos['panoid']==panoid) & (mos['heading']==heading) & (mos['task_type']=='pano')]['zrec_mean'].iloc[0]
    context_filter = (mos['panoid']==panoid) & (mos['heading']!=heading) & (mos['task_type']=='pano')  # Same place, different heading, pano task
    context_score = mos.loc[context_filter, 'zrec_mean'].mean()  # MOS of the other 3 POVs
    pano_headings = (np.array([heading-90, heading+90, heading+180]) + 360) % 360
    pano_left = mos[(mos['panoid']==panoid) & (mos['heading']==pano_headings[0]) & (mos['task_type']=='pano')]['zrec_mean'].iloc[0]
    pano_right = mos[(mos['panoid']==panoid) & (mos['heading']==pano_headings[1]) & (mos['task_type']=='pano')]['zrec_mean'].iloc[0]
    pano_opposed = mos[(mos['panoid']==panoid) & (mos['heading']==pano_headings[2]) & (mos['task_type']=='pano')]['zrec_mean'].iloc[0]
    avgpano = mos[(mos['panoid']==panoid) & (mos['task_type']=='pano')]['zrec_mean'].mean()
    # PP2
    pp2_img = pp2[(pp2['panoid']==panoid) & (pp2['heading']==heading)]['score'].iloc[0]
    pp2_left = pp2[(pp2['panoid']==panoid) & (pp2['heading']==pano_headings[0])]['score'].iloc[0]
    pp2_right = pp2[(pp2['panoid']==panoid) & (pp2['heading']==pano_headings[1])]['score'].iloc[0]
    pp2_opposed = pp2[(pp2['panoid']==panoid) & (pp2['heading']==pano_headings[2])]['score'].iloc[0]
    context.append([panoid, heading, img_mos, pano_mos, other_img_mos, pano_left, pano_right, pano_opposed, pp2_img, pp2_left, pp2_right, pp2_opposed, context_score, significant, avgpano])
context = pd.DataFrame(context, columns=['panoid', 'heading', 'img_mos', 'pano_mos', 'other_img_mos', 'pano_left', 'pano_right', 'pano_opposed', 'pp2', 'pp2_left', 'pp2_right', 'pp2_opposed','context_score', 'significant', 'avgpano'])
context['img-context'] = context['img_mos'] - context['context_score']
context['img-pano'] = context['img_mos'] - context['pano_mos']
context['abs(img-pano)'] = (context['img_mos'] - context['pano_mos']).abs()
ctx = context[context['significant']]
context.head()
Out[52]:
panoid heading img_mos pano_mos other_img_mos pano_left pano_right pano_opposed pp2 pp2_left pp2_right pp2_opposed context_score significant avgpano img-context img-pano abs(img-pano)
0 1Lg37smPwqvAXl4bdDqhlw 162 3.639504 3.734631 3.214081 3.539598 2.994824 3.206558 4.040594 3.671019 3.650846 3.930964 3.246993 False 3.368903 0.392511 -0.095127 0.095127
1 1Lg37smPwqvAXl4bdDqhlw 342 3.214081 3.206558 3.639504 2.994824 3.539598 3.734631 3.930964 3.650846 3.671019 4.040594 3.423018 False 3.368903 -0.208936 0.007523 0.007523
2 GnReLERfph4NUc7ZCnYYIA 30 2.987567 3.286174 3.432103 2.713823 3.115227 3.117008 3.791631 2.367921 3.677613 3.177446 2.982019 False 3.058058 0.005548 -0.298607 0.298607
3 GnReLERfph4NUc7ZCnYYIA 210 3.432103 3.117008 2.987567 3.115227 2.713823 3.286174 3.177446 3.677613 2.367921 3.791631 3.038408 False 3.058058 0.393695 0.315095 0.315095
4 J6b4uNaJXJePwh3i4g-J9g 63 3.637987 3.785655 2.980566 3.329778 3.555995 3.313977 3.585738 3.033215 3.062261 2.524135 3.399917 False 3.496351 0.238070 -0.147668 0.147668

On the significant cases, is the sign img - pano the same as img - context? If so, this shows the influence of the context.

We only consider the significant cases, as for the others the sign of the difference is not meaningful.

In [53]:
img_pano = ctx['img_mos'] - ctx['pano_mos']
img_ctx = ctx['img_mos'] - ctx['context_score']

# Are they of the same sign?
print('Influence of context: {:.2f} %'.format(np.average((img_pano * img_ctx) > 0) * 100))
Influence of context: 100.00 %

Task Ambiguity¶

In [54]:
def weighted_avg_std(values, weights):
    """
    Function to calculate weighted average and weighted std
    :param values: values is an opinion score array with the shape [nb_obs, stimuli]
    :param weights: weighting factor for the average and std.
                    It is expected to be the uncertainty of individual observers.
                    The expected shape is [nb_obs]
    :return: average: weighted average as the recovered mean opinion scores
             std: weighted std as the std of the recovered mean opinion scores
    """
    average = np.ma.average(values, weights=weights, axis=0)
    variance = np.ma.average((values - average) ** 2, weights=weights, axis=0)
    std = np.sqrt(variance)
    return average, std

mos['sos'] = mos['scores'].apply(lambda x: np.std(x))  # SOS = standard deviation of opinion scores
In [55]:
df = mos.set_index(['panoid', 'heading']).loc[IMG_TASK_PARAMS].copy()  # exclude calibration and pano-only tasks
df['task_type'] = df['task_type'].astype('category').cat.rename_categories({'img': 'Image', 'pano': 'Panorama'})
pivot = df.pivot_table(index=['panoid', 'heading'], columns='task_type', values='sos', observed=False)
pivot['diff'] =  pivot['Panorama'] - pivot['Image']
pivot['rel_diff'] = pivot['diff'] / pivot['Image'] * 100

plt.figure(figsize=(7,5))
sns.histplot(pivot['rel_diff'], bins=np.arange(-40, 80, 10))
plt.xlabel('Change in ambiguity introduced by the panorama (in %)')
export_figure_paper(plt.gcf(), 'ambiguity-variation.jpg', dpi=300)
plt.show()
No description has been provided for this image

Viz all scores¶

Scores (MOS + CI, MOS recovered + CI) vs panoid, grouped by task_type¶

In [56]:
def plot_one_pano(df, significance=[], shift=4, pano_color='orange', img_color='blue', alpha_no_zrec=0.3, ax=None):
    """Plot ZREC_MOS vs heading, grouped by task_type (shited on each side of the heading tick by `shift`). 
    Also plot the standard MOS in alpha, shifted by `2*shift`
    """
    ax_provided = ax is not None
    if ax is None:
        fig, ax = plt.subplots(figsize=(5,4))
    plot_mean_ci_scores(df, ax, shift, pano_color, img_color, alpha=1, prefix='zrec_')
    plot_mean_ci_scores(df, ax, 3*shift, pano_color, img_color, alpha=alpha_no_zrec, prefix='')
    
    for heading, significance_level in significance.items():
        add_significance(ax, df, heading, significance_level, shift)

    xticks = df['heading'].unique().astype('int')
    ax.set_xticks(xticks, xticks)
    ax.set_title(panoid)
    ax.set_ylabel("Mean Score")
    ax.set_xlabel("Heading")
    ax.set_ylim(0.5, 5.5)
    ax.legend()
    if not ax_provided:
        fig.tight_layout()
        plt.show()
        return fig, ax

def plot_mean_ci_scores(df, ax, shift, pano_color, img_color, alpha, prefix):
    if alpha == 0:
        return
    xticks_pano, mean_pano, ci_pano = [], [], []
    xticks_img, mean_img, ci_img = [], [], []
    for h in df['heading'].sort_values().unique():
        scores = df[df['heading']==h]
        if len(scores) == 2:  # both image and pano tasks
            xticks_img.append(h-shift)
            mean_img.append(scores[scores['task_type']=='img'][prefix+'mean'].iloc[0])
            ci_img.append(scores[scores['task_type']=='img'][prefix+'ci95'].iloc[0])
            xticks_pano.append(h+shift)
            mean_pano.append(scores[scores['task_type']=='pano'][prefix+'mean'].iloc[0])
            ci_pano.append(scores[scores['task_type']=='pano'][prefix+'ci95'].iloc[0])
        else:
            xticks_pano.append(h + shift)
            mean_pano.append(scores[prefix+'mean'].iloc[0])
            ci_pano.append(scores[prefix+'ci95'].iloc[0])
    ax.scatter(xticks_pano, mean_pano, s=15, marker='_', c=pano_color, label=prefix+'pano', alpha=alpha)
    ax.errorbar(xticks_pano, mean_pano, yerr=ci_pano, ls='', capsize=3, c=pano_color, alpha=alpha)
    ax.scatter(xticks_img, mean_img, s=15, marker='_', c=img_color, label=prefix+'img', alpha=alpha)
    ax.errorbar(xticks_img, mean_img, yerr=ci_img, ls='', capsize=3, c=img_color, alpha=alpha)

def add_significance(ax, df, heading, level, x_shift, bar_y_offset=0.15, star_y_offset=0.1, cap_length=0.1):
    """
    Add a significance bar with caps and a star between the two task types for a given heading.
    
    Parameters:
        - ax: The axes on which to draw.
        - df: The dataframe containing the data.
        - heading: The heading at which to check for significance.
        - bar_y_offset: The vertical offset for the significance bar from the top of the max error bar.
        - star_y_offset: The vertical offset for the star from the top of the significance bar.
        - cap_length: The length of the vertical caps at the ends of the significance bar.
    """
    
    # Filter the dataframe for the specified heading
    subset = df[df['heading'] == heading]
    
    # If there are not exactly 2 points for this heading, return without drawing
    if len(subset) != 2:
        print(f"Warning: {len(subset)} points found for heading {heading}. Expected 2.")
        return
    
    # Calculate the y values for the top of the error bars for the two points
    y1 = subset.iloc[0]['zrec_mean'] + subset.iloc[0]['zrec_ci95']
    y2 = subset.iloc[1]['zrec_mean'] + subset.iloc[1]['zrec_ci95']
    
    # Calculate the x values for the two points
    x1 = heading - x_shift
    x2 = heading + x_shift
    
    # Determine the y value for the top of the significance bar
    bar_top = max(y1, y2) + bar_y_offset
    
    # Draw the significance bar
    ax.plot([x1, x2], [bar_top, bar_top], color='black', lw=1)
    
    # Draw the caps at the ends of the significance bar
    ax.plot([x1, x1], [bar_top - cap_length, bar_top], color='black', lw=1)
    ax.plot([x2, x2], [bar_top - cap_length, bar_top], color='black', lw=1)
    
    # Draw the star above the center of the significance bar
    ax.text((x1 + x2) / 2, bar_top + star_y_offset, level, ha='center', va='center')


def get_significance(panoid):
    df = stats_img_vs_pano[(stats_img_vs_pano['panoid']==panoid) & (stats_img_vs_pano['weighted_t_test_significant'])]
    return df[['heading', 'significance_level']].set_index('heading').to_dict()['significance_level']
In [57]:
n_cols = 3
n_rows = int(np.ceil(len(PANOIDS_TASKS) / 3))
fig, axs = plt.subplots(n_rows, n_cols, figsize=(n_cols*5, n_rows*4))
for panoid, ax in zip(PANOIDS_TASKS, axs.ravel()):
    df = mos[mos['panoid']==panoid]
    significance = get_significance(panoid)
    plot_one_pano(df, significance=significance, pano_color='orange', img_color='blue', alpha_no_zrec=0.4, ax=ax)
fig.tight_layout()
export_figure_paper(fig, 'mos-zrec-vs-standard.jpg', dpi=400)
No description has been provided for this image

Mosaic with streetview images (SVI images are missing as we are not able to release the images):

In [58]:
def create_split_array(n_rows_total, n_split):
    # Calculate the common value for each element except the last one
    common_value = n_rows_total // n_split

    # Create the array with the common value
    split_array = [common_value] * n_split

    # Adjust the last element to ensure the sum equals n_rows_total
    split_array[-1] += n_rows_total - sum(split_array)

    return split_array
In [59]:
def add_svi(panoid, heading, ax):
    heading_str = str(int(heading))
    ax.axis('off')
    ax.set_title(heading_str + '°')

# Make a mosaic with a panoid in each plot
n_cols = 5
n_split = 3  # Split in multiple figures to include in the paper
panoids = PANOIDS_TASKS
data = mos.copy()
data = mos[mos['panoid'].isin(panoids)]
n_rows_total = len(panoids) + 1  # +1 for the reference images
figs = []
axes_list = []
n_rows_array = create_split_array(n_rows_total, n_split)
for n_rows in n_rows_array:
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols*4, n_rows*4))
    figs.append(fig)
    axes_list.append(axes)
axes = np.concatenate(axes_list)
for panoid, axs in zip(panoids, axes[1:]):
    df = data[data['panoid']==panoid]
    significance = get_significance(panoid)
    plot_one_pano(df, significance=significance, pano_color='orange', img_color='blue', alpha_no_zrec=0, ax=axs[0])
    headings = df['heading'].unique()
    headings = np.sort(headings)
    for heading, ax in zip(headings, axs[1:]):
        add_svi(panoid, heading, ax)

# Add calibration score
ax = axes[0, 0]
mos_calib = mos[mos['panoid'].isin(PANOIDS_CALIBRATION)].sort_values('zrec_mean')  # Lower score was first
ref1_mos, ref1_ci = mos_calib['zrec_mean'].iloc[0], mos_calib['zrec_ci95'].iloc[0]
ref2_mos, ref2_ci = mos_calib['zrec_mean'].iloc[1], mos_calib['zrec_ci95'].iloc[1]
ax.scatter([-1], ref1_mos, s=15, marker='_', c='magenta')
ax.errorbar([-1], ref1_mos, yerr=ref1_ci, ls='', capsize=3, c='magenta')
ax.scatter([1], ref2_mos, s=15, marker='_', c='green')
ax.errorbar([1], ref2_mos, yerr=ref2_ci, ls='', capsize=3, c='green')
ax.set_xticks([-1,1], ['Ref 1', 'Ref 2'])
ax.set_xlim(-7, 7)
ax.set_ylim(0.5, 5.5)
ax.set_title('Reference Images')
ax.set_ylabel('Mean Score')

# Add calibration images
add_svi(mos_calib['panoid'].iloc[0], mos_calib['heading'].iloc[0], axes[0, 1])
axes[0, 1].set_title('Ref 1')
add_svi(mos_calib['panoid'].iloc[1], mos_calib['heading'].iloc[1], axes[0, 2])
axes[0, 2].set_title('Ref 2')

axes[0, 3].axis('off')
axes[0, 4].axis('off')

for i, fig in enumerate(figs):
    fig.tight_layout()
    export_figure_paper(fig, f'all-scores-comparison-img-pano-with-svi-{i}.jpg', dpi=300)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Smaller version for the body of the paper:

In [60]:
n_cols = 5
n_split = 1
panoids = ['ToX0MGNWH-VsUEqq5wSCzQ', 'aOT4Hl_n33HvBXyWpvYb4Q',  'Wv_rNo6f8bMLVKkHdZotfg', 'tlPLzx1D7MRgcvFowbmWGw']
data = mos.copy()
data = mos[mos['panoid'].isin(panoids)]
n_rows_total = len(panoids)
figs = []
axes_list = []
n_rows_array = create_split_array(n_rows_total, n_split)
for n_rows in n_rows_array:
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols*4, n_rows*4))
    figs.append(fig)
    axes_list.append(axes)
axes = np.concatenate(axes_list)
for panoid, axs in zip(panoids, axes):
    df = data[data['panoid']==panoid]
    significance = get_significance(panoid)
    plot_one_pano(df, significance=significance, pano_color='orange', img_color='blue', alpha_no_zrec=0, ax=axs[0])
    headings = df['heading'].unique()
    headings = np.sort(headings)
    for heading, ax in zip(headings, axs[1:]):
        add_svi(panoid, heading, ax)

for i, fig in enumerate(figs):
    fig.tight_layout()
    export_figure_paper(fig, f'scores-comparison-img-pano-with-svi-{i}.jpg', dpi=300)
No description has been provided for this image

Comparison with PP2¶

POV-Level: Relationship between ZREC_MOS and the score predicted by RSSCNN¶

In [61]:
merged_df = pp2.merge(mos, on=['panoid', 'heading'], how='left')

img_data = merged_df[merged_df['task_type'] == 'img']
ax = plot_regression_and_print_coefficients(img_data, 'score', 'zrec_mean', 'Image Scores vs RSSCNN Score')
ax.set_ylabel('MOS')
ax.set_xlabel('RSSCNN Score')
ax.set_xlim(1.5, 5.2)
ax.set_ylim(1.5, 5.2)
export_figure_paper(ax.figure, 'up2-vs-pp2-images.jpg', dpi=300)
No description has been provided for this image
Slope: 0.5373
Intercept: 1.4055
R^2: 0.5370
------------------------

Place-Level: Relationship between the 4 ZREC_MOS and the 4 scores predicted by RSSCNN¶

In [62]:
def make_up2_pp2_df(up2_series, pp2_series):
    df = up2_series.to_frame().join(pp2_series.to_frame(), on='panoid')
    df.rename(columns={'score': 'pp2_score', 'zrec_mean': 'up2_score'}, inplace=True)
    df['diff'] = df['up2_score'] - df['pp2_score']
    return df

mos_pano = mos[mos['task_type']=='pano'].copy()
mos_pano_gpanoid = mos_pano.groupby('panoid')
pp2_gpanoid = pp2.groupby('panoid')
df = make_up2_pp2_df(mos_pano_gpanoid['zrec_mean'].agg('std'), pp2_gpanoid['score'].agg('std'))

fig, ax = plt.subplots(figsize=(7, 6))
sns.scatterplot(data=df, x='pp2_score', y='up2_score', ax=ax)

xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
min_ = min(xmin, ymin)
max_ = max(xmax, ymax)
ax.plot([min_, max_], [min_, max_], ls='--', color='red')
ax.set_xlim(min_, max_)
ax.set_ylim(min_, max_)

export_figure_paper(ax.figure, 'up2-vs-pp2-panorama-std.jpg', dpi=300)
plt.show()
No description has been provided for this image

Completion time¶

In [63]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

sns.boxplot(x='index_in_run', y='time', data=img_scores, ax=ax[0], color=sns.color_palette()[0])
ax[0].set_title('Image tasks')
ax[0].set_ylabel('Completion time (seconds)')
ax[0].set_xlabel('Task index')
sns.boxplot(x='index_in_run', y='time', data=pano_scores, ax=ax[1], color=sns.color_palette()[0])
ax[1].set_title('Panorama tasks')
ax[1].set_ylabel('Completion time (seconds)')
ax[1].set_xlabel('Task index')

export_figure_paper(fig, 'completion-time-tasks.png')
plt.show()
No description has been provided for this image

Some numerical values:

In [64]:
runs['time'].describe()
Out[64]:
count    329.000000
mean     304.130699
std      113.395775
min      137.000000
25%      224.000000
50%      283.000000
75%      354.000000
max      927.000000
Name: time, dtype: float64
In [65]:
img_scores['time'].describe()
Out[65]:
count    1940.000000
mean       16.728351
std        10.592013
min         5.000000
25%        10.000000
50%        14.000000
75%        20.000000
max        83.000000
Name: time, dtype: float64
In [66]:
img_scores.groupby('index_in_run')['time'].mean()
Out[66]:
index_in_run
0    25.802469
1    17.158055
2    16.478261
4    14.302469
5    12.720126
7    13.817337
Name: time, dtype: float64
In [67]:
pano_scores['time'].describe()
Out[67]:
count    650.000000
mean      74.749231
std       27.842008
min       36.000000
25%       55.000000
50%       68.000000
75%       87.000000
max      222.000000
Name: time, dtype: float64
In [68]:
pano_scores.groupby('index_in_run')['time'].mean()
Out[68]:
index_in_run
3    83.103976
6    66.291022
Name: time, dtype: float64

Descriptive data of the sample¶

Age¶

In [69]:
fig, ax = plt.subplots()
sns.countplot(users, x='age_range', ax=ax, color=sns.color_palette()[0])
ax.set_xlabel('Age Group')
ax.set_ylabel('Count')
# Rotate xticks
ax.set_xticks(ax.get_xticks())  # to avoid warnings
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode='anchor')

export_figure_paper(fig, 'age-distribution.png')
plt.show()
No description has been provided for this image

Gender¶

In [70]:
fig, ax = plt.subplots()
sns.countplot(users, x='gender', ax=ax, color=sns.color_palette()[0])
ax.set_xlabel('Gender')
ax.set_ylabel('Count')
# Rotate xticks
ax.set_xticks(ax.get_xticks())  # to avoid warnings
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode='anchor')

export_figure_paper(fig, 'gender-distribution.png')
plt.show()
No description has been provided for this image

Level of education¶

In [71]:
fig, ax = plt.subplots()
order = ['no degree', 'high school', 'bachelor', 'master', 'doctorate']
sns.countplot(users, x='level_of_education', ax=ax, order=order, color=sns.color_palette()[0])
ax.set_xlabel('Level of Education')
ax.set_ylabel('Count')
# Rotate xticks
ax.set_xticks(ax.get_xticks())  # to avoid warnings
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode='anchor')

export_figure_paper(fig, 'level-of-education-distribution.png')
plt.show()
No description has been provided for this image