1. Load libraries
library(tidyverse)
library(readxl)
library(dplyr)
library(plyr)
library(openxlsx)
library(pbkrtest)
library(psycho)
library(pander)
library(ggpubr)
library(knitr)
library(kableExtra) # For Markdown tables
library(report) # textual summaries of analysis
library(lme4) # to fit the linear mixed effect model
library(lmerTest)
library(fitdistrplus) # to fit probability distributions
library(emmeans) # to compare contrasts
library(bbmle) # for AICtab
library(sjPlot) # for plot_model function
library(DHARMa) # for GLMM model diagnostics
knitr::opts_chunk$set(echo = TRUE)
2. Load data
# Select trait
trait <- "AerobicScopeFactorial"
# Set working directory of executing R. script
file_path <- paste("C:/Users/oelle/Documents/Meine Dokumente/Research/Projects/Project_RockLobster/Temperature acclimation/Aerobic scope/Statistics/LinearMixedEffectModelling/", trait, "/", sep = "")
# Load tidy raw data in long format
mo2_data_long <- read.csv("C:/Users/oelle/Documents/Meine Dokumente/Research/Projects/Project_RockLobster/Temperature acclimation/Aerobic scope/Statistics/MO2_data_Tidy_RawData_long.csv")
3. Process data
# Shorten column names
names(mo2_data_long)[c(5,6)] <- c("AccTemp", "ExpTemp")
# Define fixed effects as factors
mo2_data_long$AntTag <- as.factor(mo2_data_long$AntTag)
mo2_data_long$Species <- as.factor(mo2_data_long$Species)
mo2_data_long$AccTemp <- as.factor(mo2_data_long$AccTemp)
mo2_data_long$ExpTemp <- as.factor(mo2_data_long$ExpTemp)
mo2_data_long$Sex <- as.factor(mo2_data_long$Sex)
# Subset data by trait
sub_trait <- subset(mo2_data_long, Trait == trait)
4. Find best probability distribution of data to check assumptions for linear model
If not normal distributed find best distribution fit using fitdistrplus package
# Plot data versus a range of distrubutions
descdist(sub_trait$value, boot = 1000)

## summary statistics
## ------
## min: 1.279979 max: 15.32079
## median: 4.549732
## mean: 5.040583
## estimated sd: 2.239645
## estimated skewness: 1.341954
## estimated kurtosis: 6.788017
# Fit various distributions
nd <- fitdist(sub_trait$value, "norm")
## $start.arg
## $start.arg$mean
## [1] 5.040583
##
## $start.arg$sd
## [1] 2.229252
##
##
## $fix.arg
## NULL
gd <- fitdist(sub_trait$value, "gamma")
## $start.arg
## $start.arg$shape
## [1] 5.112616
##
## $start.arg$rate
## [1] 1.014291
##
##
## $fix.arg
## NULL
wd <- fitdist(sub_trait$value, "weibull")
## $start.arg
## $start.arg$shape
## [1] 2.711953
##
## $start.arg$scale
## [1] 5.666871
##
##
## $fix.arg
## NULL
lnd <- fitdist(sub_trait$value, "lnorm")
## $start.arg
## $start.arg$meanlog
## [1] 1.523719
##
## $start.arg$sdlog
## [1] 0.4404322
##
##
## $fix.arg
## NULL
# Plot distributions for the data
plot.legend <- c("Normal", "gamma", "Weibull", "LogNormal")
denscomp(list(nd, gd, wd, lnd), legendtext = plot.legend)

# QQ plot
qqcomp(list(nd, gd, wd, lnd), legendtext = plot.legend)

# Perform goodness of fit test to select best distribution
gofstat(list(nd, gd, wd, lnd), fitnames = c("Normal", "gamma", "Weibull", "LogNormal"))
## Goodness-of-fit statistics
## Normal gamma Weibull LogNormal
## Kolmogorov-Smirnov statistic 0.1091037 0.05432442 0.0785686 0.07308742
## Cramer-von Mises statistic 0.1963082 0.05651347 0.1208768 0.07772099
## Anderson-Darling statistic 1.3530262 0.34918712 0.9788359 0.48594335
##
## Goodness-of-fit criteria
## Normal gamma Weibull LogNormal
## Akaike's Information Criterion 483.6506 462.2078 473.1394 462.4943
## Bayesian Information Criterion 489.0149 467.5721 478.5037 467.8586
–> Anderson-Darling statistics gives good measure for fit for both middle and tail of distribution
–> Data fit best to a lognormal or gamma distribution, however, model comparison between LME and GLMM showed that LME perfom slighly better. Therefore will continue with LME approach.
5. Plot data
# Gplot
ggplot(sub_trait, aes(x = ExpTemp, y = value, fill = as.factor(AccTemp), alpha = .5)) +
geom_boxplot(outlier.size = 0) +
ylab(trait) +
facet_wrap(~ Species, scales = "free")+
geom_point(pch = 21, position = position_jitterdodge(seed = 100))+
geom_text(aes(label=AntTag, color = as.factor(AccTemp)), alpha = .8, position = position_jitterdodge(seed = 100))

# Line Plot comparing both species
ggline(sub_trait, x = "ExpTemp", y = "value",
add = c("mean_ci", "jitter"), # Add mean_se and jitter points
# add.params = list(size = 0.7), # Add point size
# label = "AntTag", # Add point labels
# #label.select = list(top.up = 2), # show only labels for the top 2 points
# font.label = list(color = "Species"), # Color labels by .y., here gene names
# repel = TRUE, # Use repel to avoid labels overplotting
color = "Species",
palette = "jco")

6. Summary statistics
Grouped by species,experimental temperature and acclimation temperature
# Summary statistics
substrait_stats <- ddply(sub_trait, c("Species", "ExpTemp", "AccTemp"), summarise,
mean = mean(value, na.rm = TRUE),
n = length(value),
sd = sd(value, na.rm = TRUE),
ci = qt(0.975, n) * sd / sqrt(n),
lower_ci = mean-ci,
upper_ci = mean+ci,
min = min(value),
max = max(value))
# Round values
substrait_stats <- substrait_stats %>% mutate_if(is.numeric, round, digit = 3)
# Display results
substrait_stats %>% kbl(digits = 1, caption = trait) %>% kable_classic()
AerobicScopeFactorial
Species
|
ExpTemp
|
AccTemp
|
mean
|
n
|
sd
|
ci
|
lower_ci
|
upper_ci
|
min
|
max
|
Jasus edwardsii
|
14
|
14
|
5.5
|
6
|
1.4
|
1.4
|
4.1
|
6.9
|
3.6
|
7.8
|
Jasus edwardsii
|
14
|
17.5
|
6.6
|
6
|
0.9
|
0.9
|
5.6
|
7.5
|
5.3
|
7.9
|
Jasus edwardsii
|
14
|
21.5
|
7.2
|
6
|
4.7
|
4.7
|
2.5
|
11.9
|
3.1
|
15.3
|
Jasus edwardsii
|
17.5
|
14
|
4.4
|
6
|
2.1
|
2.1
|
2.3
|
6.4
|
2.1
|
7.6
|
Jasus edwardsii
|
17.5
|
17.5
|
4.6
|
6
|
2.1
|
2.1
|
2.5
|
6.8
|
1.6
|
6.8
|
Jasus edwardsii
|
17.5
|
21.5
|
5.0
|
6
|
2.6
|
2.6
|
2.4
|
7.6
|
2.9
|
9.1
|
Jasus edwardsii
|
21.5
|
14
|
2.8
|
6
|
0.8
|
0.8
|
2.0
|
3.5
|
1.7
|
4.0
|
Jasus edwardsii
|
21.5
|
17.5
|
3.2
|
6
|
0.6
|
0.6
|
2.6
|
3.8
|
2.6
|
4.2
|
Jasus edwardsii
|
21.5
|
21.5
|
3.9
|
6
|
1.3
|
1.3
|
2.5
|
5.2
|
2.0
|
5.8
|
Sagmariasus verreauxi
|
14
|
14
|
5.9
|
6
|
1.4
|
1.4
|
4.5
|
7.3
|
3.9
|
7.5
|
Sagmariasus verreauxi
|
14
|
17.5
|
7.2
|
6
|
2.8
|
2.8
|
4.4
|
9.9
|
4.4
|
12.5
|
Sagmariasus verreauxi
|
14
|
21.5
|
7.2
|
6
|
1.5
|
1.5
|
5.8
|
8.7
|
5.7
|
9.7
|
Sagmariasus verreauxi
|
17.5
|
14
|
3.9
|
6
|
1.0
|
1.0
|
2.9
|
5.0
|
2.0
|
4.8
|
Sagmariasus verreauxi
|
17.5
|
17.5
|
5.6
|
6
|
1.6
|
1.6
|
4.0
|
7.1
|
2.8
|
7.1
|
Sagmariasus verreauxi
|
17.5
|
21.5
|
6.5
|
6
|
1.2
|
1.2
|
5.3
|
7.8
|
4.7
|
8.0
|
Sagmariasus verreauxi
|
21.5
|
14
|
3.0
|
6
|
1.2
|
1.2
|
1.8
|
4.3
|
1.3
|
4.5
|
Sagmariasus verreauxi
|
21.5
|
17.5
|
3.8
|
6
|
0.5
|
0.5
|
3.3
|
4.3
|
3.4
|
4.5
|
Sagmariasus verreauxi
|
21.5
|
21.5
|
4.5
|
6
|
0.7
|
0.7
|
3.8
|
5.1
|
3.6
|
5.5
|
Grouped by species and experimental temperature only, acclimation groups pooled
# Summary statistics
substrait_stats <- ddply(sub_trait, c("Species", "ExpTemp"), summarise,
mean = mean(value, na.rm = TRUE),
n = length(value),
sd = sd(value, na.rm = TRUE),
ci = qt(0.975, n) * sd / sqrt(n),
lower_ci = mean-ci,
upper_ci = mean+ci,
min = min(value),
max = max(value))
# Round values
substrait_stats <- substrait_stats %>% mutate_if(is.numeric, round, digit = 3)
# Display results
substrait_stats %>% kbl(digits = 1, caption = trait) %>% kable_classic()
AerobicScopeFactorial
Species
|
ExpTemp
|
mean
|
n
|
sd
|
ci
|
lower_ci
|
upper_ci
|
min
|
max
|
Jasus edwardsii
|
14
|
6.4
|
18
|
2.8
|
1.4
|
5.0
|
7.8
|
3.1
|
15.3
|
Jasus edwardsii
|
17.5
|
4.7
|
18
|
2.2
|
1.1
|
3.6
|
5.7
|
1.6
|
9.1
|
Jasus edwardsii
|
21.5
|
3.3
|
18
|
1.0
|
0.5
|
2.8
|
3.8
|
1.7
|
5.8
|
Sagmariasus verreauxi
|
14
|
6.8
|
18
|
2.0
|
1.0
|
5.8
|
7.7
|
3.9
|
12.5
|
Sagmariasus verreauxi
|
17.5
|
5.3
|
18
|
1.6
|
0.8
|
4.5
|
6.2
|
2.0
|
8.0
|
Sagmariasus verreauxi
|
21.5
|
3.8
|
18
|
1.0
|
0.5
|
3.3
|
4.3
|
1.3
|
5.5
|
7. Linear mixed effect modelling
7.A Find best model
Fit the most complex model with all meaningful fixed factors
Animal ID (AntTag) as random factor
# Start with the most complex model
complex_model <- lmer(value ~ ExpTemp *
AccTemp *
Species +
Sex +
Weight_g +
(1|AntTag), data = sub_trait, REML = FALSE)
# Model summary
anova(complex_model)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## ExpTemp 171.773 85.886 2 73.644 48.4279 3.76e-14 ***
## AccTemp 14.235 7.117 2 39.589 4.0131 0.02588 *
## Species 2.967 2.967 1 39.743 1.6731 0.20331
## Sex 5.713 5.713 1 41.215 3.2212 0.08003 .
## Weight_g 1.244 1.244 1 41.850 0.7016 0.40702
## ExpTemp:AccTemp 0.960 0.240 4 73.650 0.1353 0.96881
## ExpTemp:Species 0.274 0.137 2 73.922 0.0773 0.92571
## AccTemp:Species 1.352 0.676 2 39.918 0.3812 0.68550
## ExpTemp:AccTemp:Species 4.683 1.171 4 73.884 0.6601 0.62169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
7.B Find the best model by backward elimination of non-significant effects of linear mixed effects model using the step function of the LmerTest package
(the model with the lowest AIC and significant p value is the best)
# Backward elimination
step_result <- step(complex_model)
step_result
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 22 -201.88 447.75
## (1 | AntTag) 0 21 -208.02 458.04 12.287 1 0.0004562 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF DenDF F value
## ExpTemp:AccTemp:Species 1 4.683 1.171 4 73.884 0.6601
## ExpTemp:AccTemp 2 1.002 0.250 4 73.816 0.1358
## ExpTemp:Species 3 0.306 0.153 2 74.056 0.0825
## AccTemp:Species 4 1.439 0.720 2 40.121 0.3869
## Weight_g 5 1.074 1.074 1 42.500 0.5768
## Species 6 2.462 2.462 1 39.961 1.3170
## Sex 7 4.634 4.634 1 40.903 2.4821
## ExpTemp 0 171.485 85.743 2 73.296 45.8382
## AccTemp 0 15.605 7.803 2 39.884 4.1713
## Pr(>F)
## ExpTemp:AccTemp:Species 0.62169
## ExpTemp:AccTemp 0.96863
## ExpTemp:Species 0.92093
## AccTemp:Species 0.68164
## Weight_g 0.45177
## Species 0.25796
## Sex 0.12285
## ExpTemp 1.224e-13 ***
## AccTemp 0.02265 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## value ~ ExpTemp + AccTemp + (1 | AntTag)
# Extract the model that step found:
final_model <- get_model(step_result)
–> Experimental and acclimation temperature have effect on data
7.C Re-run model fit using REML estimation
Best model: ExpTemp + AccTemp + (1 | AntTag)
# Remodel using Maximum likelyhood estimation
final_model <- lmer(value ~ ExpTemp +
AccTemp +
(1|AntTag), data = sub_trait, REML = TRUE)
# Model summary
summary(final_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ ExpTemp + AccTemp + (1 | AntTag)
## Data: sub_trait
##
## REML criterion at convergence: 413.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9674 -0.5001 -0.0091 0.4518 3.9063
##
## Random effects:
## Groups Name Variance Std.Dev.
## AntTag (Intercept) 1.263 1.124
## Residual 1.927 1.388
## Number of obs: 108, groups: AntTag, 39
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.7893 0.4288 57.2062 13.501 < 2e-16 ***
## ExpTemp17.5 -1.6189 0.3304 71.1270 -4.900 5.81e-06 ***
## ExpTemp21.5 -3.1315 0.3319 72.2043 -9.434 3.18e-14 ***
## AccTemp17.5 0.9513 0.5483 37.8997 1.735 0.09083 .
## AccTemp21.5 1.5165 0.5535 36.7385 2.740 0.00943 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ET17.5 ET21.5 AT17.5
## ExpTemp17.5 -0.383
## ExpTemp21.5 -0.388 0.507
## AccTemp17.5 -0.629 0.000 0.004
## AccTemp21.5 -0.621 -0.004 0.000 0.486
# Anova to test main effects of final model representing the combined significance for all coefficients
anova(final_model)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## ExpTemp 171.541 85.771 2 71.096 44.5075 2.929e-13 ***
## AccTemp 14.875 7.437 2 36.802 3.8594 0.03009 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
report(anova(final_model))
## The ANOVA suggests that:
##
## - The main effect of ExpTemp is statistically significant and large (F(2) = 44.51, p < .001; Eta2 (partial) = 0.56, 90% CI [0.43, 0.65])
## - The main effect of AccTemp is statistically significant and large (F(2) = 3.86, p = 0.030; Eta2 (partial) = 0.17, 90% CI [0.01, 0.34])
##
## Effect sizes were labelled following Field's (2013) recommendations.
# Table output from sjPlot
tab_model(final_model)
|
value
|
Predictors
|
Estimates
|
CI
|
p
|
(Intercept)
|
5.79
|
4.95 – 6.63
|
<0.001
|
ExpTemp [17.5]
|
-1.62
|
-2.27 – -0.97
|
<0.001
|
ExpTemp [21.5]
|
-3.13
|
-3.78 – -2.48
|
<0.001
|
AccTemp [17.5]
|
0.95
|
-0.12 – 2.03
|
0.083
|
AccTemp [21.5]
|
1.52
|
0.43 – 2.60
|
0.006
|
Random Effects
|
σ2
|
1.93
|
τ00 AntTag
|
1.26
|
ICC
|
0.40
|
N AntTag
|
39
|
Observations
|
108
|
Marginal R2 / Conditional R2
|
0.391 / 0.632
|
# Textual report of the model
report(final_model)
## We fitted a linear mixed model (estimated using REML and nloptwrap optimizer) to predict value with ExpTemp and AccTemp (formula: value ~ ExpTemp + AccTemp). The model included AntTag as random effect (formula: ~1 | AntTag). The model's total explanatory power is substantial (conditional R2 = 0.63) and the part related to the fixed effects alone (marginal R2) is of 0.39. The model's intercept, corresponding to ExpTemp = 14 and AccTemp = 14, is at 5.79 (95% CI [4.95, 6.63], t(101) = 13.50, p < .001). Within this model:
##
## - The effect of ExpTemp [17.5] is statistically significant and negative (beta = -1.62, 95% CI [-2.27, -0.97], t(101) = -4.90, p < .001; Std. beta = -0.72, 95% CI [-1.01, -0.43])
## - The effect of ExpTemp [21.5] is statistically significant and negative (beta = -3.13, 95% CI [-3.78, -2.48], t(101) = -9.43, p < .001; Std. beta = -1.40, 95% CI [-1.69, -1.11])
## - The effect of AccTemp [17.5] is statistically non-significant and positive (beta = 0.95, 95% CI [-0.12, 2.03], t(101) = 1.74, p = 0.083; Std. beta = 0.42, 95% CI [-0.06, 0.90])
## - The effect of AccTemp [21.5] is statistically significant and positive (beta = 1.52, 95% CI [0.43, 2.60], t(101) = 2.74, p = 0.006; Std. beta = 0.68, 95% CI [0.19, 1.16])
##
## Standardized parameters were obtained by fitting the model on a standardized version of the dataset. 95% Confidence Intervals (CIs) and p-values were computed using the Wald approximation.
Summary table of main effects
library(effectsize)
# Calculate anova test stats for model
aov_lme <-anova(final_model)
# Caculate partial effects size
eta = eta_squared(aov_lme)
# Create output stats table
lme_table <- round(aov_lme[, c(1,2,4)], 1)
# Attach F statistics
lme_table$F_value <- paste(round(aov_lme[,5],2), " (", aov_lme[,3], ")", sep = "")
# Attach p value
lme_table$p_value <- format(aov_lme[, 6], digits = 4)
# Create string with eta and 90% eta range and join with table
lme_table$Partial_Eta <- paste(round(eta$Eta2_partial, 2), " [", round(eta$CI_low, 2), "-", round(eta$CI_high, 2), "]", sep = "")
# Save table as csv
write.csv(lme_table, "Model_summary.csv")
# Print table
kable(lme_table,
format = "html",
row.names = TRUE,
caption = "Model summary for main effects",
col.names = c("Sum of sqares", "Mean squares", "Den df", "F(df)", "p value", "Partical squared eta (CI 90%)")) %>%
kable_classic(full_width = F, html_font = "Cambria")
Model summary for main effects
|
Sum of sqares
|
Mean squares
|
Den df
|
F(df)
|
p value
|
Partical squared eta (CI 90%)
|
ExpTemp
|
171.5
|
85.8
|
71.1
|
44.51 (2)
|
2.929e-13
|
0.56 [0.43-0.65]
|
AccTemp
|
14.9
|
7.4
|
36.8
|
3.86 (2)
|
3.009e-02
|
0.17 [0.01-0.34]
|
8. Model diagnostics
# Use of DHARMa package see https://cran.r-project.org/web/packages/DHARMa/vignettes/DHARMa.html
# Set see for reproducibility of residual simulation
set.seed(1000)
# Simulate residuals
# (scaled residual value of 0.5 means that half of the simulated data are higher than the observed value)
simulationOutput <- simulateResiduals(fittedModel = final_model, plot = F)
# Plot scaled residuals
# Plot 1 = QQ plot with test for KS, overdispersion and outliers
# Plot 2 = Residuals plot
plot(simulationOutput)

# Plot against predictors to check for further deviations
plotResiduals(simulationOutput, sub_trait$AccTemp)

plotResiduals(simulationOutput, sub_trait$ExpTemp)

–> Model diagnostics do not show apparent violations of model assumptions