1. Load libraries

library(tidyverse)
library(readxl)
library(dplyr)
library(plyr)
library(openxlsx)
library(pbkrtest)
library(psycho)
library(pander)
library(ggpubr)
library(knitr)
library(kableExtra) # For Markdown tables
library(report) # textual summaries of analysis

library(lme4) # to fit the linear mixed effect model
library(lmerTest)
library(fitdistrplus) # to fit probability distributions
library(emmeans) # to compare contrasts
library(bbmle) # for AICtab
library(sjPlot) # for plot_model function
library(DHARMa) # for GLMM model diagnostics

knitr::opts_chunk$set(echo = TRUE)

2. Load data

  # Select trait
    trait <- "AerobicScopeFactorial"

# Set working directory of executing R. script
    file_path <- paste("C:/Users/oelle/Documents/Meine Dokumente/Research/Projects/Project_RockLobster/Temperature acclimation/Aerobic scope/Statistics/LinearMixedEffectModelling/", trait, "/", sep = "")
    
  # Load tidy raw data in long format
    mo2_data_long <- read.csv("C:/Users/oelle/Documents/Meine Dokumente/Research/Projects/Project_RockLobster/Temperature acclimation/Aerobic scope/Statistics/MO2_data_Tidy_RawData_long.csv")

3. Process data

  # Shorten column names
    names(mo2_data_long)[c(5,6)] <- c("AccTemp", "ExpTemp")
  
  # Define fixed effects as factors
    mo2_data_long$AntTag <- as.factor(mo2_data_long$AntTag)
    mo2_data_long$Species <- as.factor(mo2_data_long$Species)
    mo2_data_long$AccTemp <- as.factor(mo2_data_long$AccTemp)
    mo2_data_long$ExpTemp <- as.factor(mo2_data_long$ExpTemp)
    mo2_data_long$Sex <- as.factor(mo2_data_long$Sex)

  # Subset data by trait
    sub_trait <- subset(mo2_data_long, Trait == trait)

4. Find best probability distribution of data to check assumptions for linear model

If not normal distributed find best distribution fit using fitdistrplus package

  # Plot data versus a range of distrubutions
    descdist(sub_trait$value, boot = 1000)

## summary statistics
## ------
## min:  1.279979   max:  15.32079 
## median:  4.549732 
## mean:  5.040583 
## estimated sd:  2.239645 
## estimated skewness:  1.341954 
## estimated kurtosis:  6.788017
  # Fit various distributions
    nd <- fitdist(sub_trait$value, "norm")
## $start.arg
## $start.arg$mean
## [1] 5.040583
## 
## $start.arg$sd
## [1] 2.229252
## 
## 
## $fix.arg
## NULL
    gd <- fitdist(sub_trait$value, "gamma")
## $start.arg
## $start.arg$shape
## [1] 5.112616
## 
## $start.arg$rate
## [1] 1.014291
## 
## 
## $fix.arg
## NULL
    wd <- fitdist(sub_trait$value, "weibull")
## $start.arg
## $start.arg$shape
## [1] 2.711953
## 
## $start.arg$scale
## [1] 5.666871
## 
## 
## $fix.arg
## NULL
    lnd <- fitdist(sub_trait$value, "lnorm")
## $start.arg
## $start.arg$meanlog
## [1] 1.523719
## 
## $start.arg$sdlog
## [1] 0.4404322
## 
## 
## $fix.arg
## NULL
  # Plot distributions for the data
    plot.legend <- c("Normal", "gamma", "Weibull", "LogNormal")
    denscomp(list(nd, gd, wd, lnd), legendtext = plot.legend)

  # QQ plot
    qqcomp(list(nd, gd, wd, lnd), legendtext = plot.legend)

  # Perform goodness of fit test to select best distribution
    gofstat(list(nd, gd, wd, lnd), fitnames = c("Normal", "gamma", "Weibull", "LogNormal"))
## Goodness-of-fit statistics
##                                 Normal      gamma   Weibull  LogNormal
## Kolmogorov-Smirnov statistic 0.1091037 0.05432442 0.0785686 0.07308742
## Cramer-von Mises statistic   0.1963082 0.05651347 0.1208768 0.07772099
## Anderson-Darling statistic   1.3530262 0.34918712 0.9788359 0.48594335
## 
## Goodness-of-fit criteria
##                                  Normal    gamma  Weibull LogNormal
## Akaike's Information Criterion 483.6506 462.2078 473.1394  462.4943
## Bayesian Information Criterion 489.0149 467.5721 478.5037  467.8586

–> Anderson-Darling statistics gives good measure for fit for both middle and tail of distribution

–> Data fit best to a lognormal or gamma distribution, however, model comparison between LME and GLMM showed that LME perfom slighly better. Therefore will continue with LME approach.

5. Plot data

# Gplot
   ggplot(sub_trait, aes(x = ExpTemp, y = value, fill = as.factor(AccTemp), alpha = .5)) +
        geom_boxplot(outlier.size = 0) +
        ylab(trait) +
        facet_wrap(~ Species, scales = "free")+
        geom_point(pch = 21, position = position_jitterdodge(seed = 100))+
        geom_text(aes(label=AntTag, color = as.factor(AccTemp)), alpha = .8, position = position_jitterdodge(seed = 100))

# Line Plot comparing both species
  ggline(sub_trait, x = "ExpTemp", y = "value", 
      add = c("mean_ci", "jitter"),              # Add mean_se and jitter points
      # add.params = list(size = 0.7),             # Add point size
      # label = "AntTag",             # Add point labels
      # #label.select = list(top.up = 2),           # show only labels for the top 2 points
      # font.label = list(color = "Species"),          # Color labels by .y., here gene names
      # repel = TRUE,                              # Use repel to avoid labels overplotting
      color = "Species", 
      palette = "jco")

6. Summary statistics

Grouped by species,experimental temperature and acclimation temperature

# Summary statistics
  substrait_stats <- ddply(sub_trait, c("Species", "ExpTemp", "AccTemp"), summarise,
                  mean = mean(value, na.rm = TRUE),
                  n = length(value),
                  sd = sd(value, na.rm = TRUE),
                  ci = qt(0.975, n) * sd / sqrt(n),
                  lower_ci = mean-ci, 
                  upper_ci = mean+ci,
                  min = min(value),
                  max = max(value))
   
# Round values
  substrait_stats <- substrait_stats %>% mutate_if(is.numeric, round, digit = 3)
   
# Display results
  substrait_stats %>% kbl(digits = 1, caption = trait) %>%  kable_classic()
AerobicScopeFactorial
Species ExpTemp AccTemp mean n sd ci lower_ci upper_ci min max
Jasus edwardsii 14 14 5.5 6 1.4 1.4 4.1 6.9 3.6 7.8
Jasus edwardsii 14 17.5 6.6 6 0.9 0.9 5.6 7.5 5.3 7.9
Jasus edwardsii 14 21.5 7.2 6 4.7 4.7 2.5 11.9 3.1 15.3
Jasus edwardsii 17.5 14 4.4 6 2.1 2.1 2.3 6.4 2.1 7.6
Jasus edwardsii 17.5 17.5 4.6 6 2.1 2.1 2.5 6.8 1.6 6.8
Jasus edwardsii 17.5 21.5 5.0 6 2.6 2.6 2.4 7.6 2.9 9.1
Jasus edwardsii 21.5 14 2.8 6 0.8 0.8 2.0 3.5 1.7 4.0
Jasus edwardsii 21.5 17.5 3.2 6 0.6 0.6 2.6 3.8 2.6 4.2
Jasus edwardsii 21.5 21.5 3.9 6 1.3 1.3 2.5 5.2 2.0 5.8
Sagmariasus verreauxi 14 14 5.9 6 1.4 1.4 4.5 7.3 3.9 7.5
Sagmariasus verreauxi 14 17.5 7.2 6 2.8 2.8 4.4 9.9 4.4 12.5
Sagmariasus verreauxi 14 21.5 7.2 6 1.5 1.5 5.8 8.7 5.7 9.7
Sagmariasus verreauxi 17.5 14 3.9 6 1.0 1.0 2.9 5.0 2.0 4.8
Sagmariasus verreauxi 17.5 17.5 5.6 6 1.6 1.6 4.0 7.1 2.8 7.1
Sagmariasus verreauxi 17.5 21.5 6.5 6 1.2 1.2 5.3 7.8 4.7 8.0
Sagmariasus verreauxi 21.5 14 3.0 6 1.2 1.2 1.8 4.3 1.3 4.5
Sagmariasus verreauxi 21.5 17.5 3.8 6 0.5 0.5 3.3 4.3 3.4 4.5
Sagmariasus verreauxi 21.5 21.5 4.5 6 0.7 0.7 3.8 5.1 3.6 5.5

Grouped by species and experimental temperature only, acclimation groups pooled

# Summary statistics
  substrait_stats <- ddply(sub_trait, c("Species", "ExpTemp"), summarise,
                  mean = mean(value, na.rm = TRUE),
                  n = length(value),
                  sd = sd(value, na.rm = TRUE),
                  ci = qt(0.975, n) * sd / sqrt(n),
                  lower_ci = mean-ci, 
                  upper_ci = mean+ci,
                  min = min(value),
                  max = max(value))
   
# Round values
  substrait_stats <- substrait_stats %>% mutate_if(is.numeric, round, digit = 3)
   
# Display results
  substrait_stats %>% kbl(digits = 1, caption = trait) %>%  kable_classic()
AerobicScopeFactorial
Species ExpTemp mean n sd ci lower_ci upper_ci min max
Jasus edwardsii 14 6.4 18 2.8 1.4 5.0 7.8 3.1 15.3
Jasus edwardsii 17.5 4.7 18 2.2 1.1 3.6 5.7 1.6 9.1
Jasus edwardsii 21.5 3.3 18 1.0 0.5 2.8 3.8 1.7 5.8
Sagmariasus verreauxi 14 6.8 18 2.0 1.0 5.8 7.7 3.9 12.5
Sagmariasus verreauxi 17.5 5.3 18 1.6 0.8 4.5 6.2 2.0 8.0
Sagmariasus verreauxi 21.5 3.8 18 1.0 0.5 3.3 4.3 1.3 5.5

7. Linear mixed effect modelling

7.A Find best model

Fit the most complex model with all meaningful fixed factors

Animal ID (AntTag) as random factor

  # Start with the most complex model
    complex_model <- lmer(value ~ ExpTemp * 
                            AccTemp * 
                            Species + 
                            Sex + 
                            Weight_g + 
                            (1|AntTag), data = sub_trait, REML = FALSE)

  # Model summary
    anova(complex_model)
## Type III Analysis of Variance Table with Satterthwaite's method
##                          Sum Sq Mean Sq NumDF  DenDF F value   Pr(>F)    
## ExpTemp                 171.773  85.886     2 73.644 48.4279 3.76e-14 ***
## AccTemp                  14.235   7.117     2 39.589  4.0131  0.02588 *  
## Species                   2.967   2.967     1 39.743  1.6731  0.20331    
## Sex                       5.713   5.713     1 41.215  3.2212  0.08003 .  
## Weight_g                  1.244   1.244     1 41.850  0.7016  0.40702    
## ExpTemp:AccTemp           0.960   0.240     4 73.650  0.1353  0.96881    
## ExpTemp:Species           0.274   0.137     2 73.922  0.0773  0.92571    
## AccTemp:Species           1.352   0.676     2 39.918  0.3812  0.68550    
## ExpTemp:AccTemp:Species   4.683   1.171     4 73.884  0.6601  0.62169    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.B Find the best model by backward elimination of non-significant effects of linear mixed effects model using the step function of the LmerTest package

(the model with the lowest AIC and significant p value is the best)

  # Backward elimination
    step_result <- step(complex_model)
    step_result
## Backward reduced random-effect table:
## 
##              Eliminated npar  logLik    AIC    LRT Df Pr(>Chisq)    
## <none>                    22 -201.88 447.75                         
## (1 | AntTag)          0   21 -208.02 458.04 12.287  1  0.0004562 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                         Eliminated  Sum Sq Mean Sq NumDF  DenDF F value
## ExpTemp:AccTemp:Species          1   4.683   1.171     4 73.884  0.6601
## ExpTemp:AccTemp                  2   1.002   0.250     4 73.816  0.1358
## ExpTemp:Species                  3   0.306   0.153     2 74.056  0.0825
## AccTemp:Species                  4   1.439   0.720     2 40.121  0.3869
## Weight_g                         5   1.074   1.074     1 42.500  0.5768
## Species                          6   2.462   2.462     1 39.961  1.3170
## Sex                              7   4.634   4.634     1 40.903  2.4821
## ExpTemp                          0 171.485  85.743     2 73.296 45.8382
## AccTemp                          0  15.605   7.803     2 39.884  4.1713
##                            Pr(>F)    
## ExpTemp:AccTemp:Species   0.62169    
## ExpTemp:AccTemp           0.96863    
## ExpTemp:Species           0.92093    
## AccTemp:Species           0.68164    
## Weight_g                  0.45177    
## Species                   0.25796    
## Sex                       0.12285    
## ExpTemp                 1.224e-13 ***
## AccTemp                   0.02265 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## value ~ ExpTemp + AccTemp + (1 | AntTag)
  # Extract the model that step found:
    final_model <- get_model(step_result)

–> Experimental and acclimation temperature have effect on data

7.C Re-run model fit using REML estimation

Best model: ExpTemp + AccTemp + (1 | AntTag)

    # Remodel using Maximum likelyhood estimation
      final_model <- lmer(value ~ ExpTemp +
                            AccTemp +
                            (1|AntTag), data = sub_trait, REML = TRUE)

    # Model summary  
      summary(final_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ ExpTemp + AccTemp + (1 | AntTag)
##    Data: sub_trait
## 
## REML criterion at convergence: 413.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9674 -0.5001 -0.0091  0.4518  3.9063 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  AntTag   (Intercept) 1.263    1.124   
##  Residual             1.927    1.388   
## Number of obs: 108, groups:  AntTag, 39
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)   5.7893     0.4288 57.2062  13.501  < 2e-16 ***
## ExpTemp17.5  -1.6189     0.3304 71.1270  -4.900 5.81e-06 ***
## ExpTemp21.5  -3.1315     0.3319 72.2043  -9.434 3.18e-14 ***
## AccTemp17.5   0.9513     0.5483 37.8997   1.735  0.09083 .  
## AccTemp21.5   1.5165     0.5535 36.7385   2.740  0.00943 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ET17.5 ET21.5 AT17.5
## ExpTemp17.5 -0.383                     
## ExpTemp21.5 -0.388  0.507              
## AccTemp17.5 -0.629  0.000  0.004       
## AccTemp21.5 -0.621 -0.004  0.000  0.486
    # Anova to test main effects of final model representing the combined significance for all coefficients
      anova(final_model)
## Type III Analysis of Variance Table with Satterthwaite's method
##          Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## ExpTemp 171.541  85.771     2 71.096 44.5075 2.929e-13 ***
## AccTemp  14.875   7.437     2 36.802  3.8594   0.03009 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
      report(anova(final_model))
## The ANOVA suggests that:
## 
##   - The main effect of ExpTemp is statistically significant and large (F(2) = 44.51, p < .001; Eta2 (partial) = 0.56, 90% CI [0.43, 0.65])
##   - The main effect of AccTemp is statistically significant and large (F(2) = 3.86, p = 0.030; Eta2 (partial) = 0.17, 90% CI [0.01, 0.34])
## 
## Effect sizes were labelled following Field's (2013) recommendations.
    # Table output from sjPlot
      tab_model(final_model)
  value
Predictors Estimates CI p
(Intercept) 5.79 4.95 – 6.63 <0.001
ExpTemp [17.5] -1.62 -2.27 – -0.97 <0.001
ExpTemp [21.5] -3.13 -3.78 – -2.48 <0.001
AccTemp [17.5] 0.95 -0.12 – 2.03 0.083
AccTemp [21.5] 1.52 0.43 – 2.60 0.006
Random Effects
σ2 1.93
τ00 AntTag 1.26
ICC 0.40
N AntTag 39
Observations 108
Marginal R2 / Conditional R2 0.391 / 0.632
    # Textual report of the model
      report(final_model)
## We fitted a linear mixed model (estimated using REML and nloptwrap optimizer) to predict value with ExpTemp and AccTemp (formula: value ~ ExpTemp + AccTemp). The model included AntTag as random effect (formula: ~1 | AntTag). The model's total explanatory power is substantial (conditional R2 = 0.63) and the part related to the fixed effects alone (marginal R2) is of 0.39. The model's intercept, corresponding to ExpTemp = 14 and AccTemp = 14, is at 5.79 (95% CI [4.95, 6.63], t(101) = 13.50, p < .001). Within this model:
## 
##   - The effect of ExpTemp [17.5] is statistically significant and negative (beta = -1.62, 95% CI [-2.27, -0.97], t(101) = -4.90, p < .001; Std. beta = -0.72, 95% CI [-1.01, -0.43])
##   - The effect of ExpTemp [21.5] is statistically significant and negative (beta = -3.13, 95% CI [-3.78, -2.48], t(101) = -9.43, p < .001; Std. beta = -1.40, 95% CI [-1.69, -1.11])
##   - The effect of AccTemp [17.5] is statistically non-significant and positive (beta = 0.95, 95% CI [-0.12, 2.03], t(101) = 1.74, p = 0.083; Std. beta = 0.42, 95% CI [-0.06, 0.90])
##   - The effect of AccTemp [21.5] is statistically significant and positive (beta = 1.52, 95% CI [0.43, 2.60], t(101) = 2.74, p = 0.006; Std. beta = 0.68, 95% CI [0.19, 1.16])
## 
## Standardized parameters were obtained by fitting the model on a standardized version of the dataset. 95% Confidence Intervals (CIs) and p-values were computed using the Wald approximation.

Summary table of main effects

library(effectsize)

# Calculate anova test stats for model
aov_lme <-anova(final_model)

# Caculate partial effects size
eta = eta_squared(aov_lme)

# Create output stats table
lme_table <- round(aov_lme[, c(1,2,4)], 1)

# Attach F statistics
lme_table$F_value <- paste(round(aov_lme[,5],2), " (", aov_lme[,3], ")", sep = "")

# Attach p value
lme_table$p_value <- format(aov_lme[, 6], digits = 4)

# Create string with eta and 90% eta range and join with table
lme_table$Partial_Eta <- paste(round(eta$Eta2_partial, 2), " [", round(eta$CI_low, 2), "-", round(eta$CI_high, 2), "]", sep = "")

# Save table as csv
write.csv(lme_table, "Model_summary.csv")

# Print table
kable(lme_table, 
      format = "html", 
      row.names = TRUE,
      caption = "Model summary for main effects",
      col.names =  c("Sum of sqares", "Mean squares", "Den df", "F(df)", "p value", "Partical squared eta (CI 90%)")) %>%
  kable_classic(full_width = F, html_font = "Cambria")
Model summary for main effects
Sum of sqares Mean squares Den df F(df) p value Partical squared eta (CI 90%)
ExpTemp 171.5 85.8 71.1 44.51 (2) 2.929e-13 0.56 [0.43-0.65]
AccTemp 14.9 7.4 36.8 3.86 (2) 3.009e-02 0.17 [0.01-0.34]

8. Model diagnostics

# Use of DHARMa package see https://cran.r-project.org/web/packages/DHARMa/vignettes/DHARMa.html

# Set see for reproducibility of residual simulation
  set.seed(1000)

  # Simulate residuals
  # (scaled residual value of 0.5 means that half of the simulated data are higher than the observed value)
    simulationOutput <- simulateResiduals(fittedModel = final_model, plot = F)
    
  # Plot scaled residuals 
  # Plot 1 = QQ plot with test for KS, overdispersion and outliers
  # Plot 2 = Residuals plot
    plot(simulationOutput)

  # Plot against predictors to check for further deviations
    plotResiduals(simulationOutput, sub_trait$AccTemp)

    plotResiduals(simulationOutput, sub_trait$ExpTemp)

–> Model diagnostics do not show apparent violations of model assumptions

10. R Session Information

report_system()
## Analyses were conducted using the R Statistical language (version 4.1.2; R Core Team, 2021) on Windows 10 x64 (build 19043)
report_packages()
##   - ggpubr (version 0.4.0; Alboukadel Kassambara, 2020)
##   - effectsize (version 0.4.5; Ben-Shachar M et al., 2020)
##   - bbmle (version 1.0.24; Ben Bolker and R Development Core Team, 2021)
##   - Matrix (version 1.3.4; Douglas Bates and Martin Maechler, 2021)
##   - lme4 (version 1.1.27.1; Douglas Bates et al., 2015)
##   - DHARMa (version 0.4.4; Florian Hartig, 2021)
##   - pander (version 0.6.4; Gergely Daróczi and Roman Tsegelskyi, 2021)
##   - ggplot2 (version 3.3.5; Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016.)
##   - plyr (version 1.8.6; Hadley Wickham, 2011)
##   - stringr (version 1.4.0; Hadley Wickham, 2019)
##   - forcats (version 0.5.1; Hadley Wickham, 2021)
##   - tidyr (version 1.1.3; Hadley Wickham, 2021)
##   - readxl (version 1.3.1; Hadley Wickham and Jennifer Bryan, 2019)
##   - readr (version 1.4.0; Hadley Wickham and Jim Hester, 2020)
##   - dplyr (version 1.0.7; Hadley Wickham et al., 2021)
##   - kableExtra (version 1.3.4; Hao Zhu, 2021)
##   - tibble (version 3.1.2; Kirill Müller and Hadley Wickham, 2021)
##   - lmerTest (version 3.1.3; Kuznetsova A et al., 2017)
##   - purrr (version 0.3.4; Lionel Henry and Hadley Wickham, 2020)
##   - sjPlot (version 2.8.10; Lüdecke D, 2021)
##   - psycho (version 0.6.1; Makowski, 2018)
##   - report (version 0.4.0; Makowski et al., 2020)
##   - fitdistrplus (version 1.1.5; Marie Laure Delignette-Muller, Christophe Dutang, 2015)
##   - openxlsx (version 4.2.4; Philipp Schauberger and Alexander Walker, 2021)
##   - R (version 4.1.2; R Core Team, 2021)
##   - emmeans (version 1.6.2.1; Russell Lenth, 2021)
##   - survival (version 3.2.13; Therneau T, 2021)
##   - pbkrtest (version 0.5.1; Ulrich Halekoh, Søren Højsgaard, 2014)
##   - MASS (version 7.3.54; Venables et al., 2002)
##   - tidyverse (version 1.3.1; Wickham et al., 2019)
##   - knitr (version 1.33; Yihui Xie, 2021)