These data support Harrison et al (2017) Best Practice for Mixed Effects Models in Ecology and Evolution. This document contains all the code necessary to reproduce the simulations and Figure 3

Convenience Functions we will need later

You should load these too

#Function to pass to parametric bootstrap function 'bootMer' that calculates the sum of squared Pearson residuals (required for 'od' function)
FUN <- function(fit) {
  #return(fixef(fit))
  x<-resid(fit,type="pearson")
  return(sum(x^2))
}

#Function To Calculate Ratio of Model SS to Mean Parametric Bootstrap SS ('bias')
od<-function(bootobject){
  biasvals<-bootobject $t0/bootobject[2]$t
  bias<-mean(biasvals,na.rm=T)
  intervals<-quantile(biasvals,c(0.025,0.975),na.rm=T)
  dat<-c(bias,intervals)
  return(dat)
}

#Point Estimates of Overdispersion (see Bolker et al 2009 TREE and Harrison 2014 PeerJ)
od.point<-function(modelobject){
  x<-sum(resid(modelobject,type="pearson")^2)
  rdf<-summary(modelobject)$AICtab[5]
  return(x/rdf)
}

Data Generation

Here we select some known values to generate our raw data. Note that here we add random noise to the linear predictor from a Normal distribution, which represents our overdispersion.

For more information on overdispersion in mixed models see: Harrison, X. A. (2014). Using observation-level random effects to model overdispersion in count data in ecology and evolution. PeerJ, 2, e616.

Harrison, X. A. (2015). A comparison of observation-level random effect and Beta-Binomial models for modelling overdispersion in Binomial data in ecology & evolution. PeerJ, 3, e1114.

#Set random number seed for reproducibility
  set.seed(5072017)

#Sample Per Population
  n.per.pop<-100

#Number of Populations
  n.groups<-5

#Total Sample Size
  n.total<-n.groups * n.per.pop

#Factor for Population ID
  groupid<-gl(n.groups,n.per.pop)

#Global Intercept 
  mu<- 1

#Draw Population Intercepts from Global Mean, SD1
  alphas<-rnorm(n.groups,mu,1)

#Slope
  beta1<-0.5

#BodySize Predictor
  body<-rnorm(n.total,0,1)

#Linear Predictor + Random Draws from Poisson With expected mean = linear predictor
  #Note addition of random noise from a Gaussian distribution to the linear predictor - this is our overdispersion (see Harrison 2014 PeerJ)
  linpred<- alphas[groupid] + beta1 * body + rnorm(n.total,0,0.8)
  outcome <- rpois(n.total,exp(linpred))


Fitting Models

We only need to fit one model of interest here, nased on our simulated data

#Models
  library(lme4)
## Loading required package: Matrix
  m1<-glmer(outcome ~ body + (1|groupid),family=poisson)
  summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: outcome ~ body + (1 | groupid)
## 
##      AIC      BIC   logLik deviance df.resid 
##   4362.0   4374.6  -2178.0   4356.0      497 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.3472 -1.3102 -0.4953  0.7406 18.0555 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  groupid (Intercept) 1.323    1.15    
## Number of obs: 500, groups:  groupid, 5
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.44375    0.51509    2.80  0.00506 ** 
## body         0.50396    0.01361   37.03  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## body -0.013
#Point Estimate of Overdispersion  
  od.point(m1)
## df.resid 
## 6.419822

Zeroes Simulations

Now we want to know if there are more zeroes in our dataset than would be expected by chance. We can use the ‘simulate’ function to generate n (10000) datasets from the coefficients in our model and work out if the number of zeroes in the ‘real’ dataset is consistent with what would be expected under the model.

  m1sims<-simulate(m1,10000)

# How many zeroes?
  zeroes<-apply(m1sims,2,function(x)mean(x==0))
  hist(zeroes)

#How Many Zeroes in the Real Dataset?  
  mean(outcome==0)
## [1] 0.174

Calculating Overdispersion

Here we use parametric bootstrapping to calculate n (1000) sum of squared pearson’s statistics for identical models fitted to data generated from our parameters estimated in ‘our model’m1’. This is a very processor intensive function so be careful about increasing the number of bootstrap iterations.
Look at the ‘FUN’ function at the top to see what it is doing at each iteration. The OD function returns a mean and 95% confidence interval for the overdispersion statistic

########## Overdispersion
  m1.odboot<-bootMer(m1,FUN,1000)
  pearsons<-data.frame(pearsons=m1.odboot$t)
  t0.observed<-m1.odboot$t0

#Calculate Overdispersion And 95% CIs (see Harrison 2014 PeerJ)
  od(m1.odboot)
##              2.5%    97.5% 
## 6.499233 5.698853 7.360752

Plotting


This code aggregates all our simulations into one plot for both the ‘zeroes’ and the ‘overdispersion’

#Aggregate Plot
library(ggplot2)
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
#Zeroes Plot
  zeroes<-data.frame(zeroes=zeroes)
  g1 <- ggplot(zeroes,aes(x=zeroes)) + geom_histogram(bins=30,colour="black",fill="white") + xlab("Proportion Zeroes") + theme_bw()
  g1<- g1 + geom_segment(aes(x=mean(outcome==0),xend=mean(outcome==0),y=0,yend=900),colour="red",size=2) + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.text=element_text(size=15),axis.title = element_text(size=18))

#Overdispersion Plot
  g2 <- ggplot(pearsons,aes(x=pearsons)) + geom_histogram(bins=100,colour="black",fill="white") + xlab("Sum Squared Pearson Residuals") + theme_bw()
  g2<-g2 + xlim(c(350,t0.observed))+ geom_segment(aes(x=t0.observed,xend=t0.observed,y=0,yend=150),colour="red",size=2) + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.text=element_text(size=15),axis.title = element_text(size=18))

#Aggregated Plot (Fig 3)
  library(cowplot)

  g3<-plot_grid(g1,g2,labels=c("A","B"),label_size=20)
## Warning: Removed 1 rows containing missing values (geom_bar).
  g3

#Save Plots
  #ggsave('Model Simulation.pdf',g3,width=10,height=5)
  #ggsave('Model Simulation.tiff',g3,width=10,height=5)