These data support Harrison et al (2017) Best Practice for Mixed Effects Models in Ecology and Evolution. This document contains all the code necessary to reproduce the simulations and Figure 3
You should load these too
#Function to pass to parametric bootstrap function 'bootMer' that calculates the sum of squared Pearson residuals (required for 'od' function)
FUN <- function(fit) {
#return(fixef(fit))
x<-resid(fit,type="pearson")
return(sum(x^2))
}
#Function To Calculate Ratio of Model SS to Mean Parametric Bootstrap SS ('bias')
od<-function(bootobject){
biasvals<-bootobject $t0/bootobject[2]$t
bias<-mean(biasvals,na.rm=T)
intervals<-quantile(biasvals,c(0.025,0.975),na.rm=T)
dat<-c(bias,intervals)
return(dat)
}
#Point Estimates of Overdispersion (see Bolker et al 2009 TREE and Harrison 2014 PeerJ)
od.point<-function(modelobject){
x<-sum(resid(modelobject,type="pearson")^2)
rdf<-summary(modelobject)$AICtab[5]
return(x/rdf)
}
Here we select some known values to generate our raw data. Note that here we add random noise to the linear predictor from a Normal distribution, which represents our overdispersion.
For more information on overdispersion in mixed models see: Harrison, X. A. (2014). Using observation-level random effects to model overdispersion in count data in ecology and evolution. PeerJ, 2, e616.
Harrison, X. A. (2015). A comparison of observation-level random effect and Beta-Binomial models for modelling overdispersion in Binomial data in ecology & evolution. PeerJ, 3, e1114.
#Set random number seed for reproducibility
set.seed(5072017)
#Sample Per Population
n.per.pop<-100
#Number of Populations
n.groups<-5
#Total Sample Size
n.total<-n.groups * n.per.pop
#Factor for Population ID
groupid<-gl(n.groups,n.per.pop)
#Global Intercept
mu<- 1
#Draw Population Intercepts from Global Mean, SD1
alphas<-rnorm(n.groups,mu,1)
#Slope
beta1<-0.5
#BodySize Predictor
body<-rnorm(n.total,0,1)
#Linear Predictor + Random Draws from Poisson With expected mean = linear predictor
#Note addition of random noise from a Gaussian distribution to the linear predictor - this is our overdispersion (see Harrison 2014 PeerJ)
linpred<- alphas[groupid] + beta1 * body + rnorm(n.total,0,0.8)
outcome <- rpois(n.total,exp(linpred))
We only need to fit one model of interest here, nased on our simulated data
#Models
library(lme4)
## Loading required package: Matrix
m1<-glmer(outcome ~ body + (1|groupid),family=poisson)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: outcome ~ body + (1 | groupid)
##
## AIC BIC logLik deviance df.resid
## 4362.0 4374.6 -2178.0 4356.0 497
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.3472 -1.3102 -0.4953 0.7406 18.0555
##
## Random effects:
## Groups Name Variance Std.Dev.
## groupid (Intercept) 1.323 1.15
## Number of obs: 500, groups: groupid, 5
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.44375 0.51509 2.80 0.00506 **
## body 0.50396 0.01361 37.03 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## body -0.013
#Point Estimate of Overdispersion
od.point(m1)
## df.resid
## 6.419822
Now we want to know if there are more zeroes in our dataset than would be expected by chance. We can use the ‘simulate’ function to generate n (10000) datasets from the coefficients in our model and work out if the number of zeroes in the ‘real’ dataset is consistent with what would be expected under the model.
m1sims<-simulate(m1,10000)
# How many zeroes?
zeroes<-apply(m1sims,2,function(x)mean(x==0))
hist(zeroes)
#How Many Zeroes in the Real Dataset?
mean(outcome==0)
## [1] 0.174
Here we use parametric bootstrapping to calculate n (1000) sum of squared pearson’s statistics for identical models fitted to data generated from our parameters estimated in ‘our model’m1’. This is a very processor intensive function so be careful about increasing the number of bootstrap iterations.
Look at the ‘FUN’ function at the top to see what it is doing at each iteration. The OD function returns a mean and 95% confidence interval for the overdispersion statistic
########## Overdispersion
m1.odboot<-bootMer(m1,FUN,1000)
pearsons<-data.frame(pearsons=m1.odboot$t)
t0.observed<-m1.odboot$t0
#Calculate Overdispersion And 95% CIs (see Harrison 2014 PeerJ)
od(m1.odboot)
## 2.5% 97.5%
## 6.499233 5.698853 7.360752
This code aggregates all our simulations into one plot for both the ‘zeroes’ and the ‘overdispersion’
#Aggregate Plot
library(ggplot2)
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
#Zeroes Plot
zeroes<-data.frame(zeroes=zeroes)
g1 <- ggplot(zeroes,aes(x=zeroes)) + geom_histogram(bins=30,colour="black",fill="white") + xlab("Proportion Zeroes") + theme_bw()
g1<- g1 + geom_segment(aes(x=mean(outcome==0),xend=mean(outcome==0),y=0,yend=900),colour="red",size=2) + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.text=element_text(size=15),axis.title = element_text(size=18))
#Overdispersion Plot
g2 <- ggplot(pearsons,aes(x=pearsons)) + geom_histogram(bins=100,colour="black",fill="white") + xlab("Sum Squared Pearson Residuals") + theme_bw()
g2<-g2 + xlim(c(350,t0.observed))+ geom_segment(aes(x=t0.observed,xend=t0.observed,y=0,yend=150),colour="red",size=2) + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.text=element_text(size=15),axis.title = element_text(size=18))
#Aggregated Plot (Fig 3)
library(cowplot)
g3<-plot_grid(g1,g2,labels=c("A","B"),label_size=20)
## Warning: Removed 1 rows containing missing values (geom_bar).
g3
#Save Plots
#ggsave('Model Simulation.pdf',g3,width=10,height=5)
#ggsave('Model Simulation.tiff',g3,width=10,height=5)