These data are from Harrison et al (2017) Best Practice for Mixed Effects Models in Ecology. The following code contains all the necessary calculations to produce Figure 4 in the manuscript.
library(lme4)
## Loading required package: Matrix
library(MuMIn)
library(viridis)
## Loading required package: viridisLite
The chunk of code processes all the output files from the various simulations into one dataframe. For details on how these files were generated, see the ‘Supplemental Type I Error Rate Simulation Methodology’ Document. You will need to change the ‘path’ file path below to the location of that folder for this code to work. These data can be found in the ‘Type I Error Rate Raw Files’ folder on FigShare.
path<-'~/Dropbox/Current Work/Mixed Effects Models Review/Julian Sims/FINALRESULTS'
#Blank Matrix to Handle Data
alldata<-data.frame(matrix(NA,0,3))
#Store Files in the Folder
file.input<-list.files(path,pattern=".csv")
for (filename in file.input){
currfile<-paste0(path,"/",filename)
currdata<-read.csv(currfile)
currcon<-as.numeric(strsplit(filename,"_")[[1]][3])
currcat<-as.numeric(strsplit(filename,"_")[[1]][2])
tempdata<-data.frame(currcon,currcat,currdata$X0.05sig)
alldata<-rbind(alldata,tempdata)
}
#Name the columns
names(alldata)<-c("Continuous","Categorical","typeI")
#Sum Of Type 1 error over all sims
a2<-with(alldata,aggregate(typeI,by=list(Continuous,Categorical),sum))
#head(a2)
#Total Sample Size
with(alldata,table(Continuous,Categorical)) #1000 sims per rep
## Categorical
## Continuous 0 1 2 3 4 5
## 0 0 1000 1000 1000 1000 1000
## 1 1000 1000 1000 1000 1000 1000
## 2 1000 1000 1000 1000 1000 1000
## 3 1000 1000 1000 1000 1000 1000
## 4 1000 1000 1000 1000 1000 1000
## 5 1000 1000 1000 1000 1000 1000
## 6 1000 1000 1000 1000 1000 1000
## 7 1000 1000 1000 1000 1000 1000
## 8 1000 1000 1000 1000 1000 1000
## 9 1000 1000 1000 1000 1000 1000
## 10 1000 1000 1000 1000 1000 1000
## 11 1000 1000 1000 1000 1000 1000
## 12 1000 1000 1000 1000 1000 1000
## 13 1000 1000 1000 1000 1000 1000
## 14 1000 1000 1000 1000 1000 1000
## 15 1000 1000 1000 1000 1000 1000
#Rename Columns, add sample size
colnames(a2)<-c("continuous","categorical","type1")
a2$sample<-1000
#nrow(a2)
#Proportion Type I error
a2$prop<-with(a2,type1/sample)
#head(a2)
Here we evalusate models that might best describe the predictors of when a Type I error might occur. All models contain a random intercept for number of categorical predictors, but we’ll evalute fit for: - A model with a single slope for the number of continuous predictors (m1) - A model with a random slope for number of continuous predictors given number of categorical predictors (m2) - The null model ‘~1’ (m3)
#Load MuMIn for the 'model.sel' function
library(MuMIn)
library(lme4)
#Fit the models listed above
m1<-glmer(cbind(type1,sample-type1) ~ continuous + (1|categorical),family=binomial,data=a2)
m2<-glmer(cbind(type1,sample-type1) ~ continuous + (continuous|categorical),family=binomial,data=a2)
m3<-glmer(cbind(type1,sample-type1) ~ 1 + (1|categorical),family=binomial,data=a2)
#Make an AICc table
model.sel(m1,m2,m3)
## Model selection table
## (Intrc) cntns random df logLik AICc delta weight
## m2 -1.6080 0.11070 cn|ct 5 -677.068 1364.8 0.00 1
## m1 -1.4170 0.09366 ct 3 -940.628 1887.5 522.71 0
## m3 -0.6817 ct 2 -2638.652 5281.4 3916.62 0
## Models ranked by AICc(x)
## Random terms:
## cn|ct = 'continuous | categorical'
## ct = '1 | categorical'
#Inspect the conditional modes of the random intercept for the best model (m2)
library(lattice)
dotplot(ranef(m2,condVar=T))
## $categorical
Here, we refit the best model as its Bayesian equivalent in MCMCglmm, specifiying random slopes for continuous predictors and a random intercept for the number of categorical predictors.
#Load the MCMCglmm library
library(MCMCglmm)
## Loading required package: coda
## Loading required package: ape
#Make 'categorcal' a factor
a2$categorical<-factor(a2$categorical)
#Uninformative Prior
mcprior<-list(R=list(V=1,nu=2),G=list(G1=list(V=diag(2)*0.002,nu=3)))
#Fit Model and Inspect
mc1<-MCMCglmm(cbind(type1,sample-type1) ~ continuous,random=~us(1+continuous):categorical,family="multinomial2",verbose=F,data=a2,prior=mcprior,burnin=10000,nitt=110000,thin=20,pr=T)
summary(mc1)
##
## Iterations = 10001:109981
## Thinning interval = 20
## Sample size = 5000
##
## DIC: 107581.3
##
## G-structure: ~us(1 + continuous):categorical
##
## post.mean l-95% CI u-95% CI eff.samp
## (Intercept):(Intercept).categorical 2.595349 0.531370 6.10398 5000
## continuous:(Intercept).categorical -0.113878 -0.275405 -0.01609 5000
## (Intercept):continuous.categorical -0.113878 -0.275405 -0.01609 5000
## continuous:continuous.categorical 0.006657 0.001264 0.01559 5000
##
## R-structure: ~units
##
## post.mean l-95% CI u-95% CI eff.samp
## units 0.07126 0.0479 0.09608 5000
##
## Location effects: cbind(type1, sample - type1) ~ continuous
##
## post.mean l-95% CI u-95% CI eff.samp pMCMC
## (Intercept) -1.65960 -3.02805 -0.39662 4525 0.0216 *
## continuous 0.11653 0.04809 0.18236 5000 0.0044 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Some basic MCMC diagnostics, worht inspecting but not run here for neatness
posterior.mode(mc1$Sol)
HPDinterval(mc1$Sol)
colMeans(mc1$VCV)
geweke.diag(mc1$Sol)
heidel.diag(mc1$VCV)
heidel.diag(mc1$Sol)
autocorr(mc1$Sol)
We need to do some work here to extract conditional means from the model for every continuous/categorical random effect combination before we can plot them alongside the raw data
#Continuous Values to Predict from
continuousvals<-seq(0,15,1)
#Empty Matrix for Predictions, made into a list (1 element for each categorical value from 0 - 5)
predmat<-rep(list(matrix(NA,ncol=3,nrow=16)),6)
#Extract Conditional Estimates from Bayesian Model for Each Categorical Variable
#This makes a 2-column matrix of intercept and slope for each specific categorical factor level of all the draws from the MCMC chains
zeropred<-cbind((mc1$Sol[,1] + mc1$Sol[,3]),(mc1$Sol[,2]+mc1$Sol[,9]))
onepred<-cbind((mc1$Sol[,1] + mc1$Sol[,4]),(mc1$Sol[,2]+mc1$Sol[,10]))
twopred<-cbind((mc1$Sol[,1] + mc1$Sol[,5]),(mc1$Sol[,2]+mc1$Sol[,11]))
threepred<-cbind((mc1$Sol[,1] + mc1$Sol[,6]),(mc1$Sol[,2]+mc1$Sol[,12]))
fourpred<-cbind((mc1$Sol[,1] + mc1$Sol[,7]),(mc1$Sol[,2]+mc1$Sol[,13]))
fivepred<-cbind((mc1$Sol[,1] + mc1$Sol[,8]),(mc1$Sol[,2]+mc1$Sol[,14]))
#Turn Into a List
statlist<-list(zeropred,onepred,twopred,threepred,fourpred,fivepred)
length(statlist)
## [1] 6
#Loop over the Elements in the List and Multiply them by the continuous vector to generate a distribution of response values
for (k in 1:length(predmat)){
temp<-(apply(statlist[[k]],1,function(x){x[1]+x[2]*continuousvals}))
#Then calculate mean and 95% crdible intervals for every value of 'continuous' for every categorical factor level
predmat[[k]][,1]<-plogis(apply(temp,1,mean))
predmat[[k]][,2:3]<-t(plogis(apply(temp,1,function(x){quantile(x,c(0.025,0.975))})))
}
#Convert the list to a dataframe
predframe<-data.frame(do.call("rbind",predmat))
colnames(predframe)<-c("mean","l95","u95")
# head(predframe)
# nrow(predframe)
#Add Identifiers
predframe$continuous<- rep(seq(0,15,1),times=6)
predframe$categorical<- rep(seq(0,5,1),each=16)
predframe$code<-with(predframe,paste(continuous,categorical))
head(predframe)
## mean l95 u95 continuous categorical code
## 1 0.01207195 0.008348357 0.01725810 0 0 0 0
## 2 0.01551776 0.011142370 0.02144743 1 0 1 0
## 3 0.01992731 0.014789161 0.02662692 2 0 2 0
## 4 0.02555734 0.019547355 0.03296727 3 0 3 0
## 5 0.03272492 0.025824884 0.04087087 4 0 4 0
## 6 0.04181638 0.033969526 0.05077507 5 0 5 0
######## Merge for Plotting
a3<-merge(a2,predframe)
head(a3)
## continuous categorical type1 sample prop mean l95
## 1 0 1 54 1000 0.054 0.07474198 0.05947981
## 2 0 2 202 1000 0.202 0.16718642 0.13416432
## 3 0 3 339 1000 0.339 0.27793742 0.22974567
## 4 0 4 399 1000 0.399 0.36049805 0.30319301
## 5 0 5 514 1000 0.514 0.49196486 0.42884821
## 6 1 0 3 1000 0.003 0.01551776 0.01114237
## u95 code
## 1 0.09503022 0 1
## 2 0.20692908 0 2
## 3 0.33263551 0 3
## 4 0.42007342 0 4
## 5 0.55704473 0 5
## 6 0.02144743 1 0
#Library for Selecting Pallettes (RColorBrewer), plotting (ggplot2) and saving (cowplot for flexible version of 'ggsave')
library(RColorBrewer)
library(ggplot2)
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
#Get Blue Colour
blueval<-brewer.pal(5,"Blues")[2]
#Plot FIgure 4
g1<-ggplot(a3,aes(x=continuous,y=prop)) + geom_ribbon(aes(ymin=l95,ymax=u95),fill="grey70",alpha=0.5,linetype=0) + facet_grid(.~categorical)
g2<- g1 + scale_colour_brewer(palette="Dark2") + geom_line(aes(y=mean)) + ggtitle("Number of Categorical Variables") + theme_bw()+ geom_point(size=5,shape=21,fill=blueval)
g3 <- g2 + ylab("Type 1 Error Rate") + xlab("Number of Continuous Variables") + theme(axis.text=element_text(size=15),axis.title=element_text(size=15),legend.position="none",panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g3
#Saving Commands
#ggsave("Type 1 Error Rate Feb17.pdf",g3,width=10,height=6)
#ggsave("Type 1 Error Rate Feb17.tiff",g3,width=10,height=6)