Type I Error Rate Calculations

These data are from Harrison et al (2017) Best Practice for Mixed Effects Models in Ecology. The following code contains all the necessary calculations to produce Figure 4 in the manuscript.

Packages Required

library(lme4)
## Loading required package: Matrix
library(MuMIn)
library(viridis)
## Loading required package: viridisLite

Processing Simulation Data

The chunk of code processes all the output files from the various simulations into one dataframe. For details on how these files were generated, see the ‘Supplemental Type I Error Rate Simulation Methodology’ Document. You will need to change the ‘path’ file path below to the location of that folder for this code to work. These data can be found in the ‘Type I Error Rate Raw Files’ folder on FigShare.

path<-'~/Dropbox/Current Work/Mixed Effects Models Review/Julian Sims/FINALRESULTS'

#Blank Matrix to Handle Data
  alldata<-data.frame(matrix(NA,0,3))

#Store Files in the Folder
  file.input<-list.files(path,pattern=".csv")

for (filename in file.input){
  currfile<-paste0(path,"/",filename)
    currdata<-read.csv(currfile)
    currcon<-as.numeric(strsplit(filename,"_")[[1]][3])
    currcat<-as.numeric(strsplit(filename,"_")[[1]][2])
    
    tempdata<-data.frame(currcon,currcat,currdata$X0.05sig)

    alldata<-rbind(alldata,tempdata)
    
}

#Name the columns
  names(alldata)<-c("Continuous","Categorical","typeI")

Process the Raw Data To Summarise Mean Type 1 error for every combination

#Sum Of Type 1 error over all sims
  a2<-with(alldata,aggregate(typeI,by=list(Continuous,Categorical),sum))
    #head(a2)

#Total Sample Size
  with(alldata,table(Continuous,Categorical)) #1000 sims per rep
##           Categorical
## Continuous    0    1    2    3    4    5
##         0     0 1000 1000 1000 1000 1000
##         1  1000 1000 1000 1000 1000 1000
##         2  1000 1000 1000 1000 1000 1000
##         3  1000 1000 1000 1000 1000 1000
##         4  1000 1000 1000 1000 1000 1000
##         5  1000 1000 1000 1000 1000 1000
##         6  1000 1000 1000 1000 1000 1000
##         7  1000 1000 1000 1000 1000 1000
##         8  1000 1000 1000 1000 1000 1000
##         9  1000 1000 1000 1000 1000 1000
##         10 1000 1000 1000 1000 1000 1000
##         11 1000 1000 1000 1000 1000 1000
##         12 1000 1000 1000 1000 1000 1000
##         13 1000 1000 1000 1000 1000 1000
##         14 1000 1000 1000 1000 1000 1000
##         15 1000 1000 1000 1000 1000 1000
#Rename Columns, add sample size  
  colnames(a2)<-c("continuous","categorical","type1")
  a2$sample<-1000
    #nrow(a2)
  
  #Proportion Type I error
    a2$prop<-with(a2,type1/sample)
    #head(a2)

Models

Here we evalusate models that might best describe the predictors of when a Type I error might occur. All models contain a random intercept for number of categorical predictors, but we’ll evalute fit for: - A model with a single slope for the number of continuous predictors (m1) - A model with a random slope for number of continuous predictors given number of categorical predictors (m2) - The null model ‘~1’ (m3)

#Load MuMIn for the 'model.sel' function
  library(MuMIn)
  library(lme4)

#Fit the models listed above
  m1<-glmer(cbind(type1,sample-type1) ~ continuous + (1|categorical),family=binomial,data=a2)
  m2<-glmer(cbind(type1,sample-type1) ~ continuous + (continuous|categorical),family=binomial,data=a2)
  m3<-glmer(cbind(type1,sample-type1) ~ 1 + (1|categorical),family=binomial,data=a2)
 
#Make an AICc table  
   model.sel(m1,m2,m3)
## Model selection table 
##    (Intrc)   cntns random df    logLik   AICc   delta weight
## m2 -1.6080 0.11070  cn|ct  5  -677.068 1364.8    0.00      1
## m1 -1.4170 0.09366     ct  3  -940.628 1887.5  522.71      0
## m3 -0.6817             ct  2 -2638.652 5281.4 3916.62      0
## Models ranked by AICc(x) 
## Random terms: 
## cn|ct = 'continuous | categorical'
## ct = '1 | categorical'
#Inspect the conditional modes of the random intercept for the best model (m2)
  library(lattice)
  dotplot(ranef(m2,condVar=T))
## $categorical

Bayesian Predictions

Here, we refit the best model as its Bayesian equivalent in MCMCglmm, specifiying random slopes for continuous predictors and a random intercept for the number of categorical predictors.

#Load the MCMCglmm library
  library(MCMCglmm)
## Loading required package: coda
## Loading required package: ape
#Make 'categorcal' a factor
  a2$categorical<-factor(a2$categorical)

#Uninformative Prior
  mcprior<-list(R=list(V=1,nu=2),G=list(G1=list(V=diag(2)*0.002,nu=3)))

#Fit Model and Inspect 
  mc1<-MCMCglmm(cbind(type1,sample-type1) ~ continuous,random=~us(1+continuous):categorical,family="multinomial2",verbose=F,data=a2,prior=mcprior,burnin=10000,nitt=110000,thin=20,pr=T)
summary(mc1)
## 
##  Iterations = 10001:109981
##  Thinning interval  = 20
##  Sample size  = 5000 
## 
##  DIC: 107581.3 
## 
##  G-structure:  ~us(1 + continuous):categorical
## 
##                                     post.mean  l-95% CI u-95% CI eff.samp
## (Intercept):(Intercept).categorical  2.595349  0.531370  6.10398     5000
## continuous:(Intercept).categorical  -0.113878 -0.275405 -0.01609     5000
## (Intercept):continuous.categorical  -0.113878 -0.275405 -0.01609     5000
## continuous:continuous.categorical    0.006657  0.001264  0.01559     5000
## 
##  R-structure:  ~units
## 
##       post.mean l-95% CI u-95% CI eff.samp
## units   0.07126   0.0479  0.09608     5000
## 
##  Location effects: cbind(type1, sample - type1) ~ continuous 
## 
##             post.mean l-95% CI u-95% CI eff.samp  pMCMC   
## (Intercept)  -1.65960 -3.02805 -0.39662     4525 0.0216 * 
## continuous    0.11653  0.04809  0.18236     5000 0.0044 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Some Diagnostics of Convergence / Fit

Some basic MCMC diagnostics, worht inspecting but not run here for neatness

posterior.mode(mc1$Sol)
HPDinterval(mc1$Sol)
colMeans(mc1$VCV)
geweke.diag(mc1$Sol)
heidel.diag(mc1$VCV)
heidel.diag(mc1$Sol)
autocorr(mc1$Sol)

Predictions from the Bayesian Model for Plotting

We need to do some work here to extract conditional means from the model for every continuous/categorical random effect combination before we can plot them alongside the raw data

#Continuous Values to Predict from
  continuousvals<-seq(0,15,1)

#Empty Matrix for Predictions, made into a list (1 element for each categorical value from 0 - 5)
  predmat<-rep(list(matrix(NA,ncol=3,nrow=16)),6)
  
#Extract Conditional Estimates from Bayesian Model for Each Categorical Variable
    #This makes a 2-column matrix of intercept and slope for each specific categorical factor level of all the draws from the MCMC chains 
  zeropred<-cbind((mc1$Sol[,1] + mc1$Sol[,3]),(mc1$Sol[,2]+mc1$Sol[,9]))
  onepred<-cbind((mc1$Sol[,1] + mc1$Sol[,4]),(mc1$Sol[,2]+mc1$Sol[,10]))
  twopred<-cbind((mc1$Sol[,1] + mc1$Sol[,5]),(mc1$Sol[,2]+mc1$Sol[,11]))
  threepred<-cbind((mc1$Sol[,1] + mc1$Sol[,6]),(mc1$Sol[,2]+mc1$Sol[,12]))
  fourpred<-cbind((mc1$Sol[,1] + mc1$Sol[,7]),(mc1$Sol[,2]+mc1$Sol[,13]))
  fivepred<-cbind((mc1$Sol[,1] + mc1$Sol[,8]),(mc1$Sol[,2]+mc1$Sol[,14]))
  
#Turn Into a List
  statlist<-list(zeropred,onepred,twopred,threepred,fourpred,fivepred)
  length(statlist)
## [1] 6
#Loop over the Elements in the List and Multiply them by the continuous vector to generate a distribution of response values
  for (k in 1:length(predmat)){
    
    temp<-(apply(statlist[[k]],1,function(x){x[1]+x[2]*continuousvals}))
    
    #Then calculate mean and 95% crdible intervals for every value of 'continuous' for every categorical factor level
      predmat[[k]][,1]<-plogis(apply(temp,1,mean))
      predmat[[k]][,2:3]<-t(plogis(apply(temp,1,function(x){quantile(x,c(0.025,0.975))})))
    
  }
  
  
  #Convert the list to a dataframe
    predframe<-data.frame(do.call("rbind",predmat))
    colnames(predframe)<-c("mean","l95","u95")
    # head(predframe)
    # nrow(predframe)
  
  #Add Identifiers
    predframe$continuous<- rep(seq(0,15,1),times=6)
    predframe$categorical<- rep(seq(0,5,1),each=16)
    predframe$code<-with(predframe,paste(continuous,categorical))
    head(predframe)
##         mean         l95        u95 continuous categorical code
## 1 0.01207195 0.008348357 0.01725810          0           0  0 0
## 2 0.01551776 0.011142370 0.02144743          1           0  1 0
## 3 0.01992731 0.014789161 0.02662692          2           0  2 0
## 4 0.02555734 0.019547355 0.03296727          3           0  3 0
## 5 0.03272492 0.025824884 0.04087087          4           0  4 0
## 6 0.04181638 0.033969526 0.05077507          5           0  5 0
  ######## Merge for Plotting
    a3<-merge(a2,predframe)
    head(a3)
##   continuous categorical type1 sample  prop       mean        l95
## 1          0           1    54   1000 0.054 0.07474198 0.05947981
## 2          0           2   202   1000 0.202 0.16718642 0.13416432
## 3          0           3   339   1000 0.339 0.27793742 0.22974567
## 4          0           4   399   1000 0.399 0.36049805 0.30319301
## 5          0           5   514   1000 0.514 0.49196486 0.42884821
## 6          1           0     3   1000 0.003 0.01551776 0.01114237
##          u95 code
## 1 0.09503022  0 1
## 2 0.20692908  0 2
## 3 0.33263551  0 3
## 4 0.42007342  0 4
## 5 0.55704473  0 5
## 6 0.02144743  1 0

Figure 4

#Library for Selecting Pallettes (RColorBrewer), plotting (ggplot2) and saving (cowplot for flexible version of 'ggsave')
  library(RColorBrewer)
  library(ggplot2)
  library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
#Get Blue Colour
  blueval<-brewer.pal(5,"Blues")[2]

#Plot FIgure 4  
  g1<-ggplot(a3,aes(x=continuous,y=prop))  + geom_ribbon(aes(ymin=l95,ymax=u95),fill="grey70",alpha=0.5,linetype=0) + facet_grid(.~categorical)
  g2<- g1 + scale_colour_brewer(palette="Dark2") + geom_line(aes(y=mean)) + ggtitle("Number of Categorical Variables") + theme_bw()+ geom_point(size=5,shape=21,fill=blueval)
  g3 <- g2 + ylab("Type 1 Error Rate") + xlab("Number of Continuous Variables") + theme(axis.text=element_text(size=15),axis.title=element_text(size=15),legend.position="none",panel.grid.major = element_blank(), panel.grid.minor = element_blank())
  g3

#Saving Commands
#ggsave("Type 1 Error Rate Feb17.pdf",g3,width=10,height=6)
#ggsave("Type 1 Error Rate Feb17.tiff",g3,width=10,height=6)