########################################################################### # Ecological specialisation is widely associated with genetic # structure in the ant-associated butterfly family Lycaenidae # Proceedings of the Royal Society B: Biological Sciences # August 2017 # Author: Sämi Schär ########################################################################### # load packages require(adegenet) require(adephylo) require(ape) require(Biostrings) require(car) require(coefplot2) require(dplyr) require(ecodist) require(ggplot2) require(GISTools) require(grDevices) require(Hmisc) require(mapdata) require(mapplots) require(maptools) require(maps) require(MCMCglmm) require(pegas) require(phangorn) require(phyclust) require(sp) require(SDMTools) require(seqinr) ################################################################################ # 1. Create consensus sequences per species from alignment of all barcodes for phyologeny data <- read.csv(file="PATH/AllLycaenidaeAlignedCut.csv", header=TRUE,fill=TRUE) myu <- unique(data$genus_species) d5=NULL cs <- (for (i in 1:nlevels(myu)){ trd <- data[which(data$genus_species==myu[i]),] premd <- DNAStringSet(trd[,1]) premd3 <- as.matrix(premd) mat <- ifelse(premd3=="N",NA,premd3) mat <- mat[,colSums(is.na(mat))1) { md1<- consensus(matt) seq = paste(md1, collapse="") } else { seq = paste(matt,collapse="") } shor <- data.frame("species"=character(0), "seq"=character(0)) tablist <-cbind( trd[1,3] ,seq) shor[1,] <- tablist d5 = rbind(d5, tablist) # print(d5) }) write.csv(d5, "PATH.csv") ################################################################## # 2. Microevolution: pre calculations ################################################################## # 2.1 Calculate average geographic distance between samples for each species data <- read.csv(file="PATH/clean6000b.csv", header=TRUE,fill=TRUE) myu <- unique(data$genus_species) d5=NULL for (i in 1:nlevels(myu)){ trd <- data[which(data$genus_species==myu[i]),] DistMat<-spDists(matrix(c(trd$longitude, trd$latitude),length(trd$id),2), longlat=TRUE) md <- mean(DistMat) med<- median(DistMat) shor <- data.frame("species"=character(0), "divergence"=numeric(0),"median"=numeric(0)) tablist <-rbind( c(as.character(trd[1,2]),md,med)) #shor[1,] <- tablist d5 = rbind(d5, tablist) print(d5) } ############################################################################ # 2.2 Calculate maximum divergence for each species data <- read.csv(file="PATH/clean6000b.csv", header=TRUE,fill=TRUE) myu <- unique(data$genus_species) d5=NULL for (i in 1:nlevels(myu)){ trd <- data[which(data$genus_species==myu[i]),] premd <- suppressWarnings(DNAStringSet(trd[,1])) md <- max(dist.dna(as.DNAbin(premd)),pairwise.deletion=F) shor <- data.frame("species"=character(0), "divergence"=numeric(0)) tablist <-rbind( c(as.character(trd[1,2]),md)) #shor[1,] <- tablist d5 = rbind(d5, tablist) # Sys.sleep(0.1) # p <- cbind(myu[i],shor) print(d5) } ############################################################################ # 2.3 resample maximum divergence (5 specimens) for 1000 iterations, for each species dxy2=NULL for (i in 1:1000) { # LOOP */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ d5=NULL for (i in 1:length(mysp)){ nd <- data[which(data$genus_species==mysp[i]),] fm5 <- sample(nd$barcode, 5) premd <- suppressWarnings(DNAStringSet(fm5)) premd2 <- suppressWarnings(as.DNAbin(premd)) md <- suppressWarnings(dist.dna(premd2)) md2 <- max(md) shor <- data.frame("species"=numeric(0), "maxdiv"=numeric(0)) tablist <-rbind( c(as.character(nd[1,4]),md2)) shor[1,] <- tablist d5 = rbind(d5, data.frame(shor)) #print(d5) } dxy2 = rbind(dxy2, d5) } ############################################################################ # 2.4 Calculate haplotype diversity for each species data <- read.csv(file="PATH/clean6000b.csv", header=TRUE,fill=TRUE) myu <- unique(data$genus_species) endr=NULL for (i in 1:nlevels(myu)){ trd <- data[which(data$genus_species==myu[i]),] premd <- suppressMessages(DNAStringSet(trd[,1])) premd2 <- suppressMessages(as.DNAbin(premd)) md1<- haplotype(premd2) #loop 2 d6=NULL for (j in 1:length(haploFreq(md1))) { x[j] <- (length(attr(md1, "index")[[j]])) d6= rbind(d6, x[j]) } sumd6 <- sum(d6) d7 <- d6/sumd6 d8<-d7^2 d9<-sum(d8) endr <- 1-d9 shor <- data.frame("species"=character(0), "z"=numeric(0)) tablist <-rbind( c(as.character(trd[1,3]),endr)) #shor[1,] <- tablist endr = rbind(endr, tablist) print(endr[,1]) } ############################################################################ # 2.5 Resample haplotype diversity (5 specimens) for 1000 iterations, for each species dxy=NULL for (i in 1:1000) { # LOOP */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ d5=NULL for (i in 1:length(mysp)){ nd <- data[which(data$genus_species==mysp[i]),] fm5 <- sample(nd$barcode, 5) # Calculation premd <- suppressMessages(DNAStringSet(fm5)) premd2 <- suppressMessages(as.DNAbin(premd)) md1<- haplotype(premd2) #loop 2 d6=NULL for (j in 1:length(haploFreq(md1))) { x[j] <- (length(attr(md1, "index")[[j]])) d6= rbind(d6, x[j]) } sumd6 <- sum(d6) d7 <- d6/sumd6 d8<-d7^2 d9<-sum(d8) endr <- 1-d9 tablist <- c(mysp[i],endr,udata$degree[i],udata$plants[i]) shor <- data.frame("species"= numeric(0), "h"= numeric(0), "degree"= numeric(0), "plants"= numeric(0)) shor[1,] <- tablist d5 = rbind(d5, data.frame(shor)) #print(d5) } dxy = rbind(dxy, d5) } print(dxy) ############################################################################ # Calculate IBD r for each species data <- read.csv(file="PATH/clean6000b.csv", header=TRUE,fill=TRUE) myu <- unique(data$genus_species) d5=NULL for (i in 1:nlevels(myu)){ trd <- data[which(data$genus_species==myu[i]),] premd <- suppressWarnings(DNAStringSet(trd[,1])) premd3 <- as.DNAbin(premd) premd4 <- dist.dna(premd3) DMat<-as.matrix(premd4) GMat<- spDists(matrix(c(trd$longitude, trd$latitude),length(trd$id),2),longlat=T) GMat <- as.matrix(GMat) GMat <- as.dist(GMat) GMat <-as.matrix(GMat) if (sum(DMat)>0) { if (sum(GMat)>0) { mt0 <- mantel.rtest(as.dist(DMat), as.dist(GMat)) }} else {mt0=list(NA,NA,NA,NA,NA)} mt<-matrix(mt0) shor <- data.frame("species"=character(0), "z"=numeric(0),"p"=numeric(0)) tablist <-rbind( c(as.character(trd[1,2]),mt[2,],mt[4,])) #shor[1,] <- tablist d5 = rbind(d5, tablist) # Sys.sleep(0.1) # p <- cbind(myu[i],shor) print(d5) } ############################################################################ # Resample IBD r (5 specimens) for 1000 iterations, for each species dxy3=NULL for (i in 1:1000) { # LOOP */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ d5=NULL for (i in 1:length(myu)){ nd <- data[which(data$genus_species==myu[i]),] fm5 <- sample_n(nd, 5) premd <- suppressWarnings(DNAStringSet(fm5$barcode)) premd3 <- as.DNAbin(premd) premd4 <- dist.dna(premd3) DMat<-as.matrix(premd4) GMat<- spDists(matrix(c(fm5$longitude, fm5$latitude),length(fm5$id),2),longlat=T) GMat <- as.matrix(GMat) if (sum(DMat)>0) { if (sum(GMat)>0) { mt0 <- mantel.rtest(as.dist(DMat), as.dist(GMat)) }} else {mt0=list(NA,NA,NA,NA,NA)} mt<-matrix(mt0) shor <- data.frame("species"=character(0), "z"=numeric(0),"p"=numeric(0)) tablist <-rbind( c(as.character(fm5[2,4]),mt[2,],mt[4,])) #shor[1,] <- tablist d5 = rbind(d5, tablist) # Sys.sleep(0.1) # p <- cbind(myu[i],shor) # print(d5) } dxy3 = rbind(dxy3, d5) } ############################################################################ # 3. Micro-evolution: models # 3.1. prepare data data=read.csv(file=file.choose(), header=T) names(data) datana<-data[!is.na(data$degree),] datana<-datana[!is.na(datana$plants),] datana<-datana[!is.na(datana$Carnivory),] datana<-datana[!is.na(datana$wingspan_mm),] ############################################################################ # 3.2. Maximum divergence, MCMCglmm, all samples mean(datana$MaxDiv,na.rm=T) sd(datana$MaxDiv,na.rm=T) min(datana$MaxDiv,na.rm=T) hist(sqrt(datana$MaxDiv)) qqnorm(sqrt(datana$MaxDiv)) phylo2 <- read.nexus(file.choose()) # import tree phylo2$edge.length<-NULL prior<-list(G=list(G1=list(V=1,nu=0.02)),R=list(V=1,nu=0.02)) inv.phylo<-inverseA(as.phylo(phylo2), nodes="ALL",scale=T) mdfinal <-MCMCglmm(sqrt(MaxDiv)~ degree + plants + ant.plant +Carnivory+ n+ AvGeoDist + wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=datana,nitt=1300000,burnin=300000,thin=500) summary(mdfinal) # 3.3. parasites vs mutualists dataobl <- data[which(data$degree==1),] dataobl<-dataobl[!is.na(dataobl$kind),] dataobl<-dataobl[!is.na(dataobl$wingspan_mm),] parmut <-MCMCglmm(sqrt(MaxDiv)~ kind+ AvGeoDist + n+ wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=dataobl,nitt=1300000,burnin=300000,thin=500) summary(parmut) # 3.4. average for 5 samples, 1000 iterations qqnorm(sqrt(datana$avmaxdiv5)) hist(sqrt(datana$avmaxdiv5)) ibm5 <-MCMCglmm(sqrt(avmaxdiv5)~ degree +plants + ant.plant +Carnivory+ AvGeoDist + wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=datana,nitt=1300000,burnin=300000,thin=500) summary(ibm5) # 3.5. parasites vs mutualists parmut2 <-MCMCglmm(sqrt(avmaxdiv5)~ kind+ AvGeoDist + wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=dataobl,nitt=1300000,burnin=300000,thin=500) summary(parmut2) ############################################################################ # 3.6. haplotype diversity, MCMCglmm mean(datana$h,na.rm=T) sd(datana$h,na.rm=T) min(datana$h,na.rm=T) hist(datana$h) mh <-MCMCglmm(h~ degree + plants + ant.plant +Carnivory+ n+ AvGeoDist + wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=datana,nitt=1300000,burnin=300000,thin=500) summary(mh) # 3.7. parasites vs mutualists parmuth <-MCMCglmm(h~ kind+ AvGeoDist + n+ wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=dataobl,nitt=1300000,burnin=300000,thin=500) summary(parmuth) # 3.8. average for 5 samples, 1000 iterations hist(datana$h5avg) mh5 <-MCMCglmm(h5avg~ degree + plants + ant.plant +Carnivory+ n+ AvGeoDist + wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=datana,nitt=1300000,burnin=300000,thin=500) summary(mh5) coefplot2(mh5) # 3.9. parasites vs mutualists parmut5 <-MCMCglmm(h5avg~ kind+ AvGeoDist + wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=dataobl,nitt=1300000,burnin=300000,thin=500) summary(parmut5) ############################################################################ # 3.10. IBD, MCMCglmm mean(datana$IBDr,na.rm=T) sd(datana$IBDr,na.rm=T) min(datana$IBDr,na.rm=T) hist(datana$IBDr) ibdmod <-MCMCglmm(IBDr~ degree + plants + ant.plant +Carnivory+ n+ AvGeoDist + wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=datana,nitt=1300000,burnin=300000,thin=500) summary(ibdmod) # 3.11. parasites vs mutualists parmutibd <-MCMCglmm(IBDr~ kind+ AvGeoDist + n+ wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=dataobl,nitt=1300000,burnin=300000,thin=500) summary(parmutibd) # 3.12. average for 5 samples, 1000 iterations hist(datana$avibd5) ibdmod5 <-MCMCglmm(avibd5~ degree + plants + ant.plant +Carnivory+ AvGeoDist + wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=datana,nitt=1300000,burnin=300000,thin=500) summary(ibdmod5) # 3.13. parasites vs mutualists parmutibd5 <-MCMCglmm(avibd5~ kind+ AvGeoDist + wingspan_mm, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo$Ainv), family="gaussian", data=dataobl,nitt=1300000,burnin=300000,thin=500) summary(parmutibd5) ################################################################## # correlatins between ant specialisation and body size cor.test(datana$wingspan_mm,as.numeric(datana$Ant_specialisation2)) cor.test(datana$wingspan_mm,as.numeric(datana$AvGeoDist)) ################################################################## # 4. Macroevoluton data<-read.csv(file=file.choose(),header=T) data$Ant_specialisation2<-factor(data$Ant_specialisation) levels(data$Ant_specialisation2) <- c("not_ant_specialized", "not_ant_specialized", "ant_specialized") data$Plant_specialisation2<-factor(data$Plant_specialisation) levels(data$Plant_specialisation2) <- c("not_plant_specialized", "not_plant_specialized", "not_plant_specialized", "plant_specialized") data$carnivory2<-factor(data$Plant_specialisation) levels(data$carnivory2) <- c("carnivorous", "not_carnivorous", "not_carnivorous", "not_carnivorous") nnls <- read.nexus(file=file.choose()) # read tree nnls2 <- read.nexus(file=file.choose()) # read tree nnls$edge.length<-NULL nnls2$edge.length<-NULL all.equal.phylo(nnls,nnls2,use.edge.length=FALSE, index.return = TRUE ) y<-list(nnls,nnls2) class(y) <- "multiPhylo" install.packages("phytools") library(phytools) ?multiRF multiRF(y) prior<-list(G=list(G1=list(V=1,nu=0.02)),R=list(V=1,nu=0.02)) inv.phylo2<-inverseA(as.phylo(nnls), nodes="ALL",scale=T) datana<-data[!is.na(data$Ant_specialisation),] datana<-datana[!is.na(datana$Plant_specialisation),] datana<-datana[!is.na(datana$Carnivory),] hist(log(datana$D)) shapiro.test(log(datanada2$D)) qqnorm(log(datanorm$D)) datanorm<-datana[which(log(datana$D)>-1.25),] # remove outliers datanorm<-datanorm[which(log(datanorm$D)< -0.65),] # remove outliers datanorm$carnivory2 <- factor(datanorm$Carnivory, levels = c(0, 1)) datanorm$plant22 <- factor(datanorm$Plant_specialisation, levels = c(0, 1)) datanorm$ant22 <- factor(datanorm$Ant_specialisation, levels = c(0, 1)) datanorm$antpl <- factor(datanorm$ant.plant, levels = c(0, 1)) ibm3 <-MCMCglmm(log(D)~ ant22 + plant22 + carnivory2 + antpl, random=~phylo, prior=prior, ginverse=list(phylo=inv.phylo2$Ainv), family="gaussian", data=datanorm,nitt=13000,burnin=3000,thin=500) las = 1, ylim=c(20,100)) ch<-c(1:41) mA <- x[,1] lines(ch-1,mA, lwd=1.5) arrows(ch-1, mA+se[,1], ch-1, mA-se[,1], code=3, angle=90, length=0.03) points(ch-1,mA, pch=16, cex=1.2) ############################################################################ # Figure 1 pdf(width=90, height=45, file="/home/gurto/Desktop/fig1.pdf") par(mfrow = c(2,2), xpd=TRUE, oma = c(0,0,0,0) , mar = c(0,0,0,0),bg = "white") rdata <- read.csv(file="/home/gurto/Desktop/Link to Resubmission3/fig1.csv",header=TRUE, fill=TRUE) rdata2<-rdata map("world",fill=T,col="grey50",border="grey50") antd <- rdata2[which(rdata2$degree==1),] antdbg <- rdata2[which(rdata2$degree==0),] points(antdbg$Longitude,antdbg$Latitude, pch=21,bg="white",cex=7) points(antd$Longitude,antd$Latitude, pch=21,bg=2,cex=7) legend("bottom", c("ant specialised","not ant specialised"), col=c(2,1),pch=c(16, 1),cex=7,bg="white") map("world",fill=T,col="grey50",border="grey50") plantd <- rdata2[which(rdata2$plants==1),] plantdbg <- rdata2[which(rdata2$plants==0),] points(plantdbg$Longitude,plantdbg$Latitude, pch=21,bg="white",cex=7) points(plantd$Longitude,plantd$Latitude, pch=21,bg=2,cex=7) legend("bottom", c("plant specialised","not plant specialised"), col=c(2,1),pch=c(16, 1),cex=7,bg="white") map("world",fill=T,col="grey50",border="grey50") carnd <- rdata2[which(rdata2$carnivory==1),] carnbg <- rdata2[which(rdata2$carnivory==0),] points(rdata2$Longitude,rdata2$Latitude, pch=21,bg="white",cex=7) points(carnd$Longitude,carnd$Latitude, pch=21,bg=2,cex=7) legend("bottom", c("carnivorous","not carnivorous"), col=c(2,1),pch=c(16, 1),cex=7,bg="white") map("world",fill=T,col="grey50",border="grey50") dd <- rdata2[which(rdata2$plants==1),] dd2 <- dd[which(dd$degree==1),] points(rdata2$Longitude,rdata2$Latitude, pch=21,bg="white",cex=7) points(dd2$Longitude,dd2$Latitude, pch=21,bg=2,cex=7) legend("bottom", c("ant and plant specialised","not ant and plant specialised"), col=c(2,1),pch=c(16, 1),cex=7,bg="white") dev.off() # Figure 2a write.csv(mdfinal$Sol, "/home/gurto/Desktop/sol2.csv") fig2a<-read.csv("/home/gurto/Desktop/sol2.csv") names(fig2a) mean(fig2a$wingspan_mm) sd(fig2a$wingspan_mm) f3<-read.csv("/home/gurto/Desktop/2a.csv") ch<-c(1:7) pdf(file="/home/gurto/Desktop/fig2a.pdf") plot(f3$mean,type="n",ylim=c(-2,4),bty="n", ylab="Coefficient") points(f3$mean/(f3$sd*2), pch=16, cex=1.5) arrows(ch,(f3$mean/(f3$sd*2))-(f3$sd/(f3$sd*2)),ch,(f3$mean/(f3$sd*2))+((f3$sd/(f3$sd*2))), code=0, lwd=3) arrows(ch,(f3$mean/(f3$sd*2))-(2*(f3$sd/(f3$sd*2))),ch,(f3$mean/(f3$sd*2))+(2*((f3$sd/(f3$sd*2)))), code=0, lwd=1) abline(a = NULL, b = NULL, h = 0, lty=2) dev.off() # Figure 2b write.csv(mh$Sol, "/home/gurto/Desktop/sol3.csv") fig2b<-read.csv("/home/gurto/Desktop/sol3.csv") names(fig2b) mean(fig2b$wingspan_mm) sd(fig2b$wingspan_mm) f3<-read.csv("/home/gurto/Desktop/2b.csv") ch<-c(1:7) pdf(file="/home/gurto/Desktop/fig2b.pdf") plot(f3$mean,type="n",ylim=c(-2,4),bty="n", ylab="Coefficient") points(f3$mean/(f3$sd*2), pch=16, cex=1.5) arrows(ch,(f3$mean/(f3$sd*2))-(f3$sd/(f3$sd*2)),ch,(f3$mean/(f3$sd*2))+((f3$sd/(f3$sd*2))), code=0, lwd=3) arrows(ch,(f3$mean/(f3$sd*2))-(2*(f3$sd/(f3$sd*2))),ch,(f3$mean/(f3$sd*2))+(2*((f3$sd/(f3$sd*2)))), code=0, lwd=1) abline(a = NULL, b = NULL, h = 0, lty=2) dev.off() # fig 2c write.csv(ibdmod$Sol, "/home/gurto/Desktop/sol4.csv") fig2c<-read.csv("/home/gurto/Desktop/sol4.csv") names(fig2c) mean(fig2c$wingspan_mm) sd(fig2c$wingspan_mm) f3<-read.csv("/home/gurto/Desktop/2c.csv") ch<-c(1:7) pdf(file="/home/gurto/Desktop/fig2c.pdf") plot(f3$mean,type="n",ylim=c(-2,4),bty="n", ylab="Coefficient") points(f3$mean/(f3$sd*2), pch=16, cex=1.5) arrows(ch,(f3$mean/(f3$sd*2))-(f3$sd/(f3$sd*2)),ch,(f3$mean/(f3$sd*2))+((f3$sd/(f3$sd*2))), code=0, lwd=3) arrows(ch,(f3$mean/(f3$sd*2))-(2*(f3$sd/(f3$sd*2))),ch,(f3$mean/(f3$sd*2))+(2*((f3$sd/(f3$sd*2)))), code=0, lwd=1) abline(a = NULL, b = NULL, h = 0, lty=2) dev.off() # Figure 3 write.csv(ibm2$Sol, "/home/gurto/Desktop/sol.csv") fig3<-read.csv("/home/gurto/Desktop/sol.csv") names(fig3) mean(fig3$carnivory21) sd(fig3$carnivory21) f3<-read.csv("/home/gurto/Desktop/fig3p.csv") ch<-c(1:4) pdf(file="/home/gurto/Desktop/fig3.pdf") plot(f3$mean,type="n",ylim=c(-.07,.2),bty="n", ylab="Coefficient") points(f3$mean, pch=16, cex=1.5) arrows(ch,f3$mean-f3$sd,ch,f3$mean+f3$sd, code=0, lwd=3) arrows(ch,f3$mean-(2*f3$sd),ch,f3$mean+(2*f3$sd), code=0, lwd=1) abline(a = NULL, b = NULL, h = 0, lty=2) dev.off()