R.Version()$version.string; R.Version()$platform
## [1] "R version 4.0.0 (2020-04-24)"
## [1] "x86_64-apple-darwin17.0"
set.seed(1001)#reproducibility
library(picante)
packageVersion("picante")
## [1] '1.8.1'
library(phytools)
packageVersion("phytools")
## [1] '0.7.20'
library(caper)
packageVersion("caper")
## [1] '1.0.1'
library(tidyverse)
packageVersion("tidyverse")
## [1] '1.3.0'
library(nlme)
packageVersion("nlme")
## [1] '3.1.147'
library(phylobase)
packageVersion("phylobase")
## [1] '0.8.10'
library(grid)
packageVersion("grid")
## [1] '4.0.0'
library(MCMCglmm)
packageVersion("MCMCglmm")
## [1] '2.29'
library(diversitree)
packageVersion("diversitree")
## [1] '0.9.13'
devtools::source_url("http://github.com/lizzieinvancouver/pwr/blob/master/pwr-functions.R?raw=TRUE")
devtools::source_url("https://github.com/lizzieinvancouver/pwr/blob/master/pwr-plots.R?raw=TRUE")
source("http://ib.berkeley.edu/courses/ib200b/scripts/diagnostics_v3.R")
This is the full dataset containing the information we need to run the models
data_comp <- read.csv("full_mat_final.csv", h=TRUE)
data_comp <- data_comp[,-1]
#rescalling RI to compensate for species used, but without reference to ICD
larger_zero <- data_comp$IR[data_comp$IR>0]
data_comp$IR[data_comp$IR==0] <- min(larger_zero)/max(larger_zero)
head(data_comp)
## species cd1 cd2 cd3 cd4 cd5 cd6 cd7 cd8 cd9 cd10 cd11 cd12
## 1 Acinonyx_jubatus 0 0 0 0 0 0 0 0 0 0 0 0
## 2 Acomys_cineraceus 0 0 0 0 0 0 0 0 0 0 0 0
## 3 Aepyceros_melampus 0 0 0 0 0 0 0 0 0 0 0 0
## 4 Ailuropoda_melanoleuca 1 0 1 1 0 0 0 0 0 0 0 0
## 5 Alcelaphus_buselaphus 0 0 0 0 0 0 0 0 0 0 0 0
## 6 Alces_alces 0 0 0 0 0 0 0 0 0 0 0 0
## cd13 cd14 cd15 cd16 cd17 cd18 cd19 IR Terrestrial Marine Freshwater
## 1 1 0 0 0 0 0 0 0.03000000 1 0 0
## 2 0 0 0 0 0 0 0 0.01666667 1 0 0
## 3 0 0 0 0 0 0 0 0.01666667 1 0 0
## 4 0 0 0 0 0 0 0 0.31000000 1 0 0
## 5 0 0 0 0 0 0 0 0.01666667 1 0 0
## 6 0 0 1 0 0 0 0 0.03000000 1 0 0
## Aerial Mass.g Log_Mass Island.Endemicity Diet.Plant Diet.Vertebrate
## 1 0 46700.0 4.7 Occurs on mainland 0 100
## 2 0 20.3 1.3 Occurs on mainland 0 0
## 3 0 52500.1 4.7 Occurs on mainland 100 0
## 4 0 108400.0 5.0 Occurs on mainland 90 10
## 5 0 171001.5 5.2 Occurs on mainland 100 0
## 6 0 356998.0 5.6 Occurs on mainland 100 0
## Diet.Invertebrate IUCN comu range
## 1 0 VU 1 441.849278
## 2 100 LC 1 86.780792
## 3 0 LC 1 293.986996
## 4 0 VU 1 3.290258
## 5 0 LC 1 576.008919
## 6 0 LC 1 3606.410850
dim(data_comp)#565 spp
## [1] 565 34
The tree is available at the VertLife webpage
full_nexus <- read.nexus("https://github.com/n8upham/MamPhy_v1/blob/master/_DATA/MamPhy_fullPosterior_BDvr_DNAonly_4098sp_topoFree_NDexp_MCC_v2_target.tre?raw=TRUE")
full_nexus_semAnolis <- drop.tip(full_nexus, "_Anolis_carolinensis")#remove Anolis
sp_names_full <- full_nexus_semAnolis$tip.label
split_names <- sapply(strsplit(sp_names_full, split='_'), function(x) (c(x[1], x[2])))
split_names_t <- data.frame(t(split_names))
new_names <- paste(split_names_t$X1, split_names_t$X2, sep="_")
# check if it is correct
data.frame(original=full_nexus$tip.label[-1][1:10], fixed=new_names[1:10])
## original
## 1 Ornithorhynchus_anatinus_ORNITHORHYNCHIDAE_MONOTREMATA
## 2 Zaglossus_bruijnii_TACHYGLOSSIDAE_MONOTREMATA
## 3 Tachyglossus_aculeatus_TACHYGLOSSIDAE_MONOTREMATA
## 4 Rhynchocyon_petersi_MACROSCELIDIDAE_MACROSCELIDEA
## 5 Rhynchocyon_cirnei_MACROSCELIDIDAE_MACROSCELIDEA
## 6 Rhynchocyon_udzungwensis_MACROSCELIDIDAE_MACROSCELIDEA
## 7 Elephantulus_rufescens_MACROSCELIDIDAE_MACROSCELIDEA
## 8 Elephantulus_brachyrhynchus_MACROSCELIDIDAE_MACROSCELIDEA
## 9 Elephantulus_intufi_MACROSCELIDIDAE_MACROSCELIDEA
## 10 Elephantulus_rupestris_MACROSCELIDIDAE_MACROSCELIDEA
## fixed
## 1 Ornithorhynchus_anatinus
## 2 Zaglossus_bruijnii
## 3 Tachyglossus_aculeatus
## 4 Rhynchocyon_petersi
## 5 Rhynchocyon_cirnei
## 6 Rhynchocyon_udzungwensis
## 7 Elephantulus_rufescens
## 8 Elephantulus_brachyrhynchus
## 9 Elephantulus_intufi
## 10 Elephantulus_rupestris
full_nexus_semAnolis$tip.label <- new_names
Prunning the phylogeny to contain only the species to whcih we have trait data
lista <- read.table("lista_may.txt", h=TRUE)#list of species
head(lista);dim(lista)#565 spp
## comu
## Acinonyx_jubatus 1
## Acomys_cineraceus 1
## Aepyceros_melampus 1
## Ailuropoda_melanoleuca 1
## Alcelaphus_buselaphus 1
## Alces_alces 1
## [1] 565 1
phylo_pruna <- prune.sample(t(lista), full_nexus_semAnolis)
phylo_pruna#521 spp
##
## Phylogenetic tree with 521 tips and 520 internal nodes.
##
## Tip labels:
## Dugong_dugon, Trichechus_manatus, Trichechus_inunguis, Trichechus_senegalensis, Procavia_capensis, Dendrohyrax_dorsalis, ...
##
## Rooted; includes branch lengths.
Data handling
data_comp_row <- column_to_rownames(data_comp, var = "species")
head(data_comp_row)
## cd1 cd2 cd3 cd4 cd5 cd6 cd7 cd8 cd9 cd10 cd11 cd12 cd13
## Acinonyx_jubatus 0 0 0 0 0 0 0 0 0 0 0 0 1
## Acomys_cineraceus 0 0 0 0 0 0 0 0 0 0 0 0 0
## Aepyceros_melampus 0 0 0 0 0 0 0 0 0 0 0 0 0
## Ailuropoda_melanoleuca 1 0 1 1 0 0 0 0 0 0 0 0 0
## Alcelaphus_buselaphus 0 0 0 0 0 0 0 0 0 0 0 0 0
## Alces_alces 0 0 0 0 0 0 0 0 0 0 0 0 0
## cd14 cd15 cd16 cd17 cd18 cd19 IR Terrestrial
## Acinonyx_jubatus 0 0 0 0 0 0 0.03000000 1
## Acomys_cineraceus 0 0 0 0 0 0 0.01666667 1
## Aepyceros_melampus 0 0 0 0 0 0 0.01666667 1
## Ailuropoda_melanoleuca 0 0 0 0 0 0 0.31000000 1
## Alcelaphus_buselaphus 0 0 0 0 0 0 0.01666667 1
## Alces_alces 0 1 0 0 0 0 0.03000000 1
## Marine Freshwater Aerial Mass.g Log_Mass
## Acinonyx_jubatus 0 0 0 46700.0 4.7
## Acomys_cineraceus 0 0 0 20.3 1.3
## Aepyceros_melampus 0 0 0 52500.1 4.7
## Ailuropoda_melanoleuca 0 0 0 108400.0 5.0
## Alcelaphus_buselaphus 0 0 0 171001.5 5.2
## Alces_alces 0 0 0 356998.0 5.6
## Island.Endemicity Diet.Plant Diet.Vertebrate
## Acinonyx_jubatus Occurs on mainland 0 100
## Acomys_cineraceus Occurs on mainland 0 0
## Aepyceros_melampus Occurs on mainland 100 0
## Ailuropoda_melanoleuca Occurs on mainland 90 10
## Alcelaphus_buselaphus Occurs on mainland 100 0
## Alces_alces Occurs on mainland 100 0
## Diet.Invertebrate IUCN comu range
## Acinonyx_jubatus 0 VU 1 441.849278
## Acomys_cineraceus 100 LC 1 86.780792
## Aepyceros_melampus 0 LC 1 293.986996
## Ailuropoda_melanoleuca 0 VU 1 3.290258
## Alcelaphus_buselaphus 0 LC 1 576.008919
## Alces_alces 0 LC 1 3606.410850
tidydata <- match.phylo.data(phylo_pruna, data_comp_row)
## [1] "Dropping taxa from the data because they are not present in the phylogeny:"
## [1] "Acomys_cineraceus" "Anomalurus_derbianus"
## [3] "Cabassous_centralis" "Colobus_vellerosus"
## [5] "Conepatus_humboldtii" "Crocidura_nigeriae"
## [7] "Dasyprocta_prymnolopha" "Dasyprocta_variegata"
## [9] "Dendrohyrax_arboreus" "Euroscaptor_micrura"
## [11] "Funisciurus_anerythrus" "Funisciurus_leucogenys"
## [13] "Funisciurus_substriatus" "Graphiurus_lorraineus"
## [15] "Heliosciurus_gambianus" "Hemiechinus_collaris"
## [17] "Hesperoptenus_tickelli" "Pipistrellus_imbricatus"
## [19] "Kerivoula_africana" "Leopardus_geoffroyi"
## [21] "Lepus_fagani" "Lepus_nigricollis"
## [23] "Lycalopex_culpaeus" "Lycalopex_gymnocercus"
## [25] "Lycalopex_sechurae" "Manis_crassicaudata"
## [27] "Mazama_chunyi" "Microtus_mexicanus"
## [29] "Murina_fusca" "Paraechinus_micropus"
## [31] "Piliocolobus_preussi" "Plecotus_ariel"
## [33] "Plecotus_sacrimontis" "Presbytis_hosei"
## [35] "Presbytis_natunae" "Presbytis_siamensis"
## [37] "Pteropus_faunulus" "Saguinus_fuscicollis"
## [39] "Steatomys_jacksoni" "Tadarida_latouchei"
## [41] "Tamandua_mexicana" "Tolypeutes_tricinctus"
## [43] "Viverra_civettina" "Vulpes_bengalensis"
tidydata$phy#521 spp
##
## Phylogenetic tree with 521 tips and 520 internal nodes.
##
## Tip labels:
## Dugong_dugon, Trichechus_manatus, Trichechus_inunguis, Trichechus_senegalensis, Procavia_capensis, Dendrohyrax_dorsalis, ...
##
## Rooted; includes branch lengths.
str(tidydata$data)
## 'data.frame': 521 obs. of 33 variables:
## $ cd1 : chr " 1" " 0" " 1" " 1" ...
## $ cd2 : chr "0" "0" "0" "0" ...
## $ cd3 : chr "0" "1" "1" "0" ...
## $ cd4 : chr "0" "1" "1" "0" ...
## $ cd5 : chr "1" "1" "1" "0" ...
## $ cd6 : chr "0" "1" "1" "0" ...
## $ cd7 : chr "0" "0" "1" "0" ...
## $ cd8 : chr "0" "1" "1" "0" ...
## $ cd9 : chr "0" "0" "0" "0" ...
## $ cd10 : chr "0" "0" "0" "0" ...
## $ cd11 : chr "0" "1" "1" "0" ...
## $ cd12 : chr "1" "0" "0" "0" ...
## $ cd13 : chr "0" "0" "0" "0" ...
## $ cd14 : chr "0" "0" "0" "0" ...
## $ cd15 : chr "0" "0" "0" "0" ...
## $ cd16 : chr "0" "0" "0" "0" ...
## $ cd17 : chr "0" "0" "0" "0" ...
## $ cd18 : chr "0" "1" "1" "0" ...
## $ cd19 : chr "0" "0" "0" "0" ...
## $ IR : chr "0.29000000" "0.83000000" "1.18000000" "0.03000000" ...
## $ Terrestrial : chr "0" "0" "0" "0" ...
## $ Marine : chr "1" "1" "1" "1" ...
## $ Freshwater : chr "0" "1" "1" "1" ...
## $ Aerial : chr "0" "0" "0" "0" ...
## $ Mass.g : chr " 310000.0" " 341000.0" " 480000.0" " 454000.0" ...
## $ Log_Mass : chr "5.5" "5.5" "5.7" "5.7" ...
## $ Island.Endemicity: chr "Exclusively marine" "Occurs on mainland" "Occurs on mainland" "Occurs on mainland" ...
## $ Diet.Plant : chr "100" "100" "100" " 95" ...
## $ Diet.Vertebrate : chr " 0" " 0" " 0" " 0" ...
## $ Diet.Invertebrate: chr " 0" " 0" " 0" " 5" ...
## $ IUCN : chr "VU" "VU" "VU" "VU" ...
## $ comu : chr " 1" NA NA NA ...
## $ range : chr "5.380811e+02" "1.302891e+02" "5.178365e+01" "3.829432e+01" ...
tidydata$data$IR <- as.numeric(as.character(tidydata$data$IR))
tidydata$data$range <- as.numeric(as.character(tidydata$data$range))
tidydata$data$Log_Mass <- as.numeric(as.character(tidydata$data$Log_Mass))
range(tidydata$data$IR)
## [1] 0.01666667 1.80000000
range(tidydata$data$range)
## [1] 1.416338e-02 4.631626e+04
ggplot(tidydata$data, aes(IR))+
geom_density(fill="red")
ggplot(tidydata$data, aes(log(IR)))+
geom_density(fill="red")
ggplot(tidydata$data, aes(range, IR))+
geom_point()+
stat_smooth(method = "lm")
ggplot(tidydata$data, aes(Log_Mass, IR))+
geom_point()+
stat_smooth(method = "lm")
ggplot(tidydata$data, aes(Log_Mass, IR))+
geom_point(aes(size=range, col=range))+
stat_smooth(method = "loess")
The question is: Does the RI is correlated with range size and body mass?
Our hypothesis is: small-ranges species are in contact with a few traditional communities and would be less chance to have multiple uses. We also expect that larger species provide more body parts for use and would have a larger versatility.
To answer this question we’ll run a PGLS transforming RI to log scale:
mod1 <- gls(log(IR)~range+Log_Mass, correlation = corPagel(1, tidydata$phy), data=tidydata$data)
Diagnostics
plot(mod1)
diagnostics(tidydata$data$IR, tidydata$phy)
Inference
summary(mod1)
## Generalized least squares fit by REML
## Model: log(IR) ~ range + Log_Mass
## Data: tidydata$data
## AIC BIC logLik
## 1804.499 1825.749 -897.2495
##
## Correlation Structure: corPagel
## Formula: ~1
## Parameter estimate(s):
## lambda
## 0.6409421
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) -3.382189 0.9657187 -3.502250 0.0005
## range -0.000015 0.0000198 -0.763286 0.4456
## Log_Mass 0.459889 0.0963299 4.774104 0.0000
##
## Correlation:
## (Intr) range
## range 0.080
## Log_Mass -0.323 -0.287
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.86763228 -1.20381902 -0.70401429 0.06610453 1.28832530
##
## Residual standard error: 1.932106
## Degrees of freedom: 521 total; 518 residual
Running a PWR with the same data to detect nonstationary patterns:
all_data <- phylo4d(tidydata$phy, tidydata$data)
mod_div_bw_prw <- gls(log(IR)~range, data = tidydata$data, correlation = corBrownian(phy=tidydata$phy))
phyloWR <- getEst(pwr(log(IR)~range,all_data, wfun="brownian", verbose=FALSE))
bandw <- get.opt.bw(log(IR)~range,all_data, wfun="martins", method="subplex")
bandw#0.06224979
## [1] 0.06224979
Results
tp(addData(all_data, data.frame(gest=coef(mod_div_bw_prw)[2], glb=confint(mod_div_bw_prw)[2,1], gub=confint(mod_div_bw_prw)[2,2], phyloWR))[,c("est", "lb", "ub", "gest", "glb", "gub")], show.tip.label=TRUE, pex=2, aex=3.5)
Our second question is: Does more closely related species are used to treat the same diseases?
To answer this question we’ll use Fritz’s D, which is a metric to calculate phylogenetic signal for binary traits.
But first let’s explore the data to check the distribution of the trait throughout the phylogeny:
cds <- data_comp[,2:20]
rownames(cds) <- data_comp$species
trait.plot(tidydata$phy, cds, cols = list(cd1=c("red", "blue"), cd2=c("red", "blue"), cd3=c("red", "blue"), cd4=c("red", "blue"), cd5=c("red", "blue"), cd6=c("red", "blue"), cd7=c("red", "blue"), cd8=c("red", "blue"), cd9=c("red", "blue"), cd10=c("red", "blue"), cd11=c("red", "blue"), cd12=c("red", "blue"), cd13=c("red", "blue"), cd14=c("red", "blue"), cd15=c("red", "blue"), cd16=c("red", "blue"), cd17=c("red", "blue"), cd18=c("red", "blue"), cd19=c("red", "blue")))
Now, let’s actually calculate the phylogenetic data:
data_new <- comparative.data(tidydata$phy, data_comp, names.col="species")
phylo.d(data_new, binvar = cd1)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd1
## Counts of states: 0 = 367
## 1 = 106
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.6764789
## Probability of E(D) resulting from no (random) phylogenetic structure : 0
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd2)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd2
## Counts of states: 0 = 405
## 1 = 68
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.8212771
## Probability of E(D) resulting from no (random) phylogenetic structure : 0.006
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd3)#sign diff from 0, but not from 1 => random
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd3
## Counts of states: 0 = 418
## 1 = 55
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.9450315
## Probability of E(D) resulting from no (random) phylogenetic structure : 0.209
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd4)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd4
## Counts of states: 0 = 392
## 1 = 81
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.5986961
## Probability of E(D) resulting from no (random) phylogenetic structure : 0
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd5)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd5
## Counts of states: 0 = 403
## 1 = 70
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.4998371
## Probability of E(D) resulting from no (random) phylogenetic structure : 0
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd6)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd6
## Counts of states: 0 = 405
## 1 = 68
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.8817533
## Probability of E(D) resulting from no (random) phylogenetic structure : 0.04
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd7)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd7
## Counts of states: 0 = 431
## 1 = 42
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.8007537
## Probability of E(D) resulting from no (random) phylogenetic structure : 0.003
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd8)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd8
## Counts of states: 0 = 428
## 1 = 45
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.6491497
## Probability of E(D) resulting from no (random) phylogenetic structure : 0
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd9)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd9
## Counts of states: 0 = 442
## 1 = 31
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.5504692
## Probability of E(D) resulting from no (random) phylogenetic structure : 0
## Probability of E(D) resulting from Brownian phylogenetic structure : 0.004
phylo.d(data_new, binvar = cd10)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd10
## Counts of states: 0 = 460
## 1 = 13
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.6493159
## Probability of E(D) resulting from no (random) phylogenetic structure : 0.014
## Probability of E(D) resulting from Brownian phylogenetic structure : 0.026
phylo.d(data_new, binvar = cd11)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd11
## Counts of states: 0 = 450
## 1 = 23
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.6827113
## Probability of E(D) resulting from no (random) phylogenetic structure : 0.002
## Probability of E(D) resulting from Brownian phylogenetic structure : 0.004
phylo.d(data_new, binvar = cd12)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd12
## Counts of states: 0 = 377
## 1 = 96
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.6133851
## Probability of E(D) resulting from no (random) phylogenetic structure : 0
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd13)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd13
## Counts of states: 0 = 446
## 1 = 27
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.6451055
## Probability of E(D) resulting from no (random) phylogenetic structure : 0
## Probability of E(D) resulting from Brownian phylogenetic structure : 0.001
phylo.d(data_new, binvar = cd14)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd14
## Counts of states: 0 = 439
## 1 = 34
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.4231284
## Probability of E(D) resulting from no (random) phylogenetic structure : 0
## Probability of E(D) resulting from Brownian phylogenetic structure : 0.016
phylo.d(data_new, binvar = cd15)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd15
## Counts of states: 0 = 441
## 1 = 32
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.7273171
## Probability of E(D) resulting from no (random) phylogenetic structure : 0.003
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd16)#sign
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd16
## Counts of states: 0 = 441
## 1 = 32
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.6282628
## Probability of E(D) resulting from no (random) phylogenetic structure : 0
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
phylo.d(data_new, binvar = cd17)#sign diff from 0, but not from 1 => random
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd17
## Counts of states: 0 = 463
## 1 = 10
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.9266426
## Probability of E(D) resulting from no (random) phylogenetic structure : 0.334
## Probability of E(D) resulting from Brownian phylogenetic structure : 0.003
phylo.d(data_new, binvar = cd18)#sign diff from random, but not Brownian
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd18
## Counts of states: 0 = 464
## 1 = 9
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.6230311
## Probability of E(D) resulting from no (random) phylogenetic structure : 0.02
## Probability of E(D) resulting from Brownian phylogenetic structure : 0.068
phylo.d(data_new, binvar = cd19)#sign diff from 0, but not from 1 => random
##
## Calculation of D statistic for the phylogenetic structure of a binary variable
##
## Data : data_comp
## Binary variable : cd19
## Counts of states: 0 = 454
## 1 = 19
## Phylogeny : tidydata$phy
## Number of permutations : 1000
##
## Estimated D : 0.9166655
## Probability of E(D) resulting from no (random) phylogenetic structure : 0.238
## Probability of E(D) resulting from Brownian phylogenetic structure : 0
Question: Groups closely-related are more versatile?
To answer this question, we’ll calculate phylogenetic signal using Blomberg’s K
IRvec=tidydata$data$IR
names(IRvec)=rownames(tidydata$data)
phylosig(tidydata$phy, IRvec, test = TRUE)#K=0.06, P=0.002
##
## Phylogenetic signal K : 0.0629837
## P-value (based on 1000 randomizations) : 0.003
There’s phylogenetic signal in IR, species more closely related are more versatile
#Fith question
Does body mass and IR predict the threat status of species?
First, let’s manipulate the data to make it adequate to run analysis:
tidydata$data$IUCN <- as.factor(as.character(tidydata$data$IUCN))
levels(tidydata$data$IUCN)
## [1] "" "CR" "DD" "EN" "EW" "LC" "NE" "NT" "VU"
myData <- tidydata$data %>%
rownames_to_column("species")
iucn_dat <- myData %>%
filter(IUCN=="LC" | IUCN=="EN" | IUCN=="CR" | IUCN=="VU" | IUCN=="NT" )
levels(iucn_dat$IUCN)
## [1] "" "CR" "DD" "EN" "EW" "LC" "NE" "NT" "VU"
iucn_dat$IUCN <- factor(iucn_dat$IUCN, ordered = TRUE, levels = c("LC", "NT", "VU", "EN", "CR", "EW"))
levels(iucn_dat$IUCN)
## [1] "LC" "NT" "VU" "EN" "CR" "EW"
prune <- iucn_dat%>%
column_to_rownames("species")
str(prune)#ordered factor
## 'data.frame': 509 obs. of 33 variables:
## $ cd1 : chr " 1" " 0" " 1" " 1" ...
## $ cd2 : chr "0" "0" "0" "0" ...
## $ cd3 : chr "0" "1" "1" "0" ...
## $ cd4 : chr "0" "1" "1" "0" ...
## $ cd5 : chr "1" "1" "1" "0" ...
## $ cd6 : chr "0" "1" "1" "0" ...
## $ cd7 : chr "0" "0" "1" "0" ...
## $ cd8 : chr "0" "1" "1" "0" ...
## $ cd9 : chr "0" "0" "0" "0" ...
## $ cd10 : chr "0" "0" "0" "0" ...
## $ cd11 : chr "0" "1" "1" "0" ...
## $ cd12 : chr "1" "0" "0" "0" ...
## $ cd13 : chr "0" "0" "0" "0" ...
## $ cd14 : chr "0" "0" "0" "0" ...
## $ cd15 : chr "0" "0" "0" "0" ...
## $ cd16 : chr "0" "0" "0" "0" ...
## $ cd17 : chr "0" "0" "0" "0" ...
## $ cd18 : chr "0" "1" "1" "0" ...
## $ cd19 : chr "0" "0" "0" "0" ...
## $ IR : num 0.29 0.83 1.18 0.03 0.03 ...
## $ Terrestrial : chr "0" "0" "0" "0" ...
## $ Marine : chr "1" "1" "1" "1" ...
## $ Freshwater : chr "0" "1" "1" "1" ...
## $ Aerial : chr "0" "0" "0" "0" ...
## $ Mass.g : chr " 310000.0" " 341000.0" " 480000.0" " 454000.0" ...
## $ Log_Mass : num 5.5 5.5 5.7 5.7 3.5 3.5 6.6 6.5 4.7 3.7 ...
## $ Island.Endemicity: chr "Exclusively marine" "Occurs on mainland" "Occurs on mainland" "Occurs on mainland" ...
## $ Diet.Plant : chr "100" "100" "100" " 95" ...
## $ Diet.Vertebrate : chr " 0" " 0" " 0" " 0" ...
## $ Diet.Invertebrate: chr " 0" " 0" " 0" " 5" ...
## $ IUCN : Ord.factor w/ 6 levels "LC"<"NT"<"VU"<..: 3 3 3 3 1 1 3 4 1 1 ...
## $ comu : chr " 1" NA NA NA ...
## $ range : num 538.1 130.3 51.8 38.3 1329 ...
data_wo_DD <- match.phylo.data(tidydata$phy, prune)
## [1] "Dropping tips from the tree because they are not present in the data:"
## [1] "Sotalia_fluviatilis" "Tragulus_javanicus"
## [3] "Muntiacus_truongsonensis" "Muntiacus_feae"
## [5] "Elaphurus_davidianus" "Mazama_temama"
## [7] "Mazama_americana" "Tragelaphus_eurycerus"
## [9] "Bos_gaurus" "Antidorcas_marsupialis"
## [11] "Myotis_punicus" "Sciurus_carolinensis"
How many species in each category?
table(data_wo_DD$data$IUCN)
##
## CR EN LC NT VU
## 20 48 321 43 77
To answer this question we’ll use a MCMCglmm setting the IUCN status as ordinal response variable.
Prelude:
#--Creating inverse of distance matriz
sptree<-makeNodeLabel(data_wo_DD$phy, method="number", prefix="node") #rename nodes to be unique
treeAinv<-inverseA(sptree,nodes="ALL",scale=FALSE)$Ainv
#--setting up priors
prior <- list(R = list(V = 1, nu = 0.02),
G=list(G1=list(V=1, nu=0.02)))
model_IR<-MCMCglmm(IUCN~Log_Mass+IR,
random=~species, data=iucn_dat, family="ordinal",
ginverse =list(species=treeAinv), nodes="ALL",
prior=prior,
nitt=40000000,
burnin=12000000,
thin = 80, verbose=FALSE)
Diagnosis of the model:
#plot trace and density of fixed effects; should be no trend in trace
plot(model_IR$Sol)
#plot trace and density of random (additive and residual (=environmental)) variances; should be no trend in trace
plot(model_IR$VCV)
#examine autocorrelation of fixed effects
autocorr.diag(model_IR$Sol)
## (Intercept) Log_Mass IR
## Lag 0 1.000000e+00 1.000000e+00 1.0000000000
## Lag 80 2.871314e-03 4.416359e-04 -0.0011996601
## Lag 400 2.732974e-03 -1.456152e-03 0.0001515264
## Lag 800 3.150507e-05 4.587137e-04 0.0030116110
## Lag 4000 -1.009872e-03 -7.274994e-05 -0.0017969587
#examine autocorrelation of random (additive and residual) variances
autocorr.diag(model_IR$VCV)
## species units
## Lag 0 1.0000000 1.0000000
## Lag 80 0.8774875 0.8384613
## Lag 400 0.8103186 0.7776930
## Lag 800 0.7702241 0.7709167
## Lag 4000 0.6671474 0.7607278
#check effective population size for fixed effects; should be gt 1000
effectiveSize(model_IR$Sol)
## (Intercept) Log_Mass IR
## 347994.8 350000.0 350000.0
#check effective population size for random effects (additve and residual variances); should be gt 1000
effectiveSize(model_IR$VCV)
## species units
## 677.2275 136.9161
#test of convergence, p should be greater than 0.05 for good convergence
heidel.diag(model_IR$VCV)
##
## Stationarity start p-value
## test iteration
## species passed 1 0.071779
## units failed NA 0.000115
##
## Halfwidth Mean Halfwidth
## test
## species passed 5.69e+23 2.33e+22
## units <NA> NA NA
#estimates of additive and residual variances
posterior.mode(model_IR$VCV)
## species units
## 8.144093e+21 1.057129e+23
Inference
summary.MCMCglmm(model_IR)
##
## Iterations = 12000001:39999921
## Thinning interval = 80
## Sample size = 350000
##
## DIC: 0
##
## G-structure: ~species
##
## post.mean l-95% CI u-95% CI eff.samp
## species 5.685e+23 1.149e+21 1.088e+24 677.2
##
## R-structure: ~units
##
## post.mean l-95% CI u-95% CI eff.samp
## units 6.511e+24 1.595e+22 1.172e+25 136.9
##
## Location effects: IUCN ~ Log_Mass + IR
##
## post.mean l-95% CI u-95% CI eff.samp pMCMC
## (Intercept) -69.96 -193288.28 199138.64 347995 1
## Log_Mass -114.99 -196031.55 196798.93 350000 1
## IR -189.21 -195244.46 196211.97 350000 1
##
## Cutpoints:
## post.mean l-95% CI u-95% CI eff.samp
## cutpoint.traitIUCN.1 1.135e+12 1.148e+11 1.662e+12 68.604
## cutpoint.traitIUCN.2 3.593e+12 4.195e+11 4.975e+12 11.464
## cutpoint.traitIUCN.3 6.534e+12 7.552e+11 8.905e+12 4.942
Calculating phylogenetic signal using Pagel’s lambda
lambda <- model_IR$VCV[,'species']/
(model_IR$VCV[,'species']+model_IR$VCV[,'units'])
mean(lambda)
## [1] 0.08577141
posterior.mode(lambda)
## var1
## 0.06486542
HPDinterval(lambda)
## lower upper
## var1 0.02733947 0.1611762
## attr(,"Probability")
## [1] 0.95