Loading packages

R.Version()$version.string; R.Version()$platform
## [1] "R version 4.0.0 (2020-04-24)"
## [1] "x86_64-apple-darwin17.0"
set.seed(1001)#reproducibility
library(picante)
packageVersion("picante")
## [1] '1.8.1'
library(phytools)
packageVersion("phytools")
## [1] '0.7.20'
library(caper)
packageVersion("caper")
## [1] '1.0.1'
library(tidyverse)
packageVersion("tidyverse")
## [1] '1.3.0'
library(nlme)
packageVersion("nlme")
## [1] '3.1.147'
library(phylobase)
packageVersion("phylobase")
## [1] '0.8.10'
library(grid)
packageVersion("grid")
## [1] '4.0.0'
library(MCMCglmm)
packageVersion("MCMCglmm")
## [1] '2.29'
library(diversitree)
packageVersion("diversitree")
## [1] '0.9.13'
devtools::source_url("http://github.com/lizzieinvancouver/pwr/blob/master/pwr-functions.R?raw=TRUE")
devtools::source_url("https://github.com/lizzieinvancouver/pwr/blob/master/pwr-plots.R?raw=TRUE")
source("http://ib.berkeley.edu/courses/ib200b/scripts/diagnostics_v3.R")

Data imput and handling

This is the full dataset containing the information we need to run the models

data_comp <- read.csv("full_mat_final.csv", h=TRUE)
data_comp <- data_comp[,-1]

#rescalling RI to compensate for species used, but without reference to ICD

larger_zero <- data_comp$IR[data_comp$IR>0]
data_comp$IR[data_comp$IR==0] <- min(larger_zero)/max(larger_zero)

head(data_comp)
##                  species cd1 cd2 cd3 cd4 cd5 cd6 cd7 cd8 cd9 cd10 cd11 cd12
## 1       Acinonyx_jubatus   0   0   0   0   0   0   0   0   0    0    0    0
## 2      Acomys_cineraceus   0   0   0   0   0   0   0   0   0    0    0    0
## 3     Aepyceros_melampus   0   0   0   0   0   0   0   0   0    0    0    0
## 4 Ailuropoda_melanoleuca   1   0   1   1   0   0   0   0   0    0    0    0
## 5  Alcelaphus_buselaphus   0   0   0   0   0   0   0   0   0    0    0    0
## 6            Alces_alces   0   0   0   0   0   0   0   0   0    0    0    0
##   cd13 cd14 cd15 cd16 cd17 cd18 cd19         IR Terrestrial Marine Freshwater
## 1    1    0    0    0    0    0    0 0.03000000           1      0          0
## 2    0    0    0    0    0    0    0 0.01666667           1      0          0
## 3    0    0    0    0    0    0    0 0.01666667           1      0          0
## 4    0    0    0    0    0    0    0 0.31000000           1      0          0
## 5    0    0    0    0    0    0    0 0.01666667           1      0          0
## 6    0    0    1    0    0    0    0 0.03000000           1      0          0
##   Aerial   Mass.g Log_Mass  Island.Endemicity Diet.Plant Diet.Vertebrate
## 1      0  46700.0      4.7 Occurs on mainland          0             100
## 2      0     20.3      1.3 Occurs on mainland          0               0
## 3      0  52500.1      4.7 Occurs on mainland        100               0
## 4      0 108400.0      5.0 Occurs on mainland         90              10
## 5      0 171001.5      5.2 Occurs on mainland        100               0
## 6      0 356998.0      5.6 Occurs on mainland        100               0
##   Diet.Invertebrate IUCN comu       range
## 1                 0   VU    1  441.849278
## 2               100   LC    1   86.780792
## 3                 0   LC    1  293.986996
## 4                 0   VU    1    3.290258
## 5                 0   LC    1  576.008919
## 6                 0   LC    1 3606.410850
dim(data_comp)#565 spp
## [1] 565  34

Importing phylogeny

The tree is available at the VertLife webpage

full_nexus <- read.nexus("https://github.com/n8upham/MamPhy_v1/blob/master/_DATA/MamPhy_fullPosterior_BDvr_DNAonly_4098sp_topoFree_NDexp_MCC_v2_target.tre?raw=TRUE")

full_nexus_semAnolis <- drop.tip(full_nexus, "_Anolis_carolinensis")#remove Anolis

sp_names_full <- full_nexus_semAnolis$tip.label
split_names <- sapply(strsplit(sp_names_full, split='_'), function(x) (c(x[1], x[2])))
split_names_t <- data.frame(t(split_names))
new_names <- paste(split_names_t$X1, split_names_t$X2, sep="_")

# check if it is correct

data.frame(original=full_nexus$tip.label[-1][1:10], fixed=new_names[1:10])
##                                                     original
## 1     Ornithorhynchus_anatinus_ORNITHORHYNCHIDAE_MONOTREMATA
## 2              Zaglossus_bruijnii_TACHYGLOSSIDAE_MONOTREMATA
## 3          Tachyglossus_aculeatus_TACHYGLOSSIDAE_MONOTREMATA
## 4          Rhynchocyon_petersi_MACROSCELIDIDAE_MACROSCELIDEA
## 5           Rhynchocyon_cirnei_MACROSCELIDIDAE_MACROSCELIDEA
## 6     Rhynchocyon_udzungwensis_MACROSCELIDIDAE_MACROSCELIDEA
## 7       Elephantulus_rufescens_MACROSCELIDIDAE_MACROSCELIDEA
## 8  Elephantulus_brachyrhynchus_MACROSCELIDIDAE_MACROSCELIDEA
## 9          Elephantulus_intufi_MACROSCELIDIDAE_MACROSCELIDEA
## 10      Elephantulus_rupestris_MACROSCELIDIDAE_MACROSCELIDEA
##                          fixed
## 1     Ornithorhynchus_anatinus
## 2           Zaglossus_bruijnii
## 3       Tachyglossus_aculeatus
## 4          Rhynchocyon_petersi
## 5           Rhynchocyon_cirnei
## 6     Rhynchocyon_udzungwensis
## 7       Elephantulus_rufescens
## 8  Elephantulus_brachyrhynchus
## 9          Elephantulus_intufi
## 10      Elephantulus_rupestris
full_nexus_semAnolis$tip.label <- new_names

Prunning the phylogeny to contain only the species to whcih we have trait data

lista <- read.table("lista_may.txt", h=TRUE)#list of species
head(lista);dim(lista)#565 spp
##                        comu
## Acinonyx_jubatus          1
## Acomys_cineraceus         1
## Aepyceros_melampus        1
## Ailuropoda_melanoleuca    1
## Alcelaphus_buselaphus     1
## Alces_alces               1
## [1] 565   1
phylo_pruna <- prune.sample(t(lista), full_nexus_semAnolis)
phylo_pruna#521 spp
## 
## Phylogenetic tree with 521 tips and 520 internal nodes.
## 
## Tip labels:
##  Dugong_dugon, Trichechus_manatus, Trichechus_inunguis, Trichechus_senegalensis, Procavia_capensis, Dendrohyrax_dorsalis, ...
## 
## Rooted; includes branch lengths.

Data handling

data_comp_row <- column_to_rownames(data_comp, var = "species")
head(data_comp_row)
##                        cd1 cd2 cd3 cd4 cd5 cd6 cd7 cd8 cd9 cd10 cd11 cd12 cd13
## Acinonyx_jubatus         0   0   0   0   0   0   0   0   0    0    0    0    1
## Acomys_cineraceus        0   0   0   0   0   0   0   0   0    0    0    0    0
## Aepyceros_melampus       0   0   0   0   0   0   0   0   0    0    0    0    0
## Ailuropoda_melanoleuca   1   0   1   1   0   0   0   0   0    0    0    0    0
## Alcelaphus_buselaphus    0   0   0   0   0   0   0   0   0    0    0    0    0
## Alces_alces              0   0   0   0   0   0   0   0   0    0    0    0    0
##                        cd14 cd15 cd16 cd17 cd18 cd19         IR Terrestrial
## Acinonyx_jubatus          0    0    0    0    0    0 0.03000000           1
## Acomys_cineraceus         0    0    0    0    0    0 0.01666667           1
## Aepyceros_melampus        0    0    0    0    0    0 0.01666667           1
## Ailuropoda_melanoleuca    0    0    0    0    0    0 0.31000000           1
## Alcelaphus_buselaphus     0    0    0    0    0    0 0.01666667           1
## Alces_alces               0    1    0    0    0    0 0.03000000           1
##                        Marine Freshwater Aerial   Mass.g Log_Mass
## Acinonyx_jubatus            0          0      0  46700.0      4.7
## Acomys_cineraceus           0          0      0     20.3      1.3
## Aepyceros_melampus          0          0      0  52500.1      4.7
## Ailuropoda_melanoleuca      0          0      0 108400.0      5.0
## Alcelaphus_buselaphus       0          0      0 171001.5      5.2
## Alces_alces                 0          0      0 356998.0      5.6
##                         Island.Endemicity Diet.Plant Diet.Vertebrate
## Acinonyx_jubatus       Occurs on mainland          0             100
## Acomys_cineraceus      Occurs on mainland          0               0
## Aepyceros_melampus     Occurs on mainland        100               0
## Ailuropoda_melanoleuca Occurs on mainland         90              10
## Alcelaphus_buselaphus  Occurs on mainland        100               0
## Alces_alces            Occurs on mainland        100               0
##                        Diet.Invertebrate IUCN comu       range
## Acinonyx_jubatus                       0   VU    1  441.849278
## Acomys_cineraceus                    100   LC    1   86.780792
## Aepyceros_melampus                     0   LC    1  293.986996
## Ailuropoda_melanoleuca                 0   VU    1    3.290258
## Alcelaphus_buselaphus                  0   LC    1  576.008919
## Alces_alces                            0   LC    1 3606.410850
tidydata <- match.phylo.data(phylo_pruna, data_comp_row)
## [1] "Dropping taxa from the data because they are not present in the phylogeny:"
##  [1] "Acomys_cineraceus"       "Anomalurus_derbianus"   
##  [3] "Cabassous_centralis"     "Colobus_vellerosus"     
##  [5] "Conepatus_humboldtii"    "Crocidura_nigeriae"     
##  [7] "Dasyprocta_prymnolopha"  "Dasyprocta_variegata"   
##  [9] "Dendrohyrax_arboreus"    "Euroscaptor_micrura"    
## [11] "Funisciurus_anerythrus"  "Funisciurus_leucogenys" 
## [13] "Funisciurus_substriatus" "Graphiurus_lorraineus"  
## [15] "Heliosciurus_gambianus"  "Hemiechinus_collaris"   
## [17] "Hesperoptenus_tickelli"  "Pipistrellus_imbricatus"
## [19] "Kerivoula_africana"      "Leopardus_geoffroyi"    
## [21] "Lepus_fagani"            "Lepus_nigricollis"      
## [23] "Lycalopex_culpaeus"      "Lycalopex_gymnocercus"  
## [25] "Lycalopex_sechurae"      "Manis_crassicaudata"    
## [27] "Mazama_chunyi"           "Microtus_mexicanus"     
## [29] "Murina_fusca"            "Paraechinus_micropus"   
## [31] "Piliocolobus_preussi"    "Plecotus_ariel"         
## [33] "Plecotus_sacrimontis"    "Presbytis_hosei"        
## [35] "Presbytis_natunae"       "Presbytis_siamensis"    
## [37] "Pteropus_faunulus"       "Saguinus_fuscicollis"   
## [39] "Steatomys_jacksoni"      "Tadarida_latouchei"     
## [41] "Tamandua_mexicana"       "Tolypeutes_tricinctus"  
## [43] "Viverra_civettina"       "Vulpes_bengalensis"
tidydata$phy#521 spp
## 
## Phylogenetic tree with 521 tips and 520 internal nodes.
## 
## Tip labels:
##  Dugong_dugon, Trichechus_manatus, Trichechus_inunguis, Trichechus_senegalensis, Procavia_capensis, Dendrohyrax_dorsalis, ...
## 
## Rooted; includes branch lengths.
str(tidydata$data)
## 'data.frame':    521 obs. of  33 variables:
##  $ cd1              : chr  " 1" " 0" " 1" " 1" ...
##  $ cd2              : chr  "0" "0" "0" "0" ...
##  $ cd3              : chr  "0" "1" "1" "0" ...
##  $ cd4              : chr  "0" "1" "1" "0" ...
##  $ cd5              : chr  "1" "1" "1" "0" ...
##  $ cd6              : chr  "0" "1" "1" "0" ...
##  $ cd7              : chr  "0" "0" "1" "0" ...
##  $ cd8              : chr  "0" "1" "1" "0" ...
##  $ cd9              : chr  "0" "0" "0" "0" ...
##  $ cd10             : chr  "0" "0" "0" "0" ...
##  $ cd11             : chr  "0" "1" "1" "0" ...
##  $ cd12             : chr  "1" "0" "0" "0" ...
##  $ cd13             : chr  "0" "0" "0" "0" ...
##  $ cd14             : chr  "0" "0" "0" "0" ...
##  $ cd15             : chr  "0" "0" "0" "0" ...
##  $ cd16             : chr  "0" "0" "0" "0" ...
##  $ cd17             : chr  "0" "0" "0" "0" ...
##  $ cd18             : chr  "0" "1" "1" "0" ...
##  $ cd19             : chr  "0" "0" "0" "0" ...
##  $ IR               : chr  "0.29000000" "0.83000000" "1.18000000" "0.03000000" ...
##  $ Terrestrial      : chr  "0" "0" "0" "0" ...
##  $ Marine           : chr  "1" "1" "1" "1" ...
##  $ Freshwater       : chr  "0" "1" "1" "1" ...
##  $ Aerial           : chr  "0" "0" "0" "0" ...
##  $ Mass.g           : chr  "  310000.0" "  341000.0" "  480000.0" "  454000.0" ...
##  $ Log_Mass         : chr  "5.5" "5.5" "5.7" "5.7" ...
##  $ Island.Endemicity: chr  "Exclusively marine" "Occurs on mainland" "Occurs on mainland" "Occurs on mainland" ...
##  $ Diet.Plant       : chr  "100" "100" "100" " 95" ...
##  $ Diet.Vertebrate  : chr  "  0" "  0" "  0" "  0" ...
##  $ Diet.Invertebrate: chr  "  0" "  0" "  0" "  5" ...
##  $ IUCN             : chr  "VU" "VU" "VU" "VU" ...
##  $ comu             : chr  " 1" NA NA NA ...
##  $ range            : chr  "5.380811e+02" "1.302891e+02" "5.178365e+01" "3.829432e+01" ...
tidydata$data$IR <- as.numeric(as.character(tidydata$data$IR))
tidydata$data$range <- as.numeric(as.character(tidydata$data$range))
tidydata$data$Log_Mass <- as.numeric(as.character(tidydata$data$Log_Mass))

Data Exploration

range(tidydata$data$IR)
## [1] 0.01666667 1.80000000
range(tidydata$data$range)
## [1] 1.416338e-02 4.631626e+04
ggplot(tidydata$data, aes(IR))+
       geom_density(fill="red")

ggplot(tidydata$data, aes(log(IR)))+
       geom_density(fill="red")

ggplot(tidydata$data, aes(range, IR))+
       geom_point()+
       stat_smooth(method = "lm")

ggplot(tidydata$data, aes(Log_Mass, IR))+
       geom_point()+
       stat_smooth(method = "lm")

ggplot(tidydata$data, aes(Log_Mass, IR))+
       geom_point(aes(size=range, col=range))+
       stat_smooth(method = "loess")

First question

The question is: Does the RI is correlated with range size and body mass?

Our hypothesis is: small-ranges species are in contact with a few traditional communities and would be less chance to have multiple uses. We also expect that larger species provide more body parts for use and would have a larger versatility.

To answer this question we’ll run a PGLS transforming RI to log scale:

mod1 <- gls(log(IR)~range+Log_Mass, correlation = corPagel(1, tidydata$phy), data=tidydata$data)

Diagnostics

plot(mod1)

diagnostics(tidydata$data$IR, tidydata$phy)

Inference

summary(mod1)
## Generalized least squares fit by REML
##   Model: log(IR) ~ range + Log_Mass 
##   Data: tidydata$data 
##        AIC      BIC    logLik
##   1804.499 1825.749 -897.2495
## 
## Correlation Structure: corPagel
##  Formula: ~1 
##  Parameter estimate(s):
##    lambda 
## 0.6409421 
## 
## Coefficients:
##                 Value Std.Error   t-value p-value
## (Intercept) -3.382189 0.9657187 -3.502250  0.0005
## range       -0.000015 0.0000198 -0.763286  0.4456
## Log_Mass     0.459889 0.0963299  4.774104  0.0000
## 
##  Correlation: 
##          (Intr) range 
## range     0.080       
## Log_Mass -0.323 -0.287
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.86763228 -1.20381902 -0.70401429  0.06610453  1.28832530 
## 
## Residual standard error: 1.932106 
## Degrees of freedom: 521 total; 518 residual

Running a PWR with the same data to detect nonstationary patterns:

all_data <- phylo4d(tidydata$phy, tidydata$data)

mod_div_bw_prw <- gls(log(IR)~range, data = tidydata$data, correlation = corBrownian(phy=tidydata$phy))

phyloWR <- getEst(pwr(log(IR)~range,all_data, wfun="brownian", verbose=FALSE))

bandw <- get.opt.bw(log(IR)~range,all_data, wfun="martins", method="subplex")
bandw#0.06224979
## [1] 0.06224979

Results

tp(addData(all_data, data.frame(gest=coef(mod_div_bw_prw)[2], glb=confint(mod_div_bw_prw)[2,1], gub=confint(mod_div_bw_prw)[2,2], phyloWR))[,c("est", "lb", "ub", "gest", "glb", "gub")], show.tip.label=TRUE, pex=2, aex=3.5)

Second question

Our second question is: Does more closely related species are used to treat the same diseases?

To answer this question we’ll use Fritz’s D, which is a metric to calculate phylogenetic signal for binary traits.

But first let’s explore the data to check the distribution of the trait throughout the phylogeny:

cds <- data_comp[,2:20]
rownames(cds) <- data_comp$species

trait.plot(tidydata$phy, cds, cols = list(cd1=c("red", "blue"), cd2=c("red", "blue"), cd3=c("red", "blue"), cd4=c("red", "blue"), cd5=c("red", "blue"), cd6=c("red", "blue"), cd7=c("red", "blue"), cd8=c("red", "blue"), cd9=c("red", "blue"), cd10=c("red", "blue"), cd11=c("red", "blue"), cd12=c("red", "blue"), cd13=c("red", "blue"), cd14=c("red", "blue"), cd15=c("red", "blue"), cd16=c("red", "blue"), cd17=c("red", "blue"), cd18=c("red", "blue"), cd19=c("red", "blue")))

Now, let’s actually calculate the phylogenetic data:

data_new <- comparative.data(tidydata$phy, data_comp, names.col="species")

phylo.d(data_new, binvar = cd1)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd1
##   Counts of states:  0 = 367
##                      1 = 106
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.6764789
## Probability of E(D) resulting from no (random) phylogenetic structure :  0
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd2)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd2
##   Counts of states:  0 = 405
##                      1 = 68
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.8212771
## Probability of E(D) resulting from no (random) phylogenetic structure :  0.006
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd3)#sign diff from 0, but not from 1 => random
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd3
##   Counts of states:  0 = 418
##                      1 = 55
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.9450315
## Probability of E(D) resulting from no (random) phylogenetic structure :  0.209
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd4)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd4
##   Counts of states:  0 = 392
##                      1 = 81
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.5986961
## Probability of E(D) resulting from no (random) phylogenetic structure :  0
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd5)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd5
##   Counts of states:  0 = 403
##                      1 = 70
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.4998371
## Probability of E(D) resulting from no (random) phylogenetic structure :  0
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd6)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd6
##   Counts of states:  0 = 405
##                      1 = 68
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.8817533
## Probability of E(D) resulting from no (random) phylogenetic structure :  0.04
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd7)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd7
##   Counts of states:  0 = 431
##                      1 = 42
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.8007537
## Probability of E(D) resulting from no (random) phylogenetic structure :  0.003
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd8)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd8
##   Counts of states:  0 = 428
##                      1 = 45
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.6491497
## Probability of E(D) resulting from no (random) phylogenetic structure :  0
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd9)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd9
##   Counts of states:  0 = 442
##                      1 = 31
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.5504692
## Probability of E(D) resulting from no (random) phylogenetic structure :  0
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0.004
phylo.d(data_new, binvar = cd10)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd10
##   Counts of states:  0 = 460
##                      1 = 13
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.6493159
## Probability of E(D) resulting from no (random) phylogenetic structure :  0.014
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0.026
phylo.d(data_new, binvar = cd11)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd11
##   Counts of states:  0 = 450
##                      1 = 23
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.6827113
## Probability of E(D) resulting from no (random) phylogenetic structure :  0.002
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0.004
phylo.d(data_new, binvar = cd12)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd12
##   Counts of states:  0 = 377
##                      1 = 96
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.6133851
## Probability of E(D) resulting from no (random) phylogenetic structure :  0
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd13)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd13
##   Counts of states:  0 = 446
##                      1 = 27
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.6451055
## Probability of E(D) resulting from no (random) phylogenetic structure :  0
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0.001
phylo.d(data_new, binvar = cd14)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd14
##   Counts of states:  0 = 439
##                      1 = 34
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.4231284
## Probability of E(D) resulting from no (random) phylogenetic structure :  0
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0.016
phylo.d(data_new, binvar = cd15)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd15
##   Counts of states:  0 = 441
##                      1 = 32
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.7273171
## Probability of E(D) resulting from no (random) phylogenetic structure :  0.003
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd16)#sign
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd16
##   Counts of states:  0 = 441
##                      1 = 32
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.6282628
## Probability of E(D) resulting from no (random) phylogenetic structure :  0
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0
phylo.d(data_new, binvar = cd17)#sign diff from 0, but not from 1 => random 
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd17
##   Counts of states:  0 = 463
##                      1 = 10
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.9266426
## Probability of E(D) resulting from no (random) phylogenetic structure :  0.334
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0.003
phylo.d(data_new, binvar = cd18)#sign diff from random, but not Brownian
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd18
##   Counts of states:  0 = 464
##                      1 = 9
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.6230311
## Probability of E(D) resulting from no (random) phylogenetic structure :  0.02
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0.068
phylo.d(data_new, binvar = cd19)#sign diff from 0, but not from 1 => random
## 
## Calculation of D statistic for the phylogenetic structure of a binary variable
## 
##   Data :  data_comp
##   Binary variable :  cd19
##   Counts of states:  0 = 454
##                      1 = 19
##   Phylogeny :  tidydata$phy
##   Number of permutations :  1000
## 
## Estimated D :  0.9166655
## Probability of E(D) resulting from no (random) phylogenetic structure :  0.238
## Probability of E(D) resulting from Brownian phylogenetic structure    :  0

Third question

Question: Groups closely-related are more versatile?

To answer this question, we’ll calculate phylogenetic signal using Blomberg’s K

IRvec=tidydata$data$IR
names(IRvec)=rownames(tidydata$data)

phylosig(tidydata$phy, IRvec, test = TRUE)#K=0.06, P=0.002
## 
## Phylogenetic signal K : 0.0629837 
## P-value (based on 1000 randomizations) : 0.003

There’s phylogenetic signal in IR, species more closely related are more versatile

#Fith question

Does body mass and IR predict the threat status of species?

First, let’s manipulate the data to make it adequate to run analysis:

tidydata$data$IUCN <- as.factor(as.character(tidydata$data$IUCN))

levels(tidydata$data$IUCN)
## [1] ""   "CR" "DD" "EN" "EW" "LC" "NE" "NT" "VU"
myData <- tidydata$data %>% 
       rownames_to_column("species")

iucn_dat <- myData %>% 
       filter(IUCN=="LC" | IUCN=="EN"  | IUCN=="CR"  | IUCN=="VU"  | IUCN=="NT" ) 
levels(iucn_dat$IUCN)
## [1] ""   "CR" "DD" "EN" "EW" "LC" "NE" "NT" "VU"
iucn_dat$IUCN <- factor(iucn_dat$IUCN, ordered = TRUE, levels = c("LC", "NT", "VU", "EN", "CR", "EW"))
levels(iucn_dat$IUCN)
## [1] "LC" "NT" "VU" "EN" "CR" "EW"
prune <- iucn_dat%>%
       column_to_rownames("species")
str(prune)#ordered factor
## 'data.frame':    509 obs. of  33 variables:
##  $ cd1              : chr  " 1" " 0" " 1" " 1" ...
##  $ cd2              : chr  "0" "0" "0" "0" ...
##  $ cd3              : chr  "0" "1" "1" "0" ...
##  $ cd4              : chr  "0" "1" "1" "0" ...
##  $ cd5              : chr  "1" "1" "1" "0" ...
##  $ cd6              : chr  "0" "1" "1" "0" ...
##  $ cd7              : chr  "0" "0" "1" "0" ...
##  $ cd8              : chr  "0" "1" "1" "0" ...
##  $ cd9              : chr  "0" "0" "0" "0" ...
##  $ cd10             : chr  "0" "0" "0" "0" ...
##  $ cd11             : chr  "0" "1" "1" "0" ...
##  $ cd12             : chr  "1" "0" "0" "0" ...
##  $ cd13             : chr  "0" "0" "0" "0" ...
##  $ cd14             : chr  "0" "0" "0" "0" ...
##  $ cd15             : chr  "0" "0" "0" "0" ...
##  $ cd16             : chr  "0" "0" "0" "0" ...
##  $ cd17             : chr  "0" "0" "0" "0" ...
##  $ cd18             : chr  "0" "1" "1" "0" ...
##  $ cd19             : chr  "0" "0" "0" "0" ...
##  $ IR               : num  0.29 0.83 1.18 0.03 0.03 ...
##  $ Terrestrial      : chr  "0" "0" "0" "0" ...
##  $ Marine           : chr  "1" "1" "1" "1" ...
##  $ Freshwater       : chr  "0" "1" "1" "1" ...
##  $ Aerial           : chr  "0" "0" "0" "0" ...
##  $ Mass.g           : chr  "  310000.0" "  341000.0" "  480000.0" "  454000.0" ...
##  $ Log_Mass         : num  5.5 5.5 5.7 5.7 3.5 3.5 6.6 6.5 4.7 3.7 ...
##  $ Island.Endemicity: chr  "Exclusively marine" "Occurs on mainland" "Occurs on mainland" "Occurs on mainland" ...
##  $ Diet.Plant       : chr  "100" "100" "100" " 95" ...
##  $ Diet.Vertebrate  : chr  "  0" "  0" "  0" "  0" ...
##  $ Diet.Invertebrate: chr  "  0" "  0" "  0" "  5" ...
##  $ IUCN             : Ord.factor w/ 6 levels "LC"<"NT"<"VU"<..: 3 3 3 3 1 1 3 4 1 1 ...
##  $ comu             : chr  " 1" NA NA NA ...
##  $ range            : num  538.1 130.3 51.8 38.3 1329 ...
data_wo_DD <- match.phylo.data(tidydata$phy, prune)
## [1] "Dropping tips from the tree because they are not present in the data:"
##  [1] "Sotalia_fluviatilis"      "Tragulus_javanicus"      
##  [3] "Muntiacus_truongsonensis" "Muntiacus_feae"          
##  [5] "Elaphurus_davidianus"     "Mazama_temama"           
##  [7] "Mazama_americana"         "Tragelaphus_eurycerus"   
##  [9] "Bos_gaurus"               "Antidorcas_marsupialis"  
## [11] "Myotis_punicus"           "Sciurus_carolinensis"

How many species in each category?

table(data_wo_DD$data$IUCN)
## 
##  CR  EN  LC  NT  VU 
##  20  48 321  43  77

To answer this question we’ll use a MCMCglmm setting the IUCN status as ordinal response variable.

Prelude:

#--Creating inverse of distance matriz
sptree<-makeNodeLabel(data_wo_DD$phy, method="number", prefix="node") #rename nodes to be unique
treeAinv<-inverseA(sptree,nodes="ALL",scale=FALSE)$Ainv

#--setting up priors

prior <- list(R = list(V = 1, nu = 0.02),
              G=list(G1=list(V=1, nu=0.02)))
model_IR<-MCMCglmm(IUCN~Log_Mass+IR,
                   random=~species,                                              data=iucn_dat, family="ordinal", 
                   ginverse =list(species=treeAinv),                            nodes="ALL",
                   prior=prior,
                   nitt=40000000, 
                   burnin=12000000, 
                   thin = 80, verbose=FALSE)

Diagnosis of the model:

#plot trace and density of fixed effects; should be no trend in trace
plot(model_IR$Sol)

#plot trace and density of random (additive and residual (=environmental)) variances; should be no trend in trace
plot(model_IR$VCV)

#examine autocorrelation of fixed effects
autocorr.diag(model_IR$Sol)
##            (Intercept)      Log_Mass            IR
## Lag 0     1.000000e+00  1.000000e+00  1.0000000000
## Lag 80    2.871314e-03  4.416359e-04 -0.0011996601
## Lag 400   2.732974e-03 -1.456152e-03  0.0001515264
## Lag 800   3.150507e-05  4.587137e-04  0.0030116110
## Lag 4000 -1.009872e-03 -7.274994e-05 -0.0017969587
#examine autocorrelation of random (additive and residual) variances
autocorr.diag(model_IR$VCV)
##            species     units
## Lag 0    1.0000000 1.0000000
## Lag 80   0.8774875 0.8384613
## Lag 400  0.8103186 0.7776930
## Lag 800  0.7702241 0.7709167
## Lag 4000 0.6671474 0.7607278
#check effective population size for fixed effects; should be gt 1000
effectiveSize(model_IR$Sol)
## (Intercept)    Log_Mass          IR 
##    347994.8    350000.0    350000.0
#check effective population size for random effects (additve and residual variances); should be gt 1000
effectiveSize(model_IR$VCV)
##  species    units 
## 677.2275 136.9161
#test of convergence, p should be greater than 0.05 for good convergence
heidel.diag(model_IR$VCV)
##                                        
##         Stationarity start     p-value 
##         test         iteration         
## species passed        1        0.071779
## units   failed       NA        0.000115
##                                     
##         Halfwidth Mean     Halfwidth
##         test                        
## species passed    5.69e+23 2.33e+22 
## units   <NA>            NA       NA
#estimates of additive and residual variances
posterior.mode(model_IR$VCV)
##      species        units 
## 8.144093e+21 1.057129e+23

Inference

summary.MCMCglmm(model_IR)
## 
##  Iterations = 12000001:39999921
##  Thinning interval  = 80
##  Sample size  = 350000 
## 
##  DIC: 0 
## 
##  G-structure:  ~species
## 
##         post.mean  l-95% CI  u-95% CI eff.samp
## species 5.685e+23 1.149e+21 1.088e+24    677.2
## 
##  R-structure:  ~units
## 
##       post.mean  l-95% CI  u-95% CI eff.samp
## units 6.511e+24 1.595e+22 1.172e+25    136.9
## 
##  Location effects: IUCN ~ Log_Mass + IR 
## 
##              post.mean   l-95% CI   u-95% CI eff.samp pMCMC
## (Intercept)     -69.96 -193288.28  199138.64   347995     1
## Log_Mass       -114.99 -196031.55  196798.93   350000     1
## IR             -189.21 -195244.46  196211.97   350000     1
## 
##  Cutpoints: 
##                      post.mean  l-95% CI  u-95% CI eff.samp
## cutpoint.traitIUCN.1 1.135e+12 1.148e+11 1.662e+12   68.604
## cutpoint.traitIUCN.2 3.593e+12 4.195e+11 4.975e+12   11.464
## cutpoint.traitIUCN.3 6.534e+12 7.552e+11 8.905e+12    4.942

Calculating phylogenetic signal using Pagel’s lambda

lambda <- model_IR$VCV[,'species']/
        (model_IR$VCV[,'species']+model_IR$VCV[,'units'])
mean(lambda)
## [1] 0.08577141
posterior.mode(lambda)
##       var1 
## 0.06486542
HPDinterval(lambda)
##           lower     upper
## var1 0.02733947 0.1611762
## attr(,"Probability")
## [1] 0.95