R.Version()$version.string; R.Version()$platform
## [1] "R version 4.0.2 (2020-06-22)"
## [1] "x86_64-apple-darwin17.0"
library(tidyverse)
packageVersion("tidyverse")
## [1] '1.3.0'
library(psych)
packageVersion("psych")
## [1] '2.0.7'
library(phytools)
packageVersion("phytools")
## [1] '0.7.47'
library(ggtree)
packageVersion("ggtree")
## [1] '2.2.1'
library(phylosignal)
packageVersion("phylosignal")
## [1] '1.3'
library(adephylo)
packageVersion("adephylo")
## [1] '1.1.11'
library(phylobase)
packageVersion("phylobase")
## [1] '0.8.10'
library(geomorph)
packageVersion("geomorph")
## [1] '3.3.1'
library(rtrees)
packageVersion("rtrees")
## [1] '0.0.0.9000'
library(readxl)
packageVersion("readxl")
## [1] '1.3.1'
library(RRphylo)
packageVersion("RRphylo")
## [1] '2.4.7'
library(visdat)
packageVersion("visdat")
## [1] '0.5.3'
library(dispRity)
packageVersion("dispRity")
## [1] '1.4.1'
source('screestick.R', echo=TRUE)
##
## > screestick <- function(ev, las = 1, positive = TRUE,
## + evmean = FALSE, relative = FALSE) {
## + require(vegan)
## + if (is.vector(ev) == FALS .... [TRUNCATED]
This is the smaller dataset with individual measurements for the 15 species we collected in the field. We’ll have to calculate species mean to continue with the analysis.
raw_data_pant <- read_excel("Medidas_Revisadas.xlsx",
sheet = "GERAL")
raw_data_pant
## # A tibble: 3,700 x 10
## Species group Peixe Hemacia AreaHema AreaNucl rnc VolumHema VolumNucl
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Piarac… Fish 1 1 111. 16.0 14.5 792. 115.
## 2 Piarac… Fish 1 2 107. 25.5 23.7 935. 222.
## 3 Piarac… Fish 1 3 105. 21.1 20.1 1122. 225.
## 4 Piarac… Fish 1 4 121. 19.0 15.7 1102. 174.
## 5 Piarac… Fish 1 5 113. 21.9 19.4 801. 155.
## 6 Piarac… Fish 1 6 107. 18.6 17.3 772. 133.
## 7 Piarac… Fish 1 7 114. 19.3 16.9 997. 168.
## 8 Piarac… Fish 1 8 106. 20.4 19.4 718. 139.
## 9 Piarac… Fish 1 9 105. 20.3 19.3 921. 178.
## 10 Piarac… Fish 1 10 104. 20.0 19.3 876. 169.
## # … with 3,690 more rows, and 1 more variable: FatorF <dbl>
mean_species <- raw_data_pant %>%
group_by(Species, group) %>%
summarise(CellArea=mean(AreaHema), NucleusArea=mean(AreaNucl),
RNC=mean(rnc), CellVolume=mean(VolumHema),
NucleusVolume=mean(VolumNucl), ShapeFactor=mean(FatorF))
## `summarise()` regrouping output by 'Species' (override with `.groups` argument)
mean_species
## # A tibble: 15 x 8
## # Groups: Species [15]
## Species group CellArea NucleusArea RNC CellVolume NucleusVolume ShapeFactor
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Astyan… Fish 75.8 22.5 29.4 766. 223. 0.930
## 2 Brycon… Fish 83.1 13.8 16.7 577. 95.7 0.926
## 3 Bujurq… Fish 83.6 14.5 17.4 645. 112. 0.894
## 4 Cichla… Fish 86.3 15.1 17.7 621. 109. 0.943
## 5 Gymnot… Fish 69.9 14.1 20.6 618. 123. 0.944
## 6 Hyphes… Fish 90.0 14.0 15.9 720. 111. 0.840
## 7 Hypost… Fish 99.1 16.6 17.2 690. 118. 0.787
## 8 Metynn… Fish 84.9 13.9 16.5 613. 101. 0.934
## 9 Piarac… Fish 102. 17.6 17.3 786. 136. 0.779
## 10 Poecil… Fish 41.1 7.45 18.4 197. 35.6 0.949
## 11 Prochi… Fish 84.6 14.7 17.4 624. 108. 0.918
## 12 Pseudo… Fish 72.7 11.8 16.2 444. 71.9 0.790
## 13 Pygoce… Fish 107. 18.5 17.7 1301. 227. 0.931
## 14 Rhamdi… Fish 84.1 15.1 18.2 533. 95.2 0.771
## 15 Synbra… Fish 175. 27.3 15.7 1631. 254. 0.776
This is the Fish dataset from the Cell size data base. We’ll select only the columns with the variables of interest
raw_data_db <- read_excel("dados_sp_site.xls")
raw_data_db
## # A tibble: 50 x 15
## Species group CLD_mean CSD_mean NLD_mean NSD_mean CellArea Comprimento Raio
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Aluter… Fish 8.2 7.2 3.9 2.9 46.3 7.7 3.85
## 2 Anguil… Fish 11.5 7.6 3.8 3.2 68.6 9.55 4.78
## 3 Balist… Fish 9.2 6.6 4 2.7 47.7 7.9 3.95
## 4 Brevoo… Fish 9.8 7.3 4.6 3.2 56.2 8.55 4.28
## 5 Carass… Fish 12.5 8.5 5.5 4 83.4 10.5 5.25
## 6 Carcha… Shark 15.1 10.4 5.8 4.3 123. 12.8 6.38
## 7 Carcha… Shark 19 13.5 7.3 6.5 201. 16.2 8.12
## 8 Cyprin… Fish 11.9 7.4 4 3.2 69.1 9.65 4.82
## 9 Dasyat… Shark 19.7 13.8 8.1 6.9 213. 16.8 8.38
## 10 Neotry… Shark 18 14 7 5.5 198. 16 8
## # … with 40 more rows, and 6 more variables: CellVolume <dbl>, PerimHema <dbl>,
## # ShapeFactor <dbl>, NucleusArea <dbl>, RNC <dbl>, NucleusVolume <dbl>
cell_size <- raw_data_db %>%
select("Species", "group", "CellArea", "NucleusArea", "RNC", "CellVolume", "NucleusVolume", "ShapeFactor")
cell_size#65 species
## # A tibble: 50 x 8
## Species group CellArea NucleusArea RNC CellVolume NucleusVolume ShapeFactor
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Aluter… Fish 46.3 8.88 19.2 239. 45.8 0.878
## 2 Anguil… Fish 68.6 9.55 13.9 456. 63.5 0.661
## 3 Balist… Fish 47.7 8.48 17.8 258. 45.9 0.717
## 4 Brevoo… Fish 56.2 11.6 20.6 327. 67.4 0.745
## 5 Carass… Fish 83.4 17.3 20.7 606. 126. 0.68
## 6 Carcha… Shark 123. 19.6 15.9 1086. 172. 0.689
## 7 Carcha… Shark 201. 37.2 18.5 2247. 416. 0.711
## 8 Cyprin… Fish 69.1 10.0 14.5 471. 68.4 0.622
## 9 Dasyat… Shark 213. 43.9 20.6 2461. 506. 0.701
## 10 Neotry… Shark 198. 30.2 15.3 2145. 328. 0.778
## # … with 40 more rows
Now let’s combine the two datasets and create another subset only for bony fish and another one for sharks and rays:
full_db <- bind_rows(mean_species, cell_size)
fish_only <- filter(full_db, group=="Fish")
fish_only <- fish_only[,-c(2)]
fish_only
## # A tibble: 53 x 7
## # Groups: Species [53]
## Species CellArea NucleusArea RNC CellVolume NucleusVolume ShapeFactor
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Astyanax_lin… 75.8 22.5 29.4 766. 223. 0.930
## 2 Brycon_hilar… 83.1 13.8 16.7 577. 95.7 0.926
## 3 Bujurquina_v… 83.6 14.5 17.4 645. 112. 0.894
## 4 Cichlasoma_d… 86.3 15.1 17.7 621. 109. 0.943
## 5 Gymnotus_ina… 69.9 14.1 20.6 618. 123. 0.944
## 6 Hyphessobryc… 90.0 14.0 15.9 720. 111. 0.840
## 7 Hypostomus_b… 99.1 16.6 17.2 690. 118. 0.787
## 8 Metynnis_mac… 84.9 13.9 16.5 613. 101. 0.934
## 9 Piaractus_me… 102. 17.6 17.3 786. 136. 0.779
## 10 Poecilia_ret… 41.1 7.45 18.4 197. 35.6 0.949
## # … with 43 more rows
shark_only <- filter(full_db, group=="Shark")
Here, we will prune the phylogeny for the species to which we have data from the fully-sampled topology of Rabosky et al. (2018) Nature
filono <- sp_list_df(sp_list = c(fish_only$Species),
taxon = "fish")
fish_tree_db <- get_tree(sp_list = filono,
tree = tree_fish, # either
taxon = "fish", # or
scenario = "S1",
show_grafted = FALSE)
## 8 species added at genus level (*)
## 1 species added at family level (**)
## Warning: `mutate_()` is deprecated as of dplyr 0.7.0.
## Please use `mutate()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
plot.phylo(ladderize(fish_tree_db), cex=0.8)#com escala mostrando a datação
axisPhylo()
The package returns the asterisks to represent species that were imputed.
The Elasmobranch tree comes from VertLife websie
shark <- read.nexus("shark_tree.nex")
shark.tree <- shark[[1]]#random tree
plot.phylo(shark[[1]], root.edge = TRUE);axisPhylo()
Now, let’s import the species trait data from FishBase and explore its structure and percentage of missing data:
especies_traits <- read_excel("especies_traits.xls", col_types = c("text", "text", "text", "text", "numeric", "numeric", "numeric", "numeric", "numeric", "text", "numeric", "numeric"), na = "NA")
especies_traits
## # A tibble: 65 x 12
## Species group environment water_column max_size max_depth minwater_temp
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Raja_e… Shark saltwater demersal 84 330 10
## 2 Leucor… Shark saltwater demersal 54 90 1.2
## 3 Zearaj… Shark saltwater demersal 210 500 4
## 4 Neotry… Shark saltwater  reef-assoc… 70 170 NA
## 5 Dasyat… Shark saltwater demersal 300 270 NA
## 6 Squalu… Shark saltwater demersal 95 1460 7
## 7 Scylio… Shark saltwater demersal 100 780 NA
## 8 Sphyrn… Shark saltwater benthopelag… 134 12 NA
## 9 Priona… Shark saltwater pelagic-oce… 400 1000 NA
## 10 Carcha… Shark saltwater  reef-assoc… 300 100 NA
## # … with 55 more rows, and 5 more variables: max_water_temp <dbl>,
## # mean_prefered_temp <dbl>, swiming_mode <chr>, life_span <dbl>,
## # gen_time <dbl>
vis_miss(especies_traits[,-c(1,2)], sort_miss = TRUE)
Now, let’s understand the trait data
traits <- especies_traits %>%
select("Species", "group", "environment", "water_column", "max_size", "max_depth", "mean_prefered_temp")
traits
## # A tibble: 65 x 7
## Species group environment water_column max_size max_depth mean_prefered_t…
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Raja_egla… Shark saltwater demersal 84 330 23.5
## 2 Leucoraja… Shark saltwater demersal 54 90 6.8
## 3 Zearaja_c… Shark saltwater demersal 210 500 6.7
## 4 Neotrygon… Shark saltwater  reef-assoc… 70 170 27.8
## 5 Dasyatis_… Shark saltwater demersal 300 270 23.1
## 6 Squalus_a… Shark saltwater demersal 95 1460 9.9
## 7 Scyliorhi… Shark saltwater demersal 100 780 10.1
## 8 Sphyrna_t… Shark saltwater benthopelag… 134 12 21.1
## 9 Prionace_… Shark saltwater pelagic-oce… 400 1000 14.8
## 10 Carcharhi… Shark saltwater  reef-assoc… 300 100 27.4
## # … with 55 more rows
fish_traits <- filter(traits, group=="Fish")
shark_traits <- filter(traits, group=="Shark")
vis_miss(traits[,-1], sort_miss = TRUE)
First we will add species as row names to the table to enter in further analysis, and order the rows according to the order of tips in the phylogeny:
com_row_names <- column_to_rownames(fish_only, var = "Species")
com_row_names <- com_row_names[fish_tree_db$tip.label,]
head(com_row_names)
## CellArea NucleusArea RNC CellVolume
## Poecilia_reticulata 41.06884 7.45168 18.35295 197.1905
## Jenynsia_lineata 53.57625 10.59750 19.78022 321.6480
## Odontesthes_argentinensis 70.65000 11.77500 16.66667 485.4426
## Cichlasoma_dimerus 86.25276 15.14280 17.68168 620.7687
## Bujurquina_vittata 83.60116 14.49436 17.38191 644.5074
## Myxus_petardi 56.04900 6.86875 12.25490 333.1343
## NucleusVolume ShapeFactor
## Poecilia_reticulata 35.64444 0.9492176
## Jenynsia_lineata 63.62268 0.6190476
## Odontesthes_argentinensis 80.90710 0.6250000
## Cichlasoma_dimerus 108.58902 0.9430419
## Bujurquina_vittata 111.93099 0.8941498
## Myxus_petardi 40.82529 0.6862745
com_row_names_sh <- column_to_rownames(shark_only, var = "Species")
com_row_names_sh <- com_row_names_sh[shark.tree$tip.label,]
describe(com_row_names)#estatistica descritiva
## vars n mean sd median trimmed mad min max
## CellArea 1 53 76.94 27.92 74.06 73.91 24.27 33.17 174.96
## NucleusArea 2 53 13.08 4.69 11.78 12.61 4.61 6.53 27.30
## RNC 3 53 17.58 4.30 17.44 17.43 2.61 5.86 29.43
## CellVolume 4 53 569.32 321.09 523.75 520.24 256.31 143.83 1631.08
## NucleusVolume 5 53 96.03 53.54 86.00 87.73 44.94 35.64 254.25
## ShapeFactor 6 53 0.76 0.12 0.75 0.76 0.11 0.43 1.00
## range skew kurtosis se
## CellArea 141.80 1.25 2.10 3.84
## NucleusArea 20.77 0.81 0.16 0.64
## RNC 23.57 0.35 1.54 0.59
## CellVolume 1487.24 1.43 1.99 44.11
## NucleusVolume 218.61 1.28 0.98 7.35
## ShapeFactor 0.57 -0.10 -0.29 0.02
describe(com_row_names_sh[,-1])#estatistica descritiva
## vars n mean sd median trimmed mad min max
## CellArea 1 12 221.18 61.23 209.49 220.19 62.71 123.28 328.99
## NucleusArea 2 12 34.90 7.89 37.70 35.53 8.29 19.58 43.87
## RNC 3 12 16.26 3.12 16.02 16.52 2.81 9.35 20.56
## CellVolume 4 12 2687.92 1106.55 2428.55 2635.36 1084.49 1085.56 4815.89
## NucleusVolume 5 12 416.79 132.76 442.27 420.40 140.83 172.40 625.11
## ShapeFactor 6 12 0.69 0.04 0.69 0.68 0.03 0.64 0.78
## range skew kurtosis se
## CellArea 205.72 0.06 -1.14 17.68
## NucleusArea 24.30 -0.49 -1.28 2.28
## RNC 11.20 -0.59 -0.37 0.90
## CellVolume 3730.32 0.32 -1.04 319.43
## NucleusVolume 452.71 -0.27 -1.15 38.32
## ShapeFactor 0.14 0.67 0.06 0.01
GGally::ggpairs(com_row_names)
GGally::ggpairs(com_row_names_sh[,-1])
Making exploratory plots
cell_area <- setNames(com_row_names$CellArea, fish_tree_db$tip.label)
cell_vol <- setNames(com_row_names$CellVolume, fish_tree_db$tip.label)
nu_area <- setNames(com_row_names$NucleusArea, fish_tree_db$tip.label)
plotTree.wBars(fish_tree_db, cell_area, tip.labels = TRUE)
plotTree.wBars(fish_tree_db, cell_vol, tip.labels = TRUE)
plotTree.wBars(fish_tree_db, nu_area, tip.labels = TRUE)
We begin builing the phylomorphospace by conducting a phylogenetic Principal Components Analysis in the phytools package:
pca_phylo <- phyl.pca(fish_tree_db, com_row_names, mode = "corr")#utilizando a matriz de correlação, já que as variáveis não estão padronizadas
biplot.phyl.pca(pca_phylo)
fish_scores <- pca_phylo$S
fish_scores <- fish_scores[fish_tree_db$tip.label,]
pca_phylo$L[,1:3]#loadings, correlações entre variáveis originais e os PCs
## PC1 PC2 PC3
## CellArea -0.90865336 0.3225135 -0.16455854
## NucleusArea -0.88745708 -0.3891239 0.13185680
## RNC -0.01654504 -0.9503719 0.29471572
## CellVolume -0.94574441 0.2077578 -0.16448528
## NucleusVolume -0.94851382 -0.2360796 0.02486903
## ShapeFactor 0.18942824 -0.5038357 -0.84256575
phylomorphospace(fish_tree_db, fish_scores[,1:2], node.size=c(0.1, 1.2), label = "horizontal", xlab="pPC1 (57.4%)", ylab="pPC2 (25.2%)")
#com informacoes de habitat nos tips
pms <- gm.prcomp(fish_scores[,1:3],fish_tree_db, align.to.phy = FALSE)
plot(pms, phylo = TRUE, node.labels = FALSE)
screestick(diag(pca_phylo$Eval))#somente os autovalores dos 2 primeiros eixos são maiores do que o modelo BS
round(diag(pca_phylo$Eval)[1]/sum(diag(pca_phylo$Eval))*100, 1)#autovalor relativo do PC1
## PC1
## 57.4
round(diag(pca_phylo$Eval)[2]/sum(diag(pca_phylo$Eval))*100,1)
## PC2
## 25.2
round(diag(pca_phylo$Eval)[3]/sum(diag(pca_phylo$Eval))*100,1)
## PC3
## 14.5
pca_phylo_sh <- phyl.pca(shark.tree, com_row_names_sh[,-1], mode = "corr")#utilizando a matriz de correlação, já que as variáveis não estão padronizadas
biplot.phyl.pca(pca_phylo_sh)
pca_phylo_sh$L[,1:3]
## PC1 PC2 PC3
## CellArea -0.95392083 0.29344724 -0.03868929
## NucleusArea -0.87481695 -0.48326411 0.01666493
## RNC 0.05670254 -0.99700783 -0.02052290
## CellVolume -0.92833576 0.35526073 -0.10258160
## NucleusVolume -0.96397569 -0.25677881 -0.03688051
## ShapeFactor -0.15383282 0.02624024 0.98773547
shark_scores <- pca_phylo_sh$S
shark_scores <- shark_scores[shark.tree$tip.label,]
round(diag(pca_phylo_sh$Eval)[1]/sum(diag(pca_phylo_sh$Eval))*100, 1)#autovalor relativo do PC1
## PC1
## 58.2
round(diag(pca_phylo_sh$Eval)[2]/sum(diag(pca_phylo_sh$Eval))*100,1)
## PC2
## 25.1
round(diag(pca_phylo_sh$Eval)[3]/sum(diag(pca_phylo_sh$Eval))*100,1)
## PC3
## 16.5
phylomorphospace(shark.tree, shark_scores[,1:2], node.size=c(0.1, 1.2), label = "horizontal", xlab="pPC1 (58.1%)", ylab="pPC2 (25.1%)")
#same thing below but showing habitat as colour coded tips
pmsh <- gm.prcomp(shark_scores,shark.tree, align.to.phy = FALSE)
plot(pmsh, phylo = TRUE, node.labels = FALSE)
env_vec_fish <- setNames(fish_traits$environment, fish_tree_db$tip.label)
env_vec_fish <- as.factor(env_vec_fish)
water_column_fish <- setNames(fish_traits$water_column, fish_tree_db$tip.label)
water_column_fish <- as.factor(water_column_fish)
compare.evol.rates(fish_scores, fish_tree_db, env_vec_fish, print.progress = FALSE)#significativo
##
## Call:
##
##
## Observed Rate Ratio: 5.34
##
## P-value: 0.013
##
## Effect Size: 2.3227
##
## Based on 1000 random permutations
##
## The rate for group freshwater is 1.24908082592796
##
## The rate for group marine is 0.233908309854182
##
## The rate for group saltwater is 0.685714508712023
fresh_vs_salt <- fct_collapse(env_vec_fish, saltwater=c("saltwater", "marine"))#combining factor levels
compare.evol.rates(fish_scores, fish_tree_db, fresh_vs_salt, print.progress = FALSE)#signifcativo
##
## Call:
##
##
## Observed Rate Ratio: 2.0695
##
## P-value: 0.049
##
## Effect Size: 1.9168
##
## Based on 1000 random permutations
##
## The rate for group freshwater is 1.24908082592796
##
## The rate for group saltwater is 0.603567927101507
compare.evol.rates(fish_scores, fish_tree_db, water_column_fish, print.progress = FALSE)#sem diferença
##
## Call:
##
##
## Observed Rate Ratio: 5.3873
##
## P-value: 0.66
##
## Effect Size: -0.4833
##
## Based on 1000 random permutations
##
## The rate for group  reef-associated is 0.84472855705115
##
## The rate for group bathydemersal is 0.782856134324788
##
## The rate for group benthopelagic is 0.960282774243303
##
## The rate for group demersal is 1.1351654906407
##
## The rate for group pelagic is 2.10575188798387
##
## The rate for group pelagic-neritic is 0.390874756663162
##
## The rate for group pelagic-oceanic is 0.515192554478696
shape_env <- as.data.frame(cbind(fish_scores, fresh_vs_salt))
head(shape_env)
## PC1 PC2 PC3 PC4
## Poecilia_reticulata 26.361511 -11.073583 -13.3810474 -2.1276947
## Jenynsia_lineata 13.058476 -3.422254 12.4284530 -1.6663052
## Odontesthes_argentinensis 4.364692 2.992512 8.1153991 -0.5890617
## Cichlasoma_dimerus -4.321762 -10.844850 -16.0646566 2.9904991
## Bujurquina_vittata -4.224417 -8.432283 -12.6838215 0.8937531
## Myxus_petardi 19.295887 11.185915 0.3811095 -2.7357048
## PC5 PC6 fresh_vs_salt
## Poecilia_reticulata -0.20366308 0.22477491 1
## Jenynsia_lineata 1.39602713 0.07344786 1
## Odontesthes_argentinensis -0.03683982 0.01523528 1
## Cichlasoma_dimerus -1.36356929 0.16397451 1
## Bujurquina_vittata -1.25892420 0.14855955 1
## Myxus_petardi -2.13222392 0.01177806 1
lineages <- custom.subsets(fish_scores, list(freshwater=c(1, 2, 3, 4, 5, 6,12, 15,16,28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48 ,49), saltwater=c(7, 8, 9, 10, 11, 13, 14, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 50, 51, 52, 53)))
bootstrapped_data <- boot.matrix(lineages, bootstraps = 100)
convhull <- dispRity(bootstrapped_data, metric = c(convhull.volume))
test.dispRity(convhull, wilcox.test, "all", correction="BY")
## [[1]]
##
## Wilcoxon rank sum test with continuity correction
##
## data: data by subsets
## W = 10000, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
summary.dispRity(convhull)
## subsets n obs bs.median 2.5% 25% 75% 97.5%
## 1 freshwater 31 263727 39687 3862 24229 85882 204322
## 2 saltwater 22 1114 137 9 46 340 718
plot.dispRity(convhull)
sum_of_variances <- dispRity(bootstrapped_data, metric = c(sum, variances))
test.dispRity(sum_of_variances, wilcox.test, "all", correction="BY")
## [[1]]
##
## Wilcoxon rank sum test with continuity correction
##
## data: data by subsets
## W = 8276, p-value = 1.211e-15
## alternative hypothesis: true location shift is not equal to 0
test.dispRity(sum_of_variances, bhatt.coeff, "pairwise", correction="BY")#lineages do not overlap
## bhatt.coeff
## freshwater : saltwater 0.7164969
plot.dispRity(sum_of_variances)
summary(sum_of_variances)
## subsets n obs bs.median 2.5% 25% 75% 97.5%
## 1 freshwater 31 691.3 641.5 421.1 533.8 751.7 960.4
## 2 saltwater 22 437.2 414.7 207.2 298.0 555.2 843.1
#---Multivariate Phylogenetic signal com todos os 8 PCs
physignal(fish_scores, fish_tree_db)#Kmult=0.6439; P=0.017, Effect Size: 2.0907
##
## Call:
## physignal(A = fish_scores, phy = fish_tree_db)
##
##
##
## Observed Phylogenetic Signal (K): 0.6439
##
## P-value: 0.017
##
## Effect Size: 2.0907
##
## Based on 1000 random permutations
physignal(shark_scores, shark.tree)#Kmult=0.4862; P=0.265
##
## Call:
## physignal(A = shark_scores, phy = shark.tree)
##
##
##
## Observed Phylogenetic Signal (K): 0.4862
##
## P-value: 0.265
##
## Effect Size: 0.623
##
## Based on 1000 random permutations
Here, we will use RRphylo package to Estimating evolutionary rate for RBC shape for tips and nodes of the phylogeny and also search automatically and test for rate shift
taxas_evol <- RRphylo(fish_tree_db, fish_scores)
tip_pheno <- taxas_evol$rates[53:105,]
Evol_Rates <- taxas_evol$rates
shift_hemacia <- search.shift(taxas_evol, status.type = "clade", foldername = tempdir())
shift_hemacia
## $all.clades
## rate.difference p.value
## 57 0.09591497 1.000
## 60 0.11490006 1.000
## 64 0.10958120 1.000
## 65 0.12737936 0.999
## 71 0.19967966 1.000
## 72 -0.06687118 0.003
## 75 -0.07340256 0.002
## 77 -0.05176633 0.024
## 78 -0.03645691 0.134
## 81 -0.04136668 0.088
## 82 0.01150022 0.661
## 83 -0.04592446 0.165
## 88 -0.02555789 0.317
##
## $single.clades
## rate.difference p.value
## 75 -0.07340256 0.002
## 71 0.19967966 1.000
plotRates(taxas_evol, node = 72,foldername=tempdir(),export.tiff = FALSE)
## Synbranchus_marmoratus Opsanus_tau Leiopotherapon_unicolor
## 0.682325026 0.478850784 0.373291597
## Perca_fluviatilis Poecilia_reticulata Mola_mola
## 0.310022321 0.306416441 0.280503408
## Rachycentron_canadus Scolopsis_vosmeri Lethrinus_miniatus
## 0.245498804 0.243762944 0.237151508
## Lophius_piscatorius 104 86
## 0.235836853 0.219559175 0.210526219
## Aluterus_schoepfii Myxus_petardi Jenynsia_lineata
## 0.210420498 0.209589618 0.209569905
## Sphoeroides_maculatus Balistes_carolinensis Prionotus_carolinus
## 0.205348244 0.187460595 0.183284330
## Prionotus_evolans Xiphias_gladius Seriola_lalandi
## 0.173890563 0.172522268 0.167140551
## Paralichthys_lethostigma Stenotomus_chrysops Odontesthes_argentinensis
## 0.151061077 0.150989128 0.139334704
## 105 103 Echeneis_naucrates
## 0.133251166 0.128731786 0.124622191
## 98 Cichlasoma_dimerus Tautoga_onitis
## 0.123883286 0.111866515 0.077834080
## 94 95 101
## 0.077466512 0.072371757 0.058509073
## Bujurquina_vittata 97 81
## 0.054891048 0.054120800 0.048627143
## 75 91 88
## 0.047913727 0.046930294 0.041616214
## 100 92 77
## 0.028624824 0.025562803 0.024285834
## 102 83 82
## 0.022860557 0.021596758 0.021349130
## 78 85 99
## 0.020964358 0.010716674 0.010055648
## 96 89
## 0.006462900 0.006428607
plotRates(taxas_evol, node = 75,foldername=tempdir(),export.tiff = FALSE)
## Synbranchus_marmoratus Leiopotherapon_unicolor Perca_fluviatilis
## 0.682325026 0.373291597 0.310022321
## Poecilia_reticulata Mola_mola Rachycentron_canadus
## 0.306416441 0.280503408 0.245498804
## Scolopsis_vosmeri Lethrinus_miniatus Lophius_piscatorius
## 0.243762944 0.237151508 0.235836853
## 104 86 Aluterus_schoepfii
## 0.219559175 0.210526219 0.210420498
## Myxus_petardi Jenynsia_lineata Sphoeroides_maculatus
## 0.209589618 0.209569905 0.205348244
## Balistes_carolinensis Prionotus_carolinus Prionotus_evolans
## 0.187460595 0.183284330 0.173890563
## Xiphias_gladius Seriola_lalandi Paralichthys_lethostigma
## 0.172522268 0.167140551 0.151061077
## Stenotomus_chrysops Odontesthes_argentinensis 105
## 0.150989128 0.139334704 0.133251166
## 103 Echeneis_naucrates 98
## 0.128731786 0.124622191 0.123883286
## Cichlasoma_dimerus Tautoga_onitis 94
## 0.111866515 0.077834080 0.077466512
## 95 101 Bujurquina_vittata
## 0.072371757 0.058509073 0.054891048
## 97 81 91
## 0.054120800 0.048627143 0.046930294
## 88 100 92
## 0.041616214 0.028624824 0.025562803
## 77 102 83
## 0.024285834 0.022860557 0.021596758
## 82 78 85
## 0.021349130 0.020964358 0.010716674
## 99 96 89
## 0.010055648 0.006462900 0.006428607
plotRates(taxas_evol, node = 77,foldername=tempdir(),export.tiff = FALSE)
## Leiopotherapon_unicolor Perca_fluviatilis Mola_mola
## 0.37329160 0.31002232 0.28050341
## Scolopsis_vosmeri Lethrinus_miniatus Lophius_piscatorius
## 0.24376294 0.23715151 0.23583685
## Aluterus_schoepfii Sphoeroides_maculatus Balistes_carolinensis
## 0.21042050 0.20534824 0.18746060
## Prionotus_carolinus Prionotus_evolans Stenotomus_chrysops
## 0.18328433 0.17389056 0.15098913
## 105 98 Tautoga_onitis
## 0.13325117 0.12388329 0.07783408
## 94 97 81
## 0.07746651 0.05412080 0.04862714
## 88 100 102
## 0.04161621 0.02862482 0.02286056
## 85 99 96
## 0.01071667 0.01005565 0.00646290
Plotting the results
edged <- data.frame(taxas_evol$tree$edge, taxas_evol$tree$edge.length)
order <- edged[, 2][which(edged[, 2] < Ntip(fish_tree_db) + 1)]
order <- rownames(Evol_Rates)[(Ntip(fish_tree_db)):length(Evol_Rates)]
#---plotting node labels and tiplevel
### these lines produce the plot
ggtree(taxas_evol$tree, layout = "circular") +
geom_point2(aes(color=Evol_Rates), size= 2.5)+
geom_tiplab(size=2, offset=.1)+
scale_color_continuous(low='blue', high='red')+
theme(legend.position="right")
## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
For sharks
taxas_evol_sh <- RRphylo(shark.tree, shark_scores)
tip_pheno_sh <- taxas_evol_sh$rates[12:23,]
Evol_Rates_sh <- taxas_evol_sh$rates
shift_hemacia_sh <- search.shift(taxas_evol_sh, status.type = "clade", foldername = tempdir())
shift_hemacia_sh
## $all.clades
## rate.difference p.value
## 14 -0.01469076 0.387
## 15 -0.01466516 0.480
## 16 0.09558896 0.813
## 17 0.02882863 0.738
## 20 0.12700188 0.983
## 21 0.17313343 0.993
## 22 0.30386536 1.000
## 23 0.36431710 0.993
##
## $single.clades
## rate.difference p.value
## 23 0.3643171 0.993
plotRates(taxas_evol_sh, node = 23,foldername=tempdir(),export.tiff = FALSE)
## Carcharhinus_brevipinna Carcharhinus_obscurus
## 0.6813788 0.3339460
edged <- data.frame(taxas_evol_sh$tree$edge, taxas_evol_sh$tree$edge.length)
order <- edged[, 2][which(edged[, 2] < Ntip(shark.tree) + 1)]
order <- rownames(Evol_Rates_sh)[(Ntip(shark.tree)):length(Evol_Rates_sh)]
#---plotting node labels and tiplevel
### these lines produce the plot
ggtree(taxas_evol_sh$tree, layout = "circular") +
geom_point2(aes(color=Evol_Rates_sh), size= 2.5)+
geom_tiplab(size=3, offset=.1)+
scale_color_continuous(low='blue', high='red')+
theme(legend.position="right")
First we run a simple multivariate PGLS assuming Brownian Motion:
all_data_fish <- geomorph.data.frame(species= fish_tree_db$tip.label, trait=fish_traits, shape=fish_scores)
col_reg <- procD.pgls(shape~trait.water_column+trait.max_size+fresh_vs_salt, phy = fish_tree_db, data = all_data_fish, print.progress = FALSE)
anova.lm.rrpp(col_reg)#nada significativo
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Generalized Least-Squares (via OLS projection)
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## trait.water_column 6 24.309 4.0514 0.07791 0.6429 -0.89898 0.826
## trait.max_size 1 2.743 2.7429 0.00879 0.4353 -0.44027 0.688
## fresh_vs_salt 1 7.684 7.6841 0.02463 1.2194 0.60668 0.292
## Residuals 44 277.264 6.3015 0.88867
## Total 52 312.000
##
## Call: procD.lm(f1 = f1, iter = iter, seed = seed, RRPP = TRUE, SS.type = SS.type,
## effect.type = effect.type, int.first = int.first, Cov = Cov,
## data = data, print.progress = print.progress)
all_data_shark <- geomorph.data.frame(species= shark.tree$tip.label, trait=shark_traits, shape=shark_scores)
shark_RBC_reg <- procD.pgls(shape~trait.water_column+trait.max_size, phy = shark.tree, data = all_data_shark, print.progress = FALSE)
anova.lm.rrpp(shark_RBC_reg)
##
## Analysis of Variance, using Residual Randomization
## Permutation procedure: Randomization of null model residuals
## Number of permutations: 1000
## Estimation method: Generalized Least-Squares (via OLS projection)
## Sums of Squares and Cross-products: Type I
## Effect sizes (Z) based on F distributions
##
## Df SS MS Rsq F Z Pr(>F)
## trait.water_column 3 22.608 7.5359 0.34254 1.4053 0.62147 0.288
## trait.max_size 1 5.854 5.8538 0.08869 1.0916 0.40632 0.378
## Residuals 7 37.538 5.3626 0.56876
## Total 11 66.000
##
## Call: procD.lm(f1 = f1, iter = iter, seed = seed, RRPP = TRUE, SS.type = SS.type,
## effect.type = effect.type, int.first = int.first, Cov = Cov,
## data = data, print.progress = print.progress)