This is the 2nd script used for data analysis and figure generation for the the manuscript by Masche et al. titled “Specific gut microbiome members are associated with distinct immune markers in allogeneic hematopoietic stem cell transplantation”.
This script and associated data are provided by Anna Cäcilia Masche, Susan Holmes, and Sünje Johanna Pamp.
These data and the associated script are licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
Under the condition that appropriate credit is provided, you are free to: 1) Share, copy and redistribute the material in any medium or format 2) Adapt, remix, transform, and build upon the material for any purpose, even commercially.
To see the full license associated with attribution of this work, see the CC-By-CA license, see http://creativecommons.org/licenses/by-sa/4.0/.
The local filename is: Script2_diversity.Rmd.
library("phyloseq")
library("dplyr")
## Warning: package 'dplyr' was built under R version 3.4.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
library("gridExtra")
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library("reshape2")
#install.packages("PMCMR")
library("PMCMR")
## Warning: package 'PMCMR' was built under R version 3.4.4
## PMCMR is superseded by PMCMRplus and will be no longer maintained. You may wish to install PMCMRplus instead.
d_30 <- readRDS(file = "M:/Documents/Publications/Masche_R_Scripts_and_Data/Data/phy_obj1_core_cleaned_vsd1.Rdata")
plot.div <- ggplot(subset(sample_data(d_30),timepoint != "NA"), aes(x = timepoint, y = InvSimpson)) + geom_boxplot(outlier.color="NA") + geom_jitter(size=3, shape = 21, width = 0.2) + theme(axis.title.y = element_text(size=12, margin=margin(0,25,0,0), face = "bold"), axis.text.y = element_text(size=16), axis.text.x = element_text(size=14, face = "bold", angle = 35, vjust = 1, hjust = 1), axis.title.x = element_blank(), legend.text = element_text(size=14, face = "bold"), legend.title = element_text(face = "bold", size = 20),panel.grid.minor = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(), panel.border = element_rect(color = "black", fill= NA)) + labs(y="Inverse Simpson") + scale_x_discrete(labels=c("pre-HSCT", "day of HSCT", "week +1", "week +2", "week +3", "week +4", "week +5")) + stat_boxplot(geom ='errorbar')
p_median <- plot.div+stat_summary(fun.y=median, geom="line", colour="black", size = 1.4, aes(group = 1)) + coord_trans(y = "log10")
print(p_median)
#subset h2:
div_fried <- as(sample_data(d_30)[,c("Patient_ID", "timepoint", "InvSimpson")], "data.frame")
#dcast to format the data as required for the Friedman test
div_fried_dcast <- dcast(div_fried, Patient_ID ~ timepoint, value.var = "InvSimpson", mean, margins =TRUE)
#turn into a matrix:
div_fried_dcast_ma <- as.matrix(div_fried_dcast)
#perfrom Friedman test.Week +5 needs to be excluded as there were too few observatons at this time point.
friedman.test(div_fried_dcast_ma[, c(2:7)], na.action = "na.omit", p.adjust.method = "BH") #no overall significant difference between timepoints
##
## Friedman rank sum test
##
## data: div_fried_dcast_ma[, c(2:7)]
## Friedman chi-squared = 8.8571, df = 5, p-value = 0.1149
#Post-hoc Conover test. Conover was chosen over Nemenyi as it allows for p-value correction for multiple testing.
#Therefore first convert the data to a numeric matrix:
div_fried_dcast_ma_num <- div_fried_dcast_ma
mode(div_fried_dcast_ma_num) <- "numeric" #convert character matrix to numeric matrix
## Warning in mde(x): NAs durch Umwandlung erzeugt
posthoc.friedman.conover.test(div_fried_dcast_ma_num[, c(2:7)],na.action = "na.omit", p.adjust.method = "BH")
##
## Pairwise comparisons using Conover's test for a two-way
## balanced complete block design
##
## data: div_fried_dcast_ma_num[, c(2:7)]
##
## f01 w0 w1 w2 w3
## w0 0.03520 - - - -
## w1 0.87249 0.04782 - - -
## w2 0.00057 0.16067 0.00089 - -
## w3 2.4e-06 0.00653 4.1e-06 0.16067 -
## w4 1.1e-14 1.4e-09 1.5e-14 1.4e-06 0.00036
##
## P value adjustment method: BH
phy_dat <- readRDS(file = "M:/Documents/Publications/Masche_R_Scripts_and_Data/Data/phy_obj1_core_cleaned.Rdata")
families_1 = tax_glom(phy_dat, "Family")#agglom. on family level
#8 most abundant families:
Family8 = names(sort(taxa_sums(families_1), TRUE)[1:8])
#transform to relative abundance:
families_rel1 = transform_sample_counts(families_1, function(x) x/sum(x))
#get the family names
fam_names1 <- as.data.frame(tax_table(families_rel1)[,"Family"])
#extract the family-agglomerated otu table
otu_families_rel1 = as.data.frame(t(otu_table(families_rel1)))
#now join with the previous data:
otu_families_rel_11 <- tibble::rownames_to_column(otu_families_rel1, var = "feces_sample")#gives a name to the row.names column
#rename the columns to family names:
colnames(otu_families_rel_11) <- fam_names1$Family[match(names(otu_families_rel_11), rownames(fam_names1))]
colnames(otu_families_rel_11)[1] <- "feces_sample"
otu_families_rel_12 <- reshape2::melt(otu_families_rel_11)
## Using feces_sample as id variables
names(otu_families_rel_12)[names(otu_families_rel_12) == 'variable'] <- 'Family'
names(otu_families_rel_12)[names(otu_families_rel_12) == 'value'] <- 'Relative_abundance'
otu_families_rel_13 <- otu_families_rel_12 %>% group_by(Family) %>% dplyr::summarize(Rel_abund_mean = mean(Relative_abundance, na.rm = TRUE))
levels(otu_families_rel_13$Family)[35]<- c("Family XI.1")
otu_families_rel_14 <- otu_families_rel_13[order(-otu_families_rel_13$Rel_abund_mean),][1:8,]
my.col_x <- c("Enterobacteriaceae" = "#a6cee3", "Lactobacillaceae" = "#1f78b4", "Streptococcaceae" = "#b2df8a", "Staphylococcaceae" = "#33a02c", "Enterococcaceae" = "#fb9a99", "Lachnospiraceae" = "#e31a1c", "Erysipelotrichaceae" = "#fdbf6f", "Ruminococcaceae" = "#ff7f00")
plot.rank <- ggplot(otu_families_rel_14, aes(y= Rel_abund_mean, x = reorder(Family, -Rel_abund_mean), fill = otu_families_rel_14$Family, levels=c("Enterobacteriaceae", "Lactobacillaceae", "Streptococcaceae", "Staphylococcaceae", "Enterococcaceae", "Lachnospiraceae", "Erysipelotrichaceae", "Ruminococcaceae"))) + geom_bar(stat = "identity") + scale_fill_manual(values = my.col_x) + theme(axis.title.y = element_text(size=12, margin=margin(0,25,0,0), face = "bold"), axis.text.y = element_text(size=16), axis.text.x = element_text(size=14, face = "bold", angle = 0, vjust = 1, hjust = 1), axis.title.x = element_text(size=12, margin=margin(25,0,0,0), face = "bold"), legend.text = element_text(size=14, face = "bold"), legend.title = element_text(face = "bold", size = 20),panel.grid.minor = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(), panel.border = element_rect(color = "black", fill= NA)) + labs(y="Average abundance", fill="Family", x = "Rank of family") + scale_x_discrete(labels=c("1", "2", "3", "4", "5", "6", "7", "8"))
plot.rank