Read in prepared workspace and load packages.
load("../data/pb_data.RData")
library(vegan)
## Loading required package: permute This is vegan 2.0-8
library(labdsv)
## Loading required package: mgcv This is mgcv 1.7-22. For overview type
## 'help("mgcv-package")'. Loading required package: MASS
##
## Attaching package: 'labdsv'
##
## The following object is masked from 'package:stats':
##
## density
library(ape)
library(xtable)
source("../data/pb_functions_for_manuscript.R")
How do samples fall into groups? I.e., do we have balance in treatments?
table(pb.map$person, pb.map$sampleType)
##
## filter petri.dish
## s1 48 24
## s2 46 23
## s3 46 24
table(pb.map$location)
##
## occ unocc
## 108 103
table(pb.map$duration)
##
## 2 4
## 105 106
table(pb.map$sampleType)
##
## filter petri.dish
## 140 71
Quick refrerence vector of names for figures and whatnot. Some genus, families, etc are empty, so search for the finest taxonomic level and use that name. cons
is in functions script sourced above.
consensus <- apply(taxo[, 1:6], 1, cons)
consensus[1:10]
## 2 6 12
## "Halomonadaceae" "Solirubrobacterales" "Rhizobiales"
## 21 24 25
## "Bacteria" "Gammaproteobacteria" "Bacteria"
## 35 73 120
## "Bacillales" "Betaproteobacteria" "Bacteria"
## 122
## "Ralstonia"
Generate a list of the 10 most abundant OTUs overall, but also tack on OTUs that are among the 10 most abundant in any particular group of samples. Then a matching version of the mapping table.
tops <- names(rev(sort(colSums(pb.1000))))[1:10]
for (i in 1:length(gr2)) {
tops.plus <- names(rev(sort(colSums(pb.1000[gr2[[i]], ]))))[1:10]
tops <- unique(c(tops, tops.plus))
}
tops <- tops[rev(order(taxo[tops, 7]))]
taxo.tops <- taxo[tops, ]
length(tops)
## [1] 36
consensus[tops]
## 234992 25506 121358
## "Methylobacterium" "Staphylococcus" "Tumebacillus"
## 201443 94008 80526
## "Stenotrophomonas" "Corynebacterium" "Corynebacterium"
## 151122 72518 136033
## "Enterobacteriaceae" "Corynebacterium" "Acinetobacter"
## 129988 172108 199152
## "Corynebacterium" "Pseudomonas" "Lactobacillus"
## 129498 134501 229364
## "Streptococcus" "Comamonas" "Micrococcus"
## 204422 57760 41465
## "Sphingomonas" "Sphingomonadaceae" "Dolosigranulum"
## 63496 235029 205036
## "Actinomycetales" "Kocuria" "Finegoldia"
## 164584 189711 92831
## "Alcaligenaceae" "Cupriavidus" "Pantoea"
## 264 133239 196388
## "Methylobacterium" "Gemella" "Leuconostoc"
## 100717 187238 58876
## "Neisseria" "Bacteria" "Aeromonas"
## 4557 200842 102650
## "Cloacibacterium" "Flavobacteriaceae" "Rhizobiales"
## 55224 169608 102857
## "Rhizobiales" "Inquilinus" "Pedobacter"
Create a reordered map and otu table.
pb.order <- data.frame(pb.1000[unlist(gr2), tops])
pb.order$gr.num <- c(rep(gr.name, c(unlist(lapply(gr2, length)))))
map.order <- pb.map[row.names(pb.order), ]
pb.order$location <- factor(map.order$location, levels = c("occ", "unocc"))
Make new trimmed OTU table and assess with boxplots. Then send the statistically interesting otus to figures and tables script.
pb.order <- pb.order[map.order$duration == "4", ]
# for (i in 1:(ncol(otus.to.barplot.tmp)-2)) {
# boxplot(otus.to.barplot.tmp[, i] ~ factor(otus.to.barplot.tmp$gr.num),
# las=2, main=consensus[gsub('X', '', names(otus.to.barplot.tmp)[i])],
# sub=i) readline('Press return for next plot') }
pb.order <- pb.order[, c(1, 2, 3, 4, 5, 6, 8, 10, 12, 18, 21, ncol(pb.order) -
1, ncol(pb.order))]
names(pb.order) <- gsub("X", "", names(pb.order))
consensus[names(pb.order)]
## 234992 25506 121358
## "Methylobacterium" "Staphylococcus" "Tumebacillus"
## 201443 94008 80526
## "Stenotrophomonas" "Corynebacterium" "Corynebacterium"
## 72518 129988 199152
## "Corynebacterium" "Corynebacterium" "Lactobacillus"
## 41465 205036 <NA>
## "Dolosigranulum" "Finegoldia" NA
## <NA>
## NA
rm(cons, Evenness, gr.name, gr2, tops.plus, makeTaxo)
Now for indicator analysis - this takes a long time and lots of memory! This uses the indval()
function in the labdsv
package. This gives output for all OTUs, the vast majority of which are unimportant to analysis. So there are a few steps to cut the output down to just the statistically salient OTUs.
Unpack list of groups and pull out just 4 hour air filter dataset.
for (i in 1:length(names(groups))) {
assign(names(groups)[i], unlist(groups[[i]]))
}
occ4f <- c(s14of, s24of, s34of, s14uf, s24uf, s34uf)
occ4f.table <- pb.1000[occ4f, ]
occ4f.table <- occ4f.table[, -which(colSums(occ4f.table) == 0)]
occ4f.map <- pb.map[occ4f, ]
Split this into two chunks - each takes a long time.
iv.location <- indval(occ4f.table, occ4f.map$location)
iv.person <- indval(occ4f.table, occ4f.map$occ.person2)
levels(occ4f.map$occ.person2)
Keep those that are significant before p-value adjustment.
p <- 0.05
these <- which(iv.location$pval <= p)
iv.location.table <- data.frame(iv.location$maxcls[these], round(iv.location$indcls[these],
4), iv.location$pval[these])
names(iv.location.table) <- c("cluster", "indicator_value", "probability")
iv.location.table$cluster[iv.location.table$cluster == "1"] <- "occupied"
iv.location.table$cluster[iv.location.table$cluster == "2"] <- "unoccupied"
these <- which(iv.person$pval <= p)
iv.person.table <- data.frame(iv.person$maxcls[these], round(iv.person$indcls[these],
4), iv.person$pval[these])
names(iv.person.table) <- c("cluster", "indicator_value", "probability")
iv.person.table$cluster[iv.person.table$cluster == "1"] <- "Subject1"
iv.person.table$cluster[iv.person.table$cluster == "2"] <- "Subject2"
iv.person.table$cluster[iv.person.table$cluster == "3"] <- "Subject3"
iv.person.table$cluster[iv.person.table$cluster == "4"] <- "unoccupied"
Order by indicator value and tack on taxonomic information.
ivL <- iv.location.table[rev(order(iv.location.table$indicator_value)), ]
ivP <- iv.person.table[rev(order(iv.person.table$indicator_value)), ]
head(ivP)
## cluster indicator_value probability
## X41465 Subject1 0.9620 0.001
## X199152 Subject3 0.8948 0.001
## X154708 Subject3 0.8193 0.001
## X63496 Subject1 0.7999 0.001
## X172036 Subject1 0.7554 0.001
## X186045 Subject3 0.7500 0.001
ivL.names <- gsub("X", "", row.names(ivL))
ivL <- data.frame(cbind(ivL[, ], taxo[ivL.names, ]))
ivP.names <- gsub("X", "", row.names(ivP))
ivP <- cbind(ivP[, ], taxo[ivP.names, ])
ivL <- ivL[, c(2, 3, 10, 1, 9, 8, 7, 6, 5, 4)]
ivP <- ivP[, c(2, 3, 10, 1, 9, 8, 7, 6, 5, 4)]
Print out for later reference.
print(xtable(head(ivL, 20), digits = 3), type = "html")
indicator_value | probability | abundance | cluster | genus | family | order | class | phylum | kingdom | |
---|---|---|---|---|---|---|---|---|---|---|
X94008 | 0.830 | 0.001 | 10134.000 | occupied | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X72518 | 0.795 | 0.001 | 5093.000 | occupied | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X25506 | 0.770 | 0.001 | 19463.000 | occupied | Staphylococcus | Staphylococcaceae | Bacillales | Bacilli | Firmicutes | Bacteria |
X80526 | 0.758 | 0.001 | 8760.000 | occupied | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X201443 | 0.751 | 0.001 | 12640.000 | unoccupied | Stenotrophomonas | Xanthomonadaceae | Xanthomonadales | Gammaproteobacteria | Proteobacteria | Bacteria |
X129498 | 0.692 | 0.001 | 2393.000 | occupied | Streptococcus | Streptococcaceae | Lactobacillales | Bacilli | Firmicutes | Bacteria |
X129988 | 0.650 | 0.010 | 2893.000 | occupied | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X151122 | 0.619 | 0.006 | 7579.000 | unoccupied | Enterobacteriaceae | Enterobacteriales | Gammaproteobacteria | Proteobacteria | Bacteria | |
X121358 | 0.604 | 0.035 | 16283.000 | unoccupied | Tumebacillus | Bacillaceae | Bacillales | Bacilli | Firmicutes | Bacteria |
X13338 | 0.556 | 0.001 | 266.000 | occupied | Anaerococcus | Incertae Sedis XI | Clostridiales | Clostridia | Firmicutes | Bacteria |
X84956 | 0.551 | 0.001 | 251.000 | occupied | Peptoniphilus | Incertae Sedis XI | Clostridiales | Clostridia | Firmicutes | Bacteria |
X172949 | 0.546 | 0.001 | 242.000 | occupied | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X234992 | 0.544 | 0.034 | 28147.000 | unoccupied | Methylobacterium | Methylobacteriaceae | Rhizobiales | Alphaproteobacteria | Proteobacteria | Bacteria |
X205036 | 0.540 | 0.001 | 793.000 | occupied | Finegoldia | Incertae Sedis XI | Clostridiales | Clostridia | Firmicutes | Bacteria |
X134501 | 0.538 | 0.003 | 1560.000 | unoccupied | Comamonas | Comamonadaceae | Burkholderiales | Betaproteobacteria | Proteobacteria | Bacteria |
X233136 | 0.532 | 0.001 | 277.000 | unoccupied | Halomonas | Halomonadaceae | Oceanospirillales | Gammaproteobacteria | Proteobacteria | Bacteria |
X230275 | 0.519 | 0.001 | 322.000 | occupied | Anaerococcus | Incertae Sedis XI | Clostridiales | Clostridia | Firmicutes | Bacteria |
X8516 | 0.505 | 0.001 | 337.000 | occupied | Anaerococcus | Incertae Sedis XI | Clostridiales | Clostridia | Firmicutes | Bacteria |
X63496 | 0.500 | 0.001 | 1061.000 | occupied | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria | ||
X199152 | 0.489 | 0.001 | 2554.000 | occupied | Lactobacillus | Lactobacillaceae | Lactobacillales | Bacilli | Firmicutes | Bacteria |
print(xtable(head(ivP, 20), digits = 3), type = "html")
indicator_value | probability | abundance | cluster | genus | family | order | class | phylum | kingdom | |
---|---|---|---|---|---|---|---|---|---|---|
X41465 | 0.962 | 0.001 | 1089.000 | Subject1 | Dolosigranulum | Carnobacteriaceae | Lactobacillales | Bacilli | Firmicutes | Bacteria |
X199152 | 0.895 | 0.001 | 2554.000 | Subject3 | Lactobacillus | Lactobacillaceae | Lactobacillales | Bacilli | Firmicutes | Bacteria |
X154708 | 0.819 | 0.001 | 64.000 | Subject3 | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X63496 | 0.800 | 0.001 | 1061.000 | Subject1 | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria | ||
X172036 | 0.755 | 0.001 | 362.000 | Subject1 | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X186045 | 0.750 | 0.001 | 154.000 | Subject3 | Facklamia | Aerococcaceae | Lactobacillales | Bacilli | Firmicutes | Bacteria |
X8516 | 0.744 | 0.001 | 337.000 | Subject3 | Anaerococcus | Incertae Sedis XI | Clostridiales | Clostridia | Firmicutes | Bacteria |
X9535 | 0.667 | 0.001 | 133.000 | Subject1 | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X175349 | 0.665 | 0.001 | 99.000 | Subject3 | Peptoniphilus | Incertae Sedis XI | Clostridiales | Clostridia | Firmicutes | Bacteria |
X108686 | 0.652 | 0.001 | 205.000 | Subject1 | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X129988 | 0.640 | 0.001 | 2893.000 | Subject1 | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X196388 | 0.589 | 0.001 | 253.000 | Subject2 | Leuconostoc | Leuconostocaceae | Lactobacillales | Bacilli | Firmicutes | Bacteria |
X205036 | 0.583 | 0.002 | 793.000 | Subject1 | Finegoldia | Incertae Sedis XI | Clostridiales | Clostridia | Firmicutes | Bacteria |
X217189 | 0.576 | 0.001 | 37.000 | Subject3 | Incertae Sedis XI | Clostridiales | Clostridia | Firmicutes | Bacteria | |
X96641 | 0.564 | 0.001 | 283.000 | Subject1 | Anaerococcus | Incertae Sedis XI | Clostridiales | Clostridia | Firmicutes | Bacteria |
X63291 | 0.557 | 0.001 | 70.000 | Subject1 | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X201443 | 0.554 | 0.002 | 12640.000 | unoccupied | Stenotrophomonas | Xanthomonadaceae | Xanthomonadales | Gammaproteobacteria | Proteobacteria | Bacteria |
X9604 | 0.540 | 0.001 | 260.000 | Subject1 | Aerococcaceae | Lactobacillales | Bacilli | Firmicutes | Bacteria | |
X94008 | 0.538 | 0.001 | 10134.000 | Subject1 | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
X72518 | 0.532 | 0.001 | 5093.000 | Subject3 | Corynebacterium | Corynebacteriaceae | Actinomycetales | Actinobacteria | Actinobacteria | Bacteria |
Clean up and save important bits to go to figures and tables script.
rm(list = ls()[which(ls() %in% names(groups))])
rm(occ4f, occ4f.table, occ4f.map, i, iv.location, iv.person, p, these, iv.location.table,
iv.person.table, ivL.names, ivP.names)
save(ivL, ivP, pb.order, taxo.tops, tops, consensus, file = "../data/taxon_analysis.RData")