library(phyloseq)
library(dplyr)
library(microeco)
library(magrittr)
library(ggplot2)
library(file2meco)
library(GUniFrac)
library(pheatmap)
library(tidytree)
library(ggtree)
library(picante)
library(pander)
library(arules)
library(kableExtra)
library(Hmisc)
library(corrplot)
library(reshape2)
library(plyr)
library(agricolae)
library(cowplot)
library(DT)
library(here)

range01 <- function(x){(x-min(x))/(max(x)-min(x))}

set.seed(11)

theme_set(theme_bw())

Load the Data

ps <- readRDS("psCongo_V4.rds")

sample_data <- read.csv(file="Congo_metadata_V4.csv", header=TRUE, sep=",")
rownames(sample_data) <- sample_data$sampleId

scores <- read.csv(file="final_maternal_stress_scores_20220131.csv", header = TRUE, sep=",")
#scores <- scores[,-1]
names(scores)[names(scores) == "id"] <- "Individual"

samples <- subset_samples(ps, Type=="sample")
mock <- subset_samples(ps, Type=="mockcommunity")
replicates <- subset_samples(ps, X=="3" | X=="21" | X=="22" | X=="111" | X=="45" | X=="29" | X=="77" | X=="26" | X=="27" | X=="79")

dataset <- phyloseq2meco(samples)

dataset$tidy_dataset()

dataset$tax_table %<>% base::subset(Kingdom == "k__Archaea" | Kingdom == "k__Bacteria")

dataset$filter_pollution(taxa = c("mitochondria", "chloroplast"))
dataset$tidy_dataset()

test <- merge(dataset$sample_table, scores ,by="Individual") ## this removes the mock communities.
rownames(test) <- test$sampleId
dataset$sample_table <- test
dataset$tidy_dataset()

dataset$cal_abund()

dataset$cal_alphadiv(PD = FALSE)

dataset$cal_betadiv(unifrac = FALSE)

print("Distribution of samples at timepoints")
## [1] "Distribution of samples at timepoints"
barplot(table(dataset$sample_table$Time))

res <- cor(scores[,1:8])

res2 <- rcorr(as.matrix(scores[,1:8]))

res2
##                general_trauma sexual_events anxiety depression  ptsd stress
## general_trauma           1.00          0.27    0.20      -0.02  0.11   0.30
## sexual_events            0.27          1.00    0.07       0.36  0.21   0.34
## anxiety                  0.20          0.07    1.00       0.27 -0.19   0.11
## depression              -0.02          0.36    0.27       1.00  0.09   0.29
## ptsd                     0.11          0.21   -0.19       0.09  1.00   0.46
## stress                   0.30          0.34    0.11       0.29  0.46   1.00
## violence                 0.41          0.37    0.18       0.18  0.56   0.37
## pregnancy                0.27         -0.11    0.23       0.17  0.03   0.12
##                violence pregnancy
## general_trauma     0.41      0.27
## sexual_events      0.37     -0.11
## anxiety            0.18      0.23
## depression         0.18      0.17
## ptsd               0.56      0.03
## stress             0.37      0.12
## violence           1.00      0.21
## pregnancy          0.21      1.00
## 
## n= 50 
## 
## 
## P
##                general_trauma sexual_events anxiety depression ptsd   stress
## general_trauma                0.0597        0.1612  0.9000     0.4418 0.0351
## sexual_events  0.0597                       0.6434  0.0108     0.1395 0.0142
## anxiety        0.1612         0.6434                0.0582     0.1906 0.4573
## depression     0.9000         0.0108        0.0582             0.5343 0.0396
## ptsd           0.4418         0.1395        0.1906  0.5343            0.0007
## stress         0.0351         0.0142        0.4573  0.0396     0.0007       
## violence       0.0031         0.0086        0.2090  0.2107     0.0000 0.0086
## pregnancy      0.0609         0.4278        0.1036  0.2519     0.8113 0.3916
##                violence pregnancy
## general_trauma 0.0031   0.0609   
## sexual_events  0.0086   0.4278   
## anxiety        0.2090   0.1036   
## depression     0.2107   0.2519   
## ptsd           0.0000   0.8113   
## stress         0.0086   0.3916   
## violence                0.1373   
## pregnancy      0.1373
print("Correlations of Scores and Factors")
## [1] "Correlations of Scores and Factors"
corrplot(res, type = "upper", order = "hclust", 
         tl.col = "black", tl.srt = 45)

# ++++++++++++++++++++++++++++
# flattenCorrMatrix
# ++++++++++++++++++++++++++++
# cormat : matrix of the correlation coefficients
# pmat : matrix of the correlation p-values
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
    )
}
scaledscores <- scores

scaledscores$general_trauma <- range01(scores$general_trauma)
scaledscores$sexual_events <- range01(scores$sexual_events)
scaledscores$stress <- range01(scores$stress)
scaledscores$ptsd <- range01(scores$ptsd)
scaledscores$violence <- range01(scores$violence)
scaledscores$depression <- range01(scores$depression)
scaledscores$anxiety <- range01(scores$anxiety)
scaledscores$pregnancy <- range01(scores$pregnancy)

scaledscores$Composite <- scaledscores$general_trauma + scaledscores$sexual_events + scaledscores$anxiety + scaledscores$depression + scaledscores$ptsd + scaledscores$stress + scaledscores$violence + scaledscores$pregnancy 

scores$Composite <- scaledscores$Composite

addscores <- merge(dataset$sample_table, scores[,c(10,11)] ,by="Individual") 
rownames(addscores) <- addscores$sampleId
dataset$sample_table <- addscores

dataset$tidy_dataset()

Stress Measures and Covariates

Removed all covariates that had only one type of response or were seriously weighted in one direction.

Removed the following: 15, 24, 25, 26, 28a, 29_Breastmilk, 29_Animal Milk, 29_Water, 29_Solidfood, 29_Other, 39_Water, 39_CoffeeTea, X39_Other, X15HIV, X32 antibiotics, X32d ibuprofen, X37Preclampsia, X37AIDS, X37Diabetes, X37 high blood pressure, X37 none of these, X38 typhoid, X38d other infection during pregnancy.

The following covariates are the remaining ones to test for associations with composite stress.

df <- dataset$sample_table

df <- df[!duplicated(df$Individual),]

df <- df[,c(1,41,43,52,61,77,79,81,87,97,117:119,131,132,134,136,141,143,145,167,168,169)]

df <- merge(df, scores, by="Individual")

## remove NAs
df <- na.omit(df)

df <- df[,-c(24:32)]

names(df[,-1])
##  [1] "X17_Antibiotics"                           
##  [2] "X18_OnMedicine"                            
##  [3] "X23_HowManyInHousehold"                    
##  [4] "X29_Formula"                               
##  [5] "Baby.weight..g."                           
##  [6] "Maternal.BMI"                              
##  [7] "X6...Age"                                  
##  [8] "X39...Soda"                                
##  [9] "X90a...If.no..what.number.child.is.this."  
## [10] "X15...UTI."                                
## [11] "X15...STD."                                
## [12] "X15...None."                               
## [13] "X32b...Take.antimalarials."                
## [14] "X32c...Take.cold.medicines."               
## [15] "X32e...Take.acetamenophen"                 
## [16] "X37b...STDs."                              
## [17] "X38a...Malaria.infection.during.pregnancy."
## [18] "X38c...UTI.during.pregnancy"               
## [19] "X38e...No.infections.during.pregnancy"     
## [20] "Med_Antibiotic"                            
## [21] "Med_Antimalarial"                          
## [22] "Med_Other"                                 
## [23] "Composite"
df[] <- lapply(df[],as.numeric)

#hist.data.frame(df[,2:21])


ggplot(melt(df[,-c(1)]),aes(x=value)) + geom_histogram() + facet_wrap(~variable, scales = "free", ncol=3)

res2<-rcorr(as.matrix(df[,-c(1,2)]))
corrmatrix <- flattenCorrMatrix(res2$r, res2$P)
corrmatrix$p <- format(corrmatrix$p, scientific = FALSE)
corrmatrix$p <- as.numeric(corrmatrix$p)
corrmatrix$p <- round(corrmatrix$p, digits = 3)
corrmatrix$cor <- round(corrmatrix$cor, digits = 3)

corrmatrix <- corrmatrix[order(-corrmatrix$cor),]

corrmatrixsig <- corrmatrix[ which(corrmatrix$p <= 0.05), ]
corrmatrixnotsig <- corrmatrix[ which(corrmatrix$p > 0.05), ]

Composite Stress and Covariates

None of the covariates are correlated to the Composite score.

res2<-rcorr(as.matrix(df[,-c(1,2)]))
corrmatrix <- flattenCorrMatrix(res2$r, res2$P)
corrmatrix$p <- format(corrmatrix$p, scientific = FALSE)
corrmatrix$p <- as.numeric(corrmatrix$p)
corrmatrix$p <- round(corrmatrix$p, digits = 3)
corrmatrix$cor <- round(corrmatrix$cor, digits = 3)

corrmatrix <- corrmatrix[order(-corrmatrix$cor),]
corrmatrixcomp <- corrmatrix[ which(corrmatrix$column == "Composite"), ]

dff <- corrmatrixcomp

datatable(dff,
            filter = "top",
            rownames = FALSE,
            width = '100%',
            options = list(scrollX = TRUE))

Covariates

I left in the information about the medications reported by mom during sick visits. The doctor’s record medicine is Med_Antimalarial, Med_Other, and Med_Antibiotics. The mother’s reported info is X32c…Take.cold.medicines, X32b…Take.antimalarials, X32e…Take.acetamenophen, and X17_Antibiotics. There is some correlation between the mother’s report and the medical report, but it is not exact. So, for the differential abundance analysis with covariates (ANCOM), I only use the medical report variables.

datatable(corrmatrixsig,
            filter = "top",
            rownames = FALSE,
            width = '100%',
            options = list(scrollX = TRUE))

Covariates and Microbial Taxa Across all Samples

These taxa exhibit significant correlations to certain covariates.

group1<-clone(dataset)

columnsofinterest <- c(41,43,52,61,77,79,81,87,97,117,118,119,131,132,134,136,141,143,145,167,168,169)

## get rid of NAs
group1$sample_table[is.na(group1$sample_table)] <- 0 

t1 <- trans_env$new(dataset = group1, add_data = group1$sample_table[,columnsofinterest])

t1$cal_cor(use_data = "all", p_adjust_method = "fdr")

datatable(t1$res_cor[t1$res_cor$AdjPvalue < 0.05,],
            filter = "top",
            rownames = FALSE,
            width = '100%',
            options = list(scrollX = TRUE))

Categorical Composite Stress Metric

Use an unsupervised Kmeans clustering approach to break each into two distinct groups.

scores <- read.csv(file="final_maternal_stress_scores_20220131.csv", header = TRUE, sep=",")
#scores <- scores[,-1]
names(scores)[names(scores) == "id"] <- "Individual"

scores <- scores[scores$Individual != "G6", ]                          # Remove row based on condition
scores <- scores[scores$Individual != "G18", ]                          # Remove row based on condition
scores <- scores[scores$Individual != "G32", ]                          # Remove row based on condition
scores <- scores[scores$Individual != "G44", ]                          # Remove row based on condition
scores <- scores[scores$Individual != "G49", ]                          # Remove row based on condition

scaledscores <- scores
scaledscores$general_trauma <- range01(scores$general_trauma)
scaledscores$sexual_events <- range01(scores$sexual_events)
scaledscores$stress <- range01(scores$stress)
scaledscores$ptsd <- range01(scores$ptsd)
scaledscores$violence <- range01(scores$violence)
scaledscores$depression <- range01(scores$depression)
scaledscores$coping <- range01(scores$coping)
scaledscores$anxiety <- range01(scores$anxiety)
scaledscores$pregnancy <- range01(scores$pregnancy)

scaledscores$Composite <- scaledscores$general_trauma + scaledscores$sexual_events + scaledscores$anxiety + scaledscores$depression + scaledscores$ptsd + scaledscores$stress + scaledscores$violence + scaledscores$pregnancy

Composite_categorical<- arules::discretize(scaledscores[,11], method="cluster", breaks = 2, labels = c("Low", "High"))
table(Composite_categorical)
## Composite_categorical
##  Low High 
##   26   21
scores$Composite_categorical <- as.factor(Composite_categorical) ## appends sample table

dataset$sample_table <- merge(dataset$sample_table, scores[,c(10,11)], by="Individual", all.x=TRUE)
row.names(dataset$sample_table) <- dataset$sample_table$sampleId

Prepare summary table

summarytable <- dataset$sample_table

summarytable <- summarytable %>% distinct(Individual, .keep_all = TRUE)

write.csv(summarytable, "summarytable.csv")
t1 <- trans_abund$new(dataset = dataset, taxrank = "Class", ntaxa = 10)

t1$plot_bar(others_color = "grey70", facet = "Time", xtext_keep = FALSE, legend_text_italic = FALSE)

t1 <- trans_abund$new(dataset = dataset, taxrank = "Class", ntaxa = 10, groupmean = "Time")
g1 <- t1$plot_bar(others_color = "grey70", legend_text_italic = FALSE)
g1 + theme_classic() + theme(axis.title.y = element_text(size = 18))

Age?

The different time points (6W, 3M, and 6M) are significantly different from one another. They are significantly different in beta diversity (Bray-Curtis), but NOT in alpha diversity (Shannon - richness).

group1 <- clone(dataset)

group1$sample_table <- subset(group1$sample_table, Time == "6W" | Time == "3M" | Time == "6M")

group1$tidy_dataset()

group1$cal_abund()
group1$cal_alphadiv(PD = FALSE)
group1$cal_betadiv(unifrac = FALSE)

group1$sample_table$Time %<>% factor(., levels = c("6W", "3M", "6M"))
t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10)


agebarplot <- t1$plot_bar(others_color = "grey70", facet = "Time", xtext_keep = FALSE, legend_text_italic = FALSE)
library(stringr)
agebarplot$data$Time <- str_replace(agebarplot$data$Time, "6W", "6 Weeks")
agebarplot$data$Time <- str_replace(agebarplot$data$Time, "3M", "3 Months")
agebarplot$data$Time <- str_replace(agebarplot$data$Time, "6M", "6 Months")

agebarplot$data$Time %<>% factor(., levels = c("6 Weeks", "3 Months", "6 Months"))

agebarplot <- agebarplot + theme(text = element_text(size = 12, face="bold"))

t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10, groupmean = "Time")
g1 <- t1$plot_bar(others_color = "grey70", legend_text_italic = FALSE)
g1 + theme_classic() + theme(axis.title.y = element_text(size = 18))

t1 <- trans_beta$new(dataset = group1, group = "Time", measure = "bray")
t1$cal_ordination(ordination = "PCoA")
t1$plot_ordination(plot_color = "Time", plot_shape = "Time", plot_type = c("point", "ellipse"))

# calculate and plot sample distances within groups
t1$cal_group_distance()
# return t1$res_group_distance
brayplot <- t1$plot_group_distance(distance_pair_stat = TRUE)
brayplot$data$Time %<>% factor(., levels = c("6W", "3M", "6M"))
brayplot <- brayplot + scale_x_discrete(labels=c("6W" = "6 Weeks", "3M" = "3 Months",
                              "6M" = "6 Months"))
brayplot

group1$sample_table$Time %<>% as.character %>% as.factor

t1 <- trans_alpha$new(dataset = group1, group = "Time")

t1$data_alpha$Time %<>%factor(., levels=c("6W", "3M", "6M"))
#t1$cal_diff(method = "KW")
#t1$res_alpha_diff

t1$cal_diff(method = "KW_dunn")
#t1$res_alpha_diff

#t1$plot_alpha(measure = "Shannon", use_boxplot = TRUE)
alphaplot <- t1$plot_alpha(pair_compare = TRUE, measure = "Shannon", group="Time")
alphaplot$data$Time %<>% factor(., levels = c("6W", "3M", "6M"))

alphaplot <- alphaplot + scale_x_discrete(labels=c("6W" = "6 Weeks", "3M" = "3 Months",
                              "6M" = "6 Months")) + theme(text = element_text(size = 12, face="bold")) + theme(panel.border = (element_rect(size=1, fill=NA)))

PCoA plot

t1 <- trans_beta$new(dataset = group1, group = "Time", measure = "bray")

t1$dataset$sample_table$Time %<>% factor(., levels = c("6W", "3M", "6M"))
t1$cal_ordination(ordination = "PCoA")
pcoaplot <- t1$plot_ordination(plot_color = "Time", plot_type = c("point", "ellipse"))

pcoa <- pcoaplot + theme_minimal() + theme(text = element_text(size = 12, face="bold"))
pcoa$data$Time %<>% factor(., levels = c("6W", "3M", "6M"))

pcoa <- pcoa + scale_color_hue(labels=c("6 Weeks", "3 Months", "6 Months")) + scale_fill_discrete(limits=c("6W", "3M", "6M"), labels=c("6 Weeks", "3 Months", "6 Months")) + scale_color_manual(values=c("#1B9E77", "#D95F02", "#7570B3"),limits=c("6W", "3M", "6M"), labels=c("6 Weeks", "3 Months", "6 Months")) + scale_fill_manual(values=c("#1B9E77", "#D95F02", "#7570B3"),limits=c("6W", "3M", "6M"), labels=c("6 Weeks", "3 Months", "6 Months"))


t1$cal_manova(manova_all = FALSE)
t1$res_manova
##     Groups measure         F         R2 p.value p.adjusted Significance
## 1 6M vs 6W    bray 1.8144503 0.02843322   0.009     0.0270            *
## 2 6M vs 3M    bray 0.8119884 0.01272470   0.792     0.7920             
## 3 6W vs 3M    bray 1.0624061 0.01434474   0.309     0.4635
t1$cal_manova(manova_set = "Time")
t1$res_manova
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = use_formula, data = metadata)
##           Df SumOfSqs      R2      F Pr(>F)  
## Time       2    1.026 0.02411 1.2228  0.095 .
## Residual  99   41.541 0.97589                
## Total    101   42.568 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fig 1

brayplot <- brayplot + theme(text = element_text(size = 12, face="bold")) + labs(y="Bray-Curtis\nDistance")

brayplot

fig1a <- plot_grid(alphaplot + theme(axis.text.x=element_blank()) + ylim(0,5), brayplot + scale_y_continuous(breaks=c(0.5,0.75,1), limits = c(0.25,1.3)), align = "v", ncol=1, rel_heights = c(0.75,1))
fig1a

fig1 <- plot_grid(fig1a, agebarplot, align = "v", ncol=2, rel_widths = c(0.75,1), labels = c("A", "B"))
fig1 ## save 1250 x 500

fig1pcoa <- plot_grid(fig1, pcoa, align = "h", ncol=1, rel_heights = c(1, 0.75), labels = c("", "C"))

fig1pcoa ## save 1000 x 900

#fig2 <- plot_grid(sixalpha, sixbeta, threealpha, threebeta, sixmonthalpha, sixmonthbeta, labels = c("  A:  6 Weeks", "", "  B:  3 Months", "", "  C:  6 Months", ""), align = "h", ncol=2)

Antibiotic?

They are significantly different in beta diversity (Bray-Curtis), but NOT in alpha diversity (Shannon - richness).

group1 <- clone(dataset)

group1$tidy_dataset()

group1$cal_abund()
group1$cal_alphadiv(PD = FALSE)
group1$cal_betadiv(unifrac = FALSE)

t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10)

group1$sample_table$Med_Antibiotic <- as.factor(group1$sample_table$Med_Antibiotic)

t1$plot_bar(others_color = "grey70", facet = "Med_Antibiotic", xtext_keep = FALSE, legend_text_italic = FALSE)

t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10, groupmean = "Med_Antibiotic")
g1 <- t1$plot_bar(others_color = "grey70", legend_text_italic = FALSE)
g1 + theme_classic() + theme(axis.title.y = element_text(size = 18))

t1 <- trans_beta$new(dataset = group1, group = "Med_Antibiotic", measure = "bray")
t1$cal_ordination(ordination = "PCoA")
t1$plot_ordination(plot_color = "Med_Antibiotic", plot_shape = "Med_Antibiotic", plot_type = c("point", "ellipse"))

# calculate and plot sample distances within groups
t1$cal_group_distance()
# return t1$res_group_distance
t1$plot_group_distance(distance_pair_stat = TRUE)

t1 <- trans_alpha$new(dataset = group1, group = "Med_Antibiotic")
#t1$cal_diff(method = "KW")
#t1$res_alpha_diff

t1$cal_diff(method = "anova")
#t1$res_alpha_diff

t1$plot_alpha(add_letter = T, measure = "Shannon", use_boxplot = TRUE)

Medicine?

They are significantly different in beta diversity (Bray-Curtis) and in alpha diversity (Shannon - richness).

t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10)

group1$sample_table$Med_Other <- as.factor(group1$sample_table$Med_Other)

t1$plot_bar(others_color = "grey70", facet = "Med_Other", xtext_keep = FALSE, legend_text_italic = FALSE)

t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10, groupmean = "Med_Other")
g1 <- t1$plot_bar(others_color = "grey70", legend_text_italic = FALSE)
g1 + theme_classic() + theme(axis.title.y = element_text(size = 18))

t1 <- trans_beta$new(dataset = group1, group = "Med_Other", measure = "bray")
t1$cal_ordination(ordination = "PCoA")
t1$plot_ordination(plot_color = "Med_Other", plot_shape = "Med_Other", plot_type = c("point", "ellipse"))

# calculate and plot sample distances within groups
t1$cal_group_distance()
# return t1$res_group_distance
t1$plot_group_distance(distance_pair_stat = TRUE)

t1 <- trans_alpha$new(dataset = group1, group = "Med_Other")
#t1$cal_diff(method = "KW")
#t1$res_alpha_diff

t1$cal_diff(method = "anova")
#t1$res_alpha_diff

t1$plot_alpha(add_letter = T, measure = "Shannon", use_boxplot = TRUE)

Gender?

They are significantly different in beta diversity (Bray-Curtis), but NOT in alpha diversity (Shannon - richness).

t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10)

t1$plot_bar(others_color = "grey70", facet = "X6_BabySex", xtext_keep = FALSE, legend_text_italic = FALSE)

t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10, groupmean = "X6_BabySex")
g1 <- t1$plot_bar(others_color = "grey70", legend_text_italic = FALSE)
g1 + theme_classic() + theme(axis.title.y = element_text(size = 18))

t1 <- trans_beta$new(dataset = group1, group = "X6_BabySex", measure = "bray")
t1$cal_ordination(ordination = "PCoA")
t1$plot_ordination(plot_color = "X6_BabySex", plot_shape = "X6_BabySex", plot_type = c("point", "ellipse"))

# calculate and plot sample distances within groups
t1$cal_group_distance()
# return t1$res_group_distance
t1$plot_group_distance(distance_pair_stat = TRUE)

t1 <- trans_alpha$new(dataset = group1, group = "X6_BabySex")
#t1$cal_diff(method = "KW")
#t1$res_alpha_diff

t1$cal_diff(method = "anova")
#t1$res_alpha_diff

t1$plot_alpha(add_letter = T, measure = "Shannon", use_boxplot = TRUE)

6 Weeks

dataset$sample_table$Med_Other <- as.numeric(dataset$sample_table$Med_Other)

#columnsofinterest <- c(41,43,52,61,77,79,81,87,97,117,118,119,131,132,134,136,141,143,145,176,177,178,179)

# removed mother's report of child taking drugs since we have the medical record. also removed formula since only one baby took formula. removed mother drinking soda.
columnsofinterest <- c(52,77,79,81,97,117,118,119,131,132,134,136,141,143,145,167,168,169,182)


group1 <- clone(dataset)

group1$sample_table <- subset(group1$sample_table, Time == "6W")

ggplot(melt(group1$sample_table[,columnsofinterest]),aes(x=value)) + geom_histogram() + facet_wrap(~variable, scales = "free", ncol=3)

group1$tidy_dataset()

group1$cal_abund()
group1$cal_alphadiv(PD = FALSE)
group1$cal_betadiv(unifrac = FALSE)

##

t1 <- trans_abund$new(dataset = group1, taxrank = "Phylum", ntaxa = 10)

t1$plot_bar(others_color = "grey70", facet = "Composite_categorical", xtext_keep = FALSE, legend_text_italic = FALSE)

t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10, groupmean = "Composite_categorical")
g1 <- t1$plot_bar(others_color = "grey70", legend_text_italic = FALSE)
g1 + theme_classic() + theme(axis.title.y = element_text(size = 18))

t1 <- trans_abund$new(dataset = group1, taxrank = "Genus", ntaxa = 40)
t1$plot_heatmap(facet = "Composite_categorical", xtext_keep = FALSE, withmargin = FALSE)

##
t1 <- trans_env$new(dataset = group1, env_cols = columnsofinterest)

t1$data_env$Med_Antimalarial <- NULL

t1$cal_ordination(method="dbRDA", use_measure = "bray")
#t1$trans_rda(adjust_arrow_length = TRUE, max_perc_env = 0.5)

t1$cal_mantel(use_measure = "bray")
t1$res_mantel%>%
  kbl(escape = F, caption="Mantel Test Significance") %>%
  kable_styling("hover")
Mantel Test Significance
Variables mantel type Correlation method Correlation coefficient p.value p.adjusted Significance
X23_HowManyInHousehold mantel test pearson -0.0694929 0.818 0.9202500
Baby.weight..g. mantel test pearson -0.0982693 0.894 0.9260000
Maternal.BMI mantel test pearson 0.0648161 0.205 0.5811429
X6…Age mantel test pearson -0.0989235 0.926 0.9260000
X90a…If.no..what.number.child.is.this. mantel test pearson 0.0762299 0.142 0.5811429
X15…UTI. mantel test pearson -0.0044295 0.470 0.9016364
X15…STD. mantel test pearson -0.0264953 0.613 0.9195000
X15…None. mantel test pearson 0.0044594 0.342 0.7695000
X32b…Take.antimalarials. mantel test pearson 0.0330999 0.226 0.5811429
X32c…Take.cold.medicines. mantel test pearson -0.0253097 0.551 0.9016364
X32e…Take.acetamenophen mantel test pearson -0.0070770 0.511 0.9016364
X37b…STDs. mantel test pearson -0.0848425 0.816 0.9202500
X38a…Malaria.infection.during.pregnancy. mantel test pearson -0.0774146 0.788 0.9202500
X38c…UTI.during.pregnancy mantel test pearson -0.0493042 0.722 0.9202500
X38e…No.infections.during.pregnancy mantel test pearson 0.0583755 0.050 0.3660000
Med_Antibiotic mantel test pearson 0.1057389 0.061 0.3660000
Med_Other mantel test pearson 0.1057389 0.052 0.3660000
Composite mantel test pearson 0.0651813 0.186 0.5811429
t1 <- trans_env$new(dataset = group1, add_data = group1$sample_table[,columnsofinterest])

t1$cal_cor(use_data = "all", p_adjust_method = "fdr")

datatable(t1$res_cor[t1$res_cor$AdjPvalue < 0.05,],
            filter = "top",
            rownames = FALSE,
            width = '100%',
            options = list(scrollX = TRUE))
t1 <- trans_alpha$new(dataset = group1, group = "Composite_categorical")
t1$cal_diff(method = "anova", anova_set="Composite_categorical+Med_Antibiotic")
sixalpha <- t1$plot_alpha(add_letter = T, use_boxplot = TRUE, measure = "Shannon", pair_compare = TRUE, order_x_mean = TRUE)
sixalpha <- sixalpha + scale_color_manual(values=c("red", "blue"))
sixalpha

t1$res_diff$Shannon
##                       Df Sum Sq Mean Sq F value Pr(>F)
## Composite_categorical  1  0.095  0.0950   0.151  0.700
## Med_Antibiotic         1  0.000  0.0002   0.000  0.986
## Residuals             34 21.317  0.6270
## testing the beta diversity with anova
t1 <- trans_beta$new(dataset = group1, group = "Composite_categorical", measure = "bray")
t1$cal_ordination(ordination = "PCoA")
t1$cal_group_distance()
t1$cal_manova(manova_set = "Composite_categorical + Med_Antibiotic + Med_Other")
t1$res_manova
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = use_formula, data = metadata)
##                       Df SumOfSqs      R2      F Pr(>F)  
## Composite_categorical  1   0.5152 0.03173 1.1546  0.162  
## Med_Antibiotic         1   0.5496 0.03385 1.2318  0.014 *
## Residual              34  15.1701 0.93441                
## Total                 36  16.2348 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t1$cal_betadisper()
t1$res_betadisper
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)  
## Groups     1 0.020654 0.0206543 3.7728    999  0.052 .
## Residuals 35 0.191610 0.0054746                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##           Low  High
## Low           0.049
## High 0.060175
counts <- ddply(group1$sample_table, .(group1$sample_table$Composite_categorical), nrow)
names(counts) <- c("Composite?", "Number of Samples")
counts
##   Composite? Number of Samples
## 1        Low                21
## 2       High                16
t1 <- trans_beta$new(dataset = group1, group = "Composite_categorical", measure = "bray")
t1$cal_ordination(ordination = "PCoA")
t1$plot_ordination(plot_color = "Composite_categorical", plot_shape = "Composite_categorical", plot_type = c("point", "ellipse"))

t1$cal_group_distance()
sixbeta <- t1$plot_group_distance(distance_pair_stat = TRUE)
sixbeta <- sixbeta + scale_color_manual(values=c("red", "blue"))
sixbeta

group1$sample_table$Composite_categorical %<>% as.character %>% as.factor

t1 <- trans_alpha$new(dataset = group1, group = "Composite_categorical")
t1$cal_diff(method = "KW")
sixweekalpha <- t1$plot_alpha(pair_compare = TRUE, measure = "Shannon", group="Composite_categorical",  order_x_mean = TRUE) + scale_color_manual(values=c("red", "blue")) + theme(panel.border = (element_rect(size=1, fill=NA)))

sixweekalpha

3 Months

group1 <- clone(dataset)

group1$sample_table <- subset(group1$sample_table, Time == "3M")

ggplot(melt(group1$sample_table[,columnsofinterest]),aes(x=value)) + geom_histogram() + facet_wrap(~variable, scales = "free", ncol=3)

group1$tidy_dataset()
group1$cal_abund()
group1$cal_alphadiv(PD = FALSE)
group1$cal_betadiv(unifrac = FALSE)

##

t1 <- trans_abund$new(dataset = group1, taxrank = "Phylum", ntaxa = 10)

t1$plot_bar(others_color = "grey70", facet = "Composite_categorical", xtext_keep = FALSE, legend_text_italic = FALSE)

t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10, groupmean = "Composite_categorical")
g1 <- t1$plot_bar(others_color = "grey70", legend_text_italic = FALSE)
g1 + theme_classic() + theme(axis.title.y = element_text(size = 18))

t1 <- trans_abund$new(dataset = group1, taxrank = "Genus", ntaxa = 40)
t1$plot_heatmap(facet = "Composite_categorical", xtext_keep = FALSE, withmargin = FALSE)

##

t1 <- trans_env$new(dataset = group1, env_cols = columnsofinterest)

t1$cal_ordination(method="dbRDA", use_measure = "bray")
#t1$trans_rda(adjust_arrow_length = TRUE, max_perc_env = 0.5)

t1$cal_mantel(use_measure = "bray")
t1$res_mantel%>%
  kbl(escape = F, caption="Mantel Test Significance") %>%
  kable_styling("hover")
Mantel Test Significance
Variables mantel type Correlation method Correlation coefficient p.value p.adjusted Significance
X23_HowManyInHousehold mantel test pearson -0.0186740 0.555 0.9410588
Baby.weight..g. mantel test pearson -0.0752633 0.828 0.9410588
Maternal.BMI mantel test pearson 0.0921892 0.137 0.9410588
X6…Age mantel test pearson -0.1459723 0.993 0.9930000
X90a…If.no..what.number.child.is.this. mantel test pearson 0.1807048 0.017 0.3230000
X15…UTI. mantel test pearson -0.0257220 0.892 0.9415556
X15…STD. mantel test pearson -0.1121692 0.842 0.9410588
X15…None. mantel test pearson -0.0214779 0.601 0.9410588
X32b…Take.antimalarials. mantel test pearson -0.0015835 0.421 0.9410588
X32c…Take.cold.medicines. mantel test pearson -0.0724194 0.761 0.9410588
X32e…Take.acetamenophen mantel test pearson 0.0168251 0.191 0.9410588
X37b…STDs. mantel test pearson -0.0514647 0.692 0.9410588
X38a…Malaria.infection.during.pregnancy. mantel test pearson -0.0725666 0.756 0.9410588
X38c…UTI.during.pregnancy mantel test pearson -0.0732142 0.840 0.9410588
X38e…No.infections.during.pregnancy mantel test pearson -0.0152307 0.640 0.9410588
Med_Antibiotic mantel test pearson -0.0428810 0.674 0.9410588
Med_Antimalarial mantel test pearson -0.1424707 0.840 0.9410588
Med_Other mantel test pearson -0.0132894 0.508 0.9410588
Composite mantel test pearson 0.0675202 0.209 0.9410588
t1 <- trans_env$new(dataset = group1, add_data = group1$sample_table[,columnsofinterest])

t1$cal_cor(use_data = "all", p_adjust_method = "fdr")

datatable(t1$res_cor[t1$res_cor$AdjPvalue < 0.05,],
            filter = "top",
            rownames = FALSE,
            width = '100%',
            options = list(scrollX = TRUE))
t1 <- trans_alpha$new(dataset = group1, group = "Composite_categorical")
t1$cal_diff(method = "anova", anova_set="Composite_categorical+Med_Antibiotic")
threealpha <- t1$plot_alpha(measure = "Shannon", pair_compare = TRUE, order_x_mean = TRUE)
threealpha <- threealpha + scale_color_manual(values=c("red", "blue"))
threealpha

t1$res_diff$Shannon
##                       Df Sum Sq Mean Sq F value Pr(>F)
## Composite_categorical  1  1.117  1.1174   2.005  0.166
## Med_Antibiotic         1  0.000  0.0004   0.001  0.980
## Residuals             35 19.509  0.5574
## testing the beta diversity with anova
t1 <- trans_beta$new(dataset = group1, group = "Composite_categorical", measure = "bray")
t1$cal_ordination(ordination = "PCoA")
t1$cal_group_distance()
t1$cal_manova(manova_set = "Composite_categorical + Med_Antibiotic + Med_Antimalarial + Med_Other")
t1$res_manova
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = use_formula, data = metadata)
##                       Df SumOfSqs      R2      F Pr(>F)
## Composite_categorical  1   0.2718 0.01761 0.6384  0.987
## Med_Antibiotic         1   0.2996 0.01942 0.7037  0.931
## Med_Antimalarial       1   0.3057 0.01981 0.7179  0.793
## Med_Other              1   0.5032 0.03261 1.1818  0.242
## Residual              33  14.0509 0.91055              
## Total                 37  15.4311 1.00000
t1$cal_betadisper()
t1$res_betadisper
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.00009 0.0000908 0.0077    999  0.921
## Residuals 36 0.42275 0.0117431                     
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##         Low  High
## Low         0.924
## High 0.9304
row.names(scores) <- gsub("6W", "3M", row.names(scores))

counts <- ddply(group1$sample_table, .(group1$sample_table$Composite_categorical), nrow)
names(counts) <- c("Composite?", "Number of Samples")
counts
##   Composite? Number of Samples
## 1        Low                22
## 2       High                16
t1 <- trans_beta$new(dataset = group1, group = "Composite_categorical", measure = "bray")
t1$cal_ordination(ordination = "PCoA")
t1$plot_ordination(plot_color = "Composite_categorical", plot_shape = "Composite_categorical", plot_type = c("point", "ellipse"))

t1$cal_group_distance()
threebeta <- t1$plot_group_distance(distance_pair_stat = TRUE)
threebeta <- threebeta + scale_color_manual(values=c("red", "blue"))
threebeta

group1$sample_table$Composite_categorical %<>% as.character %>% as.factor

t1 <- trans_alpha$new(dataset = group1, group = "Composite_categorical")
t1$cal_diff(method = "KW")
threemonthalpha <- t1$plot_alpha(pair_compare = TRUE, measure = "Shannon", group="Composite_categorical",  order_x_mean = TRUE) + scale_color_manual(values=c("red", "blue")) + theme(panel.border = (element_rect(size=1, fill=NA)))


threemonthalpha

6 Months

group1 <- clone(dataset)

group1$sample_table <- subset(group1$sample_table, Time == "6M")

ggplot(melt(group1$sample_table[,columnsofinterest]),aes(x=value)) + geom_histogram() + facet_wrap(~variable, scales = "free", ncol=3)

group1$tidy_dataset()
group1$cal_abund()
group1$cal_alphadiv(PD = FALSE)
group1$cal_betadiv(unifrac = FALSE)

##

t1 <- trans_abund$new(dataset = group1, taxrank = "Phylum", ntaxa = 10)

t1$plot_bar(others_color = "grey70", facet = "Composite_categorical", xtext_keep = FALSE, legend_text_italic = FALSE)

t1 <- trans_abund$new(dataset = group1, taxrank = "Class", ntaxa = 10, groupmean = "Composite_categorical")
g1 <- t1$plot_bar(others_color = "grey70", legend_text_italic = FALSE)
g1 + theme_classic() + theme(axis.title.y = element_text(size = 18))

t1 <- trans_abund$new(dataset = group1, taxrank = "Genus", ntaxa = 40)
t1$plot_heatmap(facet = "Composite_categorical", xtext_keep = FALSE, withmargin = FALSE)

##
t1 <- trans_env$new(dataset = group1, env_cols = columnsofinterest)

t1$data_env$Med_Antimalarial <- NULL
t1$data_env$X15...STD. <- NULL

#gg <- t1$cal_autocor(group = "Composite_categorical")


t1$cal_ordination(method = "dbRDA", use_measure = "bray")
#t1$trans_ordination(adjust_arrow_length = TRUE, max_perc_env = 1.5)

t1$cal_mantel(use_measure = "bray")
t1$res_mantel%>%
  kbl(escape = F, caption="Mantel Test Significance") %>%
  kable_styling("hover")
Mantel Test Significance
Variables mantel type Correlation method Correlation coefficient p.value p.adjusted Significance
X23_HowManyInHousehold mantel test pearson 0.0306121 0.320 0.6233333
Baby.weight..g. mantel test pearson -0.1266868 0.898 0.9660000
Maternal.BMI mantel test pearson 0.0484765 0.297 0.6233333
X6…Age mantel test pearson 0.0589518 0.242 0.6233333
X90a…If.no..what.number.child.is.this. mantel test pearson 0.2693789 0.011 0.1870000
X15…UTI. mantel test pearson -0.0217770 0.646 0.9463333
X15…None. mantel test pearson 0.0101331 0.330 0.6233333
X32b…Take.antimalarials. mantel test pearson -0.1527291 0.937 0.9660000
X32c…Take.cold.medicines. mantel test pearson 0.2317943 0.051 0.4335000
X32e…Take.acetamenophen mantel test pearson 0.0082852 0.310 0.6233333
X37b…STDs. mantel test pearson -0.0803152 0.668 0.9463333
X38a…Malaria.infection.during.pregnancy. mantel test pearson -0.0317055 0.571 0.9463333
X38c…UTI.during.pregnancy mantel test pearson -0.1084931 0.914 0.9660000
X38e…No.infections.during.pregnancy mantel test pearson -0.0399995 0.964 0.9660000
Med_Antibiotic mantel test pearson -0.1626871 0.966 0.9660000
Med_Other mantel test pearson 0.2404512 0.106 0.6006667
Composite mantel test pearson 0.0620567 0.261 0.6233333
t1 <- trans_env$new(dataset = group1, env_cols = columnsofinterest)

t1$cal_cor(use_data = "all", p_adjust_method = "fdr")

datatable(t1$res_cor[t1$res_cor$AdjPvalue < 0.05,],
            filter = "top",
            rownames = FALSE,
            width = '100%',
            options = list(scrollX = TRUE))
t1 <- trans_alpha$new(dataset = group1, group = "Composite_categorical")
t1$cal_diff(method = "anova", anova_set="Composite_categorical+Med_Antibiotic")
sixmonthalpha <- t1$plot_alpha(measure = "Shannon", group="Composite_categorical", shape="Composite_categorical", pair_compare = TRUE, order_x_mean = TRUE)
sixmonthalpha <- sixmonthalpha + scale_color_manual(values=c("red", "blue"))
sixmonthalpha

t1$res_diff$Shannon
##                       Df Sum Sq Mean Sq F value Pr(>F)  
## Composite_categorical  1  1.375  1.3747   3.595 0.0701 .
## Med_Antibiotic         1  0.145  0.1449   0.379 0.5440  
## Residuals             24  9.178  0.3824                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## testing the beta diversity with anova
t1 <- trans_beta$new(dataset = group1, group = "Composite_categorical", measure = "bray")
t1$cal_ordination(ordination = "PCoA")
t1$cal_group_distance()
t1$cal_manova(manova_set = "Composite_categorical + Med_Antibiotic")
t1$res_manova
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = use_formula, data = metadata)
##                       Df SumOfSqs      R2      F Pr(>F)
## Composite_categorical  1   0.4755 0.04815 1.2743  0.134
## Med_Antibiotic         1   0.4449 0.04505 1.1923  0.173
## Residual              24   8.9551 0.90680              
## Total                 26   9.8755 1.00000
t1$cal_betadisper()
t1$res_betadisper
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq     F N.Perm Pr(>F)
## Groups     1 0.00596 0.0059553 0.366    999  0.561
## Residuals 25 0.40683 0.0162731                    
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##          Low  High
## Low          0.554
## High 0.55067
row.names(scores) <- gsub("6W", "6M", row.names(scores))
group1$sample_table$Composite_categorical %<>% as.character %>% as.factor

t1 <- trans_alpha$new(dataset = group1, group = "Composite_categorical")
t1$cal_diff(method = "KW")
sixmonthalpha <- t1$plot_alpha(pair_compare = TRUE, measure = "Shannon", group="Composite_categorical",  order_x_mean = TRUE) + scale_color_manual(values=c("red", "blue")) + theme(panel.border = (element_rect(size=1, fill=NA))) + ggtitle("C: 6 Months")


sixmonthalpha

t1 <- trans_beta$new(dataset = group1, group = "Composite_categorical", measure = "bray")
t1$cal_ordination(ordination = "PCoA")
t1$plot_ordination(plot_color = "Composite_categorical", plot_shape = "Composite_categorical", plot_type = c("point", "ellipse"))

t1$cal_group_distance()
sixmonthbeta <- t1$plot_group_distance(distance_pair_stat = TRUE)
sixmonthbeta <- sixmonthbeta + scale_color_manual(values=c("red", "blue"))+ theme(text = element_text(size = 12, face="bold"))
sixmonthbeta

fig2 <- plot_grid(sixalpha, sixbeta, threealpha, threebeta, sixmonthalpha, sixmonthbeta, labels = c("  A:  6 Weeks", "", "  B:  3 Months", "", "  C:  6 Months", ""), align = "h", ncol=2)
fig2 <- plot_grid(sixweekalpha + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) + scale_y_continuous(limits=c(0,5)) + theme(panel.border = (element_rect(size=1, fill=NA))) + ggtitle("A: 6 Weeks") + theme(axis.text.x=element_blank())+ theme(text = element_text(size = 12, face="bold")), threemonthalpha  + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) + theme(axis.title.y = element_blank()) + scale_y_continuous(limits=c(0,5)) + theme(panel.border = (element_rect(size=1, fill=NA))) + ggtitle("B: 3 Months") + theme(axis.text.x=element_blank())+ theme(text = element_text(size = 12, face="bold")), sixmonthalpha  + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) + theme(axis.title.y = element_blank()) + scale_y_continuous(limits=c(0,5)) + theme(panel.border = (element_rect(size=1, fill=NA))) + ggtitle("C: 6 Months") + theme(axis.text.x=element_blank())+ theme(text = element_text(size = 12, face="bold")), sixbeta  + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) + theme(text = element_text(size = 12, face="bold"))+ theme(panel.border = (element_rect(size=1, fill=NA))) + scale_y_continuous(limits=c(0.2,1.1)) + labs(y="Bray-Curtis\nDistance"), threebeta+ theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) + theme(text = element_text(size = 12, face="bold")) + theme(axis.title.y = element_blank()) + theme(panel.border = (element_rect(size=1, fill=NA))) + scale_y_continuous(limits=c(0.2,1.1)), sixmonthbeta + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) + theme(text = element_text(size = 12, face="bold")) + theme(axis.title.y = element_blank()) + theme(panel.border = (element_rect(size=1, fill=NA))) + scale_y_continuous(limits=c(0.2,1.1)), align = "v", ncol=3)

fig2

## save 700 x 400

Mock Community

MC represents what the mock community should look like. MC1, MC2, and MC3 were the same mock community sequenced three different times. Zymo Research D6306. Lot 195709

Genus were well represented between what we would expect from the mock community, although approximately 0.06% of MC3 was identified as Bifidobacterium

At the species level:

  • Bacillus subtilis was identified as Bacillus intestinalis. When I did a blast on the sequence, it came up more likely as subtilis.

  • 1 to 2% of Boydii was identified in all mock communities. When I did a blast on the sequence, it came up more likely as E. coli.

  • 3 to 7% of Flexneri was identified in mock communities. When I did a blast on it, it came up as more likely E. coli.

  • The unidentified at the species level are primarily from L. fermentum, from the blast analysis.

It seems to be common to have Shigella species (boydii and flexneri) mistaken for e. coli, since they are all very closely related. See https://www.sciencedirect.com/topics/biochemistry-genetics-and-molecular-biology/shigella-boydii and https://journals.asm.org/doi/full/10.1128/JCM.01790-16.

See https://github.com/benjjneb/dada2/issues/1416 concerning L. fermentum. https://github.com/benjjneb/dada2/issues/1441

dataset <- phyloseq2meco(mock)

genusstandards <- read.csv("C:/Congo/ZymoStandardsGenus.csv")
speciesstandards <- read.csv("C:/Congo/ZymoStandardsSpecies.csv")

dataset$tidy_dataset()

dataset$tax_table %<>% base::subset(Kingdom == "k__Archaea" | Kingdom == "k__Bacteria")

dataset$filter_pollution(taxa = c("mitochondria", "chloroplast"))
dataset$tidy_dataset()

dataset$cal_abund()
#dataset$save_abund(dirpath = "C:/Congo/taxa_abund")

t1 <- trans_abund$new(dataset = dataset, taxrank = "Genus", ntaxa = 8)

## add in standards
genusstandards$X.1 <- NA
genusstandards$sampleId <- "MC"
t1$data_abund <- bind_rows(t1$data_abund, genusstandards)

t1$plot_bar(others_color = "grey70", xtext_keep = FALSE, facet="sampleId", legend_text_italic = FALSE)

ggplot(t1$data_abund, aes(x=Taxonomy, y=Abundance, color=Sample)) +
    geom_point(size=2) + theme_bw() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + labs(x="Genus Level Taxonomy", y="Relative Abundance (%)")

t1 <- trans_abund$new(dataset = dataset, taxrank = "Species", ntaxa = 15)

## add in standards
t1$data_abund <- bind_rows(t1$data_abund, speciesstandards)

ggplot(t1$data_abund, aes(x=Taxonomy, y=Abundance, color=Sample)) +
    geom_point(size=2) + theme_bw() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + labs(x="Species Level Taxonomy", y="Relative Abundance (%)")

datatable(dataset$tax_table,
            filter = "top",
            rownames = FALSE,
            width = '100%',
            options = list(scrollX = TRUE))

Replicates

G7 and G20 were extraction replicates

G19, G4, and G46 were sequence replicates

dataset <- phyloseq2meco(replicates)

dataset$tidy_dataset()

dataset$tax_table %<>% base::subset(Kingdom == "k__Archaea" | Kingdom == "k__Bacteria")

dataset$filter_pollution(taxa = c("mitochondria", "chloroplast"))
dataset$tidy_dataset()

dataset$cal_abund()

t1 <- trans_abund$new(dataset = dataset, taxrank = "Genus", ntaxa = 10)
t1$plot_bar(others_color = "grey70", facet="Individual", xtext_keep = FALSE, legend_text_italic = FALSE)

Check L. gasseri with NCBI BLAST

Checked L. gasseri with a NCBI BLAST search to verify classification. There were three different sequences. The first and third came up as 99.9% similar to L. gasseri and the second came up as 100%.

dataset <- phyloseq2meco(samples)
## 68 taxa are removed from the otu_table, as the abundance is 0 ...
dataset$tax_table %<>% base::subset(Species == "s__gasseri")
dataset$tidy_dataset()

rownames(dataset$tax_table)
## [1] "GACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAGCTTGCCTAGATGAATTTGGTGCTTGCACCAAATGAAACTAGATACAAGCGAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCCAAGAGACTGGGATAACACCTGGAAACAGATGCTAATACCGGATAACAACACTAGACGCATGTCTAGAGTTTAAAAGATGGTTCTGCTATCACTCTTGGATGGACCTGCGGTGCATTAGCTAGTTGGTAAGGTAACGGCTTACCAAGGCAATGATGCATAGCCGAGTTGAGAGACTGATCGGCCACATTGGGACTGAGACACGGCCCAAACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGCAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGGTTTCGGCTCGTAAAGCTCTGTTGGTAGTGAAGAAAGATAGAGGTAGTAACTGGCCTTTATTTGACGGTAATTACTTAGAAAGTCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGTGCAGGCGGTTCAATAAGTCTGATGTGAAAGCCTTCGGCTCAACCGGAGAATTGCATCAGAAACTGTTGAACTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCTGGTAGTCCATGCCGTAAACGATGAGTGCTAAGTGTTGGGAGGTTTCCGCCTCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGACCGCAAGGTTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCAGTGCAAACCTAAGAGATTAGGAGTTCCCTTCGGGGACGCTAAGACAGGTGGTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCATTAGTTGCCATCATTAAGTTGGGCACTCTAATGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAACGAGAAGCGAACCTGCGAAGGCAAGCGGATCTCTGAAAGCCGTTCTCAGTTCGGACTGTAGGCTGCAACTCGCCTACACGAAGCTGGAATCGCTAGTAATCGCGGATCAGCACGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGAGAGTCTGTAACACCCAAAGCCGGTGGGATAACCTTTATAGGAGTCAGCCGTCTAAGGTAGGACAGATGATTAGGGTG"
## [2] "GACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAGCTTGCCTAGATGAATTTGGTGCTTGCACCAAATGAAACTAGATACAAGCGAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCCAAGAGACTGGGATAACACCTGGAAACAGATGCTAATACCGGATAACAACACTAGACGCATGTCTAGAGTTTAAAAGATGGTTCTGCTATCACTCTTGGATGGACCTGCGGTGCATTAGCTAGTTGGTAAGGTAACGGCTTACCAAGGCAATGATGCATAGCCGAGTTGAGAGACTGATCGGCCACATTGGGACTGAGACACGGCCCAAACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGCAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGGTTTCGGCTCGTAAAGCTCTGTTGGTAGTGAAGAAAGATAGAGGTAGTAACTGGCCTTTATTTGACGGTAATTACTTAGAAAGTCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGTGCAGGCGGTTCAATAAGTCTGATGTGAAAGCCTTCGGCTCAACCGGAGAATTGCATCAGAAACTGTTGAACTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCTGGTAGTCCATGCCGTAAACGATGAGTGCTAAGTGTTGGGAGGTTTCCGCCTCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGACCGCAAGGTTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCAGTGCAAACCTAAGAGATTAGGAGTTCCCTTCGGGGACGCTGAGACAGGTGGTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCATTAGTTGCCATCATTAAGTTGGGCACTCTAATGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAACGAGAAGCGAACCTGCGAAGGCAAGCGGATCTCTGAAAGCCGTTCTCAGTTCGGACTGTAGGCTGCAACTCGCCTACACGAAGCTGGAATCGCTAGTAATCGCGGATCAGCACGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGAGAGTCTGTAACACCCAAAGCCGGTGGGATAACCTTTATAGGAGTCAGCCGTCTAAGGTAGGACAGATGATTAGGGTG"
## [3] "GACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGAGCTTGCCTAGATGAATTTGGTGCTTGCACCAAATGAAACTAGATACAAGCGAGCGGCGGACGGGTGAGTAACACGTGGGTAACCTGCCCAAGAGACTGGGATAACACCTGGAAACAGATGCTAATACCGGATAACAACACTAGACGCATGTCTAGAGTTTAAAAGATGGTTCTGCTATCACTCTTGGATGGACCTGCGGTGCATTAGCTAGTTGGTAAGGTAACGGCTTACCAAGGCAATGATGCATAGCCGAGTTGAGAGACTGATCGGCCACATTGGGACTGAGACACGGCCCAAACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCACAATGGACGCAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGGTTTCGGCTCGTAAAGCTCTGTTGGTAGTGAAGAAAGATAGAGGTAGTAACTAGCCTTTATTTGACGGTAATTACTTAGAAAGTCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGTGCAGGCGGTTCAATAAGTCTGATGTGAAAGCCTTCGGCTCAACCGGAGAATTGCATCAGAAACTGTTGAACTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCTGGTAGTCCATGCCGTAAACGATGAGTGCTAAGTGTTGGGAGGTTTCCGCCTCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGACCGCAAGGTTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCAGTGCAAACCTAAGAGATTAGGAGTTCCCTTCGGGGACGCTGAGACAGGTGGTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCATTAGTTGCCATCATTAAGTTGGGCACTCTAATGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAACGAGAAGCGAACCTGCGAAGGCAAGCGGATCTCTGAAAGCCGTTCTCAGTTCGGACTGTAGGCTGCAACTCGCCTACACGAAGCTGGAATCGCTAGTAATCGCGGATCAGCACGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGAGAGTCTGTAACACCCAAAGCCGGTGGGATAACCTTTATAGGAGTCAGCCGTCTAAGGTAGGACAGATGATTAGGGTG"
#https://www.biostars.org/p/16880/
#compare strings

comp.very.fast <- function(x,y){x == y }

comp.very.fast(rownames(dataset$tax_table[1,]), rownames(dataset$tax_table[2,]))
## [1] FALSE

Check B. pseudocatenulatum with NCBI BLAST

Checked B. pseudocatenulatum with a NCBI BLAST search. 9 different sequences. Also confirmed to be accurately classified.

dataset <- phyloseq2meco(samples)
## 68 taxa are removed from the otu_table, as the abundance is 0 ...
dataset$tax_table %<>% base::subset(Species == "s__pseudocatenulatum")
dataset$tidy_dataset()

rownames(dataset$tax_table)
## [1] "GATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGGATCCATCAGGCTTTGCTTGGTGGTGAGAGTGGCGAACGGGTGAGTAATGCGTGACCGACCTGCCCCATACACCGGAATAGCTCCTGGAAACGGGTGGTAATGCCGGATGCTCCGACTCCTCGCATGGGGTGTCGGGAAAGATTTCATCGGTATGGGATGGGGTCGCGTCCTATCAGGTAGTCGGCGGGGTAACGGCCCACCGAGCCTACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGACGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCTTCGGGTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGATGCTGGATGTGGGGCCCGTTCCACGGGTTCCGTGTCGGAGCTAACGCGTTAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGAAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGCGGATTAATTCGATGCAACGCGAAGAACCTTACCTGGGCTTGACATGTTCCCGACCGCGGCAGAGATGTCGTTTCCCTTCGGGGCGGGTTCACAGGTGGTGCATGGTCGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGCCCTGTGTTGCCAGCACGTCATGGTGGGAACTCACGGGGGACCGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAGATCATCATGCCCCTTACGTCCAGGGCTTCACGCATGCTACAATGGCCGGTACAACGGGATGCGACACGGCGACGTGGAGCGGATCCCTGAAAACCGGTCTCAGTTCGGATTGGAGTCTGCAACCCGACTCCATGAAGGCGGAGTCGCTAGTAATCGCGGATCAGCAACGCCGCGGTGAATGCGTTCCCGGGCCTTGTACACACCGCCCGTCAAGTCATGAAAGTGGGTAGCACCCGAAGCCGGTGGCCTAACCCTTTGTGGATGGAGCCGTCTAAGGTGAGACTCGTGATTGGGACT" 
## [2] "GATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGGATCCATCAGGCTTTGCTTGGTGGTGAGAGTGGCGAACGGGTGAGTAATGCGTGACCGACCTGCCCCATACACCGGAATAGCTCCTGGAAACGGGTGGTAATGCCGGATGCTCCGACTCCTCGCATGGGGTGTCGGGAAAGATTTCATCGGTATGGGATGGGGTCGCGTCCTATCAGGTAGTCGGCGGGGTAACGGCCCACCGAGCCTACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGACGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCTTCGGGTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGATGCTGGATGTGGGGCCCGTTCCACGGGTTCCGTGTCGGAGCTAACGCGTTAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGAAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGCGGATTAATTCGATGCAACGCGAAGAACCTTACCTGGGCTTGACATGTTCCCGACAGCCGTAGAGATATGGCCTCCCTTCGGGGCGGGTTCACAGGTGGTGCATGGTCGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGCCCTGTGTTGCCAGCACGTCATGGTGGGAACTCACGGGGGACCGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAGATCATCATGCCCCTTACGTCCAGGGCTTCACGCATGCTACAATGGCCGGTACAACGGGATGCGACACGGCGACGTGGAGCGGATCCCTGAAAACCGGTCTCAGTTCGGATTGGAGTCTGCAACCCGACTCCATGAAGGCGGAGTCGCTAGTAATCGCGGATCAGCAACGCCGCGGTGAATGCGTTCCCGGGCCTTGTACACACCGCCCGTCAAGTCATGAAAGTGGGTAGCACCCGAAGCCGGTGGCCTAACCCTTTGTGGATGGAGCCGTCTAAGGTGAGACTCGTGATTGGGACT" 
## [3] "GATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGGATCCATCAGGCTTTGCTTGGTGGTGAGAGTGGCGAACGGGTGAGTAATGCGTGACCGACCTGCCCCATACACCGGAATAGCTCCTGGAAACGGGTGGTAATGCCGGATGCTCCGACTCCTCGCATGGGGTGTCGGGAAAGATTCATCGGTATGGGATGGGGTCGCGTCCTATCAGGTAGTCGGCGGGGTAACGGCCCACCGAGCCTACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGACGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCTTCGGGTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGATGCTGGATGTGGGGCCCGTTCCACGGGTTCCGTGTCGGAGCTAACGCGTTAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGAAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGCGGATTAATTCGATGCAACGCGAAGAACCTTACCTGGGCTTGACATGTTCCCGACAGCCGTAGAGATATGGCCTCCCTTCGGGGCGGGTTCACAGGTGGTGCATGGTCGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGCCCTGTGTTGCCAGCACGTCATGGTGGGAACTCACGGGGGACCGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAGATCATCATGCCCCTTACGTCCAGGGCTTCACGCATGCTACAATGGCCGGTACAACGGGATGCGACACGGCGACGTGGAGCGGATCCCTGAAAACCGGTCTCAGTTCGGATTGGAGTCTGCAACCCGACTCCATGAAGGCGGAGTCGCTAGTAATCGCGGATCAGCAACGCCGCGGTGAATGCGTTCCCGGGCCTTGTACACACCGCCCGTCAAGTCATGAAAGTGGGTAGCACCCGAAGCCGGTGGCCTAACCCTTTGTGGATGGAGCCGTCTAAGGTGAGACTCGTGATTGGGACT"  
## [4] "GATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGGATCCATCAGGCTTTGCTTGGTGGTGAGAGTGGCGAACGGGTGAGTAATGCGTGACCGACCTGCCCCATACACCGGAATAGCTCCTGGAAACGGGTGGTAATGCCGGATGCTCCGACTCCTCGCATGGGGTGTCGGGAAAGATTATATCGGTATGGGATGGGGTCGCGTCCTATCAGGTAGTCGGCGGGGTAACGGCCCACCGAGCCTACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGACGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCTTCGGGTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGATGCTGGATGTGGGGCCCGTTCCACGGGTTCCGTGTCGGAGCTAACGCGTTAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGAAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGCGGATTAATTCGATGCAACGCGAAGAACCTTACCTGGGCTTGACATGTTCCCGACCGCGGCAGAGATGTCGTTTCCCTTCGGGGCGGGTTCACAGGTGGTGCATGGTCGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGCCCTGTGTTGCCAGCACGTCATGGTGGGAACTCACGGGGGACCGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAGATCATCATGCCCCTTACGTCCAGGGCTTCACGCATGCTACAATGGCCGGTACAACGGGATGCGACACGGCGACGTGGAGCGGATCCCTGAAAACCGGTCTCAGTTCGGATTGGAGTCTGCAACCCGACTCCATGAAGGCGGAGTCGCTAGTAATCGCGGATCAGCAACGCCGCGGTGAATGCGTTCCCGGGCCTTGTACACACCGCCCGTCAAGTCATGAAAGTGGGTAGCACCCGAAGCCGGTGGCCTAACCCTTTGTGGATGGAGCCGTCTAAGGTGAGACTCGTGATTGGGACT" 
## [5] "GATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGGATCCATCAGGCTTTGCTTGGTGGTGAGAGTGGCGAACGGGTGAGTAATGCGTGACCGACCTGCCCCATACACCGGAATAGCTCCTGGAAACGGGTGGTAATGCCGGATGCTCCGACTCCTCGCGTGGGGTGTCGGGAAAGATTTCATCGGTATGGGATGGGGTCGCGTCCTATCAGGTAGTCGGCGGGGTAACGGCCCACCGAGCCTACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGACGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCTTCGGGTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGATGCTGGATGTGGGGCCCGTTCCACGGGTTCCGTGTCGGAGCTAACGCGTTAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGAAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGCGGATTAATTCGATGCAACGCGAAGAACCTTACCTGGGCTTGACATGTTCCCGACAGCCGTAGAGATATGGCCTCCCTTCGGGGCGGGTTCACAGGTGGTGCATGGTCGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGCCCTGTGTTGCCAGCACGTCATGGTGGGAACTCACGGGGGACCGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAGATCATCATGCCCCTTACGTCCAGGGCTTCACGCATGCTACAATGGCCGGTACAACGGGATGCGACACGGCGACGTGGAGCGGATCCCTGAAAACCGGTCTCAGTTCGGATTGGAGTCTGCAACCCGACTCCATGAAGGCGGAGTCGCTAGTAATCGCGGATCAGCAACGCCGCGGTGAATGCGTTCCCGGGCCTTGTACACACCGCCCGTCAAGTCATGAAAGTGGGTAGCACCCGAAGCCGGTGGCCTAACCCTTTGTGGATGGAGCCGTCTAAGGTGAGACTCGTGATTGGGACT" 
## [6] "GATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGGATCCATCAGGCTTTGCTTGGTGGTGAGAGTGGCGAACGGGTGAGTAATGCGTGACCGACCTGCCCCATACACCGGAATAGCTCCTGGAAACGGGTGGTAATGCCGGATGCTCCGACTCCTCGCATGGGGTGTCGGGAAAGATTTCATCGGTATGGGATGGGGTCGCGTCCTATCAGGTAGTCGGCGGGGTAACGGCCCACCGAGCCTACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGACGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCTTCGGGTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGATGCTGGATGTGGGGCCCGTTCCACGGGTTCCGTGTCGGAGCTAACGCGTTAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGAAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGCGGATTAATTCGATGCAACGCGAAGAACCTTACCTGGGCTTGACATGTTCCCGACAGCCGGCAGAGATGTCGTCTCCCTTCGGGGCGGGTTCACAGGTGGTGCATGGTCGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGCCCTGTGTTGCCAGCACGTCATGGTGGGAACTCACGGGGGACCGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAGATCATCATGCCCCTTACGTCCAGGGCTTCACGCATGCTACAATGGCCGGTACAACGGGATGCGACACGGCGACGTGGAGCGGATCCCTGAAAACCGGTCTCAGTTCGGATTGGAGTCTGCAACCCGACTCCATGAAGGCGGAGTCGCTAGTAATCGCGGATCAGCAACGCCGCGGTGAATGCGTTCCCGGGCCTTGTACACACCGCCCGTCAAGTCATGAAAGTGGGTAGCACCCGAAGCCGGTGGCCTAACCCTTTGTGGATGGAGCCGTCTAAGGTGAGACTCGTGATTGGGACT"
## [7] "GATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGGATCCATCAGGCTTTGCTTGGTGGTGAGAGTGGCGAACGGGTGAGTAATGCGTGACCGACCTGCCCCATACACCGGAATAGCTCCTGGAAACGGGTGGTAATGCCGGATGCTCCGACTCCTCGCATGGGGTGTCGGGAAAGATTTCATCGGTATGGGATGGGGTCGCGTCCTATCAGGTAGTCGGCGGGGTAACGGCCCACCGAGCCTACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGACGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCTTCGGGTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGATGCTGGATGTGGGGCCCGTTCCACGGGTTCCGTGTCGGAGCTAACGCGTTAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGAAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGCGGATTAATTCGATGCAACGCGAAGAACCTTACCTGGGCTTGACATGTTCCCGACCGCGTCAGAGATATGGCGTTCCCTTCGGGGCGGGTTCACAGGTGGTGCATGGTCGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGCCCTGTGTTGCCAGCACGTCATGGTGGGAACTCACGGGGGACCGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAGATCATCATGCCCCTTACGTCCAGGGCTTCACGCATGCTACAATGGCCGGTACAACGGGATGCGACACGGCGACGTGGAGCGGATCCCTGAAAACCGGTCTCAGTTCGGATTGGAGTCTGCAACCCGACTCCATGAAGGCGGAGTCGCTAGTAATCGCGGATCAGCAACGCCGCGGTGAATGCGTTCCCGGGCCTTGTACACACCGCCCGTCAAGTCATGAAAGTGGGTAGCACCCGAAGCCGGTGGCCTAACCCTTTGTGGATGGAGCCGTCTAAGGTGAGACTCGTGATTGGGACT"
## [8] "GATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGGATCCATCAGGCTTTGCTTGGTGGTGAGAGTGGCGAACGGGTGAGTAATGCGTGACCGACCTGCCCCATACACCGGAATAGCTCCTGGAAACGGGTGGTAATGCCGGATGCTCCGACTCCTCGCATGGGGTGTCGGGAAAGATTTCATCGGTATGGGATGGGGTCGCGTCCTATCAGGTAGTCGGCGGGGTAACGGCCCACCGAGCCTACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGACGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCTTCGGGTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGATGCTGGATGTGGGGCCCGTTCCACGGGTTCCGTGTCGGAGCTAACGCGTTAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGAAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGCGGATTAATTCGATGCAACGCGAAGAACCTTACCTGGGCTTGACATGTTCCCGACAGCCGGAGAGATGTCCGTTCCCTTCGGGGCGGGTTCACAGGTGGTGCATGGTCGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGCCCTGTGTTGCCAGCACGTCATGGTGGGAACTCACGGGGGACCGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAGATCATCATGCCCCTTACGTCCAGGGCTTCACGCATGCTACAATGGCCGGTACAACGGGATGCGACACGGCGACGTGGAGCGGATCCCTGAAAACCGGTCTCAGTTCGGATTGGAGTCTGCAACCCGACTCCATGAAGGCGGAGTCGCTAGTAATCGCGGATCAGCAACGCCGCGGTGAATGCGTTCCCGGGCCTTGTACACACCGCCCGTCAAGTCATGAAAGTGGGTAGCACCCGAAGCCGGTGGCCTAACCCTTTGTGGATGGAGCCGTCTAAGGTGAGACTCGTGATTGGGACT" 
## [9] "GATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGGATCCATCGGGCTTTGCTTGGTGGTGAGAGTGGCGAACGGGTGAGTAATGCGTGACCGACCTGCCCCGTACACCGGAATAGCTCCTGGAAACGGGTGGTAATGCCGGATGCTCCGACTCCTCGCATGGGGTGTCGGGAAAGATTGTATCGGTATGGGATGGGGTCGCGTCCTATCAGGTAGTCGGCGGGGTAACGGCCCACCGAGCCTACGACGGGTAGCCGGCCTGAGAGGGCGACCGGCCACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGACGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCTTCGGGTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCCGCGCCGGGTACGGGCGGGCTTGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGATGCTGGATGTGGGGCCCGTTCCACGGGTTCCGTGTCGGAGCTAACGCGTTAAGCATCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCAAAGAAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGCGGATTAATTCGATGCAACGCGAAGAACCTTACCTGGGCTTGACATGTTCCCGACGGCCGTAGAGATACGGTTTCCCTTCGGGGCGGGTTCACAGGTGGTGCATGGTCGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCGCCCTGTGTTGCCAGCGCGTCATGGCGGGAACTCACGGGGGACCGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAGATCATCATGCCCCTTACGTCCAGGGCTTCACGCATGCTACAATGGCCGGTACAACGGGATGCGACACGGCGACGTGGAGCGGATCCCTGAAAACCGGTCTCAGTTCGGATTGGAGTCTGCAACCCGACTCCATGAAGGCGGAGTCGCTAGTAATCGCGGATCAGCAACGCCGCGGTGAATGCGTTCCCGGGCCTTGTACACACCGCCCGTCAAGTCATGAAAGTGGGTAGCACCCGAAGCCGGTGGCCTAACCCCTTGTGGGATGGAGCCGTCTAAGGTGAGACTCGTGATTGGGACT"
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] stringr_1.4.1    here_1.0.1       DT_0.24          cowplot_1.1.1   
##  [5] agricolae_1.3-5  plyr_1.8.7       reshape2_1.4.4   corrplot_0.92   
##  [9] Hmisc_4.7-1      Formula_1.2-4    survival_3.4-0   kableExtra_1.3.4
## [13] arules_1.7-4     Matrix_1.4-1     pander_0.6.5     picante_1.8.2   
## [17] nlme_3.1-159     vegan_2.6-2      lattice_0.20-45  permute_0.9-7   
## [21] ape_5.6-2        ggtree_3.2.1     tidytree_0.4.0   pheatmap_1.0.12 
## [25] GUniFrac_1.6     file2meco_0.4.0  ggplot2_3.3.6    magrittr_2.0.3  
## [29] microeco_0.12.1  dplyr_1.0.10     phyloseq_1.38.0 
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2             questionr_0.7.7        tidyselect_1.1.2      
##   [4] htmlwidgets_1.5.4      dunn.test_1.3.5        grid_4.1.3            
##   [7] combinat_0.0-8         modeest_2.4.0          munsell_0.5.0         
##  [10] codetools_0.2-18       interp_1.1-3           statmod_1.4.37        
##  [13] miniUI_0.1.1.1         withr_2.5.0            colorspace_2.0-3      
##  [16] Biobase_2.53.0         highr_0.9              knitr_1.40            
##  [19] AlgDesign_1.2.1        rstudioapi_0.14        stats4_4.1.3          
##  [22] ggsignif_0.6.3         labeling_0.4.2         GenomeInfoDbData_1.2.7
##  [25] farver_2.1.1           rhdf5_2.37.4           fBasics_4021.92       
##  [28] rprojroot_2.0.3        vctrs_0.4.1            treeio_1.18.1         
##  [31] generics_0.1.3         xfun_0.32              R6_2.5.1              
##  [34] GenomeInfoDb_1.30.1    clue_0.3-61            bitops_1.0-7          
##  [37] rhdf5filters_1.5.0     cachem_1.0.6           gridGraphics_0.5-1    
##  [40] assertthat_0.2.1       promises_1.2.0.1       scales_1.2.1          
##  [43] nnet_7.3-17            gtable_0.3.1           spatial_7.3-15        
##  [46] timeDate_4021.104      rlang_1.0.6            FSA_0.9.3             
##  [49] systemfonts_1.0.4      splines_4.1.3          rstatix_0.7.0         
##  [52] lazyeval_0.2.2         broom_1.0.1            checkmate_2.1.0       
##  [55] abind_1.4-5            yaml_2.3.5             crosstalk_1.2.0       
##  [58] backports_1.4.1        httpuv_1.6.5           tools_4.1.3           
##  [61] ggplotify_0.1.0        ellipsis_0.3.2         jquerylib_0.1.4       
##  [64] biomformat_1.22.0      RColorBrewer_1.1-3     BiocGenerics_0.40.0   
##  [67] stabledist_0.7-1       Rcpp_1.0.9             base64enc_0.1-3       
##  [70] zlibbioc_1.39.0        purrr_0.3.4            RCurl_1.98-1.8        
##  [73] ggpubr_0.4.0           rpart_4.1.16           deldir_1.0-6          
##  [76] statip_0.2.3           S4Vectors_0.31.5       haven_2.5.1           
##  [79] ggrepel_0.9.1          cluster_2.1.4          data.table_1.14.2     
##  [82] timeSeries_4021.104    matrixStats_0.62.0     hms_1.1.2             
##  [85] patchwork_1.1.2.9000   mime_0.12              evaluate_0.16         
##  [88] xtable_1.8-4           klaR_1.7-1             jpeg_0.1-9            
##  [91] IRanges_2.27.2         gridExtra_2.3          compiler_4.1.3        
##  [94] tibble_3.1.8           crayon_1.5.1           htmltools_0.5.3       
##  [97] ggfun_0.0.7            mgcv_1.8-40            later_1.3.0           
## [100] tidyr_1.2.0            aplot_0.1.6            DBI_1.1.3             
## [103] rmutil_1.1.9           MASS_7.3-58.1          ade4_1.7-19           
## [106] car_3.1-0              cli_3.3.0              parallel_4.1.3        
## [109] igraph_1.3.4           forcats_0.5.2          pkgconfig_2.0.3       
## [112] foreign_0.8-82         xml2_1.3.3             foreach_1.5.2         
## [115] svglite_2.1.0          bslib_0.4.0            multtest_2.49.0       
## [118] webshot_0.5.3          XVector_0.33.0         rvest_1.0.3           
## [121] yulab.utils_0.0.5      digest_0.6.29          Biostrings_2.61.2     
## [124] rmarkdown_2.16         htmlTable_2.4.1        shiny_1.7.2           
## [127] lifecycle_1.0.3        jsonlite_1.8.0         Rhdf5lib_1.15.2       
## [130] carData_3.0-5          viridisLite_0.4.1      fansi_1.0.3           
## [133] labelled_2.9.1         pillar_1.8.1           fastmap_1.1.0         
## [136] httr_1.4.4             glue_1.6.2             png_0.1-7             
## [139] iterators_1.0.14       stringi_1.7.8          sass_0.4.2            
## [142] stable_1.1.6           latticeExtra_0.6-30