This is R code used for the data analysis for the manuscript: “Idiosyncratic effects of coinfection on the association between systemic pathogens and the gut microbiota of a wild rodent, the bank vole (Myodes glareolus)”.

The data have been pre-processed using QIIME2 v.2019.10, the description of the data processing is available as electronic supplementary material, “Brila-et-al_Qiime2-code”.

Preparation

Load necessary libraries

# load libraries
library(ANCOMBC) # v.1.6.2 ← differential abundance analysis
library(bestNormalize) # v.1.8.3 ← Shannon's diversity index transformation
library(emmeans) # v.1.8.0 ← pairwise comparisons in α-diversity models
library(ggConvexHull) # v.0.1.0 ← convex hulls in ordination plots
library(ggplot2) # v.3.3.6 ← plotting
library(ggvenn) # v.0.1.9 ← Venn diagram
library(lme4) # v.1.1-30 ← LMMs
library(lmerTest) # v.3.1-3 ← LMMs
library(magrittr) # v.2.0.3 ← awesome pipes
library(MuMIn) # v.1.47.1 ← R² values for α-diversity models
library(pairwiseAdonis) #v.0.4  ← pairwise adonis
library(partR2) # v.0.9.1 ← R² for individual variables in LMMs
library(patchwork) #  v.1.1.2 ← patching together plots
library(performance) # v.0.9.2 ← model diagnostics
library(phyloseq) # v.1.40.0 ← most of microbiome data manipulation
library(picante) # v.1.8.2 ← Faith's phylogenetic diversity index
library(qiime2R) # v.0.99.6 ← data import from QIIME2
library(rbiom) # v.1.0.3 ← for UniFrac distance calculations
library(rcompanion) # v.2.4.18 ← for Cramér's V calculations
library(tidyverse) # v.1.3.2 ← for most data manipulations
library(vegan) # v.2.6-2 ← for ß-diversity analysis
library(xlsx) # v.0.6.5 ← for export straight to MS Excel

# set ggplot theme
theme_set(theme_minimal())

# write down all package versions
sessionInfo()

Data preparation

Vole and pathogen data:

# read in
VoleData <- read.table("vole_meta_200.txt", sep="\t", header=TRUE, stringsAsFactors = TRUE)
PathData <- read.table("pathogens_R_10032021.txt", sep="\t", header=TRUE, stringsAsFactors = TRUE)

# merge
VolePathData <- merge(VoleData, PathData, by= "animal_id")

# transform
VolePathData$inf_count <- as.factor(VolePathData$inf_count)
VolePathData$coinf_stat <- as.factor(VolePathData$coinf_stat)
VolePathData$animal_id <- as.factor(VolePathData$animal_id)
VolePathData %<>% mutate(reprod = as.character(gravid))
VolePathData$reprod %<>% replace_na("male") %>% as.factor()

Classify animals into groups based on coinfection status per pathogen:

# perhaps there's a shorter way but this works
VolePathData %<>%
  mutate(
    puu_coinf = case_when(
      puu == "negative" & coinf_stat == "0" ~ "negative",
      puu == "negative" & coinf_stat == "1" ~ "negative",
      puu == "negative" & coinf_stat == "2" ~ "negative",
      puu == "positive" & coinf_stat == "1" ~ "single",
      puu == "positive" & coinf_stat == "2" ~ "coinfected"),
    ana_coinf = case_when(
      ana == "negative" & coinf_stat == "0" ~ "negative",
      ana == "negative" & coinf_stat == "1" ~ "negative",
      ana == "negative" & coinf_stat == "2" ~ "negative",
      ana == "positive" & coinf_stat == "1" ~ "single",
      ana == "positive" & coinf_stat == "2" ~ "coinfected"),
    bab_coinf = case_when(
      bab == "negative" & coinf_stat == "0" ~ "negative",
      bab == "negative" & coinf_stat == "1" ~ "negative",
      bab == "negative" & coinf_stat == "2" ~ "negative",
      bab == "positive" & coinf_stat == "1" ~ "single",
      bab == "positive" & coinf_stat == "2" ~ "coinfected"),
    bor_coinf = case_when(
      bor == "negative" & coinf_stat == "0" ~ "negative",
      bor == "negative" & coinf_stat == "1" ~ "negative",
      bor == "negative" & coinf_stat == "2" ~ "negative",
      bor == "positive" & coinf_stat == "1" ~ "single",
      bor == "positive" & coinf_stat == "2" ~ "coinfected") )

# re-level and set as factors
VolePathData$ana_coinf <- factor(VolePathData$ana_coinf, levels=c("negative", "single", "coinfected"))
VolePathData$bab_coinf <- factor(VolePathData$bab_coinf, levels=c("negative", "single", "coinfected"))
VolePathData$bor_coinf <- factor(VolePathData$bor_coinf, levels=c("negative", "single", "coinfected"))
VolePathData$puu_coinf <- factor(VolePathData$puu_coinf, levels=c("negative", "single", "coinfected"))

Microbiome data:

# read in
tax <- read_csv("qiime_data/tax_unrare_gut.csv")
asvs<- read_qza("qiime_data/gut_feat_table_rarefied.qza")
phy_tree <- read_tree("qiime_data/tree_gut.nwk")

# make a phyloseq object
ps <- phyloseq(
 otu_table(asvs$data, taxa_are_rows = T), 
 phy_tree(phy_tree), 
 tax_table(as.data.frame(tax) %>% dplyr::select(-Confidence) %>% column_to_rownames("OTUID") %>% as.matrix()),
 sample_data(VolePathData %>% as.data.frame() %>% column_to_rownames("SampleID_G")) )

# total number of reads
sum(sample_sums(ps))

Prune data so you have metadata data frame with only animals with microbiome data:

VolePathData_micro <- sample_data(ps) %>% as_tibble(rownames = "SampleID_G")

Infection and pathogen prevalence

Calculate the number of animals infected with any pathogen and save to file:

write.table(
  x = as_tibble(plyr::count(VolePathData_micro, c('location', 'infected')) %>% 
                  pivot_wider(names_from = infected, values_from = freq)),
  file = "results/revised/inf-count_loc.txt",
  sep = "\t",
  row.names = FALSE)

write.table(
  x = as_tibble(plyr::count(VolePathData_micro, c('sex', 'infected'))),
  file = "results/revised/inf-count_sex.txt",
  sep = "\t",
  row.names = FALSE)

write.table(
  x = as_tibble(plyr::count(VolePathData_micro, c('poll_group', 'infected'))),
  file = "results/revised/inf-count_poll_group.txt",
  sep = "\t",
  row.names = FALSE)

write.table(
  x = as_tibble(plyr::count(VolePathData_micro, c('study_area', 'infected')) %>% 
                  pivot_wider(names_from = infected, values_from = freq)),
  file = "results/revised/inf-count_studar.txt",
  sep = "\t",
  row.names = FALSE)

Calculate the number of animals infected with specific pathogens split by environmental and host traits:

# location
write.table(
  x = bind_rows(
    VolePathData_micro %>%
      group_by(location, puu, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(location, ana, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(location, bab, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(location, bor, .drop = FALSE) %>%
      dplyr::count()) %>%
  pivot_longer(c(ana, puu, bab, bor), names_to = "pathogen", values_to = "status") %>% 
  drop_na(status),
  file = "results/revised/path_location.txt",
  sep = "\t",
  row.names = FALSE)

# sex
write.table(
  x = bind_rows(
    VolePathData_micro %>%
      group_by(sex, puu, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(sex, ana, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(sex, bab, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(sex, bor, .drop = FALSE) %>%
      dplyr::count()) %>%
  pivot_longer(c(ana, puu, bab, bor), names_to = "pathogen", values_to = "status") %>% 
  drop_na(status),
  file = "results/revised/path_sex.txt",
  sep = "\t",
  row.names = FALSE)

# pollution group
write.table(
  x = bind_rows(
    VolePathData_micro %>%
      group_by(poll_group, puu, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(poll_group, ana, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(poll_group, bab, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(poll_group, bor, .drop = FALSE) %>%
      dplyr::count()) %>%
  pivot_longer(c(ana, puu, bab, bor), names_to = "pathogen", values_to = "status") %>% 
  drop_na(status),
  file = "results/revised/path_poll_group.txt",
  sep = "\t",
  row.names = FALSE)

# study area
write.table(
  x = bind_rows(
    VolePathData_micro %>%
      group_by(study_area, puu, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(study_area, ana, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(study_area, bab, .drop = FALSE) %>%
      dplyr::count(),
    VolePathData_micro %>%
      group_by(study_area, bor, .drop = FALSE) %>%
      dplyr::count()) %>%
  pivot_longer(c(ana, puu, bab, bor), names_to = "pathogen", values_to = "status") %>% 
  drop_na(status),
  file = "results/revised/path_studar.txt",
  sep = "\t",
  row.names = FALSE)

Calculate the prevalence and 95% CI

# ifelse function
func1 <- function(x) {
  ifelse(x == "positive" | x == "yes" , "1", "0")
}

# re-code
prevalence <- VolePathData_micro %>%
  mutate(across(
    .cols = matches(c("ana", "bab", "bor", "puu", "infected")),
    .fns = func1,
    .names = "{.col}"  ))

# gives errors if doing simultaneously with last command, so just run separately
prevalence %<>%
    mutate(across(
    .cols = matches(c("ana", "bab", "bor", "puu", "infected")),
    .fns = as.numeric,
    .names = "{.col}"))

Code for functions written with help from https://stackoverflow.com/questions/52908192/how-to-correctly-use-group-by-and-summarise-in-a-for-loop-in-r/52908326

# a function to calculate the prevalence, margin of error, CI and generate a report column
func2 <- function(df, .groupvar, .checkvar) {
  .groupvar <- sym(.groupvar)
  .checkvar <- sym(.checkvar)

  output_df <- df %>%
    dplyr::group_by(!! .groupvar) %>% 
    dplyr::summarise(prev = round(sum(!! .checkvar/n(), na.rm = TRUE),2),
                     marg_err = round(1.96 * sqrt(prev*(1-prev)/n()),2),
                     CI_low = round(prev - marg_err, 2),
                     CI_high = round(prev + marg_err,2),
                     report = paste(prev, ' (', CI_low, '-',CI_high, ')', sep="")
                           )
    return(output_df)
}

# define variables of interest
vars <- c("ana", "bab",  "bor", "puu", "infected")

# test
vars %>%
  purrr::set_names() %>% # so it uses purrr not magrittr
  map_df(~ func2(prevalence, "location", .x), .id = 'Variable')

# manually crosscheck value for Anaplasma in Harjavalta
prevalence %>% tabyl(location, ana) # yes, prevalence is 0.11
1.96 * sqrt(0.1149425*(1-0.1149425)/87) # yes, marginal error is 0.07

# calculate for all variables of interest, write to table
vars %>%
  purrr::set_names() %>%
  map_df( ~ func2(prevalence, "location", .x), .id = 'Variable') %>% 
  write.table(., file = "results/revised/prevalence/prev_loc.txt", sep="\t", row.names = FALSE)

vars %>%
  purrr::set_names() %>%
  map_df( ~ func2(prevalence, "study_area", .x), .id = 'Variable') %>% 
  write.table(., file = "results/revised/prevalence/prev_studar.txt", sep="\t", row.names = FALSE)

vars %>%
  purrr::set_names() %>%
  map_df( ~ func2(prevalence, "sex", .x), .id = 'Variable') %>% 
  write.table(., file = "results/revised/prevalence/prev_sex.txt", sep="\t", row.names = FALSE)

vars %>%
  purrr::set_names() %>%
  map_df( ~ func2(prevalence, "poll_group", .x), .id = 'Variable') %>% 
  write.table(., file = "results/revised/prevalence/prev_poll.txt", sep="\t", row.names = FALSE)

# for the all animals without split into metadata groups
func3 <- function(df,.checkvar) {
  .checkvar <- sym(.checkvar)

  output_df <- df %>% 
    dplyr::summarise(prev = round(sum(!! .checkvar/n(), na.rm = TRUE),2),
                     marg_err = round(1.96 * sqrt(prev*(1-prev)/n()),2),
                     CI_low = round(prev - marg_err, 2),
                     CI_high = round(prev + marg_err,2),
                     report = paste(prev, ' (', CI_low, '-',CI_high, ')', sep="")
                           )
    return(output_df)
}

# run function and save as a table
vars %>%
  purrr::set_names() %>%
  map_df(~ func3(prevalence, .x), .id = 'Variable') %>% 
    write.table(., file = "results/revised/prevalence/prev_all.txt", sep="\t", row.names = FALSE)

Data exploration

Data were explored using exploratory plotting using ggplot2. The code is not included here for brevity.

Correlation between pollution group and infection status and coinfection status:

# infection status
chisq.test(VolePathData_micro$infected, VolePathData_micro$poll_group, simulate.p.value = TRUE)
cramerV(VolePathData_micro$infected, VolePathData_micro$poll_group, bias.correct = TRUE)

# coinfection status
chisq.test(VolePathData_micro$coinf_stat, VolePathData_micro$poll_group, simulate.p.value = TRUE)
cramerV(VolePathData_micro$coinf_stat, VolePathData_micro$poll_group, bias.correct = TRUE)

Results:

Infection status: Pearson’s Chi-squared test with simulated p-value: Chi-squared =2.56, p=0.15, Cramér’s V = 0.09
Coinfection status: Pearson’s Chi-squared test with simulated p-value: Chi-squared =3.58, p=0.02, Cramér’s V = 0.18 → low association

Note: coinfection status correlates with pollution group, but as the correlation is weak and both are important explanatory variables, they are included in full models (after checking for effects of interactions in the models).

Correlation between pollution group and individual pathogens:

# Anaplasma
chisq.test(VolePathData_micro$ana, VolePathData_micro$poll_group,simulate.p.value = TRUE)
cramerV(VolePathData_micro$ana, VolePathData_micro$poll_group, bias.correct = TRUE)

# Babesia
chisq.test(VolePathData_micro$bab, VolePathData_micro$poll_group,simulate.p.value = TRUE)
cramerV(VolePathData_micro$bab, VolePathData_micro$poll_group, bias.correct = TRUE)

# Borrelia
chisq.test(VolePathData_micro$bor, VolePathData_micro$poll_group,simulate.p.value = TRUE)
cramerV(VolePathData_micro$bor, VolePathData_micro$poll_group, bias.correct = TRUE)

# Puumala
chisq.test(VolePathData_micro$puu, VolePathData_micro$poll_group,simulate.p.value = TRUE)
cramerV(VolePathData_micro$puu, VolePathData_micro$poll_group, bias.correct = TRUE)

Results:

A. phagocytophilum: Pearson’s Chi-squared test with simulated p-value: Chi-squared = 1.43, p = 0.3, Cramér’s V = 0.05
B. microti: Pearson’s Chi-squared test with simulated p-value: Chi-squared = 2.6, p = 0.14, Cramér’s V = 0.09
B.burgdorferi s.l.: Pearson’s Chi-squared test with simulated p-value: Chi-squared = 0.16, p = 0.25, Cramér’s V = 0.05
Puumala orthohantavirus: Pearson’s Chi-squared test with simulated p-value: Chi-squared = 5.05, p = 0.03, Cramér’s V = 0.15

Note: only Puumala virus infection status correlates with pollution group, but as the correlation is weak and both are important explanatory variables, they are included in full models.

Correlation between pollution group and pathogen coinfection status:

# Anaplasma
chisq.test(VolePathData_micro$ana_coinf, VolePathData_micro$poll_group,simulate.p.value = TRUE)
cramerV(VolePathData_micro$ana_coinf, VolePathData_micro$poll_group, bias.correct = TRUE)

# Babesia
chisq.test(VolePathData_micro$bab_coinf, VolePathData_micro$poll_group,simulate.p.value = TRUE)
cramerV(VolePathData_micro$bab_coinf, VolePathData_micro$poll_group, bias.correct = TRUE)

# Borrelia
chisq.test(VolePathData_micro$bor_coinf, VolePathData_micro$poll_group,simulate.p.value = TRUE)
cramerV(VolePathData_micro$bor_coinf, VolePathData_micro$poll_group, bias.correct = TRUE)

# Puumala
chisq.test(VolePathData_micro$puu_coinf, VolePathData_micro$poll_group,simulate.p.value = TRUE)
cramerV(VolePathData_micro$puu_coinf, VolePathData_micro$poll_group, bias.correct = TRUE)

Results:

A. phagocytophilum: Pearson’s Chi-squared test with simulated p-value: Chi-squared = 2.8, p = 0.3, Cramér’s V = 0.06
B. microti: Pearson’s Chi-squared test with simulated p-value: Chi-squared = 5.9, p = 0.06, Cramér’s V = 0.14
B.burgdorferi s.l.: Pearson’s Chi-squared test with simulated p-value: Chi-squared = 7.8, p = 0.02, Cramér’s V = 0.17
Puumala orthohantavirus: Pearson’s Chi-squared test with simulated p-value: Chi-squared = 5.06, p = 0.08, Cramér’s V = 0.12

Note: only B.burgdorferi s.l. infection status significantly correlates with pollution group, but as the correlation is weak and both are important explanatory variables, they are included in full models.

Correlation between pollution group and location:

chisq.test(VolePathData_micro$location, VolePathData_micro$poll_group,simulate.p.value = TRUE)
cramerV(VolePathData_micro$location, VolePathData_micro$poll_group, bias.correct = TRUE)

Result: Pearson’s Chi-squared test with simulated p-value: Chi-squared = 9.64, p = 0.005, Cramér’s V = 0.21

Note: location shows weak correlation with pollution group. Location is however, included in the full models to account for possible differences in the microbiota of bank voles due to distinct differences latitude and as such putative vole population genetic or diet differences.

Alpha diversity

Calculate alpha diversity indices

AlphaDiv<- data.frame(
 "Shannon"= estimate_richness(ps, measures = "Shannon"),
 "Observed" = estimate_richness(ps, measures = "Observed"),
 "PD" = picante::pd(samp = t(as.data.frame(phyloseq::otu_table(ps))), tree = phyloseq::phy_tree(ps))[, 1])

# merge with metadata
AlphaDiv %<>% rownames_to_column(var = "SampleID_G")
AlphaDivMeta<- merge(AlphaDiv, VolePathData_micro)

Re-level so that the intercept is an uninfected animal from pollution group “low”:

AlphaDivMeta$poll_group <- relevel(AlphaDivMeta$poll_group, ref = 'low')

Alpha diversity metric distribution:

ggplot(AlphaDivMeta, aes(x = Shannon)) +
  geom_histogram(aes(y = ..density..),
                 colour = "black",
                 fill = "white") +
  stat_function(fun = dnorm, args = list(
    mean = mean(AlphaDivMeta$Shannon),
    sd = sd(AlphaDivMeta$Shannon)  ))

ggplot(AlphaDivMeta, aes(x = PD)) +
  geom_histogram(aes(y = ..density..),
                 colour = "black",
                 fill = "white") +
  stat_function(fun = dnorm, args = list(
    mean = mean(AlphaDivMeta$PD),
    sd = sd(AlphaDivMeta$PD)  ))

ggplot(AlphaDivMeta, aes(x = Observed)) +
  geom_histogram(aes(y = ..density..),
                 colour = "black",
                 fill = "white") +
  stat_function(fun = dnorm, args = list(
    mean = mean(AlphaDivMeta$Observed),
    sd = sd(AlphaDivMeta$Observed)  ))

Notes:

Exploratory plotting to examine need for polynomial terms or interactions done using ggplot2; not shown here for brevity
Model diagnostics run the same way for all linear mixed models but are shown for the first model only, for brevity

Analysis

Coinfection status

Shannon’s diversity index

As the model diagnostics for first run model were poor, and distribution Shannon’s diversity index is right-skewed, transform the response variable:

# find best transformation
bestNormalize(AlphaDivMeta$Shannon, standardize = FALSE) # bestNormalize has showed that Yeo-Johnson Transformation is the best approach

# transform
AlphaDivMeta$Shannon_tr <- yeojohnson(AlphaDivMeta$Shannon, standardize = FALSE)$x.t

# look at the response variable histogram now
ggplot(AlphaDivMeta, aes(x = Shannon_tr)) +
  geom_histogram(aes(y = ..density..),
                 colour = "black",
                 fill = "white") +
  stat_function(fun = dnorm, args = list(
    mean = mean(AlphaDivMeta$Shannon_tr),
    sd = sd(AlphaDivMeta$Shannon_tr)  ))

Run the full model:

# fit full model
m_Shan_coinf <- lmerTest::lmer(Shannon_tr ~ coinf_stat + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model diagnostics
check_autocorrelation(m_Shan_coinf)

check_collinearity(m_Shan_coinf)
check_collinearity(m_Shan_coinf) %>% plot()

check_heteroscedasticity(m_Shan_coinf)
check_heteroscedasticity(m_Shan_coinf) %>% plot()

check_normality(m_Shan_coinf)
check_normality(m_Shan_coinf) %>% plot()

check_outliers(m_Shan_coinf)

# model summary
summary(m_Shan_coinf)

# calculate R² values
r2_m_Shan_coinf<- round(r.squaredGLMM(m_Shan_coinf),digits=3)
r2_m_Shan_coinf <- t(r2_m_Shan_coinf)

# calculate individual R² values
set.seed(42)
r2_m_Shan_coinf_ind <- partR2(
  m_Shan_coinf,
  data = AlphaDivMeta,
  partvars = c("coinf_stat", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_Shan_coinf_pairwise <-
 emmeans(m_Shan_coinf,
         list(pairwise ~ coinf_stat),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of coinf_stat` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_Shan_coinf,
     list(pairwise ~ coinf_stat),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of coinf_stat` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Shan_coinf)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6  ),
  file = "results/revised/alpha/m_sum_Shan_coinf.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_Shan_coinf_pairwise, file = "results/revised/alpha/m_Shan_coinf_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_Shan_coinf, file = "results/revised/alpha/r2_m_Shan_coinf.txt", sep = "\t")
write.table(r2_m_Shan_coinf_ind, file = "results/revised/alpha/r2_m_Shan_coinf_ind.txt", sep = "\t")

Faith’s phylogenetic diversity

Run the full model:

# fit full model
m_PD_coinf <- lmerTest::lmer(PD ~ coinf_stat + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_PD_coinf)

# calculate R² values
r2_m_PD_coinf<- round(r.squaredGLMM(m_PD_coinf),digits=3)
r2_m_PD_coinf <- t(r2_m_PD_coinf)

# calculate individual R² values
set.seed(42)
r2_m_PD_coinf_ind <- partR2(
  m_PD_coinf,
  data = AlphaDivMeta,
  partvars = c("coinf_stat", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_PD_coinf_pairwise <-
 emmeans(m_PD_coinf,
         list(pairwise ~ coinf_stat),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of coinf_stat` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_PD_coinf,
     list(pairwise ~ coinf_stat),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of coinf_stat` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_PD_coinf)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6  ),
  file = "results/revised/alpha/m_sum_PD_coinf.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_PD_coinf_pairwise, file = "results/revised/alpha/m_PD_coinf_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_PD_coinf, file = "results/revised/alpha/r2_m_PD_coinf.txt", sep = "\t")
write.table(r2_m_PD_coinf_ind, file = "results/revised/alpha/r2_m_PD_coinf_ind.txt", sep = "\t")

ASV richness

Run the full model:

# fit full model
m_Obs_coinf <- lmerTest::lmer(Observed ~ coinf_stat + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_Obs_coinf)

# calculate R² values
r2_m_Obs_coinf<- round(r.squaredGLMM(m_Obs_coinf),digits=3)
r2_m_Obs_coinf <- t(r2_m_Obs_coinf)

# calculate individual R² values
set.seed(42)
r2_m_Obs_coinf_ind <- partR2(
  m_Obs_coinf,
  data = AlphaDivMeta,
  partvars = c("coinf_stat", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_Obs_coinf_pairwise <-
 emmeans(m_Obs_coinf,
         list(pairwise ~ coinf_stat),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of coinf_stat` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_Obs_coinf,
     list(pairwise ~ coinf_stat),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of coinf_stat` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Obs_coinf)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6  ),
  file = "results/revised/alpha/m_sum_Obs_coinf.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_Obs_coinf_pairwise, file = "results/revised/alpha/m_Obs_coinf_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_Obs_coinf, file = "results/revised/alpha/r2_m_Obs_coinf.txt", sep = "\t")
write.table(r2_m_Obs_coinf_ind, file = "results/revised/alpha/r2_m_Obs_coinf_ind.txt", sep = "\t")

Pathogens irrespective of coinfection status

Shannon’s diversity index

Run the full model:

# fit full model
m_Shan_path <- lmerTest::lmer(Shannon_tr ~ ana + bab + bor + puu + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
tab_model(m_Shan_path, df.method = "satterthwaite")

# calculate R²values
r2_m_Shan_path <- round(r.squaredGLMM(m_Shan_path), digits=3)
r2_m_Shan_path <- t(r2_m_Shan_path)

# calculate individual R² values
set.seed(42)
r2_m_Shan_path_ind <- partR2(
  m_Shan_path,
  data = AlphaDivMeta,
  partvars = c("ana", "bab", "bor", "puu", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Export the model results:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Shan_path)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6  ),
  file = "results/revised/alpha/m_sum_Shan_path.txt",
  sep = "\t",
  row.names = FALSE)

# export R2
write.table(r2_m_Shan_path, 
            file = "results/revised/alpha/r2_m_Shan_path.txt", sep = "\t")
write.table(r2_m_Shan_path_ind, 
            file = "results/revised/alpha/r2_m_Shan_path_ind.txt", sep = "\t")

Faith’s phylogenetic diversity

# fit full model
m_PD_path <- lmerTest::lmer(PD ~ ana + bab + bor + puu + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_PD_path)

# calculate R²values
r2_m_PD_path<- round(r.squaredGLMM(m_PD_path),digits=3)
r2_m_PD_path <- t(r2_m_PD_path)

# calculate individual R² values
set.seed(42)
r2_m_PD_path_ind <- partR2(
  m_PD_path,
  data = AlphaDivMeta,
  partvars = c("ana", "bab", "bor", "puu", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Export the model results:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_PD_path)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6  ),
  file = "results/revised/alpha/m_sum_PD_path.txt",
  sep = "\t",
  row.names = FALSE)

# export R2
write.table(r2_m_PD_path, 
            file = "results/revised/alpha/r2_m_PD_path.txt", sep = "\t")
write.table(r2_m_PD_path_ind, 
            file = "results/revised/alpha/r2_m_PD_path_ind.txt", sep = "\t")

ASV richness

# fit full model
m_Obs_path <- lmerTest::lmer(Observed ~ ana + bab + bor + puu + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_Obs_path)

# calculate R²values
r2_m_Obs_path<- round(r.squaredGLMM(m_Obs_path),digits=3)
r2_m_Obs_path <- t(r2_m_Obs_path)

# calculate individual R² values
set.seed(42)
r2_m_Obs_path_ind <- partR2(
  m_Obs_path,
  data = AlphaDivMeta,
  partvars = c("ana", "bab", "bor", "puu", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Export the model results:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Obs_path)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6  ),
  file = "results/revised/alpha/m_sum_Obs_path.txt",
  sep = "\t",
  row.names = FALSE)

# export R2
write.table(r2_m_Obs_path, 
            file = "results/revised/alpha/r2_m_Obs_path.txt", sep = "\t")
write.table(r2_m_Obs_path_ind, 
            file = "results/revised/alpha/r2_m_Obs_path_ind.txt", sep = "\t")

Pathogens considering coinfection status

Shannon’s diversity index

Anaplasma

Run the full model:

# fit full model
m_Shan_coinf_ana <- lmerTest::lmer(Shannon_tr ~ ana_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_Shan_coinf_ana)

# calculate R²values
r2_m_Shan_coinf_ana <- round(r.squaredGLMM(m_Shan_coinf_ana), digits = 3)
r2_m_Shan_coinf_ana <- t(r2_m_Shan_coinf_ana)

# calculate individual R² values
set.seed(42)
r2_m_Shan_coinf_ana_ind <- partR2(
  m_Shan_coinf_ana,
  data = AlphaDivMeta,
  partvars = c("ana_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_Shan_coinf_ana_pairwise <-
 emmeans(m_Shan_coinf_ana,
         list(pairwise ~ ana_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of ana_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_Shan_coinf_ana,
     list(pairwise ~ ana_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of ana_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Shan_coinf_ana)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_Shan_coinf_ana.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_Shan_coinf_ana_pairwise, file = "results/revised/alpha/m_Shan_coinf_ana_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_Shan_coinf_ana, file = "results/revised/alpha/r2_m_Shan_coinf_ana.txt", sep = "\t")
write.table(r2_m_Shan_coinf_ana_ind, file = "results/revised/alpha/r2_m_Shan_coinf_ana_ind.txt", sep = "\t")

Babesia

Run the full model:

# fit full model
m_Shan_coinf_bab <- lmerTest::lmer(Shannon_tr ~ bab_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_Shan_coinf_bab)

# calculate R²values
r2_m_Shan_coinf_bab<- round(r.squaredGLMM(m_Shan_coinf_bab),digits=3)
r2_m_Shan_coinf_bab <- t(r2_m_Shan_coinf_bab)

# calculate individual R² values
set.seed(42)
r2_m_Shan_coinf_bab_ind <- partR2(
  m_Shan_coinf_bab,
  data = AlphaDivMeta,
  partvars = c("bab_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_Shan_coinf_bab_pairwise <-
 emmeans(m_Shan_coinf_bab,
         list(pairwise ~ bab_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of bab_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_Shan_coinf_bab,
     list(pairwise ~ bab_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of bab_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Shan_coinf_bab)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_Shan_coinf_bab.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_Shan_coinf_bab_pairwise, file = "results/revised/alpha/m_Shan_coinf_bab_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_Shan_coinf_bab, file = "results/revised/alpha/r2_m_Shan_coinf_bab.txt", sep = "\t")
write.table(r2_m_Shan_coinf_bab_ind, file = "results/revised/alpha/r2_m_Shan_coinf_bab_ind.txt", sep = "\t")

Borrelia

Run the full model:

# fit full model
m_Shan_coinf_bor <- lmerTest::lmer(Shannon_tr ~ bor_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_Shan_coinf_bor)

# calculate R²values
r2_m_Shan_coinf_bor<- round(r.squaredGLMM(m_Shan_coinf_bor),digits=3)
r2_m_Shan_coinf_bor <- t(r2_m_Shan_coinf_bor)

# calculate individual R² values
set.seed(42)
r2_m_Shan_coinf_bor_ind <- partR2(
  m_Shan_coinf_bor,
  data = AlphaDivMeta,
  partvars = c("bor_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_Shan_coinf_bor_pairwise <-
 emmeans(m_Shan_coinf_bor,
         list(pairwise ~ bor_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of bor_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_Shan_coinf_bor,
     list(pairwise ~ bor_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of bor_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Shan_coinf_bor)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_Shan_coinf_bor.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_Shan_coinf_bor_pairwise, file = "results/revised/alpha/m_Shan_coinf_bor_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_Shan_coinf_bor, file = "results/revised/alpha/r2_m_Shan_coinf_bor.txt", sep = "\t")
write.table(r2_m_Shan_coinf_bor_ind, file = "results/revised/alpha/r2_m_Shan_coinf_bor_ind.txt", sep = "\t")

Puumala

Run the full model:

# fit full model
m_Shan_coinf_puu <- lmerTest::lmer(Shannon_tr ~ puu_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_Shan_coinf_puu)

# calculate R²values
r2_m_Shan_coinf_puu<- round(r.squaredGLMM(m_Shan_coinf_puu),digits=3)
r2_m_Shan_coinf_puu <- t(r2_m_Shan_coinf_puu)

# calculate individual R² values
set.seed(42)
r2_m_Shan_coinf_puu_ind <- partR2(
  m_Shan_coinf_puu,
  data = AlphaDivMeta,
  partvars = c("puu_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_Shan_coinf_puu_pairwise <-
 emmeans(m_Shan_coinf_puu,
         list(pairwise ~ puu_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of puu_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_Shan_coinf_puu,
     list(pairwise ~ puu_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of puu_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Shan_coinf_puu)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_Shan_coinf_puu.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_Shan_coinf_puu_pairwise, file = "results/revised/alpha/m_Shan_coinf_puu_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_Shan_coinf_puu, file = "results/revised/alpha/r2_m_Shan_coinf_puu.txt", sep = "\t")
write.table(r2_m_Shan_coinf_puu_ind, file = "results/revised/alpha/r2_m_Shan_coinf_puu_ind.txt", sep = "\t")

ASV richness

Anaplasma

Run the full model:

# fit full model
m_Obs_coinf_ana <- lmerTest::lmer(Observed ~ ana_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_Obs_coinf_ana)

# calculate R²values
r2_m_Obs_coinf_ana<- round(r.squaredGLMM(m_Obs_coinf_ana),digits=3)
r2_m_Obs_coinf_ana <- t(r2_m_Obs_coinf_ana)

# calculate individual R² values
set.seed(42)
r2_m_Obs_coinf_ana_ind <- partR2(
  m_Obs_coinf_ana,
  data = AlphaDivMeta,
  partvars = c("ana_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_Obs_coinf_ana_pairwise <-
 emmeans(m_Obs_coinf_ana,
         list(pairwise ~ ana_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of ana_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_Obs_coinf_ana,
     list(pairwise ~ ana_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of ana_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Obs_coinf_ana)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_Obs_coinf_ana.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_Obs_coinf_ana_pairwise, file = "results/revised/alpha/m_Obs_coinf_ana_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_Obs_coinf_ana, file = "results/revised/alpha/r2_m_Obs_coinf_ana.txt", sep = "\t")
write.table(r2_m_Obs_coinf_ana_ind, file = "results/revised/alpha/r2_m_Obs_coinf_ana_ind.txt", sep = "\t")

Babesia

Run the full model:

# fit full model
m_Obs_coinf_bab <- lmerTest::lmer(Observed ~ bab_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_Obs_coinf_bab)

# calculate R²values
r2_m_Obs_coinf_bab<- round(r.squaredGLMM(m_Obs_coinf_bab),digits=3)
r2_m_Obs_coinf_bab <- t(r2_m_Obs_coinf_bab)

# calculate individual R² values
set.seed(42)
r2_m_Obs_coinf_bab_ind <- partR2(
  m_Obs_coinf_bab,
  data = AlphaDivMeta,
  partvars = c("bab_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_Obs_coinf_bab_pairwise <-
 emmeans(m_Obs_coinf_bab,
         list(pairwise ~ bab_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of bab_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_Obs_coinf_bab,
     list(pairwise ~ bab_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of bab_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Obs_coinf_bab)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_Obs_coinf_bab.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_Obs_coinf_bab_pairwise, file = "results/revised/alpha/m_Obs_coinf_bab_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_Obs_coinf_bab, file = "results/revised/alpha/r2_m_Obs_coinf_bab.txt", sep = "\t")
write.table(r2_m_Obs_coinf_bab_ind, file = "results/revised/alpha/r2_m_Obs_coinf_bab_ind.txt", sep = "\t")

Borrelia

Run the full model:

# fit full model
m_Obs_coinf_bor <- lmerTest::lmer(Observed ~ bor_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_Obs_coinf_bor)

# calculate R²values
r2_m_Obs_coinf_bor<- round(r.squaredGLMM(m_Obs_coinf_bor),digits=3)
r2_m_Obs_coinf_bor <- t(r2_m_Obs_coinf_bor)

# calculate individual R² values
set.seed(42)
r2_m_Obs_coinf_bor_ind <- partR2(
  m_Obs_coinf_bor,
  data = AlphaDivMeta,
  partvars = c("bor_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_Obs_coinf_bor_pairwise <-
 emmeans(m_Obs_coinf_bor,
         list(pairwise ~ bor_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of bor_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_Obs_coinf_bor,
     list(pairwise ~ bor_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of bor_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Obs_coinf_bor)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_Obs_coinf_bor.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_Obs_coinf_bor_pairwise, file = "results/revised/alpha/m_Obs_coinf_bor_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_Obs_coinf_bor, file = "results/revised/alpha/r2_m_Obs_coinf_bor.txt", sep = "\t")
write.table(r2_m_Obs_coinf_bor_ind, file = "results/revised/alpha/r2_m_Obs_coinf_bor_ind.txt", sep = "\t")

Puumala

Run the full model:

# fit full model
m_Obs_coinf_puu <- lmerTest::lmer(Observed ~ puu_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_Obs_coinf_puu)

# calculate R²values
r2_m_Obs_coinf_puu<- round(r.squaredGLMM(m_Obs_coinf_puu),digits=3)
r2_m_Obs_coinf_puu <- t(r2_m_Obs_coinf_puu)

# calculate individual R² values
set.seed(42)
r2_m_Obs_coinf_puu_ind <- partR2(
  m_Obs_coinf_puu,
  data = AlphaDivMeta,
  partvars = c("puu_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_Obs_coinf_puu_pairwise <-
 emmeans(m_Obs_coinf_puu,
         list(pairwise ~ puu_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of puu_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_Obs_coinf_puu,
     list(pairwise ~ puu_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of puu_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_Obs_coinf_puu)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_Obs_coinf_puu.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_Obs_coinf_puu_pairwise, file = "results/revised/alpha/m_Obs_coinf_puu_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_Obs_coinf_puu, file = "results/revised/alpha/r2_m_Obs_coinf_puu.txt", sep = "\t")
write.table(r2_m_Obs_coinf_puu_ind, file = "results/revised/alpha/r2_m_Obs_coinf_puu_ind.txt", sep = "\t")

Faith’s phylogenetic diversity

Anaplasma

Run the full model:

# fit full model
m_PD_coinf_ana <- lmerTest::lmer(PD ~ ana_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_PD_coinf_ana)

# calculate R²values
r2_m_PD_coinf_ana<- round(r.squaredGLMM(m_PD_coinf_ana),digits=3)
r2_m_PD_coinf_ana <- t(r2_m_PD_coinf_ana)

# calculate individual R² values
set.seed(42)
r2_m_PD_coinf_ana_ind <- partR2(
  m_PD_coinf_ana,
  data = AlphaDivMeta,
  partvars = c("ana_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_PD_coinf_ana_pairwise <-
 emmeans(m_PD_coinf_ana,
         list(pairwise ~ ana_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of ana_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_PD_coinf_ana,
     list(pairwise ~ ana_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of ana_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_PD_coinf_ana)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_PD_coinf_ana.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_PD_coinf_ana_pairwise, file = "results/revised/alpha/m_PD_coinf_ana_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_PD_coinf_ana, file = "results/revised/alpha/r2_m_PD_coinf_ana.txt", sep = "\t")
write.table(r2_m_PD_coinf_ana_ind, file = "results/revised/alpha/r2_m_PD_coinf_ana_ind.txt", sep = "\t")

Babesia

Run the full model:

# fit full model
m_PD_coinf_bab <- lmerTest::lmer(PD ~ bab_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_PD_coinf_bab)

# calculate R²values
r2_m_PD_coinf_bab<- round(r.squaredGLMM(m_PD_coinf_bab),digits=3)
r2_m_PD_coinf_bab <- t(r2_m_PD_coinf_bab)

# calculate individual R² values
set.seed(42)
r2_m_PD_coinf_bab_ind <- partR2(
  m_PD_coinf_bab,
  data = AlphaDivMeta,
  partvars = c("bab_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_PD_coinf_bab_pairwise <-
 emmeans(m_PD_coinf_bab,
         list(pairwise ~ bab_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of bab_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_PD_coinf_bab,
     list(pairwise ~ bab_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of bab_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_PD_coinf_bab)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_PD_coinf_bab.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_PD_coinf_bab_pairwise, file = "results/revised/alpha/m_PD_coinf_bab_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_PD_coinf_bab, file = "results/revised/alpha/r2_m_PD_coinf_bab.txt", sep = "\t")
write.table(r2_m_PD_coinf_bab_ind, file = "results/revised/alpha/r2_m_PD_coinf_bab_ind.txt", sep = "\t")

Borrelia

Run the full model:

# fit full model
m_PD_coinf_bor <- lmerTest::lmer(PD ~ bor_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_PD_coinf_bor)

# calculate R²values
r2_m_PD_coinf_bor<- round(r.squaredGLMM(m_PD_coinf_bor),digits=3)
r2_m_PD_coinf_bor <- t(r2_m_PD_coinf_bor)

# calculate individual R² values
set.seed(42)
r2_m_PD_coinf_bor_ind <- partR2(
  m_PD_coinf_bor,
  data = AlphaDivMeta,
  partvars = c("bor_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_PD_coinf_bor_pairwise <-
 emmeans(m_PD_coinf_bor,
         list(pairwise ~ bor_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of bor_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_PD_coinf_bor,
     list(pairwise ~ bor_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of bor_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_PD_coinf_bor)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_PD_coinf_bor.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_PD_coinf_bor_pairwise, file = "results/revised/alpha/m_PD_coinf_bor_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_PD_coinf_bor, file = "results/revised/alpha/r2_m_PD_coinf_bor.txt", sep = "\t")
write.table(r2_m_PD_coinf_bor_ind, file = "results/revised/alpha/r2_m_PD_coinf_bor_ind.txt", sep = "\t")

Puumala

Run the full model:

# fit full model
m_PD_coinf_puu <- lmerTest::lmer(PD ~ puu_coinf + poll_group + location + reprod + (1|study_area), data = AlphaDivMeta, REML = TRUE)

# model summary
summary(m_PD_coinf_puu)

# calculate R²values
r2_m_PD_coinf_puu<- round(r.squaredGLMM(m_PD_coinf_puu),digits=3)
r2_m_PD_coinf_puu <- t(r2_m_PD_coinf_puu)

# calculate individual R² values
set.seed(42)
r2_m_PD_coinf_puu_ind <- partR2(
  m_PD_coinf_puu,
  data = AlphaDivMeta,
  partvars = c("puu_coinf", "poll_group", "location", "reprod"),
  R2_type = "marginal",
  nboot = 100,
  CI = 0.95,
  max_level = 1)$R2 %>% 
 as_tibble() %>% 
 mutate_if(is.numeric, round, digits=3)

Compare between the coinfection groups:

set.seed(42)
m_PD_coinf_puu_pairwise <-
 emmeans(m_PD_coinf_puu,
         list(pairwise ~ puu_coinf),
         adjust = "none",
         lmer.df = "satterthwaite")$`pairwise differences of puu_coinf` %>% 
 as_tibble() %>% 
 add_column(adj.p.value =
   emmeans(
     m_PD_coinf_puu,
     list(pairwise ~ puu_coinf),
     adjust = "fdr",
     lmer.df = "satterthwaite")$`pairwise differences of puu_coinf` %>% 
 as_tibble() %>% 
 pull(p.value)) %>%
 mutate_if(is.numeric, round, digits=3)

Export the data:

# export model summary
write.table(
  x = as_tibble(round(coef(
    summary(m_PD_coinf_puu)), digits = 3), rownames = NA) %>%
    tibble::rownames_to_column(., "Predictor") %>%
    rename("SE" = 3,
           "t_value" = 5,
           "p_value" = 6),
  file = "results/revised/alpha/m_sum_PD_coinf_puu.txt",
  sep = "\t",
  row.names = FALSE)

# export comparison 
write.table(m_PD_coinf_puu_pairwise, file = "results/revised/alpha/m_PD_coinf_puu_pairwise.txt", sep="\t", row.names = FALSE)

# export R2
write.table(r2_m_PD_coinf_puu, file = "results/revised/alpha/r2_m_PD_coinf_puu.txt", sep = "\t")
write.table(r2_m_PD_coinf_puu_ind, file = "results/revised/alpha/r2_m_PD_coinf_puu_ind.txt", sep = "\t")

Beta diversity

Calculate distances and ordinate using PCoA

# Bray - Curtis
d_bc <- distance(ps, method = "bray")
ordin_bc <- ordinate(ps,method = "PCoA", distance = d_bc)

# Unweighted UniFrac
d_uf <- rbiom::unifrac(biom=as.matrix(phyloseq::otu_table(ps)), weighted=FALSE, tree=phy_tree(ps))
ordin_uf<- ordinate(ps,method = "PCoA", distance = d_uf)

# Jaccard
d_jacc <- distance(ps, method = "jaccard", binary=TRUE)
ordin_jacc<- ordinate(ps,method = "PCoA", distance = d_jacc)

# Weighted UniFrac
d_wuf <- rbiom::unifrac(biom=as.matrix(phyloseq::otu_table(ps)), weighted=TRUE, tree=phy_tree(ps))
ordin_wuf<- ordinate(ps,method = "PCoA", distance = d_wuf)

Note: potential clustering due to sample processing was checked using exploratory plotting. No significant effects of trapping or dissection date or DNA extraction batch were detected.

Coinfection status

Jaccard

Permanova:

set.seed(42)
adonis2_jacc_coinf<- adonis2(
  d_jacc ~ coinf_stat + poll_group + location + reprod, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

Pairwise comparisons:

set.seed(42)
adonis_jacc_coinf_pair <- pairwise.adonis(
  d_jacc, VolePathData_micro$coinf_stat, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

Compare dispersion:

set.seed(42)
betadisper(
  d_jacc, group =  VolePathData_micro$coinf_stat, bias.adjust = TRUE) %>% 
  permutest()

set.seed(42)
disp_jacc_coinf <- betadisper(
  d_jacc, group =  VolePathData_micro$coinf_stat, bias.adjust = TRUE) %>% 
  permutest(pairwise = TRUE)

disp_jacc_coinf <- as.data.frame(
  disp_jacc_coinf$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="coinf_stat") %>%
  mutate_if(is.numeric, round, digits=4)

Export results:

# adonis2
write.table(adonis2_jacc_coinf, file = "results/revised/beta/coinf_jacc_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis_jacc_coinf_pair, file = "results/revised/beta/coinf_jacc_perm_pairw.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_jacc_coinf, file = "results/revised/beta/coinf_jacc_disp.txt", sep="\t", row.names = FALSE)

UniFrac

Permanova:

set.seed(42)
adonis2_uf_coinf<- adonis2(
  d_uf ~ coinf_stat + poll_group + location + reprod, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

Pairwise comparisons:

set.seed(42)
adonis_uf_coinf_pair <- pairwise.adonis(
  d_uf, VolePathData_micro$coinf_stat, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

Compare dispersion:

set.seed(42)
betadisper(
  d_uf, group =  VolePathData_micro$coinf_stat, bias.adjust = TRUE) %>% 
  permutest()

set.seed(42)
disp_uf_coinf <- betadisper(
  d_uf, group =  VolePathData_micro$coinf_stat, bias.adjust = TRUE) %>% 
  permutest(pairwise = TRUE)

disp_uf_coinf <- as.data.frame(
  disp_uf_coinf$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="coinf_stat") %>%
  mutate_if(is.numeric, round, digits=4)

Export results:

# adonis2
write.table(adonis2_uf_coinf, file = "results/revised/beta/coinf_UF_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis_uf_coinf_pair, file = "results/revised/beta/coinf_UF_perm_pairw.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_uf_coinf, file = "results/revised/beta/coinf_UF_disp.txt", sep="\t", row.names = FALSE)

Bray-Curtis

Permanova:

set.seed(42)
adonis2_bc_coinf<- adonis2(
  d_bc ~ coinf_stat + poll_group + location + reprod, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6)%>%
 mutate_if(is.numeric, round, digits=3)

Pairwise comparisons:

set.seed(42)
adonis_bc_coinf_pair <- pairwise.adonis(
  d_bc, VolePathData_micro$coinf_stat, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

Compare dispersion:

set.seed(42)
betadisper(
  d_bc, group =  VolePathData_micro$coinf_stat, bias.adjust = TRUE) %>% 
  permutest()

set.seed(42)
disp_bc_coinf <- betadisper(
  d_bc, group =  VolePathData_micro$coinf_stat, bias.adjust = TRUE) %>% 
  permutest(pairwise = TRUE)

disp_bc_coinf <- as.data.frame(
  disp_bc_coinf$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="coinf_stat") %>%
  mutate_if(is.numeric, round, digits=4)

Export results:

# adonis2
write.table(adonis2_bc_coinf, file = "results/revised/beta/coinf_BC_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis_bc_coinf_pair, file = "results/revised/beta/coinf_BC_perm_pairw.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_bc_coinf, file = "results/revised/beta/coinf_BC_disp.txt", sep="\t", row.names = FALSE)

Weighted UniFrac

Permanova:

set.seed(42)
adonis2_wuf_coinf<- adonis2(
  d_wuf ~ coinf_stat + poll_group + location + reprod, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

Pairwise comparisons:

set.seed(42)
adonis_wuf_coinf_pair <- pairwise.adonis(
  d_wuf, VolePathData_micro$coinf_stat, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

Compare dispersion:

set.seed(42)
betadisper(
  d_wuf, group =  VolePathData_micro$coinf_stat, bias.adjust = TRUE) %>% 
  permutest()

set.seed(42)
disp_wuf_coinf <- betadisper(
  d_wuf, group =  VolePathData_micro$coinf_stat, bias.adjust = TRUE) %>% 
  permutest(pairwise = TRUE)

disp_wuf_coinf <- as.data.frame(
  disp_wuf_coinf$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="coinf_stat") %>%
  mutate_if(is.numeric, round, digits=4)

Export results:

# adonis2
write.table(adonis2_wuf_coinf, file = "results/revised/beta/coinf_wUF_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis_wuf_coinf_pair, file = "results/revised/beta/coinf_wUF_perm_pairw.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_wuf_coinf, file = "results/revised/beta/coinf_wUF_disp.txt", sep="\t", row.names = FALSE)

Pathogens irrespective of coinfection status

Jaccard

Permanova:

set.seed(42)
adonis2_jacc_path<- adonis2(
  d_jacc ~ ana + bab + bor + puu + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

Compare dispersion

# Anaplasma
set.seed(42)

disp_jacc_ana <- betadisper(d_jacc, group =  VolePathData_micro$ana, bias.adjust = TRUE) %>%
  permutest()

disp_jacc_ana <- as_tibble(disp_jacc_ana$tab[1, ]) %>%
  add_column(variable = "anaplasma") %>%
  mutate_if(is.numeric, round, digits = 4)

# Babesia
set.seed(42)
disp_jacc_bab <- betadisper(d_jacc, group =  VolePathData_micro$bab, bias.adjust = TRUE) %>%
  permutest()

disp_jacc_bab <- as_tibble(disp_jacc_bab$tab[1, ]) %>%
  add_column(variable = "babesia") %>%
  mutate_if(is.numeric, round, digits = 4)

# Borrelia
set.seed(42)
disp_jacc_bor <- betadisper(d_jacc, group =  VolePathData_micro$bor, bias.adjust = TRUE) %>%
  permutest

disp_jacc_bor <- as_tibble(disp_jacc_bor$tab[1, ]) %>%
  add_column(variable = "borrelia") %>%
  mutate_if(is.numeric, round, digits = 4)

# Puumala
set.seed(42)

disp_jacc_puu <- betadisper(d_jacc, group =  VolePathData_micro$puu, bias.adjust = TRUE) %>%
  permutest

disp_jacc_puu <- as_tibble(disp_jacc_puu$tab[1, ]) %>%
  add_column(variable = "puumala") %>%
  mutate_if(is.numeric, round, digits = 4)

# group
disp_jacc_path <- 
  bind_rows(disp_jacc_ana,disp_jacc_bab,disp_jacc_bor,disp_jacc_puu)  %>% 
  as_tibble() %>%
  rename("df" = 1,
         "SS" = 2,
         "F" = 4,
         "p" = 6) %>%
  select(-c(3,5)) %>% 
  mutate_if(is.numeric, round, digits = 3)

disp_jacc_path

Export results:

# adonis2
write.table(adonis2_jacc_path, file = "results/revised/beta/path_jacc_perm.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_jacc_path, file = "results/revised/beta/path_jacc_disp.txt", sep="\t", row.names = FALSE)

UniFrac

Permanova:

set.seed(42)
adonis2_uf_path<- adonis2(
  d_uf ~ ana + bab + bor + puu + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

Compare dispersion:

# Anaplasma
set.seed(42)

disp_uf_ana <- betadisper(d_uf, group =  VolePathData_micro$ana, bias.adjust = TRUE) %>%
  permutest()

disp_uf_ana <- as_tibble(disp_uf_ana$tab[1, ]) %>%
  add_column(variable = "anaplasma") %>%
  mutate_if(is.numeric, round, digits = 4)

# Babesia
set.seed(42)

disp_uf_bab <- betadisper(d_uf, group =  VolePathData_micro$bab, bias.adjust = TRUE) %>%
  permutest()

disp_uf_bab <- as_tibble(disp_uf_bab$tab[1, ]) %>%
  add_column(variable = "babesia") %>%
  mutate_if(is.numeric, round, digits = 4)

# Borrelia
set.seed(42)

disp_uf_bor <- betadisper(d_uf, group =  VolePathData_micro$bor, bias.adjust = TRUE) %>%
  permutest

disp_uf_bor <- as_tibble(disp_uf_bor$tab[1, ]) %>%
  add_column(variable = "borrelia") %>%
  mutate_if(is.numeric, round, digits = 4)

# Puumala
set.seed(42)

disp_uf_puu <- betadisper(d_uf, group =  VolePathData_micro$puu, bias.adjust = TRUE) %>%
  permutest

disp_uf_puu <- as_tibble(disp_uf_puu$tab[1, ]) %>%
  add_column(variable = "puumala") %>%
  mutate_if(is.numeric, round, digits = 4)

# group
disp_uf_path <- 
  bind_rows(disp_uf_ana,disp_uf_bab,disp_uf_bor,disp_uf_puu) %>% 
  as_tibble() %>%
  rename("df" = 1,
         "SS" = 2,
         "F" = 4,
         "p" = 6) %>%
  select(-c(3,5)) %>% 
  mutate_if(is.numeric, round, digits = 3)

disp_uf_path

Export results:

# adonis2
write.table(adonis2_uf_path, file = "results/revised/beta/path_UF_perm.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_uf_path, file = "results/revised/beta/path_UF_disp.txt", sep="\t", row.names = FALSE)

Bray-Curtis

Permanova:

set.seed(42)
adonis2_bc_path <- adonis2(
  d_bc ~ ana + bab + bor + puu + poll_group + reprod + location,
  data = VolePathData_micro,
  by = "margin") %>%
  rownames_to_column(., "Variable") %>%
  as_tibble() %>%
  rename("F" = 5,
         "p" = 6) %>%
  mutate_if(is.numeric, round, digits = 3)

Compare dispersion:

# Anaplasma
set.seed(42)

disp_bc_ana <- betadisper(d_bc, group =  VolePathData_micro$ana, bias.adjust = TRUE) %>%
  permutest()

disp_bc_ana <- as_tibble(disp_bc_ana$tab[1, ]) %>%
  add_column(variable = "anaplasma") %>%
  mutate_if(is.numeric, round, digits = 4)

# Babesia
set.seed(42)

disp_bc_bab <- betadisper(d_bc, group =  VolePathData_micro$bab, bias.adjust = TRUE) %>%
  permutest()

disp_bc_bab <- as_tibble(disp_bc_bab$tab[1, ]) %>%
  add_column(variable = "babesia") %>%
  mutate_if(is.numeric, round, digits = 4)

# Borrelia
set.seed(42)

disp_bc_bor <- betadisper(d_bc, group =  VolePathData_micro$bor, bias.adjust = TRUE) %>%
  permutest

disp_bc_bor <- as_tibble(disp_bc_bor$tab[1, ]) %>%
  add_column(variable = "borrelia") %>%
  mutate_if(is.numeric, round, digits = 4)

# Puumala
set.seed(42)

disp_bc_puu <- betadisper(d_bc, group =  VolePathData_micro$puu, bias.adjust = TRUE) %>%
  permutest

disp_bc_puu <- as_tibble(disp_bc_puu$tab[1, ]) %>%
  add_column(variable = "puumala") %>%
  mutate_if(is.numeric, round, digits = 4)

# group into one file
disp_bc_path <- 
  bind_rows(disp_bc_ana,disp_bc_bab,disp_bc_bor,disp_bc_puu) %>% 
  as_tibble() %>%
  rename("df" = 1,
         "SS" = 2,
         "F" = 4,
         "p" = 6) %>%
  select(-c(3,5)) %>% 
  mutate_if(is.numeric, round, digits = 3)

disp_bc_path

Export results:

# adonis2
write.table(adonis2_bc_path, file = "results/revised/beta/path_BC_perm.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_bc_path, file = "results/revised/beta/path_BC_disp.txt", sep="\t", row.names = FALSE)

Weighted UniFrac

Permanova:

set.seed(42)
adonis2_wuf_path<- adonis2(
  d_wuf ~ ana + bab + bor + puu + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

Compare dispersion:

# Anaplasma
set.seed(42)
disp_wuf_ana <- betadisper(d_wuf, group =  VolePathData_micro$ana, bias.adjust = TRUE) %>%
  permutest()

disp_wuf_ana <- as_tibble(disp_wuf_ana$tab[1, ]) %>%
  add_column(variable = "anaplasma") %>%
  mutate_if(is.numeric, round, digits = 4)

# Babesia
set.seed(42)

disp_wuf_bab <- betadisper(d_wuf, group =  VolePathData_micro$bab, bias.adjust = TRUE) %>%
  permutest()

disp_wuf_bab <- as_tibble(disp_wuf_bab$tab[1, ]) %>%
  add_column(variable = "babesia") %>%
  mutate_if(is.numeric, round, digits = 4)

# Borrelia
set.seed(42)

disp_wuf_bor <- betadisper(d_wuf, group =  VolePathData_micro$bor, bias.adjust = TRUE) %>%
  permutest

disp_wuf_bor <- as_tibble(disp_wuf_bor$tab[1, ]) %>%
  add_column(variable = "borrelia") %>%
  mutate_if(is.numeric, round, digits = 4)

# Puumala
set.seed(42)

disp_wuf_puu <- betadisper(d_wuf, group =  VolePathData_micro$puu, bias.adjust = TRUE) %>%
  permutest

disp_wuf_puu <- as_tibble(disp_wuf_puu$tab[1, ]) %>%
  add_column(variable = "puumala") %>%
  mutate_if(is.numeric, round, digits = 4)

# group
disp_wuf_path <- 
  bind_rows(disp_wuf_ana,disp_wuf_bab,disp_wuf_bor,disp_wuf_puu) %>% 
  as_tibble() %>%
  rename("df" = 1,
         "SS" = 2,
         "F" = 4,
         "p" = 6) %>%
  select(-c(3,5)) %>% 
  mutate_if(is.numeric, round, digits = 3)

disp_wuf_path

Export results:

# adonis2
write.table(adonis2_wuf_path, file = "results/revised/beta/path_wUF_perm.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_wuf_path, file = "results/revised/beta/path_wUF_disp.txt", sep="\t", row.names = FALSE)

Pathogens considering coinfection status

Jaccard

Anaplasma

# permanova - global
set.seed(42)
adonis2_jacc_coinf_ana<- adonis2(d_jacc ~ ana_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_jacc_coinf_ana_pairw <- pairwise.adonis(
  d_jacc, VolePathData_micro$ana_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_jacc_coinf_ana <- betadisper(d_jacc, group =  VolePathData_micro$ana_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_jacc_coinf_ana <- as.data.frame(
  disp_jacc_coinf_ana$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="anaplasma") %>%
  mutate_if(is.numeric, round, digits=4)

Babesia

# permanova - global
set.seed(42)
adonis2_jacc_coinf_bab <- adonis2(d_jacc ~ bab_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_jacc_coinf_bab_pairw <- pairwise.adonis(
  d_jacc, VolePathData_micro$bab_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_jacc_coinf_bab <- betadisper(d_jacc, group =  VolePathData_micro$bab_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_jacc_coinf_bab <- as.data.frame(
  disp_jacc_coinf_bab$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Babesia") %>%
  mutate_if(is.numeric, round, digits=4)

Borrelia

# permanova - global
set.seed(42)
adonis2_jacc_coinf_bor<- adonis2(d_jacc ~ bor_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_jacc_coinf_bor_pairw <- pairwise.adonis(
  d_jacc, VolePathData_micro$bor_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_jacc_coinf_bor <- betadisper(d_jacc, group =  VolePathData_micro$bor_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_jacc_coinf_bor <- as.data.frame(
  disp_jacc_coinf_bor$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Borrelia") %>%
  mutate_if(is.numeric, round, digits=4)

Puumala

# permanova - global
set.seed(42)
adonis2_jacc_coinf_puu<- adonis2(d_jacc ~ puu_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_jacc_coinf_puu_pairw <- pairwise.adonis(
  d_jacc, VolePathData_micro$puu_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_jacc_coinf_puu <- betadisper(d_jacc, group =  VolePathData_micro$puu_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_jacc_coinf_puu <- as.data.frame(
  disp_jacc_coinf_puu$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Puumala") %>%
  mutate_if(is.numeric, round, digits=4)

Export results:

# merge dispersion results
disp_jacc_coinf_path <- 
  bind_rows(disp_jacc_coinf_ana, disp_jacc_coinf_bab, disp_jacc_coinf_bor, disp_jacc_coinf_puu)  %>% 
  as_tibble() %>% 
  mutate_if(is.numeric, round, digits = 3)

# adonis2
write.table(adonis2_jacc_coinf_ana, file = "results/revised/beta/path_coinf_jacc_ana_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_jacc_coinf_ana_pairw, file = "results/revised/beta/path_coinf_jacc_ana_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_jacc_coinf_bab, file = "results/revised/beta/path_coinf_jacc_bab_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_jacc_coinf_bab_pairw, file = "results/revised/beta/path_coinf_jacc_bab_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_jacc_coinf_bor, file = "results/revised/beta/path_coinf_jacc_bor_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_jacc_coinf_bor_pairw, file = "results/revised/beta/path_coinf_jacc_bor_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_jacc_coinf_puu, file = "results/revised/beta/path_coinf_jacc_puu_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_jacc_coinf_puu_pairw, file = "results/revised/beta/path_coinf_jacc_puu_perm_pairw.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_jacc_coinf_path, file = "results/revised/beta/path_coinf_jacc_disp.txt", sep="\t", row.names = FALSE)

UniFrac

Anaplasma

# permanova
set.seed(42)
adonis2_uf_coinf_ana<- adonis2(d_uf ~ ana_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_uf_coinf_ana_pairw <- pairwise.adonis(
  d_uf, VolePathData_micro$ana_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_uf_coinf_ana <- betadisper(d_uf, group =  VolePathData_micro$ana_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_uf_coinf_ana <- as.data.frame(
  disp_uf_coinf_ana$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="anaplasma") %>%
  mutate_if(is.numeric, round, digits=4)

Babesia

# permanova
set.seed(42)
adonis2_uf_coinf_bab<- adonis2(d_uf ~ bab_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_uf_coinf_bab_pairw <- pairwise.adonis(
  d_uf, VolePathData_micro$bab_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_uf_coinf_bab <- betadisper(d_uf, group =  VolePathData_micro$bab_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_uf_coinf_bab <- as.data.frame(
  disp_uf_coinf_bab$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Babesia") %>%
  mutate_if(is.numeric, round, digits=4)

Borrelia

# permanova
set.seed(42)
adonis2_uf_coinf_bor<- adonis2(d_uf ~ bor_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_uf_coinf_bor_pairw <- pairwise.adonis(
  d_uf, VolePathData_micro$bor_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_uf_coinf_bor <- betadisper(d_uf, group =  VolePathData_micro$bor_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_uf_coinf_bor <- as.data.frame(
  disp_uf_coinf_bor$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Borrelia") %>%
  mutate_if(is.numeric, round, digits=4)

Puumala

# permanova
set.seed(42)
adonis2_uf_coinf_puu<- adonis2(d_uf ~ puu_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_uf_coinf_puu_pairw <- pairwise.adonis(
  d_uf, VolePathData_micro$puu_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_uf_coinf_puu <- betadisper(d_uf, group =  VolePathData_micro$puu_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_uf_coinf_puu <- as.data.frame(
  disp_uf_coinf_puu$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Puumala") %>%
  mutate_if(is.numeric, round, digits=4)

Export results:

# merge dispersion results
disp_uf_coinf_path <- 
  bind_rows(disp_uf_coinf_ana, disp_uf_coinf_bab, disp_uf_coinf_bor, disp_uf_coinf_puu)  %>% 
  as_tibble() %>% 
  mutate_if(is.numeric, round, digits = 3)

# adonis2
write.table(adonis2_uf_coinf_ana, file = "results/revised/beta/path_coinf_uf_ana_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_uf_coinf_ana_pairw, file = "results/revised/beta/path_coinf_uf_ana_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_uf_coinf_bab, file = "results/revised/beta/path_coinf_uf_bab_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_uf_coinf_bab_pairw, file = "results/revised/beta/path_coinf_uf_bab_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_uf_coinf_bor, file = "results/revised/beta/path_coinf_uf_bor_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_uf_coinf_bor_pairw, file = "results/revised/beta/path_coinf_uf_bor_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_uf_coinf_puu, file = "results/revised/beta/path_coinf_uf_puu_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_uf_coinf_puu_pairw, file = "results/revised/beta/path_coinf_uf_puu_perm_pairw.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_uf_coinf_path, file = "results/revised/beta/path_coinf_uf_disp.txt", sep="\t", row.names = FALSE)

Bray-Curtis

Anaplasma

# permanova
set.seed(42)
adonis2_bc_coinf_ana<- adonis2(d_bc ~ ana_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_bc_coinf_ana_pairw <- pairwise.adonis(
  d_bc, VolePathData_micro$ana_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_bc_coinf_ana <- betadisper(d_bc, group =  VolePathData_micro$ana_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_bc_coinf_ana <- as.data.frame(
  disp_bc_coinf_ana$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="anaplasma") %>%
  mutate_if(is.numeric, round, digits=4)

Babesia

# permanova
set.seed(42)
adonis2_bc_coinf_bab<- adonis2(d_bc ~ bab_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_bc_coinf_bab_pairw <- pairwise.adonis(
  d_bc, VolePathData_micro$bab_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_bc_coinf_bab <- betadisper(d_bc, group =  VolePathData_micro$bab_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_bc_coinf_bab <- as.data.frame(
  disp_bc_coinf_bab$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Babesia") %>%
  mutate_if(is.numeric, round, digits=4)

Borrelia

# permanova
set.seed(42)
adonis2_bc_coinf_bor<- adonis2(d_bc ~ bor_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_bc_coinf_bor_pairw <- pairwise.adonis(
  d_bc, VolePathData_micro$bor_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_bc_coinf_bor <- betadisper(d_bc, group =  VolePathData_micro$bor_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_bc_coinf_bor <- as.data.frame(
  disp_bc_coinf_bor$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Borrelia") %>%
  mutate_if(is.numeric, round, digits=4)

Puumala

# permanova
set.seed(42)
adonis2_bc_coinf_puu<- adonis2(d_bc ~ puu_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_bc_coinf_puu_pairw <- pairwise.adonis(
  d_bc, VolePathData_micro$puu_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_bc_coinf_puu <- betadisper(d_bc, group =  VolePathData_micro$puu_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_bc_coinf_puu <- as.data.frame(
  disp_bc_coinf_puu$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Puumala") %>%
  mutate_if(is.numeric, round, digits=4)

Export results:

# merge dispersion results
disp_bc_coinf_path <- 
  bind_rows(disp_bc_coinf_ana, disp_bc_coinf_bab, disp_bc_coinf_bor, disp_bc_coinf_puu)  %>% 
  as_tibble() %>% 
  mutate_if(is.numeric, round, digits = 3)

# adonis2
write.table(adonis2_bc_coinf_ana, file = "results/revised/beta/path_coinf_bc_ana_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_bc_coinf_ana_pairw, file = "results/revised/beta/path_coinf_bc_ana_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_bc_coinf_bab, file = "results/revised/beta/path_coinf_bc_bab_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_bc_coinf_bab_pairw, file = "results/revised/beta/path_coinf_bc_bab_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_bc_coinf_bor, file = "results/revised/beta/path_coinf_bc_bor_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_bc_coinf_bor_pairw, file = "results/revised/beta/path_coinf_bc_bor_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_bc_coinf_puu, file = "results/revised/beta/path_coinf_bc_puu_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_bc_coinf_puu_pairw, file = "results/revised/beta/path_coinf_bc_puu_perm_pairw.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_bc_coinf_path, file = "results/revised/beta/path_coinf_bc_disp.txt", sep="\t", row.names = FALSE)

Weighted UniFrac

Anaplasma

# permanova
set.seed(42)
adonis2_wuf_coinf_ana<- adonis2(d_wuf ~ ana_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_wuf_coinf_ana_pairw <- pairwise.adonis(
  d_wuf, VolePathData_micro$ana_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_wuf_coinf_ana <- betadisper(d_wuf, group =  VolePathData_micro$ana_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_wuf_coinf_ana <- as.data.frame(
  disp_wuf_coinf_ana$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="anaplasma") %>%
  mutate_if(is.numeric, round, digits=4)

Babesia

# permanova
set.seed(42)
adonis2_wuf_coinf_bab<- adonis2(d_wuf ~ bab_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_wuf_coinf_bab_pairw <- pairwise.adonis(
  d_wuf, VolePathData_micro$bab_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_wuf_coinf_bab <- betadisper(d_wuf, group =  VolePathData_micro$bab_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_wuf_coinf_bab <- as.data.frame(
  disp_wuf_coinf_bab$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Babesia") %>%
  mutate_if(is.numeric, round, digits=4)

Borrelia

# permanova
set.seed(42)
adonis2_wuf_coinf_bor<- adonis2(d_wuf ~ bor_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_wuf_coinf_bor_pairw <- pairwise.adonis(
  d_wuf, VolePathData_micro$bor_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_wuf_coinf_bor <- betadisper(d_wuf, group =  VolePathData_micro$bor_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_wuf_coinf_bor <- as.data.frame(
  disp_wuf_coinf_bor$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Borrelia") %>%
  mutate_if(is.numeric, round, digits=4)

Puumala

# permanova
set.seed(42)
adonis2_wuf_coinf_puu<- adonis2(d_wuf ~ puu_coinf + poll_group + reprod + location, data = VolePathData_micro, by="margin")%>% 
  rownames_to_column(., "Variable") %>% 
  as_tibble() %>% 
  rename("F" = 5,
         "p" = 6) %>%
 mutate_if(is.numeric, round, digits=3)

# permanova - pairwise
set.seed(42)
adonis2_wuf_coinf_puu_pairw <- pairwise.adonis(
  d_wuf, VolePathData_micro$puu_coinf, perm = 999, p.adjust.m = "fdr") %>%
  as_tibble() %>% 
  rename(
    "Coinfection status comparison" = 1,
    "SS" = 3,
    "F" = 4,
    "R²" = 5,
    "p-value" = 6,
    "q-value" = 7) %>%
  select(-c(2,8)) %>% 
  mutate_if(is.numeric, round, digits=3)

# dispersion
set.seed(42)
disp_wuf_coinf_puu <- betadisper(d_wuf, group =  VolePathData_micro$puu_coinf, bias.adjust = TRUE) %>%
  permutest(pairwise = TRUE)

disp_wuf_coinf_puu <- as.data.frame(
  disp_wuf_coinf_puu$pairwise) %>%
  rownames_to_column(., var="comparison") %>% 
  add_column(variable="Puumala") %>%
  mutate_if(is.numeric, round, digits=4)

Export results:

# merge dispersion results
disp_wuf_coinf_path <- 
  bind_rows(disp_wuf_coinf_ana, disp_wuf_coinf_bab, disp_wuf_coinf_bor, disp_wuf_coinf_puu)  %>% 
  as_tibble() %>% 
  mutate_if(is.numeric, round, digits = 3)

# adonis2
write.table(adonis2_wuf_coinf_ana, file = "results/revised/beta/path_coinf_wuf_ana_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_wuf_coinf_ana_pairw, file = "results/revised/beta/path_coinf_wuf_ana_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_wuf_coinf_bab, file = "results/revised/beta/path_coinf_wuf_bab_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_wuf_coinf_bab_pairw, file = "results/revised/beta/path_coinf_wuf_bab_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_wuf_coinf_bor, file = "results/revised/beta/path_coinf_wuf_bor_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_wuf_coinf_bor_pairw, file = "results/revised/beta/path_coinf_wuf_bor_perm_pairw.txt", sep="\t", row.names = FALSE)

write.table(adonis2_wuf_coinf_puu, file = "results/revised/beta/path_coinf_wuf_puu_perm.txt", sep="\t", row.names = FALSE)
write.table(adonis2_wuf_coinf_puu_pairw, file = "results/revised/beta/path_coinf_wuf_puu_perm_pairw.txt", sep="\t", row.names = FALSE)

# dispersion
write.table(disp_wuf_coinf_path, file = "results/revised/beta/path_coinf_wuf_disp.txt", sep="\t", row.names = FALSE)

Dispersion of other variables

Jaccard

# poll_group
set.seed(42)

disp_jacc_poll_group <- betadisper(
  d_jacc, group =  VolePathData_micro$poll_group, bias.adjust = TRUE) %>% 
  permutest()

disp_jacc_poll_group <- as_tibble(
  disp_jacc_poll_group$tab[1,]) %>% 
  add_column(variable="pollution group") %>%
 mutate_if(is.numeric, round, digits=4)

# reprod
set.seed(42)

disp_jacc_reprod <- betadisper(
  d_jacc, group =  VolePathData_micro$reprod, bias.adjust = TRUE) %>% 
  permutest()

disp_jacc_reprod <- as_tibble(
  disp_jacc_reprod$tab[1,]) %>% 
  add_column(variable="reprod") %>%
 mutate_if(is.numeric, round, digits=4)

# location
set.seed(42)

disp_jacc_location <- betadisper(
  d_jacc, group =  VolePathData_micro$location, bias.adjust = TRUE) %>% 
  permutest()

disp_jacc_location <- as_tibble(
  disp_jacc_location$tab[1,]) %>% 
  add_column(variable="location") %>%
 mutate_if(is.numeric, round, digits=4)

## see which has higher dispersion - Kemi-Tornio
betadisper(d_jacc, group =  VolePathData_micro$location, bias.adjust = TRUE)

# merge into one tibble
disp_jacc_all <- bind_rows(disp_jacc_poll_group, disp_jacc_reprod, disp_jacc_location)

# export results
write.table(disp_jacc_all, file = "results/revised/beta/inf_jacc_disp.txt", sep="\t", row.names = FALSE)

UniFrac

# poll_group
set.seed(42)

disp_uf_poll_group <- betadisper(
  d_uf, group =  VolePathData_micro$poll_group, bias.adjust = TRUE) %>% 
  permutest()

disp_uf_poll_group <- as_tibble(
  disp_uf_poll_group$tab[1,]) %>% 
  add_column(variable="pollution group") %>%
 mutate_if(is.numeric, round, digits=4)

# reprod
set.seed(42)

disp_uf_reprod <- betadisper(
  d_uf, group =  VolePathData_micro$reprod, bias.adjust = TRUE) %>% 
  permutest()

disp_uf_reprod <- as_tibble(
  disp_uf_reprod$tab[1,]) %>% 
  add_column(variable="sex") %>%
 mutate_if(is.numeric, round, digits=4)

# location
set.seed(42)

disp_uf_location <- betadisper(
  d_uf, group =  VolePathData_micro$location, bias.adjust = TRUE) %>% 
  permutest()

disp_uf_location <- as_tibble(
  disp_uf_location$tab[1,]) %>% 
  add_column(variable="location") %>%
 mutate_if(is.numeric, round, digits=4)

## see which has higher dispersion - Kemi-Tornio
betadisper(d_uf, group =  VolePathData_micro$location, bias.adjust = TRUE)

# merge into one tibble
disp_uf_all <- bind_rows(disp_uf_inf, disp_uf_poll_group, disp_uf_reprod, disp_uf_location)

# export results
write.table(disp_uf_all, file = "results/revised/beta/inf_UF_disp.txt", sep="\t", row.names = FALSE)

Bray-Curtis

# poll_group
set.seed(42)

disp_bc_poll_group <- betadisper(
  d_bc, group =  VolePathData_micro$poll_group, bias.adjust = TRUE) %>% 
  permutest()

disp_bc_poll_group <- as_tibble(
  disp_bc_poll_group$tab[1,]) %>% 
  add_column(variable="pollution group") %>%
 mutate_if(is.numeric, round, digits=4)

# sex
set.seed(42)

disp_bc_reprod <- betadisper(
  d_bc, group =  VolePathData_micro$reprod, bias.adjust = TRUE) %>% 
  permutest()

disp_bc_reprod <- as_tibble(
  disp_bc_reprod$tab[1,]) %>% 
  add_column(variable="reprod") %>%
 mutate_if(is.numeric, round, digits=4)

# location
set.seed(42)

disp_bc_location <- betadisper(
  d_bc, group =  VolePathData_micro$location, bias.adjust = TRUE) %>% 
  permutest()

disp_bc_location<- as_tibble(
  disp_bc_location$tab[1,]) %>% 
  add_column(variable="location") %>%
 mutate_if(is.numeric, round, digits=4)

## see which has higher dispersion - Kemi-Tornio
betadisper(d_bc, group =  VolePathData_micro$location, bias.adjust = TRUE)

# merge into one tibble
disp_bc_all <- bind_rows(disp_bc_inf, disp_bc_poll_group, disp_bc_reprod, disp_bc_location)
disp_bc_all

# export results
write.table(disp_bc_all, file = "results/revised/beta/inf_BC_disp.txt", sep="\t", row.names = FALSE)

Weighted UniFrac

# poll_group
set.seed(42)

disp_wuf_poll_group <- betadisper(
  d_wuf, group =  VolePathData_micro$poll_group, bias.adjust = TRUE) %>% 
  permutest()

disp_wuf_poll_group <- as_tibble(
  disp_wuf_poll_group$tab[1,]) %>% 
  add_column(variable="pollution group") %>%
 mutate_if(is.numeric, round, digits=4)

# reprod
set.seed(42)

disp_wuf_reprod <- betadisper(
  d_wuf, group =  VolePathData_micro$reprod, bias.adjust = TRUE) %>% 
  permutest()

disp_wuf_reprod <- as_tibble(
  disp_wuf_reprod$tab[1,]) %>% 
  add_column(variable="reprod") %>%
 mutate_if(is.numeric, round, digits=4)

# location
set.seed(42)

disp_wuf_location <- betadisper(
  d_wuf, group =  VolePathData_micro$location, bias.adjust = TRUE) %>% 
  permutest()

disp_wuf_location <- as_tibble(
  disp_wuf_location$tab[1,]) %>% 
  add_column(variable="location") %>%
 mutate_if(is.numeric, round, digits=4)

## see which has higher dispersion - Kemi-Tornio
betadisper(d_wuf, group =  VolePathData_micro$location, bias.adjust = TRUE)

# merge into one tibble
disp_wuf_all <- bind_rows(disp_wuf_inf, disp_wuf_poll_group, disp_wuf_reprod, disp_wuf_location)

# export results
write.table(disp_wuf_all, file = "results/revised/beta/inf_wUF_disp.txt", sep="\t", row.names = FALSE)

Rare ASV analysis

Extract ASV table and add ASV classification

# transform to relative abundance
ps_relat <- transform_sample_counts(ps, function(x) x / sum(x))

asvs_abund_all <- as(otu_table(ps_relat), "matrix") %>%
  as_tibble(rownames = "ASVID") %>%
  mutate(MeanAbun = rowMeans(across(where(is.numeric)))) %>%
  mutate(AbundGroup = case_when(
      MeanAbun >= 0.0005  ~ 'abundant',
      MeanAbun < 0.0005 & MeanAbun > 0.00001  ~ 'intermediate',
      MeanAbun <= 0.00001  ~ 'rare'))

# examine the numbers per abundance group
asvs_abund_all %>% tabyl(AbundGroup)

# create an ASV classification file
asv_class <- asvs_abund_all %>% dplyr::select(ASVID, AbundGroup)

# write a table of ASV IDs and classification
write.table(asv_class, file = "results/revised/asv_classification.txt", sep = "\t")

Create a single data frame with ASV abundance and metadata for comparison:

# pivot longer so you can add individual sample metadata
asv_abundgroup_meta_long <- asvs_abund_all %>%
  # ↓ rm column
  dplyr::select(-MeanAbun) %>%
  pivot_longer(
    cols = -c(AbundGroup, ASVID),
    names_to = "SampleID_G",
    values_to = "relative_abundance") %>%
  # ↓ add in whether ASV is present in the sample
  mutate(in_sample = ifelse(relative_abundance == 0, "no", "yes")) %>% 
  # ↓ add in metadata columns
  full_join (
    x = .,
    y = VolePathData_micro %>% 
      dplyr::select(SampleID_G, coinf_stat, ana, bab, bor, puu, ana_coinf, bab_coinf, bor_coinf, puu_coinf),
    by = "SampleID_G")

# make dataframe for ESM
asv_abundgroup_meta <-
  asv_abundgroup_meta_long %>%
  # ↓ keep only those rows where ASV is present in a given sample
  filter(in_sample == "yes") %>%
  group_by(ASVID) %>% 
  # ↓ add column where you collapse per ASV the columns of interest
  mutate(coinf_comb = paste0(coinf_stat, collapse = ", "),
         ana_comb = paste0(ana, collapse = ", "),
         bab_comb = paste0(bab, collapse = ", "),
         bor_comb = paste0(bor, collapse = ", "),
         puu_comb = paste0(puu, collapse = ", "),
         ana_coinf_comb = paste0(ana_coinf, collapse = ", "),
         bab_coinf_comb = paste0(bab_coinf, collapse = ", "),
         bor_coinf_comb = paste0(bor_coinf, collapse = ", "),
         puu_coinf_comb = paste0(puu_coinf, collapse = ", ")) %>% 
  # ↓ keep only needed columns
  select(ASVID, AbundGroup, coinf_comb:puu_coinf_comb) %>% 
  # ↓ collapse to distinct ASVs
  distinct(ASVID, .keep_all = TRUE) %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # add in taxonomy
  left_join(
    x = .,
    y = tax %>% 
      rename(ASVID = OTUID),
    by = "ASVID") %>% 
  # remove all the extra columns
  select(-c(Domain, Confidence)) %>% 
  # move taxonomy forward in R so you don't kill laptop
  relocate(Phylum:Species, .after = ASVID) %>%
  mutate(across(where(is_character), as_factor))

Save for ESM:

write.table(asv_abundgroup_meta, file = "results/revised/asv_abundgroup_meta.txt", sep = "\t", row.names = FALSE)

Differential abundance analysis using ANCOM-BC

Notes:

using ANCOM-BC
this method currently does not do pairwise comparisons, so for coinfection, models run 2x with different reference
“it is recommended to set neg_lb = TRUE when the sample size per group is relatively large (e.g. > 30)”
adding column “found in 1 sample” to sanity-check that those are not differentially abundant

Get in unrarefied ASV table and remove the 1 sample that was under rarefaction threshold for comparable data

# get unrarefied table
asvs_unrarefied<- read_qza("gut_feat_table_filt10_taxafilt.qza")

# make a phyloseq object
ps_unrare <- phyloseq(
 otu_table(asvs_unrarefied$data, taxa_are_rows = T), 
 phy_tree(phy_tree), 
 tax_table(as.data.frame(tax) %>% dplyr::select(-Confidence) %>% column_to_rownames("OTUID") %>% as.matrix()),
 sample_data(VolePathData %>% as.data.frame() %>% column_to_rownames("SampleID_G")) )

# drop the 1 sample so that the sample size remains the same
ps_unrare <- subset_samples(ps_unrare, animal_id != "241")
ps_unrare <- prune_taxa(taxa_sums(ps_unrare) > 0, ps_unrare)

# get out the asv table (that has dropped the 1 sample) - 
asvs_unrare_192 <- ps_unrare@otu_table@.Data %>% 
  as_tibble(rownames = "ASVID") %>% # need to do because otherwise rowSums is acting up
  column_to_rownames("ASVID")

# add variable that counts in how many samples ASV appears
asvs_unrare_192$nsamp <- rowSums(asvs_unrare_192 != 0)

# make a list of ASV names - then that can be added as a column in ANCOM-BC results
asvs_1sample <- asvs_unrare_192 %>%
  filter(nsamp <2) %>% 
  rownames_to_column(var = "ASVID") %>% 
  pull(ASVID)

# try adding as a column in ASV table to see if it works
asvs_unrare_192 %>%
  rownames_to_column(var = "ASVID") %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no")) %>% # ← use this in ancom result table
  tabyl(in1sample)

Coinfection status

ASV level

Coinfection status 0 vs 1 and 2

DA_coinf_012 <- ancombc(
  phyloseq = ps_unrare, 
  formula = "coinf_stat + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "coinf_stat", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

Combine the result DFs

# get out the results as a more structured list
DA_coinf012_res <- DA_coinf012$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("coinf_stat_1", "coinf_stat_2", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_coinf_stat_0", "srt0_coinf_stat_1", "srt0_coinf_stat_2")

# merge everything
DA_coinf012_merged <- full_join(
  DA_coinf012_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_coinf012$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf012_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf012_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf012_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf012_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf012_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID") %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_coinf012_merged, file = "results/revised/diff_abund/DA_coinf_asv.xlsx", sheetName = "DA_coinf_0-12", append = FALSE, row.names = FALSE) # write.xlsx2 is faster than write.xlsx

Coinfection status 1 vs 2

Subset phyloseq object to exclude non-infected animals and prune to exclude ASVs not in these samples

DA_coinf_12 <- ancombc(
    phyloseq = subset_samples(ps_unrare, coinf_stat != "0") %>%
      prune_taxa(taxa_sums(.) > 0, .),
    formula = "coinf_stat + poll_group + location + reprod",
    prv_cut = 0,# already filtered in QIIME2
    struc_zero = FALSE, # already detected in the previous one
    neg_lb = FALSE,
    global = FALSE )

# get out the results as a more structured list
DA_coinf12_res <- DA_coinf_12$res

# set column names for all result tables
col_names = c("coinf_stat_2", "poll_low", "loc_KT", "non-gravid", "gravid")

# merge everything
DA_coinf12_merged <- full_join(
  DA_coinf12_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_coinf12_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf12_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf12_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf12_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf12_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_coinf12_merged, 
            file = "results/revised/diff_abund/DA_coinf_asv.xlsx", 
            sheetName = "DA_coinf_1-2", 
            append = TRUE, # to include in an already existing file
            row.names = FALSE)

Export in a single table

With taxonomy and cleaned-up values

DA_coinf_asv_all <- full_join(
  x = DA_coinf012_merged %>% 
    rename_with(~ paste0(.x, "_012"), -ASVID),
  y = DA_coinf12_merged %>% 
    rename_with(~ paste0(.x, "_12"), -ASVID),
  by = "ASVID") %>% 
  left_join(
    x = .,
    y = tax %>% 
      rename(ASVID = OTUID) %>% 
      dplyr::select(-c(Confidence, Domain)),
    by = "ASVID") %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # round all numeric values to 5 digits 
  mutate(across(where(is_numeric), round, 5)) %>% 
  # re-arrange columns so I don't have to drag them in Excel and kill the laptop
  relocate(in1sample_012, Phylum, Class, Order, Family, Genus, Species, .after = ASVID) 

write.xlsx2(DA_coinf_asv_all,
            file = "results/revised/diff_abund/DA_coinf_asv.xlsx", 
            sheetName = "DA_coinf_orig", 
            append = FALSE, # to include in an already existing file
            row.names = FALSE)

Genus level

Collapse phyloseq object and taxonomy to genus level

ps_unrare_gen <- 
  tax_glom(ps_unrare, "Genus")

tax_gen <- 
  tax_table(ps_unrare_gen)@.Data %>% 
  as_tibble(rownames = "ASVID") %>% 
  dplyr::select(-Species) %>% 
  unite("Fam_Gen", c(Family, Genus), sep = "_", remove = FALSE)

Are there any in only 1 sample:

# get out the asv table (that has dropped the 1 sample) - 
asvs_unrare_gen_192 <- ps_unrare_gen@otu_table@.Data %>% 
  as_tibble(rownames = "ASVID") %>% # need to do because otherwise rowSums is acting up
  column_to_rownames("ASVID")

# add variable that counts in how many samples ASV appears
asvs_unrare_gen_192$nsamp <- rowSums(asvs_unrare_gen_192 != 0)

# object with which genera ASVs in 1 sample
asvs_1sample_gen <- asvs_unrare_gen_192 %>%
  filter(nsamp <2) %>% 
  rownames_to_column(var = "ASVID") %>% 
  pull(ASVID)

asvs_unrare_gen_192 %>%
  rownames_to_column(var = "ASVID") %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no")) %>% # ← use this in ancom result table
  tabyl(in1sample)

Coinfection status 0 vs 1 and 2

DA_coinf_012_gen <- ancombc(
  phyloseq = ps_unrare_gen, 
  formula = "coinf_stat + poll_group + location + reprod",
  prv_cut = 0,
  group = "coinf_stat", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_coinf_012_gen_res <- DA_coinf_012_gen$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("coinf_stat_1", "coinf_stat_2", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_coinf_stat_0", "srt0_coinf_stat_1", "srt0_coinf_stat_2")

# save everything as a table
DA_coinf012_gen_merged <- full_join(
  DA_coinf_012_gen_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_coinf_012_gen$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf_012_gen_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf_012_gen_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf_012_gen_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf_012_gen_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf_012_gen_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))
  
# straight to XLSX for these
write.xlsx2(DA_coinf012_gen_merged, 
            file = "results/revised/diff_abund/DA_all_gen.xlsx", 
            sheetName = "DA_coinf_0-12", 
            append = FALSE, 
            row.names = FALSE)

Coinfection status 1 vs 2

DA_coinf_12_gen <- ancombc(
    phyloseq = subset_samples(ps_unrare_gen, coinf_stat != "0") %>%
      prune_taxa(taxa_sums(.) > 0, .),
    formula = "coinf_stat + poll_group + location + reprod",
    prv_cut = 0,
    struc_zero = FALSE,
    neg_lb = FALSE,
    global = FALSE)

# get out the results as a more structured list
DA_coinf_12_gen_res <- DA_coinf_12_gen$res

# set column names for all result tables
col_names = c("coinf_stat_2", "poll_low", "loc_KT", "non-gravid", "gravid")

# merge everything
DA_coinf_12_gen_merged <- full_join(
  DA_coinf_12_gen_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_coinf_12_gen_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf_12_gen_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf_12_gen_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf_12_gen_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_coinf_12_gen_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_coinf_12_gen_merged, 
            file = "results/revised/diff_abund/DA_all_gen.xlsx", 
            sheetName = "DA_coinf_1-2", 
            append = TRUE, # to include in an already existing file
            row.names = FALSE)

Export in a single table

With taxonomy and cleaned-up values

DA_coinf_gen_all <- 
full_join(
  x = DA_coinf012_gen_merged %>% 
    rename_with(~ paste0(.x, "_012"), -ASVID),
  y = DA_coinf_12_gen_merged %>% 
    rename_with(~ paste0(.x, "_12"), -ASVID),
  by = "ASVID") %>% 
  left_join(
    x = .,
    y = tax_gen %>% 
      dplyr::select(-Domain),
    by = "ASVID") %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # round all numeric values to 5 digits 
  mutate(across(where(is_numeric), round, 5)) %>% 
  # re-arrange columns so I don't have to drag them in Excel and kill the laptop
  relocate(in1sample_012, Phylum, Class, Order, Family, Genus, .after = ASVID)

write.xlsx2(.,
              file = "results/revised/diff_abund/DA_coinf_gen.xlsx", 
              sheetName = "DA_coinf_orig", 
              append = FALSE, # to include in an already existing file
              row.names = FALSE)

Pathogens irrespective of coinfection status

ASV level

Anaplasma

Note: run separately for each pathogen, because the order of the variables in formula for ANCOM-BC matters, and because you need to detect structural zeros for each group, furthermore, as a trial I tested running ana + poll_group + location + reprod against ana + bab + bor + puu + poll_group + location + reprod and the results for Anaplasma are the same!

Run ANCOM-BC:

DA_ana <- ancombc(
  phyloseq = ps_unrare, 
  formula = "ana + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "ana", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_ana_res <- DA_ana$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("ana_pos", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_ana_neg", "srt0_ana_pos")

# save everything as a tibble
DA_ana_res_merged <- full_join(
  DA_ana_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_ana$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_ana_res_merged, 
            file = "results/revised/diff_abund/DA_path_asv.xlsx", 
            sheetName = "DA_ana", 
            append = FALSE, 
            row.names = FALSE)

Babesia

Run ANCOM-BC:

DA_bab <- ancombc(
  phyloseq = ps_unrare, 
  formula = "bab + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "bab", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_bab_res <- DA_bab$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bab_pos", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_bab_neg", "srt0_bab_pos")

# save everything as a tibble
DA_bab_res_merged <- full_join(
  DA_bab_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bab$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bab_res_merged, 
            file = "results/revised/diff_abund/DA_path_asv.xlsx", 
            sheetName = "DA_bab", 
            append = TRUE, 
            row.names = FALSE)

Borrelia

Run ANCOM-BC:

DA_bor <- ancombc(
  phyloseq = ps_unrare, 
  formula = "bor + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "bor", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_bor_res <- DA_bor$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bor_pos", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_bor_neg", "srt0_bor_pos")

# save everything as a tibble
DA_bor_res_merged <- full_join(
  DA_bor_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bor$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bor_res_merged, 
            file = "results/revised/diff_abund/DA_path_asv.xlsx", 
            sheetName = "DA_bor", 
            append = TRUE, 
            row.names = FALSE)

Puumala

Run ANCOM-BC:

DA_puu <- ancombc(
  phyloseq = ps_unrare, 
  formula = "puu + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "puu", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_puu_res <- DA_puu$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("puu_pos", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_puu_neg", "srt0_puu_pos")

# save everything as a tibble
DA_puu_res_merged <- full_join(
  DA_puu_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_puu$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_puu_res_merged, 
            file = "results/revised/diff_abund/DA_path_asv.xlsx", 
            sheetName = "DA_puu", 
            append = TRUE, 
            row.names = FALSE)

Genus level

Anaplasma

Run ANCOM-BC:

DA_ana_gen <- ancombc(
  phyloseq = ps_unrare_gen, 
  formula = "ana + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "ana", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_ana_gen_res <- DA_ana_gen$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("ana_pos", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_ana_neg", "srt0_ana_pos")

# save everything as a tibble
DA_ana_gen_res_merged <- full_join(
  DA_ana_gen_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_ana_gen$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_gen_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_gen_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_gen_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_gen_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_gen_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_ana_gen_res_merged, 
            file = "results/revised/diff_abund/DA_path_gen.xlsx", 
            sheetName = "DA_ana", 
            append = FALSE, 
            row.names = FALSE)

Babesia

Run ANCOM-BC:

DA_bab_gen <- ancombc(
  phyloseq = ps_unrare_gen, 
  formula = "bab + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "bab", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_bab_gen_res <- DA_bab_gen$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bab_pos", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_bab_neg", "srt0_bab_pos")

# save everything as a tibble
DA_bab_gen_res_merged <- full_join(
  DA_bab_gen_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bab_gen$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_gen_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_gen_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_gen_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_gen_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_gen_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bab_gen_res_merged, 
            file = "results/revised/diff_abund/DA_path_gen.xlsx", 
            sheetName = "DA_bab", 
            append = TRUE, 
            row.names = FALSE)

Borrelia

Run ANCOM-BC:

DA_bor_gen <- ancombc(
  phyloseq = ps_unrare_gen, 
  formula = "bor + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "bor", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_bor_gen_res <- DA_bor_gen$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bor_pos", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_bor_neg", "srt0_bor_pos")

# save everything as a tibble
DA_bor_gen_res_merged <- full_join(
  DA_bor_gen_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bor_gen$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_gen_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_gen_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_gen_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_gen_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_gen_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bor_gen_res_merged, 
            file = "results/revised/diff_abund/DA_path_gen.xlsx", 
            sheetName = "DA_bor", 
            append = TRUE, 
            row.names = FALSE)

Puumala

Run ANCOM-BC:

DA_puu_gen <- ancombc(
  phyloseq = ps_unrare_gen, 
  formula = "puu + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "puu", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_puu_gen_res <- DA_puu_gen$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("puu_pos", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_puu_neg", "srt0_puu_pos")

# save everything as a tibble
DA_puu_gen_res_merged <- full_join(
  DA_puu_gen_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_puu_gen$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_gen_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_gen_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_gen_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_gen_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_gen_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_puu_gen_res_merged, 
            file = "results/revised/diff_abund/DA_path_gen.xlsx", 
            sheetName = "DA_puu", 
            append = TRUE, 
            row.names = FALSE)

Pathogens considering coinfection status

ASV level

Anaplasma

Status 0 vs 1 and 2

Run ANCOM-BC:

DA_ana_coinf_012 <- ancombc(
  phyloseq = ps_unrare, 
  formula = "ana_coinf + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "ana_coinf", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_ana_coinf_012_res <- DA_ana_coinf_012$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("ana_single", "ana_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_ana_neg", "srt0_ana_single", "srt0_ana_coinf")

# save everything as a tibble
DA_ana_coinf_012_res_merged <- full_join(
  DA_ana_coinf_012_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_ana_coinf_012$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_012_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_012_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_012_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_012_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_012_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_ana_coinf_012_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_asv.xlsx", 
            sheetName = "DA_ana_coinf_012", 
            append = FALSE, 
            row.names = FALSE)

Status 1 vs 2

Run ANCOM-BC

DA_ana_coinf_12 <- ancombc(
    phyloseq = subset_samples(ps_unrare, ana_coinf != "negative") %>%
      prune_taxa(taxa_sums(.) > 0, .),
    formula = "ana_coinf + poll_group + location + reprod",
    prv_cut = 0,# already filtered in QIIME2
    struc_zero = FALSE, # already detected in the previous one
    neg_lb = FALSE,
    global = FALSE )

Get out data:

# get out the results as a more structured list
DA_ana_coinf_12_res <- DA_ana_coinf_12$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("ana_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")

# save everything as a tibble
DA_ana_coinf_12_res_merged <- full_join(
  DA_ana_coinf_12_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_ana_coinf_12_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID" ) %>%
  full_join(
    .,
    DA_ana_coinf_12_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_12_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_12_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_12_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_ana_coinf_12_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_asv.xlsx", 
            sheetName = "DA_ana_coinf_12", 
            append = FALSE, 
            row.names = FALSE)

Babesia

Status 0 vs 1 and 2

Run ANCOM-BC:

DA_bab_coinf_012 <- ancombc(
  phyloseq = ps_unrare, 
  formula = "bab_coinf + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "bab_coinf", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_bab_coinf_012_res <- DA_bab_coinf_012$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bab_single", "bab_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_bab_neg", "srt0_bab_single", "srt0_bab_coinf")

# save everything as a tibble
DA_bab_coinf_012_res_merged <- full_join(
  DA_bab_coinf_012_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bab_coinf_012$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_012_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_012_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_012_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_012_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_012_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bab_coinf_012_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_asv.xlsx", 
            sheetName = "DA_bab_coinf_012", 
            append = TRUE, 
            row.names = FALSE)

Status 1 vs 2

Run ANCOM-BC

DA_bab_coinf_12 <- ancombc(
    phyloseq = subset_samples(ps_unrare, bab_coinf != "negative") %>%
      prune_taxa(taxa_sums(.) > 0, .),
    formula = "bab_coinf + poll_group + location + reprod",
    prv_cut = 0,# already filtered in QIIME2
    struc_zero = FALSE, # already detected in the previous one
    neg_lb = FALSE,
    global = FALSE )

Get out data:

# get out the results as a more structured list
DA_bab_coinf_12_res <- DA_bab_coinf_12$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bab_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")

# save everything as a tibble
DA_bab_coinf_12_res_merged <- full_join(
  DA_bab_coinf_12_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bab_coinf_12_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID" ) %>%
  full_join(
    .,
    DA_bab_coinf_12_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_12_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_12_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_12_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bab_coinf_12_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_asv.xlsx", 
            sheetName = "DA_bab_coinf_12", 
            append = FALSE, 
            row.names = FALSE)

Borrelia

Status 0 vs 1 and 2

Run ANCOM-BC:

DA_bor_coinf_012 <- ancombc(
  phyloseq = ps_unrare, 
  formula = "bor_coinf + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "bor_coinf", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_bor_coinf_012_res <- DA_bor_coinf_012$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bor_single", "bor_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_bor_neg", "srt0_bor_single", "srt0_bor_coinf")

# save everything as a tibble
DA_bor_coinf_012_res_merged <- full_join(
  DA_bor_coinf_012_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bor_coinf_012$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_012_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_012_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_012_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_012_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_012_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bor_coinf_012_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_asv.xlsx", 
            sheetName = "DA_bor_coinf_012", 
            append = TRUE, 
            row.names = FALSE)

Status 1 vs 2

Run ANCOM-BC

DA_bor_coinf_12 <- ancombc(
    phyloseq = subset_samples(ps_unrare, bor_coinf != "negative") %>%
      prune_taxa(taxa_sums(.) > 0, .),
    formula = "bor_coinf + poll_group + location + reprod",
    prv_cut = 0,# already filtered in QIIME2
    struc_zero = FALSE, # already detected in the previous one
    neg_lb = FALSE,
    global = FALSE )

Get out data:

# get out the results as a more structured list
DA_bor_coinf_12_res <- DA_bor_coinf_12$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bor_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")

# save everything as a tibble
DA_bor_coinf_12_res_merged <- full_join(
  DA_bor_coinf_12_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bor_coinf_12_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID" ) %>%
  full_join(
    .,
    DA_bor_coinf_12_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_12_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_12_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_12_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bor_coinf_12_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_asv.xlsx", 
            sheetName = "DA_bor_coinf_12", 
            append = FALSE, 
            row.names = FALSE)

Puumala

Status 0 vs 1 and 2

Run ANCOM-BC:

DA_puu_coinf_012 <- ancombc(
  phyloseq = ps_unrare, 
  formula = "puu_coinf + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "puu_coinf", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_puu_coinf_012_res <- DA_puu_coinf_012$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("puu_single", "puu_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_puu_neg", "srt0_puu_single", "srt0_puu_coinf")

# save everything as a tibble
DA_puu_coinf_012_res_merged <- full_join(
  DA_puu_coinf_012_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_puu_coinf_012$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_012_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_012_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_012_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_012_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_012_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_puu_coinf_012_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_asv.xlsx", 
            sheetName = "DA_puu_coinf_012", 
            append = TRUE, 
            row.names = FALSE)

Status 1 vs 2

Run ANCOM-BC

DA_puu_coinf_12 <- ancombc(
    phyloseq = subset_samples(ps_unrare, puu_coinf != "negative") %>%
      prune_taxa(taxa_sums(.) > 0, .),
    formula = "puu_coinf + poll_group + location + reprod",
    prv_cut = 0,# already filtered in QIIME2
    struc_zero = FALSE, # already detected in the previous one
    neg_lb = FALSE,
    global = FALSE )

Get out data:

# get out the results as a more structured list
DA_puu_coinf_12_res <- DA_puu_coinf_12$res

# set column names for all result tables
col_names = c("puu_coinf", "poll_low", "loc_KT", "non-gravid")

# save everything as a tibble
DA_puu_coinf_12_res_merged <- full_join(
  DA_puu_coinf_12_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_puu_coinf_12_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID" ) %>%
  full_join(
    .,
    DA_puu_coinf_12_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_12_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_12_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_12_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_puu_coinf_12_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_asv.xlsx", 
            sheetName = "DA_puu_coinf_12", 
            append = FALSE, 
            row.names = FALSE)

Genus level

Anaplasma

*##### Status 0 vs 1 and 2

Run ANCOM-BC:

DA_ana_coinf_gen_012 <- ancombc(
  phyloseq = ps_unrare_gen, 
  formula = "ana_coinf + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "ana_coinf", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_ana_coinf_gen_012_res <- DA_ana_coinf_gen_012$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("ana_single", "ana_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_ana_neg", "srt0_ana_single", "srt0_ana_coinf")

# save everything as a tibble
DA_ana_coinf_gen_012_res_merged <- full_join(
  DA_ana_coinf_gen_012_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_ana_coinf_gen_012$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_gen_012_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_gen_012_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_gen_012_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_gen_012_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_gen_012_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_ana_coinf_gen_012_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_gen.xlsx", 
            sheetName = "DA_ana_coinf_gen_012", 
            append = FALSE, 
            row.names = FALSE)

Status 1 vs 2

Run ANCOM-BC

DA_ana_coinf_gen_12 <- ancombc(
    phyloseq = subset_samples(ps_unrare_gen, ana_coinf != "negative") %>%
      prune_taxa(taxa_sums(.) > 0, .),
    formula = "ana_coinf + poll_group + location + reprod",
    prv_cut = 0,# already filtered in QIIME2
    struc_zero = FALSE, # already detected in the previous one
    neg_lb = FALSE,
    global = FALSE)

Get out data:

# get out the results as a more structured list
DA_ana_coinf_gen_12_res <- DA_ana_coinf_gen_12$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("ana_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")

# save everything as a tibble
DA_ana_coinf_gen_12_res_merged <- full_join(
  DA_ana_coinf_gen_12_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_ana_coinf_gen_12_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID" ) %>%
  full_join(
    .,
    DA_ana_coinf_gen_12_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_gen_12_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_gen_12_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_ana_coinf_gen_12_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_ana_coinf_gen_12_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_gen.xlsx", 
            sheetName = "DA_ana_coinf_gen_12", 
            append = FALSE, 
            row.names = FALSE)

Babesia

Status 0 vs 1 and 2

Run ANCOM-BC:

DA_bab_coinf_gen_012 <- ancombc(
  phyloseq = ps_unrare_gen, 
  formula = "bab_coinf + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "bab_coinf", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_bab_coinf_gen_012_res <- DA_bab_coinf_gen_012$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bab_single", "bab_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_bab_neg", "srt0_bab_single", "srt0_bab_coinf")

# save everything as a tibble
DA_bab_coinf_gen_012_res_merged <- full_join(
  DA_bab_coinf_gen_012_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bab_coinf_gen_012$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_gen_012_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_gen_012_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_gen_012_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_gen_012_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_gen_012_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bab_coinf_gen_012_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_gen.xlsx", 
            sheetName = "DA_bab_coinf_gen_012", 
            append = TRUE, 
            row.names = FALSE)

Status 1 vs 2

Run ANCOM-BC

DA_bab_coinf_gen_12 <- ancombc(
    phyloseq = subset_samples(ps_unrare_gen, bab_coinf != "negative") %>%
      prune_taxa(taxa_sums(.) > 0, .),
    formula = "bab_coinf + poll_group + location + reprod",
    prv_cut = 0,# already filtered in QIIME2
    struc_zero = FALSE, # already detected in the previous one
    neg_lb = FALSE,
    global = FALSE)

Get out data:

# get out the results as a more structured list
DA_bab_coinf_gen_12_res <- DA_bab_coinf_gen_12$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bab_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")

# save everything as a tibble
DA_bab_coinf_gen_12_res_merged <- full_join(
  DA_bab_coinf_gen_12_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bab_coinf_gen_12_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID" ) %>%
  full_join(
    .,
    DA_bab_coinf_gen_12_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_gen_12_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_gen_12_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bab_coinf_gen_12_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bab_coinf_gen_12_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_gen.xlsx", 
            sheetName = "DA_bab_coinf_gen_12", 
            append = FALSE, 
            row.names = FALSE)

Borrelia

Status 0 vs 1 and 2

Run ANCOM-BC:

DA_bor_coinf_gen_012 <- ancombc(
  phyloseq = ps_unrare_gen, 
  formula = "bor_coinf + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "bor_coinf", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_bor_coinf_gen_012_res <- DA_bor_coinf_gen_012$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bor_single", "bor_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_bor_neg", "srt0_bor_single", "srt0_bor_coinf")

# save everything as a tibble
DA_bor_coinf_gen_012_res_merged <- full_join(
  DA_bor_coinf_gen_012_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bor_coinf_gen_012$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_gen_012_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_gen_012_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_gen_012_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_gen_012_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_gen_012_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bor_coinf_gen_012_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_gen.xlsx", 
            sheetName = "DA_bor_coinf_gen_012", 
            append = TRUE, 
            row.names = FALSE)

Status 1 vs 2

Run ANCOM-BC

DA_bor_coinf_gen_12 <- ancombc(
    phyloseq = subset_samples(ps_unrare_gen, bor_coinf != "negative") %>%
      prune_taxa(taxa_sums(.) > 0, .),
    formula = "bor_coinf + poll_group + location + reprod",
    prv_cut = 0,# already filtered in QIIME2
    struc_zero = FALSE, # already detected in the previous one
    neg_lb = FALSE,
    global = FALSE )

Get out data:

# get out the results as a more structured list
DA_bor_coinf_gen_12_res <- DA_bor_coinf_gen_12$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("bor_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")

# save everything as a tibble
DA_bor_coinf_gen_12_res_merged <- full_join(
  DA_bor_coinf_gen_12_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_bor_coinf_gen_12_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID" ) %>%
  full_join(
    .,
    DA_bor_coinf_gen_12_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_gen_12_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_gen_12_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_bor_coinf_gen_12_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_bor_coinf_gen_12_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_gen.xlsx", 
            sheetName = "DA_bor_coinf_gen_12", 
            append = FALSE, 
            row.names = FALSE)

Puumala

Status 0 vs 1 and 2

Run ANCOM-BC:

DA_puu_coinf_gen_012 <- ancombc(
  phyloseq = ps_unrare_gen, 
  formula = "puu_coinf + poll_group + location + reprod",
  prv_cut = 0, # already filtered in QIIME2
  group = "puu_coinf", 
  struc_zero = TRUE, 
  neg_lb = TRUE)

# get out the results as a more structured list
DA_puu_coinf_gen_012_res <- DA_puu_coinf_gen_012$res

# set column names for a) all result tables and b) for structural zero table
col_names = c("puu_single", "puu_coinf", "poll_low", "loc_KT", "non-gravid", "gravid")
col_names2 = c("ASVID", "srt0_puu_neg", "srt0_puu_single", "srt0_puu_coinf")

# save everything as a tibble
DA_puu_coinf_gen_012_res_merged <- full_join(
  DA_puu_coinf_gen_012_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_puu_coinf_gen_012$zero_ind %>%
    as_tibble(rownames = "ASVID") %>%
    rename_with( ~ col_names2),
  by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_gen_012_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_gen_012_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_gen_012_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_gen_012_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_gen_012_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  )  %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_puu_coinf_gen_012_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_gen.xlsx", 
            sheetName = "DA_puu_coinf_gen_012", 
            append = TRUE, 
            row.names = FALSE)

Status 1 vs 2

Run ANCOM-BC

# run 2 min
DA_puu_coinf_gen_12 <- ancombc(
    phyloseq = subset_samples(ps_unrare_gen, puu_coinf != "negative") %>%
      prune_taxa(taxa_sums(.) > 0, .),
    formula = "puu_coinf + poll_group + location + reprod",
    prv_cut = 0,# already filtered in QIIME2
    struc_zero = FALSE, # already detected in the previous one
    neg_lb = FALSE,
    global = FALSE )

Get out data:

# get out the results as a more structured list
DA_puu_coinf_gen_12_res <- DA_puu_coinf_gen_12$res

# set column names for all result tables
col_names = c("puu_coinf", "poll_low", "loc_KT", "non-gravid")

# save everything as a tibble
DA_puu_coinf_gen_12_res_merged <- full_join(
  DA_puu_coinf_gen_12_res$diff_abn %>%
    rename_with( ~ col_names) %>%
    rename_with( ~ paste0(.x, "_diff_abn")) %>%
    rownames_to_column("ASVID"),
  DA_puu_coinf_gen_12_res$lfc %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_LFC")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID" ) %>%
  full_join(
    .,
    DA_puu_coinf_gen_12_res$se %>%
      rename_with(~ col_names) %>%
      rename_with(~ paste0(.x, "_SE")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_gen_12_res$p %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_p")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_gen_12_res$q %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_q")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>%
  full_join(
    .,
    DA_puu_coinf_gen_12_res$W %>%
      rename_with( ~ col_names) %>%
      rename_with( ~ paste0(.x, "_W")) %>%
      rownames_to_column("ASVID"),
    by = "ASVID"  ) %>% 
  mutate(in1sample = ifelse(.$ASVID %in% asvs_1sample_gen, "yes", "no"))

# straight to XLSX
write.xlsx2(DA_puu_coinf_gen_12_res_merged, 
            file = "results/revised/diff_abund/DA_path-coinf_gen.xlsx", 
            sheetName = "DA_puu_coinf_gen_12", 
            append = FALSE, 
            row.names = FALSE)

Export DA results grouped per pathogen

Anaplasma

ASV level

right_join(
  x = tax %>% 
      rename(ASVID = OTUID) %>% 
      dplyr::select(-c(Confidence, Domain)),
  y = DA_ana_res_merged,
  by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_ana_coinf_012_res_merged %>% 
      rename_with(~ paste0(.x, "_012"), -ASVID),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_ana_coinf_12_res_merged %>% 
      rename_with(~ paste0(.x, "_12"), -ASVID),
    by = "ASVID") %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # round all numeric values to 5 digits 
  mutate(across(where(is_numeric), round, 5)) %>% 
  # re-arrange columns so I don't have to drag them in Excel and kill the laptop
  relocate(in1sample_012, .after = ASVID) %>% 
  write.table(., file = "results/revised/diff_abund/DA_ana_asv.tsv", row.names = FALSE, sep = "\t")

Genus level:

right_join(
  x = tax_gen %>% 
      dplyr::select(-c(Domain, Fam_Gen)),
  y = DA_ana_gen_res_merged,
  by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_ana_coinf_gen_012_res_merged %>% 
      dplyr::select(-c(ana_single_DA, ana_coinf_DA, in1sample)) %>% 
      rename_with(~ paste0(.x, "_012"), -ASVID),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_ana_coinf_gen_12_res_merged %>% 
      dplyr::select(-in1sample) %>% 
      rename_with(~ paste0(.x, "_12"), -ASVID),
    by = "ASVID") %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # round all numeric values to 5 digits 
  mutate(across(where(is_numeric), round, 5)) %>% 
  # re-arrange columns so I don't have to drag them in Excel and kill the laptop
  relocate(in1sample, .after = ASVID) %>% 
  write.table(., file = "results/revised/diff_abund/DA_ana_gen.tsv", row.names = FALSE, sep = "\t")

Babesia

ASV level:

right_join(
  x = tax %>% 
      rename(ASVID = OTUID) %>% 
      dplyr::select(-c(Confidence, Domain)),
  y = DA_bab_res_merged,
  by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bab_coinf_012_res_merged %>% 
      dplyr::select(-c(bab_single_DA, bab_coinf_DA)) %>% 
      rename_with(~ paste0(.x, "_012"), -ASVID),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bab_coinf_12_res_merged %>% 
      rename_with(~ paste0(.x, "_12"), -ASVID),
    by = "ASVID") %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # round all numeric values to 5 digits 
  mutate(across(where(is_numeric), round, 5)) %>% 
  # re-arrange columns so I don't have to drag them in Excel and kill the laptop
  relocate(in1sample_012, .after = ASVID) %>% 
  write.table(., file = "results/revised/diff_abund/DA_bab_asv.tsv", row.names = FALSE, sep = "\t")

Genus level:

right_join(
  x = tax_gen %>% 
      dplyr::select(-c(Domain, Fam_Gen)),
  y = DA_bab_gen_res_merged,
  by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bab_coinf_gen_012_res_merged %>% 
      dplyr::select(-c(bab_single_DA, bab_coinf_DA, in1sample)) %>% 
      rename_with(~ paste0(.x, "_012"), -ASVID),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bab_coinf_gen_12_res_merged %>% 
      dplyr::select(-in1sample) %>% 
      rename_with(~ paste0(.x, "_12"), -ASVID),
    by = "ASVID") %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # round all numeric values to 5 digits 
  mutate(across(where(is_numeric), round, 5)) %>% 
  # re-arrange columns so I don't have to drag them in Excel and kill the laptop
  relocate(in1sample, .after = ASVID) %>% 
  write.table(., file = "results/revised/diff_abund/DA_bab_gen.tsv", row.names = FALSE, sep = "\t")

Borrelia

ASV level:

right_join(
  x = tax %>% 
      rename(ASVID = OTUID) %>% 
      dplyr::select(-c(Confidence, Domain)),
  y = DA_bor_res_merged,
  by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bor_coinf_012_res_merged %>% 
      dplyr::select(-c(bor_single_DA, bor_coinf_DA)) %>% 
      rename_with(~ paste0(.x, "_012"), -ASVID),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bor_coinf_12_res_merged %>% 
      rename_with(~ paste0(.x, "_12"), -ASVID),
    by = "ASVID") %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # round all numeric values to 5 digits 
  mutate(across(where(is_numeric), round, 5)) %>% 
  # re-arrange columns so I don't have to drag them in Excel and kill the laptop
  relocate(in1sample_012, .after = ASVID) %>% 
  write.table(., file = "results/revised/diff_abund/DA_bor_asv.tsv", row.names = FALSE, sep = "\t")

Genus level:

right_join(
  x = tax_gen %>% 
      dplyr::select(-c(Domain, Fam_Gen)),
  y = DA_bor_gen_res_merged,
  by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bor_coinf_gen_012_res_merged %>% 
      dplyr::select(-c(bor_single_DA, bor_coinf_DA, in1sample)) %>% 
      rename_with(~ paste0(.x, "_012"), -ASVID),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bor_coinf_gen_12_res_merged %>% 
      dplyr::select(-in1sample) %>% 
      rename_with(~ paste0(.x, "_12"), -ASVID),
    by = "ASVID") %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # round all numeric values to 5 digits 
  mutate(across(where(is_numeric), round, 5)) %>% 
  # re-arrange columns so I don't have to drag them in Excel and kill the laptop
  relocate(in1sample, .after = ASVID) %>% 
  write.table(., file = "results/revised/diff_abund/DA_bor_gen.tsv", row.names = FALSE, sep = "\t")

Puumala

ASV level:

right_join(
  x = tax %>% 
      rename(ASVID = OTUID) %>% 
      dplyr::select(-c(Confidence, Domain)),
  y = DA_puu_res_merged,
  by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_puu_coinf_012_res_merged %>% 
      dplyr::select(-c(puu_single_DA, puu_coinf_DA)) %>% 
      rename_with(~ paste0(.x, "_012"), -ASVID),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_puu_coinf_12_res_merged %>% 
      rename_with(~ paste0(.x, "_12"), -ASVID),
    by = "ASVID") %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # round all numeric values to 5 digits 
  mutate(across(where(is_numeric), round, 5)) %>% 
  # re-arrange columns so I don't have to drag them in Excel and kill the laptop
  relocate(in1sample_012, .after = ASVID) %>% 
  write.table(., file = "results/revised/diff_abund/DA_puu_asv.tsv", row.names = FALSE, sep = "\t")

Genus level:

right_join(
  x = tax_gen %>% 
      dplyr::select(-c(Domain, Fam_Gen)),
  y = DA_puu_gen_res_merged,
  by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_puu_coinf_gen_012_res_merged %>% 
      dplyr::select(-c(puu_single_DA, puu_coinf_DA, in1sample)) %>% 
      rename_with(~ paste0(.x, "_012"), -ASVID),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_puu_coinf_gen_12_res_merged %>% 
      dplyr::select(-in1sample) %>% 
      rename_with(~ paste0(.x, "_12"), -ASVID),
    by = "ASVID") %>% 
  # replace all logical values with yes and no
  mutate(across(where(is_logical), ~ ifelse(. == TRUE,"yes", "no"))) %>% 
  # round all numeric values to 5 digits 
  mutate(across(where(is_numeric), round, 5)) %>% 
  # re-arrange columns so I don't have to drag them in Excel and kill the laptop
  relocate(in1sample, .after = ASVID) %>% 
  write.table(., file = "results/revised/diff_abund/DA_puu_gen.tsv", row.names = FALSE, sep = "\t")

Table 2 for MS

See how many are DA for each pathogens split by +/- LFC, coinfection status, and taxonomy level

ASV level

Negative vs positive:

# make a dataframe with all DA ASVs for pathogens
DA_path_merged <- full_join(
  x = DA_ana_res_merged %>%
    filter(ana_DA == "yes") %>%
    dplyr::select(c(ASVID, ana_DA, srt0_ana_neg, srt0_ana_pos, ana_pos_LFC, ana_pos_W)),
  y = DA_bab_res_merged %>%
    filter(bab_DA == "yes") %>%
    dplyr::select(c(ASVID, bab_DA, srt0_bab_neg, srt0_bab_pos, bab_pos_LFC, bab_pos_W)),
  by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bor_res_merged %>%
      filter(bor_DA == "yes") %>%
      dplyr::select(c(ASVID, bor_DA, srt0_bor_neg, srt0_bor_pos, bor_pos_LFC, bor_pos_W)),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_puu_res_merged %>%
      filter(puu_DA == "yes") %>%
      dplyr::select(c(ASVID, puu_DA, srt0_puu_neg, srt0_puu_pos, puu_pos_LFC, puu_pos_W)),
    by = "ASVID") %>% 
  left_join(
    x = .,
    y = tax %>% 
      rename(ASVID = OTUID) %>% 
      dplyr::select(-Confidence),
    by = "ASVID")

DA_path_merged %>% filter(ana_DA == "yes" & ana_pos_LFC>0) %>% count()
DA_path_merged %>% filter(ana_DA == "yes" & ana_pos_LFC<0) %>% count()

DA_path_merged %>% filter(bab_DA == "yes" & bab_pos_LFC>0) %>% count()
DA_path_merged %>% filter(bab_DA == "yes" & bab_pos_LFC<0) %>% count()

DA_path_merged %>% filter(bor_DA == "yes" & bor_pos_LFC>0) %>% count()
DA_path_merged %>% filter(bor_DA == "yes" & bor_pos_LFC<0) %>% count()

DA_path_merged %>% filter(puu_DA == "yes" & puu_pos_LFC>0) %>% count()
DA_path_merged %>% filter(puu_DA == "yes" & puu_pos_LFC<0) %>% count()

Coinfection considered:

# N vs S
DA_ana_coinf_012_res_merged %>% filter(ana_single_diff_abn == TRUE & ana_single_LFC>0) %>% count()
DA_ana_coinf_012_res_merged %>% filter(ana_single_diff_abn == TRUE & ana_single_LFC<0) %>% count()

DA_bab_coinf_012_res_merged %>% filter(bab_single_diff_abn == TRUE & bab_single_LFC>0) %>% count()
DA_bab_coinf_012_res_merged %>% filter(bab_single_diff_abn == TRUE & bab_single_LFC<0) %>% count()

DA_bor_coinf_012_res_merged %>% filter(bor_single_diff_abn == TRUE & bor_single_LFC>0) %>% count()
DA_bor_coinf_012_res_merged %>% filter(bor_single_diff_abn == TRUE & bor_single_LFC<0) %>% count()

DA_puu_coinf_012_res_merged %>% filter(puu_single_diff_abn == TRUE & puu_single_LFC>0) %>% count()
DA_puu_coinf_012_res_merged %>% filter(puu_single_diff_abn == TRUE & puu_single_LFC<0) %>% count()

# N vs C
DA_ana_coinf_012_res_merged %>% filter(ana_coinf_diff_abn == TRUE & ana_coinf_LFC>0) %>% count()
DA_ana_coinf_012_res_merged %>% filter(ana_coinf_diff_abn == TRUE & ana_coinf_LFC<0) %>% count()

DA_bab_coinf_012_res_merged %>% filter(bab_coinf_diff_abn == TRUE & bab_coinf_LFC>0) %>% count()
DA_bab_coinf_012_res_merged %>% filter(bab_coinf_diff_abn == TRUE & bab_coinf_LFC<0) %>% count()

DA_bor_coinf_012_res_merged %>% filter(bor_coinf_diff_abn == TRUE & bor_coinf_LFC>0) %>% count()
DA_bor_coinf_012_res_merged %>% filter(bor_coinf_diff_abn == TRUE & bor_coinf_LFC<0) %>% count()

DA_puu_coinf_012_res_merged %>% filter(puu_coinf_diff_abn == TRUE & puu_coinf_LFC>0) %>% count()
DA_puu_coinf_012_res_merged %>% filter(puu_coinf_diff_abn == TRUE & puu_coinf_LFC<0) %>% count()

# S vs C
DA_bab_coinf_12_res_merged %>% filter(bab_coinf_diff_abn == TRUE & bab_coinf_LFC>0) %>% count()
DA_bab_coinf_12_res_merged %>% filter(bab_coinf_diff_abn == TRUE & bab_coinf_LFC<0) %>% count()

DA_bor_coinf_12_res_merged %>% filter(bor_coinf_diff_abn == TRUE & bor_coinf_LFC>0) %>% count()
DA_bor_coinf_12_res_merged %>% filter(bor_coinf_diff_abn == TRUE & bor_coinf_LFC<0) %>% count()

DA_puu_coinf_12_res_merged %>% filter(puu_coinf_diff_abn == TRUE & puu_coinf_LFC>0) %>% count()
DA_puu_coinf_12_res_merged %>% filter(puu_coinf_diff_abn == TRUE & puu_coinf_LFC<0) %>% count()

Same direction of association for for N vs S and N vs C

DA_ana_coinf_012_res_merged %>%
  filter(ana_single_diff_abn == TRUE & ana_coinf_diff_abn == TRUE) %>% count()

DA_ana_coinf_012_res_merged %>%
  filter(ana_single_diff_abn == TRUE &
           ana_single_LFC > 0 &
           ana_coinf_diff_abn == TRUE & ana_coinf_LFC > 0) %>% count()

DA_ana_coinf_012_res_merged %>%
  filter(ana_single_diff_abn == TRUE &
           ana_single_LFC < 0 &
           ana_coinf_diff_abn == TRUE & ana_coinf_LFC < 0) %>% count()

DA_bab_coinf_012_res_merged %>%
  filter(bab_single_diff_abn == TRUE & bab_coinf_diff_abn == TRUE) %>% count()

DA_bab_coinf_012_res_merged %>%
  filter(bab_single_diff_abn == TRUE &
           bab_single_LFC > 0 &
           bab_coinf_diff_abn == TRUE & bab_coinf_LFC > 0) %>% count()

DA_bab_coinf_012_res_merged %>%
  filter(bab_single_diff_abn == TRUE &
           bab_single_LFC < 0 &
           bab_coinf_diff_abn == TRUE & bab_coinf_LFC < 0) %>% count()

DA_bor_coinf_012_res_merged %>%
  filter(bor_single_diff_abn == TRUE & bor_coinf_diff_abn == TRUE) %>% count()

DA_bor_coinf_012_res_merged %>%
  filter(bor_single_diff_abn == TRUE &
           bor_single_LFC > 0 &
           bor_coinf_diff_abn == TRUE & bor_coinf_LFC > 0) %>% count()

DA_bor_coinf_012_res_merged %>%
  filter(bor_single_diff_abn == TRUE &
           bor_single_LFC < 0 &
           bor_coinf_diff_abn == TRUE & bor_coinf_LFC < 0) %>% count()

DA_puu_coinf_012_res_merged %>%
  filter(puu_single_diff_abn == TRUE & puu_coinf_diff_abn == TRUE) %>% count()

DA_puu_coinf_012_res_merged %>%
  filter(puu_single_diff_abn == TRUE &
           puu_single_LFC > 0 &
           puu_coinf_diff_abn == TRUE & puu_coinf_LFC > 0) %>% count()

DA_puu_coinf_012_res_merged %>%
  filter(puu_single_diff_abn == TRUE &
           puu_single_LFC < 0 &
           puu_coinf_diff_abn == TRUE & puu_coinf_LFC < 0) %>% count()

Genus level

Negative vs positive

# change TRUE in DA for yes and no
DA_ana_gen_res_merged$ana_DA<- ifelse(DA_ana_gen_res_merged$ana_pos_diff_abn == TRUE, "yes", "no")
DA_bab_gen_res_merged$bab_DA <- ifelse(DA_bab_gen_res_merged$bab_pos_diff_abn == TRUE, "yes", "no")
DA_bor_gen_res_merged$bor_DA <- ifelse(DA_bor_gen_res_merged$bor_pos_diff_abn == TRUE, "yes", "no")
DA_puu_gen_res_merged$puu_DA <- ifelse(DA_puu_gen_res_merged$puu_pos_diff_abn == TRUE, "yes", "no")

# make a dataframe with all DA genera for pathogens 
DA_path_gen_merged <- full_join(
  x = DA_ana_gen_res_merged %>%
    filter(ana_DA == "yes") %>%
    dplyr::select(c(ASVID, ana_DA, srt0_ana_neg, srt0_ana_pos, ana_pos_LFC, ana_pos_W)),
  y = DA_bab_gen_res_merged %>%
    filter(bab_DA == "yes") %>%
    dplyr::select(c(ASVID, bab_DA, srt0_bab_neg, srt0_bab_pos, bab_pos_LFC, bab_pos_W)),
  by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bor_gen_res_merged %>%
      filter(bor_DA == "yes") %>%
      dplyr::select(c(ASVID, bor_DA, srt0_bor_neg, srt0_bor_pos, bor_pos_LFC, bor_pos_W)),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_puu_gen_res_merged %>%
      filter(puu_DA == "yes") %>%
      dplyr::select(c(ASVID, puu_DA, srt0_puu_neg, srt0_puu_pos, puu_pos_LFC, puu_pos_W)),
    by = "ASVID") %>% 
  left_join(
    x = .,
    y = tax_gen,
    by = "ASVID")

DA_path_gen_merged %>% filter(ana_DA == "yes" & ana_pos_LFC>0) %>% count()
DA_path_gen_merged %>% filter(ana_DA == "yes" & ana_pos_LFC<0) %>% count()

DA_path_gen_merged %>% filter(bab_DA == "yes" & bab_pos_LFC>0) %>% count()
DA_path_gen_merged %>% filter(bab_DA == "yes" & bab_pos_LFC<0) %>% count()

DA_path_gen_merged %>% filter(bor_DA == "yes" & bor_pos_LFC>0) %>% count()
DA_path_gen_merged %>% filter(bor_DA == "yes" & bor_pos_LFC<0) %>% count()

DA_path_gen_merged %>% filter(puu_DA == "yes" & puu_pos_LFC>0) %>% count()
DA_path_gen_merged %>% filter(puu_DA == "yes" & puu_pos_LFC<0) %>% count()

Coinfection status considered

# N vs S
DA_ana_coinf_gen_012_res_merged %>% filter(ana_single_diff_abn == TRUE & ana_single_LFC>0) %>% count()
DA_ana_coinf_gen_012_res_merged %>% filter(ana_single_diff_abn == TRUE & ana_single_LFC<0) %>% count()

DA_bab_coinf_gen_012_res_merged %>% filter(bab_single_diff_abn == TRUE & bab_single_LFC>0) %>% count()
DA_bab_coinf_gen_012_res_merged %>% filter(bab_single_diff_abn == TRUE & bab_single_LFC<0) %>% count()

DA_bor_coinf_gen_012_res_merged %>% filter(bor_single_diff_abn == TRUE & bor_single_LFC>0) %>% count()
DA_bor_coinf_gen_012_res_merged %>% filter(bor_single_diff_abn == TRUE & bor_single_LFC<0) %>% count()

DA_puu_coinf_gen_012_res_merged %>% filter(puu_single_diff_abn == TRUE & puu_single_LFC>0) %>% count()
DA_puu_coinf_gen_012_res_merged %>% filter(puu_single_diff_abn == TRUE & puu_single_LFC<0) %>% count()

# N vs C
DA_ana_coinf_gen_012_res_merged %>% filter(ana_coinf_diff_abn == TRUE & ana_coinf_LFC>0) %>% count()
DA_ana_coinf_gen_012_res_merged %>% filter(ana_coinf_diff_abn == TRUE & ana_coinf_LFC<0) %>% count()

DA_bab_coinf_gen_012_res_merged %>% filter(bab_coinf_diff_abn == TRUE & bab_coinf_LFC>0) %>% count()
DA_bab_coinf_gen_012_res_merged %>% filter(bab_coinf_diff_abn == TRUE & bab_coinf_LFC<0) %>% count()

DA_bor_coinf_gen_012_res_merged %>% filter(bor_coinf_diff_abn == TRUE & bor_coinf_LFC>0) %>% count()
DA_bor_coinf_gen_012_res_merged %>% filter(bor_coinf_diff_abn == TRUE & bor_coinf_LFC<0) %>% count()

DA_puu_coinf_gen_012_res_merged %>% filter(puu_coinf_diff_abn == TRUE & puu_coinf_LFC>0) %>% count()
DA_puu_coinf_gen_012_res_merged %>% filter(puu_coinf_diff_abn == TRUE & puu_coinf_LFC<0) %>% count()

# S vs C
DA_ana_coinf_gen_12_res_merged %>% filter(ana_coinf_diff_abn == TRUE & ana_coinf_LFC>0) %>% count()
DA_ana_coinf_gen_12_res_merged %>% filter(ana_coinf_diff_abn == TRUE & ana_coinf_LFC<0) %>% count()

DA_puu_coinf_gen_12_res_merged %>% filter(puu_coinf_diff_abn == TRUE & puu_coinf_LFC>0) %>% count()
DA_puu_coinf_gen_12_res_merged %>% filter(puu_coinf_diff_abn == TRUE & puu_coinf_LFC<0) %>% count()

Same direction of association for N vs S and N vs C

DA_ana_coinf_gen_012_res_merged %>%
  filter(ana_single_diff_abn == TRUE & ana_coinf_diff_abn == TRUE) %>% count()

DA_ana_coinf_gen_012_res_merged %>%
  filter(ana_single_diff_abn == TRUE &
           ana_single_LFC > 0 &
           ana_coinf_diff_abn == TRUE & ana_coinf_LFC > 0) %>% count()

DA_ana_coinf_gen_012_res_merged %>%
  filter(ana_single_diff_abn == TRUE &
           ana_single_LFC < 0 &
           ana_coinf_diff_abn == TRUE & ana_coinf_LFC < 0) %>% count()

DA_bab_coinf_gen_012_res_merged %>%
  filter(bab_single_diff_abn == TRUE & bab_coinf_diff_abn == TRUE) %>% count()

DA_bab_coinf_gen_012_res_merged %>%
  filter(bab_single_diff_abn == TRUE &
           bab_single_LFC > 0 &
           bab_coinf_diff_abn == TRUE & bab_coinf_LFC > 0) %>% count()

DA_bab_coinf_gen_012_res_merged %>%
  filter(bab_single_diff_abn == TRUE &
           bab_single_LFC < 0 &
           bab_coinf_diff_abn == TRUE & bab_coinf_LFC < 0) %>% count()

DA_bor_coinf_gen_012_res_merged %>%
  filter(bor_single_diff_abn == TRUE & bor_coinf_diff_abn == TRUE) %>% count()

DA_bor_coinf_gen_012_res_merged %>%
  filter(bor_single_diff_abn == TRUE &
           bor_single_LFC > 0 &
           bor_coinf_diff_abn == TRUE & bor_coinf_LFC > 0) %>% count()

DA_bor_coinf_gen_012_res_merged %>%
  filter(bor_single_diff_abn == TRUE &
           bor_single_LFC < 0 &
           bor_coinf_diff_abn == TRUE & bor_coinf_LFC < 0) %>% count()

DA_puu_coinf_gen_012_res_merged %>%
  filter(puu_single_diff_abn == TRUE & puu_coinf_diff_abn == TRUE) %>% count()

DA_puu_coinf_gen_012_res_merged %>%
  filter(puu_single_diff_abn == TRUE &
           puu_single_LFC > 0 &
           puu_coinf_diff_abn == TRUE & puu_coinf_LFC > 0) %>% count()

DA_puu_coinf_gen_012_res_merged %>%
  filter(puu_single_diff_abn == TRUE &
           puu_single_LFC < 0 &
           puu_coinf_diff_abn == TRUE & puu_coinf_LFC < 0) %>% count()

Figures

Note: Final edits (on exported SVG files) for better arrangement, text adjustments and highlighting done using Inkscape

Define specific colors and shapes:

col_inf <- c("yes" = "#002642", "no" = "#FFAE23")
col_coinf <- c("0" = "#FFAE23", "1" = "#ee4266", "2"="#540d6e")
col_path <- c("positive" = "#C72614", "negative" = "#FFAE23")
col_coinf_path <- c("negative" = "#FFAE23", "single" = "#ee4266", "coinfected"="#540d6e")

Figure 1

# create a string of animal id's that have the specific pathogens
ana_pos <- subset(VolePathData_micro, ana == "positive") %>% {as.character(.$animal_id)}
bab_pos <- subset(VolePathData_micro, bab == "positive") %>% {as.character(.$animal_id)}
bor_pos <- subset(VolePathData_micro, bor == "positive") %>% {as.character(.$animal_id)}
puu_pos <- subset(VolePathData_micro, puu == "positive") %>% {as.character(.$animal_id)}

# make a list
venn_list <- list(
  Anaplasma  = ana_pos,
  Babesia = bab_pos,
  Borrelia = bor_pos,
  Puumala = puu_pos)

# make the Venn diagram
ggvenn(
  venn_list,
  fill_color = c("#540d6e","#3bceac", "#FFAE23", "#ee4266"),
  fill_alpha = 0.6,
  stroke_color = NA,
  stroke_size = 0.5,
  stroke_alpha = 0.8,
  set_name_size = 6,
  text_size = 2,
  show_percentage = FALSE)

Figure 2

Figure 2A

Prepare data frames:

# all points
ordin_jacc_df_ana <-
  left_join(
    x = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, ana)),
                by = "SampleID_G"),
    y = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, ana)),
                by = "SampleID_G") %>%
      group_by(ana) %>%
      summarise_if(is.numeric, mean),
    by = "ana",
    suffix = c("_ind", "_centr")  )

# get just centroids so ggplot2 doesn't plot 50 points on top of each other
ordin_jacc_centr_ana <- 
  as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
  left_join(.,
            VolePathData_micro %>%
              dplyr::select(c(SampleID_G, ana)),
            by = "SampleID_G") %>%
  group_by(ana) %>%
  summarise_if(is.numeric, mean)

Create the plots:

# get axis values
round(ordin_jacc$values$Relative_eig[1:10]*100, digits = 2)

# ordination
p_ana_ordin <- 
ggplot(ordin_jacc_df_ana) +
  geom_convexhull(aes(x = Axis.1_ind, 
                      y = Axis.2_ind,
                      fill = ana), 
                  alpha = 0.05, 
                  col = NA) +
  geom_point(data = ordin_jacc_centr_ana,  
             aes(x = Axis.1,
                 y = Axis.2,
                 col = ana, 
                 fill = ana),
             size = 3.5, 
             alpha = 0.4, 
             shape = 16) +
  geom_point(aes(x = Axis.1_ind, 
                 y = Axis.2_ind, 
                 col = ana), 
             size = 1.5, 
             alpha = 1,
             shape = 16) +
  scale_color_manual(values = col_path) + 
  scale_fill_manual(values = col_path) +
  labs(x = "PCoA 1 (5.68%)",
       y = "PCoA 2 (3.83%)") +
  scale_y_continuous(limits = c(- 0.25, 0.26)) + # adjust the y axis so other plot doesn't hide anything
  theme(axis.title.x = element_text(size = 12, margin = margin(t = 12), colour = "black"),
        axis.text.x  = element_text(size = 11, colour = "black"),
        axis.title.y = element_text(size = 12, margin = margin(r = 12), colour = "black"),
        axis.text.y  = element_text(size = 11, colour = "black"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5),
        plot.margin = unit(c(1, 0.5, 1, 0.5), "cm"),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "lightgrey", fill = NA, size = 0.7) )

# dispersion
p_ana_disp <- 
ggplot(disp_jacc_ana_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +
  geom_boxplot(width = 0.45,
               outlier.colour = NA,
               alpha = 0.08,
               lwd = 0.1,
               fill = NA) +
  geom_point(shape = 16,
             size = 0.3,
             alpha = 0.5,
             position = position_jitter(seed = 42, width = 0.2)) +
  scale_color_manual(values = col_path) +
  scale_fill_manual(values = col_path) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 5, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 0.7, inherit.aes = TRUE, shape = 16) +
  coord_cartesian(xlim = c(1.25, 1.7))

Figure 2B

Prepare data frames:

# all data points
ordin_jacc_df_ana_coinf <-
  left_join(
    x = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, ana_coinf_plot, ana_coinf)),
                by = "SampleID_G"),
    y = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, ana_coinf_plot, ana_coinf)),
                by = "SampleID_G") %>%
      group_by(ana_coinf) %>%
      summarise_if(is.numeric, mean),
    by = "ana_coinf",
    suffix = c("_ind", "_centr")  )

# get just centroid so it doesn't plot 50 points on top of each other
ordin_jacc_centr_ana_coinf <- 
  as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
  left_join(.,
            VolePathData_micro %>%
              dplyr::select(c(SampleID_G, ana_coinf)),
            by = "SampleID_G") %>%
  group_by(ana_coinf) %>%
  summarise_if(is.numeric, mean)

Make the plots:

# ordination
p_ana_coinf_ordin <- 
  ggplot(ordin_jacc_df_ana_coinf) +
  geom_convexhull(aes(Axis.1_ind, 
                      y = Axis.2_ind,
                      fill = ana_coinf), 
                  alpha = 0.06, 
                  col = NA) +
  geom_point(data = ordin_jacc_centr_ana_coinf,  
             aes(x = Axis.1,
                 y = Axis.2,
                 col = ana_coinf, 
                 fill = ana_coinf),
             size = 3.5, 
             alpha = 0.4, 
             shape = 16) +
    geom_point(aes(x = Axis.1_ind, 
                 y = Axis.2_ind, 
                 col = ana_coinf, 
                 fill = ana_coinf, 
                 shape = ana_coinf_plot), 
             size = 1.5, 
             alpha = 1) +
  scale_color_manual(values = col_coinf_path) + 
  scale_fill_manual(values = col_coinf_path) +
  scale_shape_manual(values = shapes_coinf) +
  labs(x = "PCoA 1 (5.68%)",
       y = "PCoA 2 (3.83%)",
       title = NULL,
       color = "Infection status",
       fill = "Infection status",
       shape = "Infection status") +
  scale_y_continuous(limits = c(- 0.25, 0.26)) + # adjust the y axis so other plot doesn't hide anything
  theme(axis.title.x = element_text(size = 12, margin = margin(t = 12), colour = "black"),
        axis.text.x  = element_text(size = 11, colour = "black"),
        axis.title.y = element_text(size = 12, margin = margin(r = 12), colour = "black"),
        axis.text.y  = element_text(size = 11, colour = "black"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5),
        plot.margin = unit(c(1, 0.5, 1, 0.5), "cm"),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "lightgrey", fill = NA, size = 0.7) )

# dispersion
p_ana_coinf_disp <- 
  ggplot(disp_jacc_ana_coinf_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +
  geom_boxplot(width = 0.45,
               outlier.colour = NA,
               alpha = 0.08,
               lwd = 0.1,
               fill = NA) +
  geom_point(shape = 16,
             size = 0.3,
             alpha = 0.5,
             position = position_jitter(seed = 42, width = 0.2)) +
  scale_color_manual(values = col_coinf_path) +
  scale_fill_manual(values = col_coinf_path) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 5, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 0.7, inherit.aes = TRUE, shape = 16) +
  coord_cartesian(xlim = c(1.25, 2.7))

Merge to get the final figure:

(p_ana_ordin + inset_element(p_ana_disp, left = 0.01, bottom = 0.01, right = 0.28, top = 0.3, align_to = "panel")) +
(p_ana_coinf_ordin + inset_element(p_ana_coinf_disp, left = 0.01, bottom = 0.01, right = 0.28, top = 0.3, align_to = "panel"))

Figure 3

Figure 3A

Prepare data frames:

# all data points
ordin_jacc_df_bor <-
  left_join(
    x = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, bor)),
                by = "SampleID_G"),
    y = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, bor)),
                by = "SampleID_G") %>%
      group_by(bor) %>%
      summarise_if(is.numeric, mean),
    by = "bor",
    suffix = c("_ind", "_centr")  )

# get just centroid so it doesn't plot 50 points on top of each other
ordin_jacc_centr_bor <- 
  as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
  left_join(.,
            VolePathData_micro %>%
              dplyr::select(c(SampleID_G, bor)),
            by = "SampleID_G") %>%
  group_by(bor) %>%
  summarise_if(is.numeric, mean)

# ordination
p_bor_ordin <- 
ggplot(ordin_jacc_df_bor) +
  geom_convexhull(aes(x = Axis.2_ind, 
                      y = Axis.3_ind,
                      fill = bor), 
                  alpha = 0.05, 
                  col = NA) +
  geom_point(data = ordin_jacc_centr_bor,  
             aes(x = Axis.2,
                 y = Axis.3,
                 col = bor, 
                 fill = bor),
             size = 3.5, 
             alpha = 0.4, 
             shape = 16) +
  geom_point(aes(x = Axis.2_ind, 
                 y = Axis.3_ind, 
                 col = bor), 
             size = 1.5, 
             alpha = 1,
             shape = 16) +
  scale_color_manual(values = col_path) + 
  scale_fill_manual(values = col_path) +
  labs(x = "PCoA 2 (3.83%)",
       y = "PCoA 3 (2.10%)") +
  scale_y_continuous(limits = c(- 0.38, 0.17)) + # adjust the y axis so other plot doesn't hide anything
  theme(axis.title.x = element_text(size = 12, margin = margin(t = 12), colour = "black"),
        axis.text.x  = element_text(size = 11, colour = "black"),
        axis.title.y = element_text(size = 12, margin = margin(r = 12), colour = "black"),
        axis.text.y  = element_text(size = 11, colour = "black"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5),
        plot.margin = unit(c(1, 0.5, 1, 0.5), "cm"),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "lightgrey", fill = NA, size = 0.7) )

# dispersion
p_bor_disp <- 
ggplot(disp_jacc_bor_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +
  geom_boxplot(width = 0.45,
               outlier.colour = NA,
               alpha = 0.08,
               lwd = 0.1,
               fill = NA) +
  geom_point(shape = 16,
             size = 0.3,
             alpha = 0.5,
             position = position_jitter(seed = 42, width = 0.2)) +
  scale_color_manual(values = col_path) +
  scale_fill_manual(values = col_path) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 5, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 0.7, inherit.aes = TRUE, shape = 16) +
  coord_cartesian(xlim = c(1.25, 1.7))

Figure 3B:

# all data points
ordin_jacc_df_bor_coinf <-
  left_join(
    x = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, bor_coinf_plot, bor_coinf)),
                by = "SampleID_G"),
    y = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, bor_coinf_plot, bor_coinf)),
                by = "SampleID_G") %>%
      group_by(bor_coinf) %>%
      summarise_if(is.numeric, mean),
    by = "bor_coinf",
    suffix = c("_ind", "_centr")  )

# get just centroid so it doesn't plot 50 points on top of each other
ordin_jacc_centr_bor_coinf <- 
  as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
  left_join(.,
            VolePathData_micro %>%
              dplyr::select(c(SampleID_G, bor_coinf)),
            by = "SampleID_G") %>%
  group_by(bor_coinf) %>%
  summarise_if(is.numeric, mean)

# ordination
p_bor_coinf_ordin <- 
  ggplot(ordin_jacc_df_bor_coinf) +
  geom_convexhull(aes(Axis.2_ind, 
                      y = Axis.3_ind,
                      fill = bor_coinf), 
                  alpha = 0.06, 
                  col = NA) +
  geom_point(data = ordin_jacc_centr_bor_coinf,  
             aes(x = Axis.2,
                 y = Axis.3,
                 col = bor_coinf, 
                 fill = bor_coinf),
             size = 3.5, 
             alpha = 0.4, 
             shape = 16) +
    geom_point(aes(x = Axis.2_ind, 
                 y = Axis.3_ind, 
                 col = bor_coinf, 
                 fill = bor_coinf, 
                 shape = bor_coinf_plot), 
             size = 1.5, 
             alpha = 1) +
  scale_color_manual(values = col_coinf_path) + 
  scale_fill_manual(values = col_coinf_path) +
  scale_shape_manual(values = shapes_coinf) +
  labs(x = "PCoA 2 (3.83%)",
       y = "PCoA 3 (2.10%)",
       title = NULL,
       color = "Infection status",
       fill = "Infection status",
       shape = "Infection status") +
  scale_y_continuous(limits = c(- 0.38, 0.17)) + # adjust the y axis so other plot doesn't hide anything
  theme(axis.title.x = element_text(size = 12, margin = margin(t = 12), colour = "black"),
        axis.text.x  = element_text(size = 11, colour = "black"),
        axis.title.y = element_text(size = 12, margin = margin(r = 12), colour = "black"),
        axis.text.y  = element_text(size = 11, colour = "black"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5),
        plot.margin = unit(c(1, 0.5, 1, 0.5), "cm"),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "lightgrey", fill = NA, size = 0.7) )

# dispersion
p_bor_coinf_disp <- 
  ggplot(disp_jacc_bor_coinf_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +
  geom_boxplot(width = 0.45,
               outlier.colour = NA,
               alpha = 0.08,
               lwd = 0.1,
               fill = NA) +
  geom_point(shape = 16,
             size = 0.3,
             alpha = 0.5,
             position = position_jitter(seed = 42, width = 0.2)) +
  scale_color_manual(values = col_coinf_path) +
  scale_fill_manual(values = col_coinf_path) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 5, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 0.7, inherit.aes = TRUE, shape = 16) +
  coord_cartesian(xlim = c(1.25, 2.7))

Merge for the final figure

(p_bor_ordin + inset_element(p_bor_disp, left = 0.01, bottom = 0.01, right = 0.28, top = 0.3, align_to = "panel")) +
(p_bor_coinf_ordin + inset_element(p_bor_coinf_disp, left = 0.01, bottom = 0.01, right = 0.28, top = 0.3, align_to = "panel"))

Figure S1

Get the data, constrained ordination using distance-based redundancy analysis

# run dbrda
dbrda_uf_coinf <- dbrda(d_uf ~ coinf_stat + poll_group + reprod + location, VolePathData_micro)

# get information on % variance explained by each of constrained axes
summary(dbrda_uf_coinf)$concont

# DF - individual points
meta_dbrda_uf_coinf <-
  as_tibble(scores(dbrda_uf_coinf, choices = 1:6, scaling = "symmetric")$sites,
            rownames = "SampleID_G") %>%
  full_join(
    ., 
    VolePathData_micro,
    by = "SampleID_G")

# DF - centroids
meta_dbrda_uf_coinf_centr <-
  as_tibble(scores(dbrda_uf_coinf, choices = 1:6, scaling = "symmetric")$centroids %>% 
      as_tibble(rownames = "coinf_stat") %>%
      mutate(
        coinf_stat = str_replace(coinf_stat, "coinf_stat", "")) %>% 
        slice(1:3))

Plot:

# plot the constrained ordination
p_coinf_dbrda <-
  ggplot(meta_dbrda_uf_coinf, aes(x = dbRDA1, y = dbRDA3)) +  
  geom_convexhull(aes(fill = coinf_stat), 
                  alpha = 0.06, 
                  col = NA) +
  geom_point(data = meta_dbrda_uf_coinf_centr,
             aes(x = dbRDA1, 
                 y = dbRDA3,
                 col = coinf_stat,
                 fill = coinf_stat),
             size = 3.5, 
             alpha = 0.4, 
             shape = 16) +
  geom_point(aes(col = coinf_stat), 
             size = 1.5, 
             alpha = 1, 
             shape = 16) +
  scale_color_manual(values = alpha(col_coinf, 0.4), labels = c("Negative", "Single infection", "Coinfected")) + 
  scale_fill_manual(values = alpha(col_coinf, 0.4), labels = c("Negative", "Single infection", "Coinfected")) +
  labs(x = "dbRDA 1 (38.1%)",
       y = "dbRDA 3 (13.6%)",
       fill = "Coinfection status",
       col = "Coinfection status") +
  scale_y_continuous(limits = c(-1.05, 0.65)) + # adjust the y axis so other plot doesn't hide anything
  theme(axis.title.x = element_text(size = 12, margin = margin(t = 12), colour = "black"),
        axis.text.x  = element_text(size = 11, colour = "black"),
        axis.title.y = element_text(size = 12, margin = margin(r = 12), colour = "black"),
        axis.text.y  = element_text(size = 11, colour = "black"),
        legend.position = "bottom",
        plot.title = element_text(hjust = 0.5),
        plot.margin = unit(c(1, 0.5, 1, 0.5), "cm"),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "lightgrey", fill = NA, size = 0.7) )

# dispersion
p_coinf_disp <- 
  ggplot(disp_uf_coinf_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +
  geom_boxplot(width = 0.45,
               outlier.colour = NA,
               alpha = 0.08,
               lwd = 0.1,
               fill = NA) +
  geom_point(shape = 16,
             size = 0.3,
             alpha = 0.5,
             position = position_jitter(seed = 42, width = 0.2)) +
  scale_color_manual(values = col_coinf) +
  scale_fill_manual(values = col_coinf) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 5, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 0.7, inherit.aes = TRUE, shape = 16) +
  coord_cartesian(xlim = c(1.25, 2.7))

# merge
p_coinf_dbrda + inset_element(p_coinf_disp, left = 0.01, bottom = 0.01, right = 0.28, top = 0.3, align_to = "panel")

Figure S2

Figure S2A

Prepare data frames:

# Make a data frame
ordin_jacc_df_bab <-
  left_join(
    x = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, bab)),
                by = "SampleID_G"),
    y = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, bab)),
                by = "SampleID_G") %>%
      group_by(bab) %>%
      summarise_if(is.numeric, mean),
    by = "bab",
    suffix = c("_ind", "_centr")  )

# get just centroid so it doesn't plot 50 points on top of each other
ordin_jacc_centr_bab <- 
  as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
  left_join(.,
            VolePathData_micro %>%
              dplyr::select(c(SampleID_G, bab)),
            by = "SampleID_G") %>%
  group_by(bab) %>%
  summarise_if(is.numeric, mean)

Plots:

# ordination
p_bab_ordin <- 
ggplot(ordin_jacc_df_bab) +
  geom_convexhull(aes(x = Axis.1_ind, 
                      y = Axis.2_ind,
                      fill = bab), 
                  alpha = 0.05, 
                  col = NA) +
  geom_point(data = ordin_jacc_centr_bab,  
             aes(x = Axis.1,
                 y = Axis.2,
                 col = bab, 
                 fill = bab),
             size = 3.5, 
             alpha = 0.4, 
             shape = 16) +
  geom_point(aes(x = Axis.1_ind, 
                 y = Axis.2_ind, 
                 col = bab), 
             size = 1.5, 
             alpha = 1,
             shape = 16) +
  scale_color_manual(values = col_path) + 
  scale_fill_manual(values = col_path) +
  labs(x = "PCoA 1 (5.68%)",
       y = "PCoA 2 (3.83%)") +
  scale_y_continuous(limits = c(- 0.25, 0.26)) + # adjust the y axis so other plot doesn't hide anything
  theme(axis.title.x = element_text(size = 12, margin = margin(t = 12), colour = "black"),
        axis.text.x  = element_text(size = 11, colour = "black"),
        axis.title.y = element_text(size = 12, margin = margin(r = 12), colour = "black"),
        axis.text.y  = element_text(size = 11, colour = "black"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5),
        plot.margin = unit(c(1, 0.5, 1, 0.5), "cm"),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "lightgrey", fill = NA, size = 0.7) )

# dispersion
p_bab_disp <- 
ggplot(disp_jacc_bab_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +
  geom_boxplot(width = 0.45,
               outlier.colour = NA,
               alpha = 0.08,
               lwd = 0.1,
               fill = NA) +
  geom_point(shape = 16,
             size = 0.3,
             alpha = 0.5,
             position = position_jitter(seed = 42, width = 0.2)) +
  scale_color_manual(values = col_path) +
  scale_fill_manual(values = col_path) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 5, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 0.7, inherit.aes = TRUE, shape = 16) +
  coord_cartesian(xlim = c(1.25, 1.7))

Figure S2B

# Make a data frame
ordin_jacc_df_bab_coinf <-
  left_join(
    x = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, bab_coinf_plot, bab_coinf)),
                by = "SampleID_G"),
    y = as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
      full_join(.,
                AlphaDivMeta_plot %>%
                  dplyr::select(c(SampleID_G, bab_coinf_plot, bab_coinf)),
                by = "SampleID_G") %>%
      group_by(bab_coinf) %>%
      summarise_if(is.numeric, mean),
    by = "bab_coinf",
    suffix = c("_ind", "_centr")  )

# get just centroids
ordin_jacc_centr_bab_coinf <- 
  as_tibble(ordin_jacc$vectors, rownames = "SampleID_G") %>%
  left_join(.,
            VolePathData_micro %>%
              dplyr::select(c(SampleID_G, bab_coinf)),
            by = "SampleID_G") %>%
  group_by(bab_coinf) %>%
  summarise_if(is.numeric, mean)

# ordination
p_bab_coinf_ordin <- 
  ggplot(ordin_jacc_df_bab_coinf) +
  geom_convexhull(aes(Axis.1_ind, 
                      y = Axis.2_ind,
                      fill = bab_coinf), 
                  alpha = 0.06, 
                  col = NA) +
  geom_point(data = ordin_jacc_centr_bab_coinf,  
             aes(x = Axis.1,
                 y = Axis.2,
                 col = bab_coinf, 
                 fill = bab_coinf),
             size = 3.5, 
             alpha = 0.4, 
             shape = 16) +
    geom_point(aes(x = Axis.1_ind, 
                 y = Axis.2_ind, 
                 col = bab_coinf, 
                 fill = bab_coinf, 
                 shape = bab_coinf_plot), 
             size = 1.5, 
             alpha = 1) +
  scale_color_manual(values = col_coinf_path) + 
  scale_fill_manual(values = col_coinf_path) +
  scale_shape_manual(values = shapes_coinf) +
  labs(x = "PCoA 1 (5.68%)",
       y = "PCoA 2 (3.83%)",
       title = NULL,
       color = "Infection status",
       fill = "Infection status",
       shape = "Infection status") +
  scale_y_continuous(limits = c(- 0.25, 0.26)) + # adjust the y axis so other plot doesn't hide anything
  theme(axis.title.x = element_text(size = 12, margin = margin(t = 12), colour = "black"),
        axis.text.x  = element_text(size = 11, colour = "black"),
        axis.title.y = element_text(size = 12, margin = margin(r = 12), colour = "black"),
        axis.text.y  = element_text(size = 11, colour = "black"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5),
        plot.margin = unit(c(1, 0.5, 1, 0.5), "cm"),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(colour = "lightgrey", fill = NA, size = 0.7) )

# dispersion
p_bab_coinf_disp <- 
  ggplot(disp_jacc_bab_coinf_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +
  geom_boxplot(width = 0.45,
               outlier.colour = NA,
               alpha = 0.08,
               lwd = 0.1,
               fill = NA) +
  geom_point(shape = 16,
             size = 0.3,
             alpha = 0.5,
             position = position_jitter(seed = 42, width = 0.2)) +
  scale_color_manual(values = col_coinf_path) +
  scale_fill_manual(values = col_coinf_path) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 5, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 0.7, inherit.aes = TRUE, shape = 16) +
  coord_cartesian(xlim = c(1.25, 2.7))

Merge to get final figure:

(p_bab_ordin + inset_element(p_bab_disp, left = 0.01, bottom = 0.01, right = 0.28, top = 0.3, align_to = "panel")) +
(p_bab_coinf_ordin + inset_element(p_bab_coinf_disp, left = 0.01, bottom = 0.01, right = 0.28, top = 0.3, align_to = "panel"))

Figure S3

Modified heat-map for DA genera for all pathogens together.

Prepare data:

# make a DF from N-P results
DA_path_gen_NP <- 
  full_join(
    x = DA_ana_gen_res_merged %>% 
      mutate(DA_ana = ana_pos_diff_abn * ana_pos_LFC) %>% 
      dplyr::select(ASVID, DA_ana), 
    y = DA_bab_gen_res_merged %>%
      mutate(DA_bab = bab_pos_diff_abn * bab_pos_LFC) %>% 
      dplyr::select(ASVID, DA_bab),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bor_gen_res_merged %>%
      mutate(DA_bor = bor_pos_diff_abn * bor_pos_LFC) %>% 
      dplyr::select(ASVID, DA_bor),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_puu_gen_res_merged %>%
      mutate(DA_puu = puu_pos_diff_abn * puu_pos_LFC) %>% 
      dplyr::select(ASVID, DA_puu),
    by = "ASVID")

# make the big plotting data frame
DA_path_gen_all_plot <-
  full_join(
    x = DA_ana_coinf_gen_012_res_merged %>%
      mutate(DA_ana_01 = ana_single_diff_abn * ana_single_LFC,
             DA_ana_02 = ana_coinf_diff_abn * ana_coinf_LFC) %>%
      dplyr::select(ASVID, DA_ana_01, DA_ana_02),
    y = DA_bab_coinf_gen_012_res_merged %>%
      mutate(DA_bab_01 = bab_single_diff_abn * bab_single_LFC,
             DA_bab_02 = bab_coinf_diff_abn * bab_coinf_LFC) %>%
      dplyr::select(ASVID, DA_bab_01, DA_bab_02),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_bor_coinf_gen_012_res_merged %>%
      mutate(DA_bor_01 = bor_single_diff_abn * bor_single_LFC,
             DA_bor_02 = bor_coinf_diff_abn * bor_coinf_LFC) %>%
      dplyr::select(ASVID, DA_bor_01, DA_bor_02),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_puu_coinf_gen_012_res_merged %>%
      mutate(DA_puu_01 = puu_single_diff_abn * puu_single_LFC,
             DA_puu_02 = puu_coinf_diff_abn * puu_coinf_LFC) %>%
      dplyr::select(ASVID, DA_puu_01, DA_puu_02),
    by = "ASVID") %>% 
  # add in the few ASVs diff abundant between 1 and 2 (in anaplasma and puumala)
  full_join(
    x = .,
    y = DA_ana_coinf_gen_12_res_merged %>%
      mutate(DA_ana_12 = ana_coinf_diff_abn * ana_coinf_LFC) %>%
      dplyr::select(ASVID, DA_ana_12),
    by = "ASVID") %>% 
  full_join(
    x = .,
    y = DA_puu_coinf_gen_12_res_merged %>%
      mutate(DA_puu_12 = puu_coinf_diff_abn * puu_coinf_LFC) %>%
      dplyr::select(ASVID, DA_puu_12),
    by = "ASVID") %>%
  full_join(
    x = .,
    y = DA_path_gen_NP,
    by = "ASVID") %>% 
  # remove ASVs not differently abundant for either of the groups
  filter(if_any(-ASVID, ~ .!=0)) %>%  
  # replace zeros with NA's
  mutate(across(everything(), ~ replace(., . == 0, NA))) %>%   
  # add in taxonomy
  left_join( 
    x = .,
    y = tax_gen,
    by = "ASVID") %>% 
  # pivot longer for plotting
  pivot_longer(cols = DA_ana_01:DA_puu, names_to = "pathogen_status", values_to = "LFC") %>% 
  mutate(dir = ifelse(LFC > 0, "positive", "negative"))

# add n of occurrences in any group
DA_path_gen_all_plot %<>%
  left_join(
    x = DA_path_gen_all_plot,
    y = DA_path_gen_all_plot %>%
      group_by(ASVID) %>%
      summarize(n = sum(is.na(LFC))) %>%
      mutate(n = 14 - n),
    by = "ASVID")

# replace NAs for colours
DA_path_gen_all_plot$dir %<>% replace_na("NA")

# replace the unidentified values with abbreviations or the order
DA_path_gen_all_plot %<>%
  mutate(
    Fam_Gen_new = case_when(
      Fam_Gen == "Clostridiales vadinBB60 group_gut metagenome" ~ "Clostridiales vadinBB60 group_unident.1",
      Fam_Gen == "Clostridiales vadinBB60 group_uncultured Clostridiales bacterium" ~ "Clostridiales vadinBB60 group_unident.2",
      Fam_Gen == "Clostridiales vadinBB60 group_uncultured prokaryote" ~ "Clostridiales vadinBB60 group_unident.3",
      Fam_Gen == "Clostridiales vadinBB60 group_uncultured rumen bacterium" ~ "Clostridiales vadinBB60 group_unident.4",
      Fam_Gen == "Coriobacteriales Incertae Sedis_uncultured" ~ "Coriobacteriales Incertae Sedis_unident.",
      Fam_Gen == "Desulfovibrionaceae_uncultured" ~ "Desulfovibrionaceae_unident.",
      Fam_Gen == "Eggerthellaceae_uncultured" ~ "Eggerthellaceae_unident.",
      Fam_Gen == "Erysipelotrichaceae_uncultured" ~ "Erysipelotrichaceae_unident.",
      Fam_Gen == "gut metagenome_gut metagenome" ~ "Mollicutes RF39_unident.1",
      Fam_Gen == "Puniceicoccaceae_uncultured" ~ "Puniceicoccaceae_unident.",
      Fam_Gen == "uncultured bacterium_uncultured bacterium" ~ "Mollicutes RF39_unident.2",
      Fam_Gen == "uncultured cyanobacterium_uncultured cyanobacterium" ~ "Gastranaerophilales_unident.",
      Fam_Gen == "uncultured Firmicutes bacterium_uncultured Firmicutes bacterium" ~ "Mollicutes RF39_unident.3",
      Fam_Gen == "uncultured Lachnospiraceae bacterium_uncultured Lachnospiraceae bacterium" ~ "Mollicutes RF39_unident.4",
      Fam_Gen == "uncultured Mollicutes bacterium_uncultured Mollicutes bacterium" ~ "Mollicutes RF39_unident.5",
      Fam_Gen == "uncultured Paenibacillaceae bacterium_uncultured Paenibacillaceae bacterium" ~ "Mollicutes RF39_unident.6",
      Fam_Gen == "uncultured prokaryote_uncultured prokaryote" ~ "Mollicutes RF39_unident.7",
      Fam_Gen == "uncultured rumen bacterium_uncultured rumen bacterium" ~ "Mollicutes RF39_unident.8",
      Fam_Gen == "uncultured_Alphaproteobacteria bacterium canine oral taxon 081" ~ "Rhodospirillales_uncult._Alphaproteobacteria bacterium canine oral taxon 081.",
      Fam_Gen == "uncultured_gut metagenome" ~ "Rhodospirillales_unident.1",
      Fam_Gen == "uncultured_uncultured alpha proteobacterium" ~ "Rickettsiales_unident.1",
      Fam_Gen == "uncultured_uncultured rumen bacterium" ~ "Rickettsiales_unident.2",
      Fam_Gen == "uncultured_unidentified rumen bacterium RF32" ~ "Rhodospirillales_unident.2",
      Fam_Gen == "unidentified_unidentified" ~ "Mollicutes RF39_unident.9",
      TRUE ~ as.character(Fam_Gen)))

Make the heat-map:

# set the colours for heat map
cols_heat <- c("positive" = "red", "negative" = "blue", "NA" = "white")

ggplot(DA_path_gen_all_plot, 
       aes(x = pathogen_status, 
           y = fct_reorder(Fam_Gen_new, n), 
           fill = dir, 
           alpha = abs(LFC))) +
  geom_tile(col = "grey94") +
  scale_fill_manual(values = cols_heat) +
  labs(x = NULL, 
       y = NULL) +
  scale_x_discrete(position = "top",
                   labels = c("DA_ana" = "Ap \n N-P",
                              "DA_ana_01" = "Ap \n N-S",
                              "DA_ana_02" = "Ap \n N-C",
                              "DA_ana_12" = "Ap \n S-C",
                              "DA_bab" = "Bm \n N-P",
                              "DA_bab_01" = "Bm \n N-S",
                              "DA_bab_02" = "Bm \n N-C",
                              "DA_bor" = "Bbsl \n N-P",
                              "DA_bor_01" = "Bbsl \n N-S",
                              "DA_bor_02" = "Bbsl \n N-C",
                              "DA_puu" = "PUUV \n N-P",
                              "DA_puu_01" = "PUUV \n N-S",
                              "DA_puu_02" = "PUUV \n N-C",
                              "DA_puu_12" = "PUUV \n S-C"),
                   expand = c(0,0)) +
  theme(axis.text.x = element_text(size = 9, colour = "black"),
        axis.text.y = element_text(size = 9, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_blank()) +
  geom_text(aes(label = round(LFC, 3)), size = 2.5, col = "black", alpha = 1, vjust = 0.5, hjust = 0.5)

Figure S4

Make data frames:

set.seed(42)
disp_jacc_puu_coinf_plot <- betadisper(d_jacc, group = AlphaDivMeta_plot$puu_coinf, bias.adjust = TRUE)

disp_jacc_puu_coinf_plot <- data.frame(
  dist_centr = disp_jacc_puu_coinf_plot$distances,
  group = disp_jacc_puu_coinf_plot$group)

set.seed(42)
disp_uf_puu_coinf_plot <- betadisper(d_uf, group = AlphaDivMeta_plot$puu_coinf, bias.adjust = TRUE)

disp_uf_puu_coinf_plot <- data.frame(
  dist_centr = disp_uf_puu_coinf_plot$distances,
  group = disp_uf_puu_coinf_plot$group)

set.seed(42)
disp_bc_puu_coinf_plot <- betadisper(d_bc, group = AlphaDivMeta_plot$puu_coinf, bias.adjust = TRUE)

disp_bc_puu_coinf_plot <- data.frame(
  dist_centr = disp_bc_puu_coinf_plot$distances,
  group = disp_bc_puu_coinf_plot$group)

set.seed(42)
disp_wuf_puu_coinf_plot <- betadisper(d_wuf, group = AlphaDivMeta_plot$puu_coinf, bias.adjust = TRUE)

disp_wuf_puu_coinf_plot <- data.frame(
  dist_centr = disp_wuf_puu_coinf_plot$distances,
  group = disp_wuf_puu_coinf_plot$group)

Plot:

p_puu_coinf_disp_jacc <- 
  ggplot(disp_jacc_puu_coinf_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +
  geom_boxplot(width = 0.5,
               outlier.colour = NA,
               alpha = 0.15,
               lwd = 0.1) +
  geom_point(shape = 16,
             size = 1.5,
             alpha = 0.8,
             position = position_jitter(seed = 42, width = 0.23)) +
  scale_color_manual(values = col_coinf_path) +
  scale_fill_manual(values = col_coinf_path) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 11, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 3, inherit.aes = TRUE, shape = 16)

p_puu_coinf_disp_uf <- 
  ggplot(disp_uf_puu_coinf_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +  
  geom_boxplot(width = 0.5,
               outlier.colour = NA,
               alpha = 0.15,
               lwd = 0.1) +
  geom_point(shape = 16,
             size = 1.5,
             alpha = 0.8,
             position = position_jitter(seed = 42, width = 0.23)) +
  scale_color_manual(values = col_coinf_path) +
  scale_fill_manual(values = col_coinf_path) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 11, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 3, inherit.aes = TRUE, shape = 16)

p_puu_coinf_disp_bc <- 
  ggplot(disp_bc_puu_coinf_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +
  geom_boxplot(width = 0.5,
               outlier.colour = NA,
               alpha = 0.15,
               lwd = 0.1) +
  geom_point(shape = 16,
             size = 1.5,
             alpha = 0.8,
             position = position_jitter(seed = 42, width = 0.23)) +
  scale_color_manual(values = col_coinf_path) +
  scale_fill_manual(values = col_coinf_path) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 11, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 3, inherit.aes = TRUE, shape = 16)

p_puu_coinf_disp_wuf <- 
  ggplot(disp_wuf_puu_coinf_plot, aes(x = group, y = dist_centr, colour = group, fill = group)) +
  geom_boxplot(width = 0.5,
               outlier.colour = NA,
               alpha = 0.15,
               lwd = 0.1) +
  geom_point(shape = 16,
             size = 1.5,
             alpha = 0.8,
             position = position_jitter(seed = 42, width = 0.23)) +
  scale_color_manual(values = col_coinf_path) +
  scale_fill_manual(values = col_coinf_path) +
  labs(x = NULL, 
       y = NULL,
       title = NULL) +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 11, colour = "black"),
        axis.title.y = element_blank(),
        legend.position = "none",
        panel.background = element_blank(),
        panel.grid.major = element_line(size = 0.2, color = "gray94"),
        panel.grid.minor = element_blank(),
        plot.margin = margin(t = 5, r = 5, b = 5, l = 5, unit = "pt"),
        plot.background = element_rect(color = "lightgrey", size = 0.5)) +
  stat_summary(fun = mean, geom = "point", size = 3, inherit.aes = TRUE, shape = 16)

Merge for the final version:

(p_puu_coinf_disp_jacc + p_puu_coinf_disp_uf) / (p_puu_coinf_disp_bc + p_puu_coinf_disp_wuf) + plot_annotation(tag_levels = 'A')

R code used for statistical analysis and figures

Ilze Brila

2022-11-04

Preparation

Load necessary libraries

Data preparation

Infection and pathogen prevalence

Data exploration

Alpha diversity

Calculate alpha diversity indices

Analysis

Coinfection status

Shannon’s diversity index

Faith’s phylogenetic diversity

ASV richness

Pathogens irrespective of coinfection status

Shannon’s diversity index

Faith’s phylogenetic diversity

ASV richness

Pathogens considering coinfection status

Shannon’s diversity index

Anaplasma

Babesia

Borrelia

Puumala

ASV richness

Anaplasma

Babesia

Borrelia

Puumala

Faith’s phylogenetic diversity

Anaplasma

Babesia

Borrelia

Puumala

Beta diversity

Calculate distances and ordinate using PCoA

Coinfection status

Jaccard

UniFrac

Bray-Curtis

Weighted UniFrac

Pathogens irrespective of coinfection status

Jaccard

UniFrac

Bray-Curtis

Weighted UniFrac

Pathogens considering coinfection status

Jaccard

Anaplasma

Babesia

Borrelia

Puumala

UniFrac

Anaplasma

Babesia

Borrelia

Puumala

Bray-Curtis

Anaplasma

Babesia

Borrelia

Puumala

Weighted UniFrac

Anaplasma

Babesia

Borrelia

Puumala

Dispersion of other variables

Rare ASV analysis

Differential abundance analysis using ANCOM-BC

Coinfection status

ASV level

Coinfection status 0 vs 1 and 2

Coinfection status 1 vs 2

Genus level

Coinfection status 0 vs 1 and 2

Coinfection status 1 vs 2

Pathogens irrespective of coinfection status

ASV level