Supplemental Methods

Figure 1: Phylogeny

Load ggplot library for graphics. Also used for later figures.

library(ggplot2)

Load phylogenetic abundances. Data created using calc_phylum_bp.py script from PhymmBL and RDP classified Metaxa data.

phy_data <- read.csv("/data/supplemental_data_code/DatasetS6_phylogeny_comparison_bp.tsv", 
    sep = "\t")

Plot stacked bar chart.

library(grid)

ggplot(phy_data, aes(x = Program, y = bp, fill = Phylum, order = -as.numeric(Phylum))) + 
    geom_bar(position = "fill", stat = "identity") + geom_bar(position = "fill", 
    colour = "#262626", show_guide = FALSE, stat = "identity") + ylab("bp Fraction") + 
    xlab("Program") + theme(panel.border = element_blank(), plot.margin = unit(c(0.1, 
    0.1, 0.1, 0.1), "in"), axis.title.x = element_text(size = "16"), axis.title.y = element_text(size = "16"), 
    axis.text.x = element_text(colour = "black", size = "16"), legend.title = element_text(size = "16"))

plot of chunk Fig1_Phylogeny

Figure 2: Subsystems Bar Chart

Load subsystems abundance data. Data is from MG-RAST, with abundance counts converted to relative abundance.

subsystems_data <- read.csv("/data/supplemental_data_code/DatasetS1_subsystems.tsv", 
    sep = "\t")

Reorder subsystems from most abundant to least.

subsystems_data$reorderL1 <- reorder(subsystems_data$level.1, subsystems_data$percent)

Plot the subsystems abundance.

ggplot(data = subsystems_data, aes(x = reorderL1, y = percent)) + geom_bar(stat = "identity", 
    fill = "#06a4ff") + coord_flip() + ylab("Percent") + xlab("Subsystems Level 1") + 
    theme(legend.position = "none", axis.text.x = element_text(colour = "black"), 
        axis.text.y = element_text(colour = "black", face = "bold"), axis.ticks.y = element_blank())

plot of chunk Fig2_Subsystems

Figure 3: PCA

Load Resistance to Antibiotics and Toxic Compounds Subsystem relative abundance data. Raw data from MGRAST.

RATCdata <- read.csv("/data/supplemental_data_code/DatasetS7_RATC.csv")

Perform PCA. Only rows with abundance data selected.

pc <- prcomp(RATCdata[4:34])

Extract principle component values, and merge with main dataset.

pc2 <- data.frame(pc$x)

pc2$Metagenome <- RATCdata$Metagenome

pc3 <- merge(RATCdata, pc2, by = "Metagenome")

Plot.

ggplot(pc3, aes(PC1, PC2, colour = Type, label = Description)) + geom_point(size = 3) + 
    geom_text(hjust = 0, vjust = 0)

plot of chunk Fig3_PCA

Session Info

sessionInfo()
## R version 3.0.0 (2013-04-03)
## Platform: x86_64-unknown-linux-gnu (64-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=C                 LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] ggplot2_0.9.3.1 knitr_1.2      
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.2-2   dichromat_2.0-0    digest_0.6.3      
##  [4] evaluate_0.4.3     formatR_0.7        gtable_0.1.2      
##  [7] labeling_0.1       MASS_7.3-26        munsell_0.4       
## [10] plyr_1.8           proto_0.3-10       RColorBrewer_1.0-5
## [13] reshape2_1.2.2     scales_0.2.3       stringr_0.6.2     
## [16] tools_3.0.0