Sylvain Monteux, Frida Keuper, Sébastien Fontaine, Konstantin Gavazov, Sara Hallin, Jaanis Juhanson, Eveline J. Krab, Sandrine Revaillot, Erik Verbruggen, Josefine Walz, James T. Weedon, Ellen Dorrepaal
Nature Geoscience (2020), https://dx.doi.org/10.1038/s41561-020-00662-4
This Jupyter notebook was ran with the following software:
In this notebook, qiime is installed on a conda env called "qiime", and is called by "conda activate qiime", feel free to replace that with whatever you call it, or to remove that line if qiime is installed without conda. Please refer to the first section of the 16S/MonteuxKeuper_16S.ipynb file to create the conda environment.
# Concatenate read files
gunzip ../reads/ITS/*.gz
cat ../reads/ITS/* > reads.fastq
gzip ../reads/ITS/*.fastq
# Strict quality filtering (0.05) and length truncation
vsearch --fastq_filter reads.fastq --fastq_trunclen 200 --fastq_maxee 0.05 --fastaout shortreads_005.fasta --threads 3
# Primer trimming with R script using ShortRead library (see head scripts/ShortRead_trim.R for installation through BioConductor)
Rscript scripts/ShortRead_trim.R
awk -f scripts/relabel.awk fnp_reads.fa > relab_reads.fa
vsearch --derep_fulllength fnp_reads.fa --output seqs_sorted.fasta --minuniquesize 2 --sizeout --threads 3
# Create OTUs
vsearch -cluster_size seqs_sorted.fasta --consout otus.fa --id 0.97 --relabel 'OTU_' --sizein --threads 3
# Check for chimeras based on the UNITE database
vsearch --uchime_ref otus.fa --nonchimeras nonchimeras.fa --db scripts/uchime_sh_refs_dynamic_develop_985_01.01.2016.ITS1.fasta --relabel 'OTU_' --uchimeout results.uchime --xsize --threads 3
# Cluster OTUs
vsearch -usearch_global relab_reads.fa -db nonchimeras.fa --biomout its_map.biom --id 0.97 --threads 3
# This notebook uses a python2 virtual environment where QIIME is installed through conda
# depending on your configuration you might need to activate your conda environment with the following line, where "qiime" should be the name of your environment
conda activate qiime
# Remove OTUs present in less than 10% of samples, to reduce importance of rare OTUs in differential abundance testing
# While not necessary for other steps than differential abundance, we prefer using a single consistent dataset
filter_otus_from_otu_table.py -i its_map.biom -o its_map_filtered.biom -s 3
filter_fasta.py -f nonchimeras.fa -o final_otus.fa -b its_map_filtered.biom
# Assign taxonomy to the OTUs, asks for much memory
assign_taxonomy.py -i final_otus.fa -o rdp_tax/ -m rdp -t scripts/97_otu_taxonomy.txt -r scripts/97_otus.fasta --rdp_max_memory 12000
# Main non-rarefied OTU table
biom add-metadata -i its_map_filtered.biom -o its_map_f_wtax.biom --observation-metadata-fp rdp_tax/final_otus_tax_assignments.txt --sc-separated taxonomy --observation-header OTUID,taxonomy
biom summarize-table -i its_map_f_wtax.biom -o summary_its_map.txt
alpha_rarefaction.py -i its_map_f_wtax.biom -o alpha_rar -m MonteuxKeuper_map_ITS_alpharar.txt -p scripts/alpha_rar.par -n 10 -e 12000 -O 2 -a -f
# Need to keep empty OTUs (-k) for downstream averaging in R
multiple_rarefactions_even_depth.py -i its_map_f_wtax.biom -o inoc_rar5000 -d 5000 -n 100 -k
# Formatting to prepare tables for import in R
bash scripts/convert_table.sh
bash scripts/clean_tsv_from_biom_rarefied_tables.sh
# Makes a consensus out of the 100 rarefaction tables
Rscript scripts/make_consensus_from_rarefactions.R
biom convert -i inoc_rar5000_tsv/clean/consensus100_rarefaction5000.otu -o consensus_100_rar5000.biom --to-json
# Bray-curtis beta diversity summary for downstream use in R
beta_diversity.py -i consensus_100_rar5000.biom -o inoc5000_bray/ -m 'bray_curtis'
# Alpha diversity summary for downstream use in R
alpha_diversity.py -i consensus_100_rar5000.biom -o its_alpha_div.txt -m ace,chao1,chao1_ci,observed_otus
# Put taxonomy in the consensus rarefied table and convert to tsv for downstream use in R
biom add-metadata -i consensus_100_rar5000.biom -o Yedoma_ITS_5k_wtax.biom --observation-metadata-fp rdp_tax/final_otus_tax_assignments.txt --sc-separated taxonomy --observation-header OTUID,taxonomy
biom convert -i Yedoma_ITS_5k_wtax.biom -o Yedoma_ITS_5k_wtax.otu --to-tsv --table-type "OTU table"
# Taxa summary for downstream use in R
summarize_taxa.py -i Yedoma_ITS_5k_wtax.biom -o taxa_sum_Yedo_ITS/
# Changes formatting of the non-rarefied table for sample names to match with those in the mapping file (R exports adds an X to sample names)
biom convert -i its_map_f_wtax.biom -o its_map_f_wtax.otu --table-type "OTU table" --to-tsv
awk -f scripts/sub_cleaner.awk its_map_f_wtax.otu > its_map_f_wtax2.otu
Rscript scripts/change_sample_names.R
biom convert -i Yedoma_ITS_wtax.otu -o Yedoma_ITS_wtax.biom --table-type "OTU table" --to-json
# Add taxonomy to the non-rarefied table for easier DESeq processing
biom add-metadata -i Yedoma_ITS_wtax.biom -o Yedoma_ITS_wtax.biom --observation-metadata-fp rdp_tax/final_otus_tax_assignments.txt --sc-separated taxonomy --observation-header OTUID,taxonomy
# Make subsets per harvest to look at differential abundance between treatments at each harvest
filter_samples_from_otu_table.py -i Yedoma_ITS_wtax.biom -o Harvest0_ggtax.biom -m MonteuxKeuper_map_ITS.txt -s 'Harvest:0'
filter_samples_from_otu_table.py -i Yedoma_ITS_wtax.biom -o Harvest1_ggtax.biom -m MonteuxKeuper_map_ITS.txt -s 'Harvest:1'
filter_samples_from_otu_table.py -i Yedoma_ITS_wtax.biom -o Harvest4_ggtax.biom -m MonteuxKeuper_map_ITS.txt -s 'Harvest:4'
# DESeq2 has many dependencies: biocLite("DESeq2")
# "biom" package is deprecated and might require manual installation:
# library(devtools)
# install_github("biom","joey711")
differential_abundance.py -i Yedoma_ITS_wtax.biom -o diff_abund_YvI_ITS.txt -m MonteuxKeuper_map_ITS.txt -c DESEQ -x b -y a -a DESeq2_nbinom -d
differential_abundance.py -i Harvest0_ggtax.biom -o diff_abund_YvI_ITS_H0.txt -m MonteuxKeuper_map_ITS.txt -c Treatment -x Y -y I -a DESeq2_nbinom -d
differential_abundance.py -i Harvest1_ggtax.biom -o diff_abund_YvI_ITS_H1.txt -m MonteuxKeuper_map_ITS.txt -c Treatment -x Y -y I -a DESeq2_nbinom -d
differential_abundance.py -i Harvest4_ggtax.biom -o diff_abund_YvI_ITS_H4.txt -m MonteuxKeuper_map_ITS.txt -c Treatment -x Y -y I -a DESeq2_nbinom -d
# mkdir ../Rnew # This should have been created as the last step of the 16S pipeline, else uncomment
cp inoc5000_bray/bray_curtis_consensus_100_rar5000.txt ../Downstream/Rnew/
awk -f scripts/cleaner.awk taxa_sum_Yedo_ITS/Yedoma_ITS_5k_wtax_L3.txt > ../Downstream/Rnew/Yedoma_ITS_5k_wtax_L3.txt
awk -f scripts/cleaner.awk Yedoma_ITS_5k_wtax.otu > ../Downstream/Rnew/Yedoma_ITS_5k_wtax.otu
cp alpha_rar/alpha_div_collated/chao1.txt ../Downstream/Rnew/itschao1.txt
cp diff_abund_YvI_ITS.txt ../Downstream/Rnew/
cp diff_abund_YvI_ITS_H0.txt ../Downstream/Rnew/
cp diff_abund_YvI_ITS_H1.txt ../Downstream/Rnew/
cp diff_abund_YvI_ITS_H4.txt ../Downstream/Rnew/
cp its_alpha_div.txt ../Downstream/Rnew/