This file starts with the fastq files from the PACBIO run and runs through the dada2 workflow to get a phyloseq object at the end. Two cells were used with all samples ran once on each cell. Fastq files from the different cells are them combined after error learning for the individual runs.
## https://benjjneb.github.io/LRASManuscript/LRASms_fecal.html
library(dada2)
library(Biostrings)
library(ShortRead)
library(ggplot2)
library(reshape2)
library(gridExtra)
library(phyloseq)
library(gtools)
path1 <- "/blue/mulligan/duttonc/Congo/Cell1"
fns1 <- list.files(path1, pattern="fastq", full.names=TRUE)
F27 <- "AGRGTTYGATYMTGGCTCAG"
R1492 <- "RGYTACCTTGTTACGACTT"
rc <- dada2:::rc
theme_set(theme_bw())
if (file.exists("/blue/mulligan/duttonc/Congo/output/cell1_output.Rdata")){
load("/blue/mulligan/duttonc/Congo/output/cell1_output.Rdata")
} else{}
## remove primers
nops1 <- file.path(path1, "noprimers", basename(fns1))
if (exists("prim1")){
print("cached file loaded: prim1")
} else{
prim1 <- removePrimers(fns1, nops1, primer.fwd=F27, primer.rev=dada2:::rc(R1492), orient=TRUE)
}
## [1] "cached file loaded: prim1"
## filter
if (exists("lens.fn1")){
print("cached file loaded: lens.fn1")
} else{
lens.fn1 <- lapply(nops1, function(fn) nchar(getSequences(fn)))
}
## [1] "cached file loaded: lens.fn1"
lens <- do.call(c, lens.fn1)
hist(lens, 1000)
filts1 <- file.path(path1, "noprimers", "filtered", basename(fns1))
if (exists("track1")){
print("cached file loaded: track1")
} else{
track1 <- filterAndTrim(nops1, filts1, minQ=3, minLen=1000, maxLen=1600, maxN=0, rm.phix=FALSE, maxEE=2)
}
## [1] "cached file loaded: track1"
track1
## reads.in reads.out
## G1_6M-16S_For_bc1002-16S_Rev_bc1045.fastq 2585 2478
## G1_6W-16S_For_bc1006-16S_Rev_bc1045.fastq 4009 3853
## G10_3M-16S_For_bc1020-16S_Rev_bc1035.fastq 2201 2143
## G10_6M-16S_For_bc1022-16S_Rev_bc1033.fastq 2044 1943
## G10_6W-16S_For_bc1005-16S_Rev_bc1060.fastq 7191 6932
## G11_3M-16S_For_bc1012-16S_Rev_bc1035.fastq 2392 2321
## G11_6M-16S_For_bc1015-16S_Rev_bc1033.fastq 1755 1689
## G11_6W-16S_For_bc1012-16S_Rev_bc1057.fastq 5772 5595
## G12_3M-16S_For_bc1005-16S_Rev_bc1054.fastq 4878 4729
## G12_6W-16S_For_bc1006-16S_Rev_bc1033.fastq 6199 5984
## G12_MV1-16S_For_bc1024-16S_Rev_bc1056.fastq 4305 4128
## G12_PT1-16S_For_bc1002-16S_Rev_bc1044.fastq 2337 2105
## G13_3M_2-16S_For_bc1008-16S_Rev_bc1060.fastq 6238 6051
## G13_3M-16S_For_bc1024-16S_Rev_bc1059.fastq 11267 10870
## G13_6M-16S_For_bc1011-16S_Rev_bc1035.fastq 4325 4165
## G13_6W-16S_For_bc1020-16S_Rev_bc1054.fastq 4921 4727
## G14_3M-16S_For_bc1005-16S_Rev_bc1033.fastq 3211 3109
## G14_6M-16S_For_bc1015-16S_Rev_bc1075.fastq 2966 2873
## G14_6W-16S_For_bc1012-16S_Rev_bc1033.fastq 1852 1794
## G14_PT-16S_For_bc1002-16S_Rev_bc1057.fastq 4919 4759
## G15_3M-16S_For_bc1022-16S_Rev_bc1035.fastq 2848 2765
## G15_6W-16S_For_bc1024-16S_Rev_bc1035.fastq 3774 3663
## G15_MV1-16S_For_bc1024-16S_Rev_bc1033.fastq 3879 3749
## G16_6M-16S_For_bc1005-16S_Rev_bc1056.fastq 4285 4112
## G17_3M-16S_For_bc1015-16S_Rev_bc1065.fastq 3433 3320
## G17_6M-16S_For_bc1012-16S_Rev_bc1044.fastq 1945 1890
## G17_6W-16S_For_bc1005-16S_Rev_bc1045.fastq 3173 3054
## G19_3M-16S_For_bc1004-16S_Rev_bc1045.fastq 2485 2412
## G19_6W-16S_For_bc1010-16S_Rev_bc1035.fastq 1940 1875
## G19_6WR-16S_For_bc1022-16S_Rev_bc1062.fastq 5398 5218
## G19_MV3-16S_For_bc1005-16S_Rev_bc1075.fastq 5880 5718
## G2_3M-16S_For_bc1004-16S_Rev_bc1035.fastq 3112 2971
## G2_6M-16S_For_bc1011-16S_Rev_bc1044.fastq 3441 3243
## G2_6W-16S_For_bc1020-16S_Rev_bc1065.fastq 4094 3937
## G2_PT-16S_For_bc1024-16S_Rev_bc1065.fastq 14814 14360
## G20_6W-16S_For_bc1024-16S_Rev_bc1054.fastq 6238 6046
## G20_MV1-16S_For_bc1005-16S_Rev_bc1059.fastq 9035 8750
## G20_PT1-16S_For_bc1020-16S_Rev_bc1045.fastq 3340 3243
## G20_PT2-16S_For_bc1008-16S_Rev_bc1045.fastq 2637 2554
## G20_PT2R-16S_For_bc1007-16S_Rev_bc1045.fastq 3588 3460
## G21_3M-16S_For_bc1005-16S_Rev_bc1044.fastq 2304 2223
## G21_6W-16S_For_bc1007-16S_Rev_bc1057.fastq 10668 10283
## G22_3M-16S_For_bc1020-16S_Rev_bc1075.fastq 1981 1915
## G22_6W-16S_For_bc1006-16S_Rev_bc1044.fastq 4081 3958
## G22_MV-16S_For_bc1003-16S_Rev_bc1044.fastq 2954 2827
## G23_3M-16S_For_bc1008-16S_Rev_bc1035.fastq 1996 1912
## G24_3M-16S_For_bc1020-16S_Rev_bc1056.fastq 4708 4564
## G24_6M-16S_For_bc1007-16S_Rev_bc1075.fastq 8984 8629
## G24_6W-16S_For_bc1008-16S_Rev_bc1062.fastq 7179 6981
## G25_3M-16S_For_bc1012-16S_Rev_bc1054.fastq 4775 4629
## G25_6W-16S_For_bc1008-16S_Rev_bc1065.fastq 6931 6736
## G26_3M-16S_For_bc1024-16S_Rev_bc1060.fastq 10584 10267
## G26_6M-16S_For_bc1020-16S_Rev_bc1060.fastq 8743 8466
## G26_6W-16S_For_bc1006-16S_Rev_bc1035.fastq 3663 3511
## G27_6M-16S_For_bc1002-16S_Rev_bc1056.fastq 1888 1781
## G27_6W-16S_For_bc1008-16S_Rev_bc1057.fastq 5437 5224
## G28_6M-16S_For_bc1022-16S_Rev_bc1060.fastq 9963 9665
## G29_3M-16S_For_bc1005-16S_Rev_bc1065.fastq 3186 3103
## G29_6W-16S_For_bc1008-16S_Rev_bc1054.fastq 4180 4056
## G3_3M-16S_For_bc1015-16S_Rev_bc1060.fastq 3551 3395
## G3_6M-16S_For_bc1009-16S_Rev_bc1044.fastq 2306 2239
## G3_6W-16S_For_bc1012-16S_Rev_bc1059.fastq 9842 9530
## G30_3M-16S_For_bc1009-16S_Rev_bc1035.fastq 1718 1637
## G30_6M-16S_For_bc1015-16S_Rev_bc1059.fastq 4717 4493
## G30_6W-16S_For_bc1010-16S_Rev_bc1044.fastq 1511 1458
## G31_3M-16S_For_bc1007-16S_Rev_bc1044.fastq 2642 2511
## G31_6M-16S_For_bc1003-16S_Rev_bc1045.fastq 4076 3918
## G33_3M-16S_For_bc1012-16S_Rev_bc1075.fastq 3028 2889
## G33_6W-16S_For_bc1022-16S_Rev_bc1059.fastq 10059 9667
## G34_3M-16S_For_bc1015-16S_Rev_bc1054.fastq 4262 4156
## G34_6W-16S_For_bc1012-16S_Rev_bc1065.fastq 3370 3227
## G35_3M-16S_For_bc1002-16S_Rev_bc1059.fastq 4649 4437
## G35_6M-16S_For_bc1012-16S_Rev_bc1045.fastq 3361 3249
## G35_6W-16S_For_bc1020-16S_Rev_bc1057.fastq 6420 6233
## G36_3M-16S_For_bc1024-16S_Rev_bc1075.fastq 536 515
## G36_6M-16S_For_bc1002-16S_Rev_bc1035.fastq 3362 3259
## G37_6W-16S_For_bc1008-16S_Rev_bc1044.fastq 2305 2237
## G37_PT1-16S_For_bc1007-16S_Rev_bc1035.fastq 1944 1867
## G38_3M-16S_For_bc1008-16S_Rev_bc1059.fastq 6740 6535
## G38_6W-16S_For_bc1002-16S_Rev_bc1054.fastq 3050 2965
## G39_6M-16S_For_bc1012-16S_Rev_bc1062.fastq 3746 3627
## G4_3M-16S_For_bc1015-16S_Rev_bc1062.fastq 4391 4247
## G4_3MR-16S_For_bc1015-16S_Rev_bc1045.fastq 2633 2538
## G4_6W-16S_For_bc1009-16S_Rev_bc1033.fastq 4481 4315
## G40_3M-16S_For_bc1010-16S_Rev_bc1045.fastq 1863 1789
## G40_6M-16S_For_bc1002-16S_Rev_bc1033.fastq 2770 2664
## G40_6W-16S_For_bc1007-16S_Rev_bc1060.fastq 11413 10945
## G41_3M-16S_For_bc1022-16S_Rev_bc1056.fastq 4243 4082
## G42_3M-16S_For_bc1002-16S_Rev_bc1062.fastq 2850 2751
## G42_6M-16S_For_bc1005-16S_Rev_bc1035.fastq 3372 3276
## G43_6M-16S_For_bc1010-16S_Rev_bc1033.fastq 3618 3517
## G43_6W-16S_For_bc1008-16S_Rev_bc1075.fastq 2519 2421
## G45_3M-16S_For_bc1024-16S_Rev_bc1045.fastq 4727 4577
## G45_6W-16S_For_bc1008-16S_Rev_bc1056.fastq 4132 3985
## G45_MV-16S_For_bc1024-16S_Rev_bc1057.fastq 11545 11144
## G46_3M-16S_For_bc1015-16S_Rev_bc1044.fastq 2364 2282
## G46_3MR-16S_For_bc1020-16S_Rev_bc1044.fastq 2928 2837
## G46_6M-16S_For_bc1022-16S_Rev_bc1065.fastq 4054 3918
## G46_6W-16S_For_bc1007-16S_Rev_bc1062.fastq 13383 12918
## G47_3M-16S_For_bc1005-16S_Rev_bc1062.fastq 10701 10390
## G47_6W-16S_For_bc1011-16S_Rev_bc1045.fastq 2757 2648
## G48_3M-16S_For_bc1015-16S_Rev_bc1057.fastq 8984 8574
## G48_6M-16S_For_bc1004-16S_Rev_bc1033.fastq 1762 1703
## G48_6W-16S_For_bc1012-16S_Rev_bc1060.fastq 8682 8396
## G5_3M-16S_For_bc1007-16S_Rev_bc1054.fastq 4257 4022
## G5_MV1-16S_For_bc1003-16S_Rev_bc1033.fastq 3311 3181
## G50_3M-16S_For_bc1020-16S_Rev_bc1062.fastq 4013 3790
## G50_6M-16S_For_bc1007-16S_Rev_bc1059.fastq 7721 7411
## G50_6W-16S_For_bc1004-16S_Rev_bc1044.fastq 2450 2373
## G51_6W-16S_For_bc1024-16S_Rev_bc1044.fastq 3741 3602
## G52_3M-16S_For_bc1024-16S_Rev_bc1062.fastq 9802 9473
## G52_6M-16S_For_bc1022-16S_Rev_bc1057.fastq 6344 6166
## G52_6W-16S_For_bc1020-16S_Rev_bc1059.fastq 9859 9527
## G7_3M-16S_For_bc1008-16S_Rev_bc1033.fastq 2167 2086
## G7_3MR-16S_For_bc1015-16S_Rev_bc1056.fastq 3020 2873
## G7_6M-16S_For_bc1007-16S_Rev_bc1033.fastq 2584 2447
## G7_MV2-16S_For_bc1022-16S_Rev_bc1044.fastq 3241 3142
## G7_PT1-16S_For_bc1005-16S_Rev_bc1057.fastq 6692 6490
## G8_3M-16S_For_bc1002-16S_Rev_bc1060.fastq 3476 3351
## G8_6W-16S_For_bc1007-16S_Rev_bc1065.fastq 9434 9151
## G9_3M-16S_For_bc1012-16S_Rev_bc1056.fastq 4618 4467
## G9_6M-16S_For_bc1022-16S_Rev_bc1075.fastq 3450 3347
## G9_6W-16S_For_bc1011-16S_Rev_bc1033.fastq 4344 4199
## MC1-16S_For_bc1009-16S_Rev_bc1045.fastq 1779 1718
## MC2-16S_For_bc1022-16S_Rev_bc1045.fastq 3520 3435
## MC3-16S_For_bc1020-16S_Rev_bc1033.fastq 3283 3216
## dada2
if (exists("drp1")){
print("cached file loaded: drp1")
} else{
drp1 <- derepFastq(filts1, verbose=TRUE)
}
## [1] "cached file loaded: drp1"
## Learn errors
if (exists("err1")){
print("cached file loaded: err1")
} else{
err1 <- learnErrors(drp1, errorEstimationFunction=PacBioErrfun, BAND_SIZE=32, multithread=TRUE)
}
## [1] "cached file loaded: err1"
## Plot errors
plotErrors(err1)
## Denoise
if (exists("dd1")){
print("cached file loaded: dd1")
} else{
dd1 <- dada(drp1, err=err1, BAND_SIZE=32, multithread=TRUE)
}
## [1] "cached file loaded: dd1"
cbind(ccs=prim1[,1], primers=prim1[,2], filtered=track1[,2], denoised=sapply(dd1, function(x) sum(x$denoised)))
## ccs primers filtered denoised
## G1_6M-16S_For_bc1002-16S_Rev_bc1045.fastq 2973 2585 2478 2294
## G1_6W-16S_For_bc1006-16S_Rev_bc1045.fastq 4581 4009 3853 3771
## G10_3M-16S_For_bc1020-16S_Rev_bc1035.fastq 2276 2201 2143 2083
## G10_6M-16S_For_bc1022-16S_Rev_bc1033.fastq 2460 2044 1943 1795
## G10_6W-16S_For_bc1005-16S_Rev_bc1060.fastq 7981 7191 6932 6728
## G11_3M-16S_For_bc1012-16S_Rev_bc1035.fastq 2718 2392 2321 2282
## G11_6M-16S_For_bc1015-16S_Rev_bc1033.fastq 2016 1755 1689 1607
## G11_6W-16S_For_bc1012-16S_Rev_bc1057.fastq 6301 5772 5595 5527
## G12_3M-16S_For_bc1005-16S_Rev_bc1054.fastq 5211 4878 4729 4641
## G12_6W-16S_For_bc1006-16S_Rev_bc1033.fastq 7124 6199 5984 5917
## G12_MV1-16S_For_bc1024-16S_Rev_bc1056.fastq 4466 4305 4128 3987
## G12_PT1-16S_For_bc1002-16S_Rev_bc1044.fastq 2723 2337 2105 2021
## G13_3M_2-16S_For_bc1008-16S_Rev_bc1060.fastq 6944 6238 6051 5911
## G13_3M-16S_For_bc1024-16S_Rev_bc1059.fastq 11681 11267 10870 10687
## G13_6M-16S_For_bc1011-16S_Rev_bc1035.fastq 4655 4325 4165 3986
## G13_6W-16S_For_bc1020-16S_Rev_bc1054.fastq 5111 4921 4727 4682
## G14_3M-16S_For_bc1005-16S_Rev_bc1033.fastq 3402 3211 3109 3082
## G14_6M-16S_For_bc1015-16S_Rev_bc1075.fastq 3439 2966 2873 2810
## G14_6W-16S_For_bc1012-16S_Rev_bc1033.fastq 2117 1852 1794 1696
## G14_PT-16S_For_bc1002-16S_Rev_bc1057.fastq 5578 4919 4759 4562
## G15_3M-16S_For_bc1022-16S_Rev_bc1035.fastq 3388 2848 2765 2708
## G15_6W-16S_For_bc1024-16S_Rev_bc1035.fastq 3921 3774 3663 3631
## G15_MV1-16S_For_bc1024-16S_Rev_bc1033.fastq 4044 3879 3749 3688
## G16_6M-16S_For_bc1005-16S_Rev_bc1056.fastq 4731 4285 4112 4008
## G17_3M-16S_For_bc1015-16S_Rev_bc1065.fastq 3913 3433 3320 3228
## G17_6M-16S_For_bc1012-16S_Rev_bc1044.fastq 2328 1945 1890 1825
## G17_6W-16S_For_bc1005-16S_Rev_bc1045.fastq 3494 3173 3054 3019
## G19_3M-16S_For_bc1004-16S_Rev_bc1045.fastq 2740 2485 2412 2369
## G19_6W-16S_For_bc1010-16S_Rev_bc1035.fastq 2181 1940 1875 1844
## G19_6WR-16S_For_bc1022-16S_Rev_bc1062.fastq 6349 5398 5218 5079
## G19_MV3-16S_For_bc1005-16S_Rev_bc1075.fastq 6099 5880 5718 5600
## G2_3M-16S_For_bc1004-16S_Rev_bc1035.fastq 3552 3112 2971 2874
## G2_6M-16S_For_bc1011-16S_Rev_bc1044.fastq 3736 3441 3243 3029
## G2_6W-16S_For_bc1020-16S_Rev_bc1065.fastq 4182 4094 3937 3780
## G2_PT-16S_For_bc1024-16S_Rev_bc1065.fastq 15227 14814 14360 14248
## G20_6W-16S_For_bc1024-16S_Rev_bc1054.fastq 6450 6238 6046 5977
## G20_MV1-16S_For_bc1005-16S_Rev_bc1059.fastq 9746 9035 8750 8680
## G20_PT1-16S_For_bc1020-16S_Rev_bc1045.fastq 3415 3340 3243 3204
## G20_PT2-16S_For_bc1008-16S_Rev_bc1045.fastq 2832 2637 2554 2533
## G20_PT2R-16S_For_bc1007-16S_Rev_bc1045.fastq 3728 3588 3460 3422
## G21_3M-16S_For_bc1005-16S_Rev_bc1044.fastq 2592 2304 2223 2184
## G21_6W-16S_For_bc1007-16S_Rev_bc1057.fastq 11101 10668 10283 10162
## G22_3M-16S_For_bc1020-16S_Rev_bc1075.fastq 2053 1981 1915 1868
## G22_6W-16S_For_bc1006-16S_Rev_bc1044.fastq 4798 4081 3958 3866
## G22_MV-16S_For_bc1003-16S_Rev_bc1044.fastq 3273 2954 2827 2740
## G23_3M-16S_For_bc1008-16S_Rev_bc1035.fastq 2235 1996 1912 1884
## G24_3M-16S_For_bc1020-16S_Rev_bc1056.fastq 4896 4708 4564 4531
## G24_6M-16S_For_bc1007-16S_Rev_bc1075.fastq 9423 8984 8629 8539
## G24_6W-16S_For_bc1008-16S_Rev_bc1062.fastq 7769 7179 6981 6963
## G25_3M-16S_For_bc1012-16S_Rev_bc1054.fastq 5418 4775 4629 4585
## G25_6W-16S_For_bc1008-16S_Rev_bc1065.fastq 7435 6931 6736 6675
## G26_3M-16S_For_bc1024-16S_Rev_bc1060.fastq 10974 10584 10267 10116
## G26_6M-16S_For_bc1020-16S_Rev_bc1060.fastq 9072 8743 8466 8398
## G26_6W-16S_For_bc1006-16S_Rev_bc1035.fastq 4238 3663 3511 3302
## G27_6M-16S_For_bc1002-16S_Rev_bc1056.fastq 2234 1888 1781 1681
## G27_6W-16S_For_bc1008-16S_Rev_bc1057.fastq 6043 5437 5224 5203
## G28_6M-16S_For_bc1022-16S_Rev_bc1060.fastq 11243 9963 9665 9530
## G29_3M-16S_For_bc1005-16S_Rev_bc1065.fastq 3308 3186 3103 3050
## G29_6W-16S_For_bc1008-16S_Rev_bc1054.fastq 4558 4180 4056 3988
## G3_3M-16S_For_bc1015-16S_Rev_bc1060.fastq 4009 3551 3395 3192
## G3_6M-16S_For_bc1009-16S_Rev_bc1044.fastq 2541 2306 2239 2156
## G3_6W-16S_For_bc1012-16S_Rev_bc1059.fastq 10796 9842 9530 9411
## G30_3M-16S_For_bc1009-16S_Rev_bc1035.fastq 1860 1718 1637 1589
## G30_6M-16S_For_bc1015-16S_Rev_bc1059.fastq 5453 4717 4493 4384
## G30_6W-16S_For_bc1010-16S_Rev_bc1044.fastq 1743 1511 1458 1421
## G31_3M-16S_For_bc1007-16S_Rev_bc1044.fastq 2818 2642 2511 2387
## G31_6M-16S_For_bc1003-16S_Rev_bc1045.fastq 4379 4076 3918 3800
## G33_3M-16S_For_bc1012-16S_Rev_bc1075.fastq 3545 3028 2889 2783
## G33_6W-16S_For_bc1022-16S_Rev_bc1059.fastq 11852 10059 9667 9605
## G34_3M-16S_For_bc1015-16S_Rev_bc1054.fastq 4728 4262 4156 4116
## G34_6W-16S_For_bc1012-16S_Rev_bc1065.fastq 3983 3370 3227 3158
## G35_3M-16S_For_bc1002-16S_Rev_bc1059.fastq 5566 4649 4437 4387
## G35_6M-16S_For_bc1012-16S_Rev_bc1045.fastq 3599 3361 3249 3190
## G35_6W-16S_For_bc1020-16S_Rev_bc1057.fastq 6620 6420 6233 6179
## G36_3M-16S_For_bc1024-16S_Rev_bc1075.fastq 562 536 515 485
## G36_6M-16S_For_bc1002-16S_Rev_bc1035.fastq 3882 3362 3259 3184
## G37_6W-16S_For_bc1008-16S_Rev_bc1044.fastq 2604 2305 2237 2165
## G37_PT1-16S_For_bc1007-16S_Rev_bc1035.fastq 2040 1944 1867 1742
## G38_3M-16S_For_bc1008-16S_Rev_bc1059.fastq 7508 6740 6535 6406
## G38_6W-16S_For_bc1002-16S_Rev_bc1054.fastq 3400 3050 2965 2850
## G39_6M-16S_For_bc1012-16S_Rev_bc1062.fastq 4307 3746 3627 3455
## G4_3M-16S_For_bc1015-16S_Rev_bc1062.fastq 4891 4391 4247 4108
## G4_3MR-16S_For_bc1015-16S_Rev_bc1045.fastq 2931 2633 2538 2420
## G4_6W-16S_For_bc1009-16S_Rev_bc1033.fastq 4840 4481 4315 4180
## G40_3M-16S_For_bc1010-16S_Rev_bc1045.fastq 2113 1863 1789 1738
## G40_6M-16S_For_bc1002-16S_Rev_bc1033.fastq 3274 2770 2664 2479
## G40_6W-16S_For_bc1007-16S_Rev_bc1060.fastq 11920 11413 10945 10744
## G41_3M-16S_For_bc1022-16S_Rev_bc1056.fastq 5022 4243 4082 3980
## G42_3M-16S_For_bc1002-16S_Rev_bc1062.fastq 3144 2850 2751 2684
## G42_6M-16S_For_bc1005-16S_Rev_bc1035.fastq 3547 3372 3276 3190
## G43_6M-16S_For_bc1010-16S_Rev_bc1033.fastq 4103 3618 3517 3363
## G43_6W-16S_For_bc1008-16S_Rev_bc1075.fastq 2949 2519 2421 2307
## G45_3M-16S_For_bc1024-16S_Rev_bc1045.fastq 4839 4727 4577 4489
## G45_6W-16S_For_bc1008-16S_Rev_bc1056.fastq 4627 4132 3985 3948
## G45_MV-16S_For_bc1024-16S_Rev_bc1057.fastq 11880 11545 11144 11056
## G46_3M-16S_For_bc1015-16S_Rev_bc1044.fastq 2711 2364 2282 2205
## G46_3MR-16S_For_bc1020-16S_Rev_bc1044.fastq 3083 2928 2837 2795
## G46_6M-16S_For_bc1022-16S_Rev_bc1065.fastq 4556 4054 3918 3713
## G46_6W-16S_For_bc1007-16S_Rev_bc1062.fastq 13821 13383 12918 12856
## G47_3M-16S_For_bc1005-16S_Rev_bc1062.fastq 11193 10701 10390 10302
## G47_6W-16S_For_bc1011-16S_Rev_bc1045.fastq 2901 2757 2648 2536
## G48_3M-16S_For_bc1015-16S_Rev_bc1057.fastq 10175 8984 8574 8380
## G48_6M-16S_For_bc1004-16S_Rev_bc1033.fastq 1974 1762 1703 1594
## G48_6W-16S_For_bc1012-16S_Rev_bc1060.fastq 9915 8682 8396 8336
## G5_3M-16S_For_bc1007-16S_Rev_bc1054.fastq 4446 4257 4022 3831
## G5_MV1-16S_For_bc1003-16S_Rev_bc1033.fastq 3565 3311 3181 3067
## G50_3M-16S_For_bc1020-16S_Rev_bc1062.fastq 4101 4013 3790 3580
## G50_6M-16S_For_bc1007-16S_Rev_bc1059.fastq 8095 7721 7411 7054
## G50_6W-16S_For_bc1004-16S_Rev_bc1044.fastq 2816 2450 2373 2316
## G51_6W-16S_For_bc1024-16S_Rev_bc1044.fastq 3932 3741 3602 3520
## G52_3M-16S_For_bc1024-16S_Rev_bc1062.fastq 10081 9802 9473 9451
## G52_6M-16S_For_bc1022-16S_Rev_bc1057.fastq 7219 6344 6166 5907
## G52_6W-16S_For_bc1020-16S_Rev_bc1059.fastq 10244 9859 9527 9453
## G7_3M-16S_For_bc1008-16S_Rev_bc1033.fastq 2353 2167 2086 2061
## G7_3MR-16S_For_bc1015-16S_Rev_bc1056.fastq 3404 3020 2873 2809
## G7_6M-16S_For_bc1007-16S_Rev_bc1033.fastq 2687 2584 2447 2334
## G7_MV2-16S_For_bc1022-16S_Rev_bc1044.fastq 3774 3241 3142 3079
## G7_PT1-16S_For_bc1005-16S_Rev_bc1057.fastq 6986 6692 6490 6389
## G8_3M-16S_For_bc1002-16S_Rev_bc1060.fastq 4014 3476 3351 3216
## G8_6W-16S_For_bc1007-16S_Rev_bc1065.fastq 9803 9434 9151 9023
## G9_3M-16S_For_bc1012-16S_Rev_bc1056.fastq 5309 4618 4467 4425
## G9_6M-16S_For_bc1022-16S_Rev_bc1075.fastq 3852 3450 3347 3274
## G9_6W-16S_For_bc1011-16S_Rev_bc1033.fastq 4709 4344 4199 4099
## MC1-16S_For_bc1009-16S_Rev_bc1045.fastq 1890 1779 1718 1669
## MC2-16S_For_bc1022-16S_Rev_bc1045.fastq 3929 3520 3435 3406
## MC3-16S_For_bc1020-16S_Rev_bc1033.fastq 3394 3283 3216 3195
## Sequence table
if (exists("st1")){
print("cached file loaded: st1")
} else{
st1 <- makeSequenceTable(dd1); dim(st1)
save(lens.fn1, dd1, drp1, err1, prim1, track1, st1, file="/blue/mulligan/duttonc/Congo/output/cell1_output.Rdata")
}
## [1] "cached file loaded: st1"
## CELL 2
if (file.exists("/blue/mulligan/duttonc/Congo/output/cell2_output.Rdata")){
load("/blue/mulligan/duttonc/Congo/output/cell2_output.Rdata")
} else{}
path2 <- "/blue/mulligan/duttonc/Congo/Cell2"
fns2 <- list.files(path2, pattern="fastq", full.names=TRUE)
## remove primers
nops2 <- file.path(path2, "noprimers", basename(fns2))
if (exists("prim2")){
print("cached file loaded: prim2")
} else{
prim2 <- removePrimers(fns2, nops2, primer.fwd=F27, primer.rev=dada2:::rc(R1492), orient=TRUE)
}
## [1] "cached file loaded: prim2"
## filter
if (exists("lens.fn2")){
print("cached file loaded: lens.fn2")
} else{
lens.fn2 <- lapply(nops2, function(fn) nchar(getSequences(fn)))
}
## [1] "cached file loaded: lens.fn2"
lens <- do.call(c, lens.fn2)
hist(lens, 1000)
filts2 <- file.path(path2, "noprimers", "filtered", basename(fns2))
if (exists("track2")){
print("cached file loaded: track2")
} else{
track2 <- filterAndTrim(nops2, filts2, minQ=3, minLen=1000, maxLen=1600, maxN=0, rm.phix=FALSE, maxEE=2)
}
## [1] "cached file loaded: track2"
track2
## reads.in reads.out
## G1_6M-16S_For_bc1002-16S_Rev_bc1045.fastq 8573 8193
## G1_6W-16S_For_bc1006-16S_Rev_bc1045.fastq 13098 12556
## G10_3M-16S_For_bc1020-16S_Rev_bc1035.fastq 7449 7199
## G10_6M-16S_For_bc1022-16S_Rev_bc1033.fastq 6644 6358
## G10_6W-16S_For_bc1005-16S_Rev_bc1060.fastq 24270 23350
## G11_3M-16S_For_bc1012-16S_Rev_bc1035.fastq 7724 7469
## G11_6M-16S_For_bc1015-16S_Rev_bc1033.fastq 5374 5162
## G11_6W-16S_For_bc1012-16S_Rev_bc1057.fastq 18849 18281
## G12_3M-16S_For_bc1005-16S_Rev_bc1054.fastq 15761 15239
## G12_6W-16S_For_bc1006-16S_Rev_bc1033.fastq 19580 18906
## G12_MV1-16S_For_bc1024-16S_Rev_bc1056.fastq 14336 13723
## G12_PT1-16S_For_bc1002-16S_Rev_bc1044.fastq 7902 7069
## G13_3M_2-16S_For_bc1008-16S_Rev_bc1060.fastq 20751 20060
## G13_3M-16S_For_bc1024-16S_Rev_bc1059.fastq 36468 35328
## G13_6M-16S_For_bc1011-16S_Rev_bc1035.fastq 14776 14195
## G13_6W-16S_For_bc1020-16S_Rev_bc1054.fastq 15984 15327
## G14_3M-16S_For_bc1005-16S_Rev_bc1033.fastq 10283 9994
## G14_6M-16S_For_bc1015-16S_Rev_bc1075.fastq 9512 9209
## G14_6W-16S_For_bc1012-16S_Rev_bc1033.fastq 6117 5901
## G14_PT-16S_For_bc1002-16S_Rev_bc1057.fastq 16040 15508
## G15_3M-16S_For_bc1022-16S_Rev_bc1035.fastq 9524 9223
## G15_6W-16S_For_bc1024-16S_Rev_bc1035.fastq 12369 11982
## G15_MV1-16S_For_bc1024-16S_Rev_bc1033.fastq 12637 12239
## G16_6M-16S_For_bc1005-16S_Rev_bc1056.fastq 14144 13654
## G17_3M-16S_For_bc1015-16S_Rev_bc1065.fastq 11185 10819
## G17_6M-16S_For_bc1012-16S_Rev_bc1044.fastq 6461 6245
## G17_6W-16S_For_bc1005-16S_Rev_bc1045.fastq 10192 9804
## G19_3M-16S_For_bc1004-16S_Rev_bc1045.fastq 8370 8062
## G19_6W-16S_For_bc1010-16S_Rev_bc1035.fastq 6468 6276
## G19_6WR-16S_For_bc1022-16S_Rev_bc1062.fastq 17768 17149
## G19_MV3-16S_For_bc1005-16S_Rev_bc1075.fastq 19458 18893
## G2_3M-16S_For_bc1004-16S_Rev_bc1035.fastq 10049 9592
## G2_6M-16S_For_bc1011-16S_Rev_bc1044.fastq 11187 10634
## G2_6W-16S_For_bc1020-16S_Rev_bc1065.fastq 13688 13162
## G2_PT-16S_For_bc1024-16S_Rev_bc1065.fastq 48406 47058
## G20_6W-16S_For_bc1024-16S_Rev_bc1054.fastq 21082 20482
## G20_MV1-16S_For_bc1005-16S_Rev_bc1059.fastq 30639 29605
## G20_PT1-16S_For_bc1020-16S_Rev_bc1045.fastq 10940 10636
## G20_PT2-16S_For_bc1008-16S_Rev_bc1045.fastq 9184 8883
## G20_PT2R-16S_For_bc1007-16S_Rev_bc1045.fastq 12712 12231
## G21_3M-16S_For_bc1005-16S_Rev_bc1044.fastq 7586 7297
## G21_6W-16S_For_bc1007-16S_Rev_bc1057.fastq 34339 32955
## G22_3M-16S_For_bc1020-16S_Rev_bc1075.fastq 6207 5962
## G22_6W-16S_For_bc1006-16S_Rev_bc1044.fastq 12863 12421
## G22_MV-16S_For_bc1003-16S_Rev_bc1044.fastq 9481 9111
## G23_3M-16S_For_bc1008-16S_Rev_bc1035.fastq 6806 6610
## G24_3M-16S_For_bc1020-16S_Rev_bc1056.fastq 15702 15194
## G24_6M-16S_For_bc1007-16S_Rev_bc1075.fastq 29319 28069
## G24_6W-16S_For_bc1008-16S_Rev_bc1062.fastq 23589 22893
## G25_3M-16S_For_bc1012-16S_Rev_bc1054.fastq 14942 14470
## G25_6W-16S_For_bc1008-16S_Rev_bc1065.fastq 22253 21637
## G26_3M-16S_For_bc1024-16S_Rev_bc1060.fastq 35333 34289
## G26_6M-16S_For_bc1020-16S_Rev_bc1060.fastq 28500 27598
## G26_6W-16S_For_bc1006-16S_Rev_bc1035.fastq 11800 11357
## G27_6M-16S_For_bc1002-16S_Rev_bc1056.fastq 6288 5954
## G27_6W-16S_For_bc1008-16S_Rev_bc1057.fastq 17186 16559
## G28_6M-16S_For_bc1022-16S_Rev_bc1060.fastq 32341 31348
## G29_3M-16S_For_bc1005-16S_Rev_bc1065.fastq 10780 10478
## G29_6W-16S_For_bc1008-16S_Rev_bc1054.fastq 14314 13880
## G3_3M-16S_For_bc1015-16S_Rev_bc1060.fastq 11538 11038
## G3_6M-16S_For_bc1009-16S_Rev_bc1044.fastq 7708 7472
## G3_6W-16S_For_bc1012-16S_Rev_bc1059.fastq 30970 29990
## G30_3M-16S_For_bc1009-16S_Rev_bc1035.fastq 5396 5198
## G30_6M-16S_For_bc1015-16S_Rev_bc1059.fastq 15107 14399
## G30_6W-16S_For_bc1010-16S_Rev_bc1044.fastq 5063 4850
## G31_3M-16S_For_bc1007-16S_Rev_bc1044.fastq 8166 7808
## G31_6M-16S_For_bc1003-16S_Rev_bc1045.fastq 12766 12305
## G33_3M-16S_For_bc1012-16S_Rev_bc1075.fastq 9720 9250
## G33_6W-16S_For_bc1022-16S_Rev_bc1059.fastq 32018 30731
## G34_3M-16S_For_bc1015-16S_Rev_bc1054.fastq 13918 13460
## G34_6W-16S_For_bc1012-16S_Rev_bc1065.fastq 11252 10793
## G35_3M-16S_For_bc1002-16S_Rev_bc1059.fastq 14462 13816
## G35_6M-16S_For_bc1012-16S_Rev_bc1045.fastq 11166 10840
## G35_6W-16S_For_bc1020-16S_Rev_bc1057.fastq 20704 20052
## G36_3M-16S_For_bc1024-16S_Rev_bc1075.fastq 1742 1689
## G36_6M-16S_For_bc1002-16S_Rev_bc1035.fastq 10974 10593
## G37_6W-16S_For_bc1008-16S_Rev_bc1044.fastq 7774 7529
## G37_PT1-16S_For_bc1007-16S_Rev_bc1035.fastq 6586 6268
## G38_3M-16S_For_bc1008-16S_Rev_bc1059.fastq 22719 22104
## G38_6W-16S_For_bc1002-16S_Rev_bc1054.fastq 10531 10161
## G39_6M-16S_For_bc1012-16S_Rev_bc1062.fastq 12035 11607
## G4_3M-16S_For_bc1015-16S_Rev_bc1062.fastq 14319 13792
## G4_3MR-16S_For_bc1015-16S_Rev_bc1045.fastq 8382 8118
## G4_6W-16S_For_bc1009-16S_Rev_bc1033.fastq 15042 14464
## G40_3M-16S_For_bc1010-16S_Rev_bc1045.fastq 6145 5895
## G40_6M-16S_For_bc1002-16S_Rev_bc1033.fastq 9032 8590
## G40_6W-16S_For_bc1007-16S_Rev_bc1060.fastq 38493 36832
## G41_3M-16S_For_bc1022-16S_Rev_bc1056.fastq 13574 13092
## G42_3M-16S_For_bc1002-16S_Rev_bc1062.fastq 8966 8701
## G42_6M-16S_For_bc1005-16S_Rev_bc1035.fastq 10643 10319
## G43_6M-16S_For_bc1010-16S_Rev_bc1033.fastq 12052 11712
## G43_6W-16S_For_bc1008-16S_Rev_bc1075.fastq 8199 7906
## G45_3M-16S_For_bc1024-16S_Rev_bc1045.fastq 14892 14414
## G45_6W-16S_For_bc1008-16S_Rev_bc1056.fastq 13874 13409
## G45_MV-16S_For_bc1024-16S_Rev_bc1057.fastq 36903 35692
## G46_3M-16S_For_bc1015-16S_Rev_bc1044.fastq 8049 7787
## G46_3MR-16S_For_bc1020-16S_Rev_bc1044.fastq 9794 9508
## G46_6M-16S_For_bc1022-16S_Rev_bc1065.fastq 13687 13214
## G46_6W-16S_For_bc1007-16S_Rev_bc1062.fastq 43273 41786
## G47_3M-16S_For_bc1005-16S_Rev_bc1062.fastq 35224 34188
## G47_6W-16S_For_bc1011-16S_Rev_bc1045.fastq 8952 8553
## G48_3M-16S_For_bc1015-16S_Rev_bc1057.fastq 28975 27619
## G48_6M-16S_For_bc1004-16S_Rev_bc1033.fastq 5670 5458
## G48_6W-16S_For_bc1012-16S_Rev_bc1060.fastq 27476 26595
## G5_3M-16S_For_bc1007-16S_Rev_bc1054.fastq 13617 12959
## G5_MV1-16S_For_bc1003-16S_Rev_bc1033.fastq 11188 10758
## G50_3M-16S_For_bc1020-16S_Rev_bc1062.fastq 12819 12087
## G50_6M-16S_For_bc1007-16S_Rev_bc1059.fastq 25294 24303
## G50_6W-16S_For_bc1004-16S_Rev_bc1044.fastq 8042 7739
## G51_6W-16S_For_bc1024-16S_Rev_bc1044.fastq 12019 11606
## G52_3M-16S_For_bc1024-16S_Rev_bc1062.fastq 31224 30147
## G52_6M-16S_For_bc1022-16S_Rev_bc1057.fastq 20150 19490
## G52_6W-16S_For_bc1020-16S_Rev_bc1059.fastq 32570 31452
## G7_3M-16S_For_bc1008-16S_Rev_bc1033.fastq 7209 6948
## G7_3MR-16S_For_bc1015-16S_Rev_bc1056.fastq 9982 9473
## G7_6M-16S_For_bc1007-16S_Rev_bc1033.fastq 8280 7868
## G7_MV2-16S_For_bc1022-16S_Rev_bc1044.fastq 10865 10543
## G7_PT1-16S_For_bc1005-16S_Rev_bc1057.fastq 22347 21708
## G8_3M-16S_For_bc1002-16S_Rev_bc1060.fastq 11882 11480
## G8_6W-16S_For_bc1007-16S_Rev_bc1065.fastq 31661 30722
## G9_3M-16S_For_bc1012-16S_Rev_bc1056.fastq 14554 14101
## G9_6M-16S_For_bc1022-16S_Rev_bc1075.fastq 10922 10574
## G9_6W-16S_For_bc1011-16S_Rev_bc1033.fastq 14336 13857
## MC1-16S_For_bc1009-16S_Rev_bc1045.fastq 5825 5670
## MC2-16S_For_bc1022-16S_Rev_bc1045.fastq 11742 11456
## MC3-16S_For_bc1020-16S_Rev_bc1033.fastq 10879 10593
## dada2
if (exists("drp2")){
print("cached file loaded: drp2")
} else{
drp2 <- derepFastq(filts2, verbose=TRUE)
}
## [1] "cached file loaded: drp2"
## Learn errors
if (exists("err2")){
print("cached file loaded: err2")
} else{
err2 <- learnErrors(drp2, errorEstimationFunction=PacBioErrfun, BAND_SIZE=32, multithread=TRUE)
}
## [1] "cached file loaded: err2"
## Plot errors
plotErrors(err2)
## Denoise
if (exists("dd2")){
print("cached file loaded: dd2")
} else{
dd2 <- dada(drp2, err=err2, BAND_SIZE=32, multithread=TRUE)
}
## [1] "cached file loaded: dd2"
cbind(ccs=prim2[,1], primers=prim2[,2], filtered=track2[,2], denoised=sapply(dd2, function(x) sum(x$denoised)))
## ccs primers filtered denoised
## G1_6M-16S_For_bc1002-16S_Rev_bc1045.fastq 9956 8573 8193 7677
## G1_6W-16S_For_bc1006-16S_Rev_bc1045.fastq 15171 13098 12556 12312
## G10_3M-16S_For_bc1020-16S_Rev_bc1035.fastq 7690 7449 7199 7048
## G10_6M-16S_For_bc1022-16S_Rev_bc1033.fastq 8208 6644 6358 6119
## G10_6W-16S_For_bc1005-16S_Rev_bc1060.fastq 26955 24270 23350 22855
## G11_3M-16S_For_bc1012-16S_Rev_bc1035.fastq 8927 7724 7469 7390
## G11_6M-16S_For_bc1015-16S_Rev_bc1033.fastq 6308 5374 5162 4931
## G11_6W-16S_For_bc1012-16S_Rev_bc1057.fastq 20465 18849 18281 18134
## G12_3M-16S_For_bc1005-16S_Rev_bc1054.fastq 16845 15761 15239 15085
## G12_6W-16S_For_bc1006-16S_Rev_bc1033.fastq 22636 19580 18906 18564
## G12_MV1-16S_For_bc1024-16S_Rev_bc1056.fastq 14875 14336 13723 13388
## G12_PT1-16S_For_bc1002-16S_Rev_bc1044.fastq 9296 7902 7069 6798
## G13_3M_2-16S_For_bc1008-16S_Rev_bc1060.fastq 23067 20751 20060 19703
## G13_3M-16S_For_bc1024-16S_Rev_bc1059.fastq 38024 36468 35328 34848
## G13_6M-16S_For_bc1011-16S_Rev_bc1035.fastq 15874 14776 14195 13665
## G13_6W-16S_For_bc1020-16S_Rev_bc1054.fastq 16652 15984 15327 15210
## G14_3M-16S_For_bc1005-16S_Rev_bc1033.fastq 10963 10283 9994 9931
## G14_6M-16S_For_bc1015-16S_Rev_bc1075.fastq 11209 9512 9209 9049
## G14_6W-16S_For_bc1012-16S_Rev_bc1033.fastq 7114 6117 5901 5708
## G14_PT-16S_For_bc1002-16S_Rev_bc1057.fastq 18353 16040 15508 15084
## G15_3M-16S_For_bc1022-16S_Rev_bc1035.fastq 11263 9524 9223 9087
## G15_6W-16S_For_bc1024-16S_Rev_bc1035.fastq 12819 12369 11982 11895
## G15_MV1-16S_For_bc1024-16S_Rev_bc1033.fastq 13149 12637 12239 12125
## G16_6M-16S_For_bc1005-16S_Rev_bc1056.fastq 15686 14144 13654 13244
## G17_3M-16S_For_bc1015-16S_Rev_bc1065.fastq 12748 11185 10819 10629
## G17_6M-16S_For_bc1012-16S_Rev_bc1044.fastq 7852 6461 6245 6122
## G17_6W-16S_For_bc1005-16S_Rev_bc1045.fastq 11204 10192 9804 9713
## G19_3M-16S_For_bc1004-16S_Rev_bc1045.fastq 9260 8370 8062 7918
## G19_6W-16S_For_bc1010-16S_Rev_bc1035.fastq 7318 6468 6276 6167
## G19_6WR-16S_For_bc1022-16S_Rev_bc1062.fastq 20927 17768 17149 16882
## G19_MV3-16S_For_bc1005-16S_Rev_bc1075.fastq 20300 19458 18893 18693
## G2_3M-16S_For_bc1004-16S_Rev_bc1035.fastq 11529 10049 9592 9335
## G2_6M-16S_For_bc1011-16S_Rev_bc1044.fastq 12265 11187 10634 10229
## G2_6W-16S_For_bc1020-16S_Rev_bc1065.fastq 14000 13688 13162 12753
## G2_PT-16S_For_bc1024-16S_Rev_bc1065.fastq 49738 48406 47058 46837
## G20_6W-16S_For_bc1024-16S_Rev_bc1054.fastq 21802 21082 20482 20279
## G20_MV1-16S_For_bc1005-16S_Rev_bc1059.fastq 33009 30639 29605 29366
## G20_PT1-16S_For_bc1020-16S_Rev_bc1045.fastq 11211 10940 10636 10548
## G20_PT2-16S_For_bc1008-16S_Rev_bc1045.fastq 9842 9184 8883 8796
## G20_PT2R-16S_For_bc1007-16S_Rev_bc1045.fastq 13178 12712 12231 12101
## G21_3M-16S_For_bc1005-16S_Rev_bc1044.fastq 8592 7586 7297 7189
## G21_6W-16S_For_bc1007-16S_Rev_bc1057.fastq 35874 34339 32955 32573
## G22_3M-16S_For_bc1020-16S_Rev_bc1075.fastq 6466 6207 5962 5862
## G22_6W-16S_For_bc1006-16S_Rev_bc1044.fastq 15463 12863 12421 12204
## G22_MV-16S_For_bc1003-16S_Rev_bc1044.fastq 10515 9481 9111 8890
## G23_3M-16S_For_bc1008-16S_Rev_bc1035.fastq 7621 6806 6610 6548
## G24_3M-16S_For_bc1020-16S_Rev_bc1056.fastq 16376 15702 15194 15103
## G24_6M-16S_For_bc1007-16S_Rev_bc1075.fastq 30912 29319 28069 27861
## G24_6W-16S_For_bc1008-16S_Rev_bc1062.fastq 25686 23589 22893 22832
## G25_3M-16S_For_bc1012-16S_Rev_bc1054.fastq 17087 14942 14470 14385
## G25_6W-16S_For_bc1008-16S_Rev_bc1065.fastq 23858 22253 21637 21435
## G26_3M-16S_For_bc1024-16S_Rev_bc1060.fastq 36646 35333 34289 33868
## G26_6M-16S_For_bc1020-16S_Rev_bc1060.fastq 29700 28500 27598 27420
## G26_6W-16S_For_bc1006-16S_Rev_bc1035.fastq 13644 11800 11357 10833
## G27_6M-16S_For_bc1002-16S_Rev_bc1056.fastq 7346 6288 5954 5677
## G27_6W-16S_For_bc1008-16S_Rev_bc1057.fastq 19118 17186 16559 16477
## G28_6M-16S_For_bc1022-16S_Rev_bc1060.fastq 36575 32341 31348 31166
## G29_3M-16S_For_bc1005-16S_Rev_bc1065.fastq 11152 10780 10478 10419
## G29_6W-16S_For_bc1008-16S_Rev_bc1054.fastq 15797 14314 13880 13754
## G3_3M-16S_For_bc1015-16S_Rev_bc1060.fastq 13290 11538 11038 10508
## G3_6M-16S_For_bc1009-16S_Rev_bc1044.fastq 8601 7708 7472 7243
## G3_6W-16S_For_bc1012-16S_Rev_bc1059.fastq 34126 30970 29990 29814
## G30_3M-16S_For_bc1009-16S_Rev_bc1035.fastq 5893 5396 5198 5094
## G30_6M-16S_For_bc1015-16S_Rev_bc1059.fastq 17542 15107 14399 13828
## G30_6W-16S_For_bc1010-16S_Rev_bc1044.fastq 5950 5063 4850 4753
## G31_3M-16S_For_bc1007-16S_Rev_bc1044.fastq 8788 8166 7808 7512
## G31_6M-16S_For_bc1003-16S_Rev_bc1045.fastq 13890 12766 12305 12019
## G33_3M-16S_For_bc1012-16S_Rev_bc1075.fastq 11590 9720 9250 9054
## G33_6W-16S_For_bc1022-16S_Rev_bc1059.fastq 37586 32018 30731 30517
## G34_3M-16S_For_bc1015-16S_Rev_bc1054.fastq 15656 13918 13460 12827
## G34_6W-16S_For_bc1012-16S_Rev_bc1065.fastq 13309 11252 10793 10584
## G35_3M-16S_For_bc1002-16S_Rev_bc1059.fastq 17162 14462 13816 13724
## G35_6M-16S_For_bc1012-16S_Rev_bc1045.fastq 11984 11166 10840 10709
## G35_6W-16S_For_bc1020-16S_Rev_bc1057.fastq 21383 20704 20052 19967
## G36_3M-16S_For_bc1024-16S_Rev_bc1075.fastq 1846 1742 1689 1633
## G36_6M-16S_For_bc1002-16S_Rev_bc1035.fastq 12792 10974 10593 10414
## G37_6W-16S_For_bc1008-16S_Rev_bc1044.fastq 8773 7774 7529 7354
## G37_PT1-16S_For_bc1007-16S_Rev_bc1035.fastq 6874 6586 6268 5959
## G38_3M-16S_For_bc1008-16S_Rev_bc1059.fastq 25388 22719 22104 21842
## G38_6W-16S_For_bc1002-16S_Rev_bc1054.fastq 11917 10531 10161 9885
## G39_6M-16S_For_bc1012-16S_Rev_bc1062.fastq 13919 12035 11607 11185
## G4_3M-16S_For_bc1015-16S_Rev_bc1062.fastq 16094 14319 13792 13454
## G4_3MR-16S_For_bc1015-16S_Rev_bc1045.fastq 9347 8382 8118 7891
## G4_6W-16S_For_bc1009-16S_Rev_bc1033.fastq 16346 15042 14464 14173
## G40_3M-16S_For_bc1010-16S_Rev_bc1045.fastq 7039 6145 5895 5780
## G40_6M-16S_For_bc1002-16S_Rev_bc1033.fastq 10648 9032 8590 8139
## G40_6W-16S_For_bc1007-16S_Rev_bc1060.fastq 40598 38493 36832 36338
## G41_3M-16S_For_bc1022-16S_Rev_bc1056.fastq 16163 13574 13092 12899
## G42_3M-16S_For_bc1002-16S_Rev_bc1062.fastq 9886 8966 8701 8609
## G42_6M-16S_For_bc1005-16S_Rev_bc1035.fastq 11208 10643 10319 10176
## G43_6M-16S_For_bc1010-16S_Rev_bc1033.fastq 13646 12052 11712 11401
## G43_6W-16S_For_bc1008-16S_Rev_bc1075.fastq 9625 8199 7906 7657
## G45_3M-16S_For_bc1024-16S_Rev_bc1045.fastq 15322 14892 14414 14329
## G45_6W-16S_For_bc1008-16S_Rev_bc1056.fastq 15539 13874 13409 13309
## G45_MV-16S_For_bc1024-16S_Rev_bc1057.fastq 38060 36903 35692 35491
## G46_3M-16S_For_bc1015-16S_Rev_bc1044.fastq 9243 8049 7787 7603
## G46_3MR-16S_For_bc1020-16S_Rev_bc1044.fastq 10302 9794 9508 9447
## G46_6M-16S_For_bc1022-16S_Rev_bc1065.fastq 15449 13687 13214 12726
## G46_6W-16S_For_bc1007-16S_Rev_bc1062.fastq 44832 43273 41786 40924
## G47_3M-16S_For_bc1005-16S_Rev_bc1062.fastq 36984 35224 34188 33964
## G47_6W-16S_For_bc1011-16S_Rev_bc1045.fastq 9555 8952 8553 8411
## G48_3M-16S_For_bc1015-16S_Rev_bc1057.fastq 32941 28975 27619 27135
## G48_6M-16S_For_bc1004-16S_Rev_bc1033.fastq 6578 5670 5458 5223
## G48_6W-16S_For_bc1012-16S_Rev_bc1060.fastq 31592 27476 26595 26499
## G5_3M-16S_For_bc1007-16S_Rev_bc1054.fastq 14313 13617 12959 12465
## G5_MV1-16S_For_bc1003-16S_Rev_bc1033.fastq 12120 11188 10758 10440
## G50_3M-16S_For_bc1020-16S_Rev_bc1062.fastq 13138 12819 12087 11412
## G50_6M-16S_For_bc1007-16S_Rev_bc1059.fastq 26594 25294 24303 23350
## G50_6W-16S_For_bc1004-16S_Rev_bc1044.fastq 9464 8042 7739 7530
## G51_6W-16S_For_bc1024-16S_Rev_bc1044.fastq 12695 12019 11606 11501
## G52_3M-16S_For_bc1024-16S_Rev_bc1062.fastq 32075 31224 30147 30120
## G52_6M-16S_For_bc1022-16S_Rev_bc1057.fastq 23149 20150 19490 18902
## G52_6W-16S_For_bc1020-16S_Rev_bc1059.fastq 33901 32570 31452 31312
## G7_3M-16S_For_bc1008-16S_Rev_bc1033.fastq 7801 7209 6948 6878
## G7_3MR-16S_For_bc1015-16S_Rev_bc1056.fastq 11519 9982 9473 9301
## G7_6M-16S_For_bc1007-16S_Rev_bc1033.fastq 8624 8280 7868 7576
## G7_MV2-16S_For_bc1022-16S_Rev_bc1044.fastq 12752 10865 10543 10456
## G7_PT1-16S_For_bc1005-16S_Rev_bc1057.fastq 23346 22347 21708 21499
## G8_3M-16S_For_bc1002-16S_Rev_bc1060.fastq 13648 11882 11480 11189
## G8_6W-16S_For_bc1007-16S_Rev_bc1065.fastq 32853 31661 30722 30378
## G9_3M-16S_For_bc1012-16S_Rev_bc1056.fastq 16918 14554 14101 14024
## G9_6M-16S_For_bc1022-16S_Rev_bc1075.fastq 12430 10922 10574 10410
## G9_6W-16S_For_bc1011-16S_Rev_bc1033.fastq 15472 14336 13857 13650
## MC1-16S_For_bc1009-16S_Rev_bc1045.fastq 6280 5825 5670 5572
## MC2-16S_For_bc1022-16S_Rev_bc1045.fastq 13094 11742 11456 11359
## MC3-16S_For_bc1020-16S_Rev_bc1033.fastq 11274 10879 10593 10533
## Sequence table
if (exists("st2")){
print("cached file loaded: st2")
} else{
st2 <- makeSequenceTable(dd2); dim(st2)
save(lens.fn2, dd2, drp2, err2, prim2, track2, st2, file="/blue/mulligan/duttonc/Congo/output/cell2_output.Rdata")
}
## [1] "cached file loaded: st2"
## due to error made at sequencing facility, need to change the name of one of the samples in each sequence table
rownames(st1) <- gsub("G13_3M_2", "G37_3M", rownames(st1))
rownames(st2) <- gsub("G13_3M_2", "G37_3M", rownames(st2))
## combine the two sequence tables together by summing them.
stcomb <- mergeSequenceTables(st1, st2, repeats="sum")
## check rownames, should still be 126 samples
rownames(stcomb)
## [1] "G1_6M-16S_For_bc1002-16S_Rev_bc1045.fastq"
## [2] "G1_6W-16S_For_bc1006-16S_Rev_bc1045.fastq"
## [3] "G10_3M-16S_For_bc1020-16S_Rev_bc1035.fastq"
## [4] "G10_6M-16S_For_bc1022-16S_Rev_bc1033.fastq"
## [5] "G10_6W-16S_For_bc1005-16S_Rev_bc1060.fastq"
## [6] "G11_3M-16S_For_bc1012-16S_Rev_bc1035.fastq"
## [7] "G11_6M-16S_For_bc1015-16S_Rev_bc1033.fastq"
## [8] "G11_6W-16S_For_bc1012-16S_Rev_bc1057.fastq"
## [9] "G12_3M-16S_For_bc1005-16S_Rev_bc1054.fastq"
## [10] "G12_6W-16S_For_bc1006-16S_Rev_bc1033.fastq"
## [11] "G12_MV1-16S_For_bc1024-16S_Rev_bc1056.fastq"
## [12] "G12_PT1-16S_For_bc1002-16S_Rev_bc1044.fastq"
## [13] "G37_3M-16S_For_bc1008-16S_Rev_bc1060.fastq"
## [14] "G13_3M-16S_For_bc1024-16S_Rev_bc1059.fastq"
## [15] "G13_6M-16S_For_bc1011-16S_Rev_bc1035.fastq"
## [16] "G13_6W-16S_For_bc1020-16S_Rev_bc1054.fastq"
## [17] "G14_3M-16S_For_bc1005-16S_Rev_bc1033.fastq"
## [18] "G14_6M-16S_For_bc1015-16S_Rev_bc1075.fastq"
## [19] "G14_6W-16S_For_bc1012-16S_Rev_bc1033.fastq"
## [20] "G14_PT-16S_For_bc1002-16S_Rev_bc1057.fastq"
## [21] "G15_3M-16S_For_bc1022-16S_Rev_bc1035.fastq"
## [22] "G15_6W-16S_For_bc1024-16S_Rev_bc1035.fastq"
## [23] "G15_MV1-16S_For_bc1024-16S_Rev_bc1033.fastq"
## [24] "G16_6M-16S_For_bc1005-16S_Rev_bc1056.fastq"
## [25] "G17_3M-16S_For_bc1015-16S_Rev_bc1065.fastq"
## [26] "G17_6M-16S_For_bc1012-16S_Rev_bc1044.fastq"
## [27] "G17_6W-16S_For_bc1005-16S_Rev_bc1045.fastq"
## [28] "G19_3M-16S_For_bc1004-16S_Rev_bc1045.fastq"
## [29] "G19_6W-16S_For_bc1010-16S_Rev_bc1035.fastq"
## [30] "G19_6WR-16S_For_bc1022-16S_Rev_bc1062.fastq"
## [31] "G19_MV3-16S_For_bc1005-16S_Rev_bc1075.fastq"
## [32] "G2_3M-16S_For_bc1004-16S_Rev_bc1035.fastq"
## [33] "G2_6M-16S_For_bc1011-16S_Rev_bc1044.fastq"
## [34] "G2_6W-16S_For_bc1020-16S_Rev_bc1065.fastq"
## [35] "G2_PT-16S_For_bc1024-16S_Rev_bc1065.fastq"
## [36] "G20_6W-16S_For_bc1024-16S_Rev_bc1054.fastq"
## [37] "G20_MV1-16S_For_bc1005-16S_Rev_bc1059.fastq"
## [38] "G20_PT1-16S_For_bc1020-16S_Rev_bc1045.fastq"
## [39] "G20_PT2-16S_For_bc1008-16S_Rev_bc1045.fastq"
## [40] "G20_PT2R-16S_For_bc1007-16S_Rev_bc1045.fastq"
## [41] "G21_3M-16S_For_bc1005-16S_Rev_bc1044.fastq"
## [42] "G21_6W-16S_For_bc1007-16S_Rev_bc1057.fastq"
## [43] "G22_3M-16S_For_bc1020-16S_Rev_bc1075.fastq"
## [44] "G22_6W-16S_For_bc1006-16S_Rev_bc1044.fastq"
## [45] "G22_MV-16S_For_bc1003-16S_Rev_bc1044.fastq"
## [46] "G23_3M-16S_For_bc1008-16S_Rev_bc1035.fastq"
## [47] "G24_3M-16S_For_bc1020-16S_Rev_bc1056.fastq"
## [48] "G24_6M-16S_For_bc1007-16S_Rev_bc1075.fastq"
## [49] "G24_6W-16S_For_bc1008-16S_Rev_bc1062.fastq"
## [50] "G25_3M-16S_For_bc1012-16S_Rev_bc1054.fastq"
## [51] "G25_6W-16S_For_bc1008-16S_Rev_bc1065.fastq"
## [52] "G26_3M-16S_For_bc1024-16S_Rev_bc1060.fastq"
## [53] "G26_6M-16S_For_bc1020-16S_Rev_bc1060.fastq"
## [54] "G26_6W-16S_For_bc1006-16S_Rev_bc1035.fastq"
## [55] "G27_6M-16S_For_bc1002-16S_Rev_bc1056.fastq"
## [56] "G27_6W-16S_For_bc1008-16S_Rev_bc1057.fastq"
## [57] "G28_6M-16S_For_bc1022-16S_Rev_bc1060.fastq"
## [58] "G29_3M-16S_For_bc1005-16S_Rev_bc1065.fastq"
## [59] "G29_6W-16S_For_bc1008-16S_Rev_bc1054.fastq"
## [60] "G3_3M-16S_For_bc1015-16S_Rev_bc1060.fastq"
## [61] "G3_6M-16S_For_bc1009-16S_Rev_bc1044.fastq"
## [62] "G3_6W-16S_For_bc1012-16S_Rev_bc1059.fastq"
## [63] "G30_3M-16S_For_bc1009-16S_Rev_bc1035.fastq"
## [64] "G30_6M-16S_For_bc1015-16S_Rev_bc1059.fastq"
## [65] "G30_6W-16S_For_bc1010-16S_Rev_bc1044.fastq"
## [66] "G31_3M-16S_For_bc1007-16S_Rev_bc1044.fastq"
## [67] "G31_6M-16S_For_bc1003-16S_Rev_bc1045.fastq"
## [68] "G33_3M-16S_For_bc1012-16S_Rev_bc1075.fastq"
## [69] "G33_6W-16S_For_bc1022-16S_Rev_bc1059.fastq"
## [70] "G34_3M-16S_For_bc1015-16S_Rev_bc1054.fastq"
## [71] "G34_6W-16S_For_bc1012-16S_Rev_bc1065.fastq"
## [72] "G35_3M-16S_For_bc1002-16S_Rev_bc1059.fastq"
## [73] "G35_6M-16S_For_bc1012-16S_Rev_bc1045.fastq"
## [74] "G35_6W-16S_For_bc1020-16S_Rev_bc1057.fastq"
## [75] "G36_3M-16S_For_bc1024-16S_Rev_bc1075.fastq"
## [76] "G36_6M-16S_For_bc1002-16S_Rev_bc1035.fastq"
## [77] "G37_6W-16S_For_bc1008-16S_Rev_bc1044.fastq"
## [78] "G37_PT1-16S_For_bc1007-16S_Rev_bc1035.fastq"
## [79] "G38_3M-16S_For_bc1008-16S_Rev_bc1059.fastq"
## [80] "G38_6W-16S_For_bc1002-16S_Rev_bc1054.fastq"
## [81] "G39_6M-16S_For_bc1012-16S_Rev_bc1062.fastq"
## [82] "G4_3M-16S_For_bc1015-16S_Rev_bc1062.fastq"
## [83] "G4_3MR-16S_For_bc1015-16S_Rev_bc1045.fastq"
## [84] "G4_6W-16S_For_bc1009-16S_Rev_bc1033.fastq"
## [85] "G40_3M-16S_For_bc1010-16S_Rev_bc1045.fastq"
## [86] "G40_6M-16S_For_bc1002-16S_Rev_bc1033.fastq"
## [87] "G40_6W-16S_For_bc1007-16S_Rev_bc1060.fastq"
## [88] "G41_3M-16S_For_bc1022-16S_Rev_bc1056.fastq"
## [89] "G42_3M-16S_For_bc1002-16S_Rev_bc1062.fastq"
## [90] "G42_6M-16S_For_bc1005-16S_Rev_bc1035.fastq"
## [91] "G43_6M-16S_For_bc1010-16S_Rev_bc1033.fastq"
## [92] "G43_6W-16S_For_bc1008-16S_Rev_bc1075.fastq"
## [93] "G45_3M-16S_For_bc1024-16S_Rev_bc1045.fastq"
## [94] "G45_6W-16S_For_bc1008-16S_Rev_bc1056.fastq"
## [95] "G45_MV-16S_For_bc1024-16S_Rev_bc1057.fastq"
## [96] "G46_3M-16S_For_bc1015-16S_Rev_bc1044.fastq"
## [97] "G46_3MR-16S_For_bc1020-16S_Rev_bc1044.fastq"
## [98] "G46_6M-16S_For_bc1022-16S_Rev_bc1065.fastq"
## [99] "G46_6W-16S_For_bc1007-16S_Rev_bc1062.fastq"
## [100] "G47_3M-16S_For_bc1005-16S_Rev_bc1062.fastq"
## [101] "G47_6W-16S_For_bc1011-16S_Rev_bc1045.fastq"
## [102] "G48_3M-16S_For_bc1015-16S_Rev_bc1057.fastq"
## [103] "G48_6M-16S_For_bc1004-16S_Rev_bc1033.fastq"
## [104] "G48_6W-16S_For_bc1012-16S_Rev_bc1060.fastq"
## [105] "G5_3M-16S_For_bc1007-16S_Rev_bc1054.fastq"
## [106] "G5_MV1-16S_For_bc1003-16S_Rev_bc1033.fastq"
## [107] "G50_3M-16S_For_bc1020-16S_Rev_bc1062.fastq"
## [108] "G50_6M-16S_For_bc1007-16S_Rev_bc1059.fastq"
## [109] "G50_6W-16S_For_bc1004-16S_Rev_bc1044.fastq"
## [110] "G51_6W-16S_For_bc1024-16S_Rev_bc1044.fastq"
## [111] "G52_3M-16S_For_bc1024-16S_Rev_bc1062.fastq"
## [112] "G52_6M-16S_For_bc1022-16S_Rev_bc1057.fastq"
## [113] "G52_6W-16S_For_bc1020-16S_Rev_bc1059.fastq"
## [114] "G7_3M-16S_For_bc1008-16S_Rev_bc1033.fastq"
## [115] "G7_3MR-16S_For_bc1015-16S_Rev_bc1056.fastq"
## [116] "G7_6M-16S_For_bc1007-16S_Rev_bc1033.fastq"
## [117] "G7_MV2-16S_For_bc1022-16S_Rev_bc1044.fastq"
## [118] "G7_PT1-16S_For_bc1005-16S_Rev_bc1057.fastq"
## [119] "G8_3M-16S_For_bc1002-16S_Rev_bc1060.fastq"
## [120] "G8_6W-16S_For_bc1007-16S_Rev_bc1065.fastq"
## [121] "G9_3M-16S_For_bc1012-16S_Rev_bc1056.fastq"
## [122] "G9_6M-16S_For_bc1022-16S_Rev_bc1075.fastq"
## [123] "G9_6W-16S_For_bc1011-16S_Rev_bc1033.fastq"
## [124] "MC1-16S_For_bc1009-16S_Rev_bc1045.fastq"
## [125] "MC2-16S_For_bc1022-16S_Rev_bc1045.fastq"
## [126] "MC3-16S_For_bc1020-16S_Rev_bc1033.fastq"
## Assign taxonomy, download appropriate reference database from here - https://benjjneb.github.io/dada2/training.html
## Please note, assignTaxonomy with the species training set is recommended over the other options.
## https://github.com/benjjneb/dada2/issues/1319#issuecomment-820659005
## CELL 2
if (file.exists("/blue/mulligan/duttonc/Congo/output/cellcombined_output.Rdata")){
load("/blue/mulligan/duttonc/Congo/output/cellcombined_output.Rdata")
} else{}
if (exists("tax")){
print("cached file loaded: tax")
} else{
tax <- assignTaxonomy(stcomb, "/blue/mulligan/duttonc/Congo/tax/silva_nr99_v138.1_wSpecies_train_set.fa.gz", multithread=TRUE) # Slowest part
tax[,"Genus"] <- gsub("Escherichia-Shigella", "Escherichia", tax[,"Genus"]) # Reformat to be compatible with other data sources
}
## [1] "cached file loaded: tax"
head(unname(tax))
## [,1] [,2] [,3] [,4]
## [1,] "Bacteria" "Actinobacteriota" "Actinobacteria" "Bifidobacteriales"
## [2,] "Bacteria" "Actinobacteriota" "Actinobacteria" "Bifidobacteriales"
## [3,] "Bacteria" "Firmicutes" "Bacilli" "Lactobacillales"
## [4,] "Bacteria" "Actinobacteriota" "Actinobacteria" "Bifidobacteriales"
## [5,] "Bacteria" "Actinobacteriota" "Actinobacteria" "Bifidobacteriales"
## [6,] "Bacteria" "Actinobacteriota" "Actinobacteria" "Bifidobacteriales"
## [,5] [,6] [,7]
## [1,] "Bifidobacteriaceae" "Bifidobacterium" "longum"
## [2,] "Bifidobacteriaceae" "Bifidobacterium" "longum"
## [3,] "Streptococcaceae" "Streptococcus" "salivarius"
## [4,] "Bifidobacteriaceae" "Bifidobacterium" "breve"
## [5,] "Bifidobacteriaceae" "Bifidobacterium" "longum"
## [6,] "Bifidobacteriaceae" "Bifidobacterium" "longum"
if (exists("bim")){
print("cached file loaded: bim")
} else{
bim <- isBimeraDenovo(stcomb, minFoldParentOverAbundance=3.5, multithread=TRUE)
}
## [1] "cached file loaded: bim"
What proportion are chimeras?
## Check Chimeras
bim <- isBimeraDenovo(stcomb, minFoldParentOverAbundance=3.5, multithread=TRUE)
table(bim)
## bim
## FALSE TRUE
## 3449 884
sum(stcomb[,bim])/sum(stcomb)
## [1] 0.03357118
## Extract Sample Names
sample.names <- sapply(strsplit(fns1, "-"), function(x) paste(x[1]))
sample.names <- sapply(strsplit(sample.names, "/"), function(x) paste(x[7]))
rownames(stcomb) <- sample.names
sample.names
## [1] "G1_6M" "G1_6W" "G10_3M" "G10_6M" "G10_6W" "G11_3M"
## [7] "G11_6M" "G11_6W" "G12_3M" "G12_6W" "G12_MV1" "G12_PT1"
## [13] "G13_3M_2" "G13_3M" "G13_6M" "G13_6W" "G14_3M" "G14_6M"
## [19] "G14_6W" "G14_PT" "G15_3M" "G15_6W" "G15_MV1" "G16_6M"
## [25] "G17_3M" "G17_6M" "G17_6W" "G19_3M" "G19_6W" "G19_6WR"
## [31] "G19_MV3" "G2_3M" "G2_6M" "G2_6W" "G2_PT" "G20_6W"
## [37] "G20_MV1" "G20_PT1" "G20_PT2" "G20_PT2R" "G21_3M" "G21_6W"
## [43] "G22_3M" "G22_6W" "G22_MV" "G23_3M" "G24_3M" "G24_6M"
## [49] "G24_6W" "G25_3M" "G25_6W" "G26_3M" "G26_6M" "G26_6W"
## [55] "G27_6M" "G27_6W" "G28_6M" "G29_3M" "G29_6W" "G3_3M"
## [61] "G3_6M" "G3_6W" "G30_3M" "G30_6M" "G30_6W" "G31_3M"
## [67] "G31_6M" "G33_3M" "G33_6W" "G34_3M" "G34_6W" "G35_3M"
## [73] "G35_6M" "G35_6W" "G36_3M" "G36_6M" "G37_6W" "G37_PT1"
## [79] "G38_3M" "G38_6W" "G39_6M" "G4_3M" "G4_3MR" "G4_6W"
## [85] "G40_3M" "G40_6M" "G40_6W" "G41_3M" "G42_3M" "G42_6M"
## [91] "G43_6M" "G43_6W" "G45_3M" "G45_6W" "G45_MV" "G46_3M"
## [97] "G46_3MR" "G46_6M" "G46_6W" "G47_3M" "G47_6W" "G48_3M"
## [103] "G48_6M" "G48_6W" "G5_3M" "G5_MV1" "G50_3M" "G50_6M"
## [109] "G50_6W" "G51_6W" "G52_3M" "G52_6M" "G52_6W" "G7_3M"
## [115] "G7_3MR" "G7_6M" "G7_MV2" "G7_PT1" "G8_3M" "G8_6W"
## [121] "G9_3M" "G9_6M" "G9_6W" "MC1" "MC2" "MC3"
## remove chimeras
if (exists("seqtab")){
print("cached file loaded: seqtab")
} else{
seqtab <- removeBimeraDenovo(stcomb, method="consensus", multithread=TRUE)
save(bim, seqtab, tax, file=("/blue/mulligan/duttonc/Congo/output/cellcombined_output.Rdata"))
}
## [1] "cached file loaded: seqtab"
## construct the phyloseq object
## import sample data
sample_data <- read.csv(file="/blue/mulligan/duttonc/Congo/Congo_metadata_V4.csv", header=TRUE, sep=",")
rownames(sample_data) <- sample_data$sampleId
otuforsamples <- otu_table(seqtab, taxa_are_rows = FALSE)
## fix the G13_3M_2 samples to G37_3M.
rownames(otuforsamples) <- gsub("G13_3M_2", "G37_3M", rownames(otuforsamples))
dataforsamples<-sample_data(sample_data)
psCongo <- phyloseq(otuforsamples, dataforsamples, tax_table(tax))
saveRDS(psCongo, "/blue/mulligan/duttonc/Congo/output/psCongo_V4.rds")
psCongo
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3832 taxa and 126 samples ]
## sample_data() Sample Data: [ 126 samples by 172 sample variables ]
## tax_table() Taxonomy Table: [ 3832 taxa by 7 taxonomic ranks ]
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] gtools_3.9.3 phyloseq_1.40.0
## [3] gridExtra_2.3 reshape2_1.4.4
## [5] ggplot2_3.3.6 ShortRead_1.54.0
## [7] GenomicAlignments_1.32.1 SummarizedExperiment_1.26.1
## [9] Biobase_2.56.0 MatrixGenerics_1.8.1
## [11] matrixStats_0.62.0 Rsamtools_2.12.0
## [13] GenomicRanges_1.48.0 BiocParallel_1.30.3
## [15] Biostrings_2.64.1 GenomeInfoDb_1.32.4
## [17] XVector_0.36.0 IRanges_2.30.1
## [19] S4Vectors_0.34.0 BiocGenerics_0.42.0
## [21] dada2_1.24.0 Rcpp_1.0.9
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-159 bitops_1.0-7 RColorBrewer_1.1-3
## [4] tools_4.2.1 bslib_0.4.0 vegan_2.6-2
## [7] utf8_1.2.2 R6_2.5.1 mgcv_1.8-40
## [10] DBI_1.1.3 colorspace_2.0-3 permute_0.9-7
## [13] rhdf5filters_1.8.0 ade4_1.7-19 withr_2.5.0
## [16] tidyselect_1.2.0 compiler_4.2.1 cli_3.4.1
## [19] DelayedArray_0.22.0 labeling_0.4.2 sass_0.4.2
## [22] scales_1.2.1 stringr_1.4.1 digest_0.6.30
## [25] rmarkdown_2.16 jpeg_0.1-9 pkgconfig_2.0.3
## [28] htmltools_0.5.3 highr_0.9 fastmap_1.1.0
## [31] rlang_1.0.6 rstudioapi_0.14 farver_2.1.1
## [34] jquerylib_0.1.4 generics_0.1.3 hwriter_1.3.2.1
## [37] jsonlite_1.8.2 dplyr_1.0.10 RCurl_1.98-1.9
## [40] magrittr_2.0.3 GenomeInfoDbData_1.2.8 biomformat_1.24.0
## [43] interp_1.1-3 Matrix_1.5-1 munsell_0.5.0
## [46] Rhdf5lib_1.18.2 fansi_1.0.3 ape_5.6-2
## [49] lifecycle_1.0.3 stringi_1.7.8 yaml_2.3.5
## [52] MASS_7.3-58.1 zlibbioc_1.42.0 rhdf5_2.40.0
## [55] plyr_1.8.7 grid_4.2.1 parallel_4.2.1
## [58] crayon_1.5.2 deldir_1.0-6 lattice_0.20-45
## [61] splines_4.2.1 multtest_2.52.0 knitr_1.40
## [64] pillar_1.8.1 igraph_1.3.5 codetools_0.2-18
## [67] glue_1.6.2 evaluate_0.16 latticeExtra_0.6-30
## [70] data.table_1.14.4 RcppParallel_5.1.5 png_0.1-7
## [73] vctrs_0.5.0 foreach_1.5.2 gtable_0.3.1
## [76] assertthat_0.2.1 cachem_1.0.6 xfun_0.33
## [79] survival_3.4-0 tibble_3.1.8 iterators_1.0.14
## [82] cluster_2.1.4