Beagle Code: java -Xmx4g -jar Beagle/beagle.22Apr16.1cf.jar gtgl=mySequence/VCF_Sequences.vcf out=mySequence/VCF_imputed.vcf gprobs=T window=200 overlap=50 ########################################################################################################################################## TASSEL 5.0 GBS Pipeline V2 Code: #gzip -cd |head # Code used to find out the flow cell name for the key file /run_pipeline.pl -GBSSeqToTagDBPlugin -e PstI -i /mySequence -db /db/maizeTags.db -k /keys/16IL009.txt -kmerLength 64 -minKmerL 20 -deleteOldData TRUE -c 1 -batchSize 1 -endPlugin | tee /logfiles/logfile1GBStoTag.txt /run_pipeline.pl -TagExportToFastqPlugin -db /db/maizeTags.db -o /mergedTags/maizeUnalignedReads.fa.gz -c 1 -endPlugin | tee /logfiles/logfile2TagExport.txt cd /bowtie2/ ./bowtie2-build /ref/Zea_mays.AGPv3.31.dna_rm.genome.fa /build/maizeBuild ./bowtie2 --very-sensitive -U /mergedTags/maizeUnalignedReads.fa.gz -x /build/maizeBuild -S /mergedTags/maizeAlign.sam cd .. /run_pipeline.pl -SAMToGBSdbPlugin -i /mergedTags/maizeAlign.sam -db /db/maizeTags.db -aProp 0.0 -aLen 0 -endPlugin | tee /logfiles/logfile4Sam.txt /run_pipeline.pl -DiscoverySNPCallerPluginV2 -db /db/maizeTags.db -sC 1 -eC 10 -mnLCov 0.1 -mnMAF 0.05 -endPlugin | tee /logfiles/logfile5Disc.txt /run_pipeline.pl -SNPQualityProfilerPlugin -db /db/maizeTags.db -statFile "outputStatsALL.txt" -endPlugin | tee /logfiles/logfile6_SNPQualityProfiler.txt /run_pipeline.pl -ProductionSNPCallerPluginV2 -db /db/maizeTags.db -e PstI -i /mySequence -k /keys/16IL009.txt -o /HDF5/MAF05_Sequences.h5 -endPlugin | tee /logfiles/logfile7Production.txt ########################################################################################################################################## *SAS CODE; proc reg data=acylPop; model TA = in1 C1 R1 acylMutant/ clb spec; run; proc mixed data = acylPop method = type3; class in1 C1 R1 acylMutant; model = /solution; *fill in compound to analyze; random in1 c1 r1 acylMutant/ s; run; ########################################################################################################################################## #R Code genotypes = read.table("", header=T, row.names=1) #data converted to 0,0.5,1 for minor/het/major genotypes = genotypes[order(row.names(genotypes)), ] #orders the genotypes by row name genotypes = 2*genotypes #eliminates decimals phenotypes = read.table("file path to AT_Phenotypes.txt>", header=T) #phenotype file phenotypes = phenotypes[order(phenotypes[,1]), ] #orders the genotypes by row name SNPtable = read.table("") #position, chr, and site number of SNPs ####################### QTL ANLAYSIS ####################### pval = c() #saves the p-values for every SNP position for (i in 1:ncol(genotypes)){ #model for QTL analysis pval[i] = anova(lm(as.numeric(phenotypes[,__])~genotypes[,i],na.action =na.omit))[1,5] } paste("chr",SNPtable[which.min(pval),3]," Position",SNPtable[which.min(pval),4]) #prints results on console pval2 = c() #stepwise QTL analysis for (i in 1:ncol(genotypes)){ pval2[i] = anova(lm(as.numeric(phenotypes[,__])~genotypes[,which.min(pval)] + genotypes[,i],na.action =na.omit))[2,5] } paste("chr",SNPtable[which.min(pval2),3]," Position",SNPtable[which.min(pval2),4]) pval3 = c() for (i in 1:ncol(genotypes)){ pval3[i] = anova(lm(as.numeric(phenotypes[,__])~genotypes[,which.min(pval2)] + genotypes[,which.min(pval)] + genotypes[,i],na.action =na.omit))[3,5] } paste("chr",SNPtable[which.min(pval3),3]," Position",SNPtable[which.min(pval3),4]) ####################### Simulation Code ####################### Sys.time() reps = 100 #input reps n = c(20,40,60,80,100) #sample size #acylation results poweracyl1 = matrix(NA, nrow = reps, ncol = length(n)) #-log10 Acyl pval poweracyl2 = matrix(NA, nrow = reps, ncol = length(n)) #site most sig SNP poweracyl3 = matrix(NA, nrow = reps, ncol = length(n)) #chr name most sig snp power4 = matrix(NA, nrow = reps, ncol = length(n)) #no. samples #R1 results power5 = matrix(NA, nrow = reps, ncol = length(n)) #-log10 R1 pval power6 = matrix(NA, nrow = reps, ncol = length(n)) #site most sig SNP power7 = matrix(NA, nrow = reps, ncol = length(n)) #chr name most sig snp power8 = matrix(NA, nrow = reps, ncol = length(n)) #no. samples randAcyl = c() #makes vector for pvalues in Acylation simulation randR1 = c() #makes vector for pvalues in R1 simulation for (j in 1:length(n)){ #sample size intervals for (x in 1:reps){ #reps if(reps%%5==0){print(reps)} #what stage is it running? randSample = sample(1:128,size = n[j], replace = F) #subsets data randPheno = phenotypes[randSample,] randGeno = genotypes[randSample,] if (sum(randPheno[,2])==0){ #sometimes acyl has no mutants, kills code poweracyl1[x,j] = NA poweracyl2[x,j] = NA poweracyl3[x,j] = NA power4[x,j] = NA } else{ for (i in 1:ncol(randGeno)){ #model for acylation randAcyl[i] = summary(lm(randPheno[,16]~ randGeno[,i], na.action=na.omit))$coefficients[8] } for (i in 1:ncol(randGeno)){ #model for R1 randR1[i] = summary(lm(randPheno[,3] ~ randGeno[,i], na.action=na.omit))$coefficients[8] } poweracyl1[x,j] = -log10(min(randAcyl[!is.na(randAcyl)])) poweracyl2[x,j] = as.character(SNPtable[which.min(randAcyl[!is.na(randAcyl)]),2]) poweracyl3[x,j] = as.character(SNPtable[which.min(randAcyl[!is.na(randAcyl)]),3]) power4[x,j] = sum(randPheno[,2]) power5[x,j] = -log10(min(randR1[!is.na(randR1)])) power6[x,j] = as.character(SNPtable[which.min(randR1[!is.na(randR1)]),2]) power7[x,j] = as.character(SNPtable[which.min(randR1[!is.na(randR1)]),3]) power8[x,j] = sum(randPheno[,3]) } } } powerAcyl = cbind(poweracyl1[,1],poweracyl2[,1],poweracyl3[,1],power4[,1], poweracyl1[,2],poweracyl2[,2],poweracyl3[,2],power4[,2], poweracyl1[,3],poweracyl2[,3],poweracyl3[,3],power4[,3], poweracyl1[,4],poweracyl2[,4],poweracyl3[,4],power4[,4], poweracyl1[,5],poweracyl2[,5],poweracyl3[,5],power4[,5]) powerR1 = cbind(power5[,1],power6[,1],power7[,1],power8[,1], power5[,2],power6[,2],power7[,2],power8[,2], power5[,3],power6[,3],power7[,3],power8[,3], power5[,4],power6[,4],power7[,4],power8[,4], power5[,5],power6[,5],power7[,5],power8[,5]) write.csv(powerR1,file = "powerR1.csv") write.csv(powerAcyl,file = "powerAcyl.csv") write.csv(poweracyl1,file = "poweracyl1Acyl.csv") Sys.time() ####################### Figure Code ####################### pdf(file = "Output.pdf", width = 16, height = 8) #save into a pdf install.packages("qqman") #installs manhattan plot package library("qqman") #citation("qqman") palette(c("darkblue","chocolate1")) manplot = cbind(SNPtable[,3], SNPtable[,4], ) colnames(manplot) = c("CHR","BP","P") manplot = as.data.frame(manplot) manhattan(manplot, chr = "CHR", bp = "BP", p = "P", col = c("darkblue","chocolate1"), suggestiveline = F, genomewideline = F, main = " Locus",cex=1, cex.main = 2, cex.axis=1.5, cex.lab=1.5, ylab = "") title(ylab = expression(paste("-log",""[10], "(",italic("p"),")")), cex.lab = 1.5, line = 2.5) #the -log pval is hidden in the margin, so i had to scoot it. Expression() adds italics and subscripts hist(phenotypes[,15],breaks=15,col="red", main="Anthocyanin Content",xlab = "mg/kg", cex.axis = 2,cex.lab = 2, cex.main = 2) #histogram AC hist(phenotypes[,16], breaks = 15, col = "red", main = "Percentage of Acylation", xlab = "Acylation (%)",cex.axis = 2,cex.lab = 2, cex.main = 2) #histogram acylation % boxlabel =ifelse(genotypes[,which.min(Acylpvalues)]==2,"2: Wild Type", ifelse(genotypes[,which.min(Acylpvalues)]==1,"1: Heterozygous","0: Mutant")) #boxplot important SNP boxplot(phenotypes[,16]~boxlabel, col = c("blue", "red", "green"), main = "Percentage of Acylation vs. Genotype", ylab = "Percentage of Acylation", xlab = "Genotype") dev.off()