Parameters used to assemble Symmetrospora coprosmae UCD350 Skewer v0.2.2 skewer -m pe -q 15 -n SPADES v3.11.1 spades.py Pipeline for variant analysis using SAMtools v1.1.19, BWA mem v0.7.12-r1039, PicardTools and Genome Analysis Toolkit v4.0.1. #Generate index files for reference assemblies bwa index ASSEMBLY.fasta samtools faidx ASSEMBLY.fasta picard-tools CreateSequenceDictionary REFERENCE=ASSEMBLY.fasta OUTPUT=ASSEMBLY.dict #Align reads bwa mem -M -Y -R "@RG\tID:RGID\tSM:SAMPLE_NAME\tPL:illumina\tLB:lib1\tPU:unit1" ASSEMBLY.fasta READS1.fastq READS2.fastq > SAMPLE.sam samtools view -S -b SAMPLE.sam > SAMPLE.bam samtools sort SAMPLE.bam -o SAMPLE.sorted samtools index SAMPLE.sorted #Call variants picard-tools MarkDuplicates INPUT=SAMPLE.sorted OUTPUT=SAMPLE_dedup.bam METRICS_FILE=SAMPLE_duplicates_metrics.txt VALIDATION_STRINGENCY=LENIENT picard-tools BuildBamIndex INPUT=SAMPLE_dedup.bam VALIDATION_STRINGENCY=LENIENT gatk HaplotypeCaller --reference ASSEMBLY.fasta --input SAMPLE_dedup.bam --output SAMPLE.vcf #Index VCFs gatk IndexFeatureFile -F SAMPLE.vcf #Run cluster filter gatk VariantFiltration -R ASSEMBLY.fasta -V SAMPLE.vcf -O SAMPLE_ClusterFilter.vcf --cluster-size 5 --cluster-window-size 20 #Run GATK recommended filters gatk VariantFiltration -R ASSEMBLY.fasta -V SAMPLE_ClusterFilter.vcf -O SAMPLE_ClusterFilter.filterGATK.vcf --filter-expression "QD < 2.0" --filter-name "GATKRecomFilterQD" --filter-expression "MQ < 40.0" --filter-name "GATKRecomFilterMQ" --filter-expression "FS > 60.0" --filter-name "GATKRecomFilterFS" --filter-expression "SOR > 3.0" --filter-name "GATKRecomFilterSOR" --filter-expression "MQRankSum < -12.5" --filter-name "GATKRecomFilterMQRankSum" --filter-expression "ReadPosRankSum < -8.0" --filter-name "GATKRecomFilterReadPosRankSum" #Select SNPs gatk SelectVariants -V SAMPLE_ClusterFilter.filterGATK.vcf -select-type SNP -O ASSEMBLY_ClusterFilter.filterGATK.snps.vcf #Select het SNPs gatk SelectVariants -V SAMPLE_ClusterFilter.filterGATK.snps.vcf -select 'vc.getGenotype("SAMPLE").isHet()' -O SAMPLE_ClusterFilter.filterGATK.snps.het.vcf #Select INDELs gatk SelectVariants -V SAMPLE_ClusterFilter.filterGATK.vcf -select-type INDEL -O SAMPLE_ClusterFilter.filterGATK.indels.vcf #Select het indels gatk SelectVariants -V SAMPLE_ClusterFilter.filterGATK.indels.vcf -select 'vc.getGenotype("SAMPLE").isHet()' -O SAMPLE_ClusterFilter.filterGATK.indels.het.vcf General description of GATK filtering gatk VariantFiltration \ -R ASSEMBLY.fasta \ -V SAMPLE.vcf \ -O SAMPLE_ClusterFilter.vcf \ --cluster-size 5 \ --cluster-window-size 20 gatk VariantFiltration \ -R ASSEMBLY.fasta \ -V SAMPLE_ClusterFilter.vcf \ -O SAMPLE_filterGATK.vcf \ --filter-expression "QD < 2.0" \ --filter-name "GATKRecomFilterQD" \ --filter-expression "MQ < 40.0" \ --filter-name "GATKRecomFilterMQ" \ --filter-expression "FS > 60.0" \ --filter-name "GATKRecomFilterFS" \ --filter-expression "SOR > 3.0" \ --filter-name "GATKRecomFilterSOR" \ --filter-expression "MQRankSum < -12.5" \ --filter-name "GATKRecomFilterMQRankSum" \ --filter-expression "ReadPosRankSum < -8.0" \ --filter-name "GATKRecomFilterReadPosRankSum"