全外显子组生信分析流程

#!/bin/bash#purpose:statisticanalysisofmappingreads#sevenvbam_dir=""cd$bam_dir#bam文件所在位置ls*.sorted.dedup.bam>bam.list#samtoolsflagstatforiin$(catbam.list)doprintf$i>>bam.mappedreadssamtoolsflagstat$i|sed-n'5p'>>bam.mappedreadsdone#samtoolsidxstatsforiin$(catbam.list)doexporttotal_reads=$(samtoolsidxstats$bam_dir/$i|awk-F't''{s =$3}END{prints}')echo$inumber_of_reads$total_readsdone

#calthelengthanddepthofoverlappedregionsforsamplein$(catbam_list)doperl-alne'{$len=$F[2]-$F[1] 1;$tmp =$len}END{print$tmp}'$sample.intersect>>intersect.lengthperl-alne'{$len=$F[3]*($F[2]-$F[1] 1);$tmp =$len}END{print$tmp}'$sample.intersect>>intersect.depthperl-alne'{$len=$F[2]-$F[1] 1;$tmp =$len}END{print$tmp}'$probes_bed>>bed_lengthdonepastebam.listintersect.lengthintersect.depthbed_length>intersect.cov_depth#intersect.cov_depth:sampleIDintersect.lengthintersect.depthbed_length#coverage=intersect.length/bed_length#depth=intersect.depth/intersect.length

out_dir=${result_path}/3_picard_output

awk'{print$2/$4}'intersect.cov_depth#depthawk'{print$3/$2}'intersect.cov_depth#coverage

picard=/home/chenfan/mnt/sdc/RNAseq/picard.jar

#!/bin/bash#purpose:bam2bedGraph总计每种碱基的depth,并选取bedtools求overlappedregion。#setenvbam_dir=""cd$bam_dir#bam文件所在地方ls*.sorted.dedup.recal.bam>bam.listprobes_bed="truseq-exome-targeted-regions-manifest-v1-2.bed2"forsamplein$(catbam.list)dobedtoolsgenomecov-bg-ibam$bam_dir/$sample>$sample.bam2bed.bedGraph#generate$sample.bam2bed.bedGraphfile#chrstartenddepth#zero-basedstartandaone-basedendcat$sample.bam2bed.bedGraph|awk'{print$1"t"$2 1"t"$3"t"$4}'>$sample.bedGraphbedtoolsintersect-a$sample.bedGraph-b$probes_bed>$sample.intersect#chroverlapped_startoverlapped_enddepthecho$sampleokdone

#-------------------------------SplitNCigarReads---------------------------------------

#-------------------------------Indel Realignment

echo "------------------------------Indel Realignment -----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/5_indelrealign_output

cd ${out_dir}

java -jar $gatk

-T RealignerTargetCreator

-nt 32

-R $genome

-I ${in_dir}/split_markdup_sort_addRG_align.bam

-o ${out_dir}/realign_interval.list

-known $knownindel

java -jar $gatk

-T IndelRealigner

-R $genome

-I ${in_dir}/split_markdup_sort_addRG_align.bam

-known $knownindel

-o ${out_dir}/realign_split_markdup_sort_addRG_align.bam

-targetIntervals ${out_dir}/realign_interval.list

#-------------------------------Base Recalibration---------------------------------------

echo "------------------------------Base Recalibration-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/6_bqsr_output

cd ${out_dir}

java -jar $gatk

-T BaseRecalibrator

-nct 32

-R $genome

-I ${in_dir}/realign_split_markdup_sort_addRG_align.bam

-knownSites $knownsnp

-knownSites $knownindel

-o ${out_dir}/recal_data.table

java -jar $gatk

-T PrintReads

-nct 32

-R $genome

-I ${in_dir}/realign_split_markdup_sort_addRG_align.bam

-BQSR ${out_dir}/recal_data.table

-o ${out_dir}/bqsr_realign_split_markdup_sort_addRG_align.bam

#-------------------------------Variant calling---------------------------------------

echo "------------------------------Variant calling-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/7_callvariants_output

cd ${out_dir}

java -jar $gatk

-T HaplotypeCaller

-nct 32

-R $genome

-ploidy 1

-I ${in_dir}/bqsr_realign_split_markdup_sort_addRG_align.bam

-dontUseSoftClippedBases

-stand_call_conf 20.0

-o ${out_dir}/Hap1_rnaseq.vcf

#-------------------------------Variant filtering---------------------------------------

echo "------------------------------Variant filtering-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/8_filtervariants_output

cd ${out_dir}

java -jar $gatk

-T VariantFiltration

-R $genome

-V ${in_dir}/Hap1_rnaseq.vcf

-window 35

-cluster 3

--filterName FS -filter "FS>30.0"

--filterName QD -filter "QD<2.0"

-o ${out_dir}/filtered_Hap1_rnaseq.vcf

java -jar $gatk

-T SelectVariants

-ef

-R $genome

-V ${out_dir}/filtered_Hap1_rnaseq.vcf

-o ${out_dir}/ef_Hap1_C20_rnaseq.vcf

STAR --genomeDir ${out_dir}/1index --readFilesIn ${in_dir}/1.fastq  ${in_dir}/2.fastq --runThreadN 30

#------------------------------Picard add read groups,sort,mark duplicates,and create index-----------------------------------------

in_dir=${out_dir}

STAR --genomeDir ${out_dir}/2index --readFilesIn ${in_dir}/1.fastq ${in_dir}/2.fastq  --runThreadN 30

echo "------------------------------SplitNCigarReads-----------------------------------------"

STAR --runMode genomeGenerate --genomeDir ${out_dir}/1index --genomeFastaFiles $genome --sjdbGTFfile $refgene --runThreadN 30

java -jar $gatk -T SplitNCigarReads -R $genome -I ${in_dir}/markdup_sort_addRG_align.bam -o ${out_dir}/split_markdup_sort_addRG_align.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS

java -jar $picard AddOrReplaceReadGroups I=${in_dir}/2pass/Aligned.out.sam O=${out_dir}/sort_addRG_align.bam SO=coordinate RGID=hap1 RGLB=rnaseq RGPL=illumina RGPU=runname RGSM=C20

本文由金沙js333娱乐场官网发布于www.js333.com,转载请注明出处:全外显子组生信分析流程

TAG标签: 流程
Ctrl+D 将本页面保存为书签,全面了解最新资讯,方便快捷。