597772d765cd83c500fb244b7ca513f8bed652f7 kuhn Tue Apr 25 07:56:26 2017 -0700 added bedtools -split flag to ensure reads are not counted where they span an intron. improved sort so chromStart is properly sorted. Refs #19300 diff --git src/utils/qa/bamToBedGraph.csh src/utils/qa/bamToBedGraph.csh index fc9552d..b507085 100755 --- src/utils/qa/bamToBedGraph.csh +++ src/utils/qa/bamToBedGraph.csh @@ -21,32 +21,33 @@ echo echo " convert a BAM file into a bedGraph to assess coverage" echo " requires bedtools" echo echo " usage: db in.bam out.bedGraph [splitByStrand]" echo " where splitByStrand makes two output files" echo " output is sorted for input into bedGraphToBigWig" echo exit endif set db=$argv[1] set bamFile=$argv[2] set bedgraphFile=$argv[3] -bedtools bamtobed -i $bamFile | awk '{print "chr"$1, "\t"$2, "\t"$3, "\t"$4, "\t"$5, "\t"$6}' \ - | sed s/chrMT/chrM/ | sort > $bamFile.bed +bedtools bamtobed -split -i $bamFile \ + | awk '{print "chr"$1, "\t"$2, "\t"$3, "\t"$4, "\t"$5, "\t"$6}' \ + | sed s/chrMT/chrM/ | sort -k1,1 -k2,2n > $bamFile.bed if ( $#argv == 4 ) then if ( $argv[4] == "splitByStrand" ) then awk '{if ($6 == "+") print $1, "\t"$2, "\t"$3, "\t"$4, "\t"$5, "\t"$6}' $bamFile.bed \ | bedItemOverlapCount $db stdin | sort -k1,1 -k2,2n > $bedgraphFile.plus awk '{if ($6 == "-") print $1, "\t"$2, "\t"$3, "\t"$4, "\t"$5, "\t"$6}' $bamFile.bed \ | bedItemOverlapCount $db stdin | sort -k1,1 -k2,2n > $bedgraphFile.minus else echo echo ' 4th argument can only be: "splitByStrand"' $0 exit endif else cat $bamFile.bed | bedItemOverlapCount $db stdin | sort -k1,1 -k2,2n > $bedgraphFile endif