src/utils/qa/checkCoverage.csh 1.9
1.9 2009/05/13 01:38:46 kuhn
refactored handling of single chrom. removed extra steps in txStart-assignment pipe. put all rm statements together using else clause. thanks, brooke
Index: src/utils/qa/checkCoverage.csh
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/qa/checkCoverage.csh,v
retrieving revision 1.8
retrieving revision 1.9
diff -b -B -U 1000000 -r1.8 -r1.9
--- src/utils/qa/checkCoverage.csh 11 May 2009 05:29:21 -0000 1.8
+++ src/utils/qa/checkCoverage.csh 13 May 2009 01:38:46 -0000 1.9
@@ -1,163 +1,160 @@
#!/bin/tcsh
####################
# 09-20-07 Bob Kuhn
#
# find all the places where data missing from a track.
#
####################
set db=""
set table=""
set limit=""
set whereLimit=""
set split=""
set chr=""
set chromList=""
set start=""
set end=""
if ( $#argv < 2 || $#argv > 3 ) then
echo
- echo " find all the places where data missing from a track."
+ echo " find all non-gap places where data missing from a track."
echo
echo " usage: database table [chrom]"
echo
echo ' where "chrom" limits to single chrom'
echo
exit
else
set db=$argv[1]
set table=$argv[2]
endif
if ( $#argv == 3 ) then
+ # limit to a chrom
set limit='-chrom='$argv[3]
set whereLimit='WHERE chrom = "'$argv[3]'"'
+ set chromList=${argv[3]}_
+else
+ set chromList=`hgsql -Ne "SELECT chrom FROM chromInfo" $db | sed -r "s/(.*)/\1_/"`
endif
if ( "$HOST" != "hgwdev" ) then
echo "\n error: you must run this script on dev!\n"
exit 1
endif
set split=`getSplit.csh $db $table`
if ( $status ) then
exit 1
endif
if ( $split != "unsplit" ) then
set split=${split}_
else
set split=""
endif
# echo "split /${split}/"
# echo "splittable $split$table"
# find the correct names for starts and ends
set chr=`getChromFieldName.csh $db $split$table`
if ( $status ) then
echo "\n error. not a positional table? quitting.\n"
exit 1
endif
# echo "chr $chr"
# echo "split /${split}/"
# echo "split_table ${split}$table"
if ( $chr == "chrom" ) then
set start=`hgsql -Ne "DESC $split$table" $db | awk '{print $1}' \
- | egrep "txStart|chromStart" | head -1 | awk '{print $1}'`
+ | egrep "txStart|chromStart"`
set end=`hgsql -Ne "DESC $split$table" $db | awk '{print $1}' \
- | egrep "txEnd|chromEnd" | head -1 | awk '{print $1}'`
+ | egrep "txEnd|chromEnd"`
else
if ( $chr == "tName" ) then
set start="tStart"
set end="tEnd"
else
if ( $chr == "genoName" ) then
set start="genoStart"
set end="genoEnd"
endif
endif
endif
# make simple BED3 file from track with introns filled in
rm -f blockBedFile.bed
if ( $split != "" ) then
- set chromList=`hgsql -Ne "SELECT chrom FROM chromInfo" $db | sed -r "s/(.*)/\1_/"`
- if ( $limit != "" ) then
- set chromList=$argv[3]
- endif
foreach chrom ( $chromList )
hgsql -Ne "SELECT $chr, $start, $end FROM $chrom$table" $db >> blockBedFile.bed
end
else
hgsql -Ne "SELECT $chr, $start, $end FROM $table $whereLimit" $db > blockBedFile.bed
endif
# echo $db
# echo $table
# echo $chr $start $end
# make negative of track and of gap track
featureBits $db blockBedFile.bed -not $limit -bed=$table.not.bed \
>& /dev/null
featureBits $db gap -not $limit -bed=gap.not.bed \
>& /dev/null
# then intersect them
featureBits $db $table.not.bed gap.not.bed $limit \
-bed=$table.holes.bed >& /dev/null
# save coords and size and sort for biggest
cat $table.holes.bed | awk '{print $1, $2, $3, $3-$2}' | sort -k4,4nr \
> $table.holes.sort
if ( -z $table.holes.bed ) then
echo
echo "there are no non-gap regions not covered by $table."
echo "or there may be no gap table."
echo
- rm -f blockBedFile.bed
- rm -f gap.not.bed
- rm -f $table.not.bed
- exit 0
-endif
-
-# report
-echo
-echo "non-gap holes in track are in this file: "
-echo " $table.holes.bed"
-echo
-echo "largest missing non-gap regions are here:"
+else
+ # report
+ echo
+ echo "non-gap holes in track are in this file: "
+ echo " $table.holes.bed"
+ echo
+ echo "largest missing non-gap regions are here:"
-echo
-echo "chrom chromStart chromEnd size" \
+ echo
+ echo "chrom chromStart chromEnd size" \
| awk '{ printf("%13s %14s %14s %12s \n", $1, $2, $3, $4) }'
-echo "----- ---------- -------- ----" \
+ echo "----- ---------- -------- ----" \
| awk '{ printf("%13s %14s %14s %12s \n", $1, $2, $3, $4) }'
-head -10 $table.holes.sort \
+ head -10 $table.holes.sort \
| awk '{ printf("%13s %14s %14s %12s \n", $1, $2, $3, $4) }'
-echo
+ echo
-echo
-# make links for three biggest
-set url1="http://genome-test.cse.ucsc.edu/cgi-bin/hgTracks?db=$db&position="
-set url2="&$table=pack&gap=dense"
-set bigThree=`head -3 $table.holes.sort \
+ echo
+ # make links for three biggest
+ set url1="http://genome-test.cse.ucsc.edu/cgi-bin/hgTracks?db=$db&position="
+ set url2="&$table=pack&gap=dense"
+ set bigThree=`head -3 $table.holes.sort \
| awk '{print $1 ":" $2-300 "-" $3+300}'`
-echo "links to the three biggest voids in ${table}:"
-echo " (with 300 bp padding on each end)"
-foreach item ( $bigThree )
+ echo "links to the three biggest voids in ${table}:"
+ echo " (with 300 bp padding on each end)"
+ foreach item ( $bigThree )
echo "$url1$item$url2"
-end
-echo
+ end
+ echo
+endif
# clean up
rm -f blockBedFile.bed
rm -f gap.not.bed
rm -f $table.not.bed
+rm -f $table.holes.bed
+rm -f $table.holes.sort
-exit
-
+exit 0