src/hg/makeDb/doc/hg19.txt 1.63
1.63 2009/11/21 06:02:16 hiram
Proper calculation lodToBedScore for 46-way mostConserved tracks
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.62
retrieving revision 1.63
diff -b -B -U 4 -r1.62 -r1.63
--- src/hg/makeDb/doc/hg19.txt 18 Nov 2009 21:12:55 -0000 1.62
+++ src/hg/makeDb/doc/hg19.txt 21 Nov 2009 06:02:16 -0000 1.63
@@ -5697,11 +5697,12 @@
do
ls -d bed/${C}.[0-9][0-9] 2> /dev/null | while read D
do
cat ${D}/${C}*.bed
- done | awk 'BEGIN{ ID=1 }{printf "%s\t%d\t%d\t%s.%d\t%d\t%s\n", "'${C}'", $2, $3, "'${C}'", ID, $5, $6; ++ID}'
-done > mostConserved.bed
- # ~ 1 minute
+ done | sort -k1,1 -k2,2n \
+ | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
+done > tmpMostConserved.bed
+/cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
# load into database
ssh hgwdev
cd /hive/data/genomes/hg19/bed/multiz46way/cons/all
@@ -5830,18 +5831,19 @@
# Average job time: 156s 2.60m 0.04h 0.00d
# Longest finished job: 402s 6.70m 0.11h 0.00d
# Submission to last job: 2322s 38.70m 0.65h 0.03d
- # create Most Conserved track
cd /hive/data/genomes/hg19/bed/multiz46way/cons/primates
+ # create Most Conserved track
cut -f1 ../../../../chrom.sizes | while read C
do
ls -d bed/${C}.[0-9][0-9] 2> /dev/null | while read D
do
cat ${D}/${C}*.bed
- done | awk 'BEGIN{ ID=1 }{printf "%s\t%d\t%d\t%s.%d\t%d\t%s\n", "'${C}'", $2, $3, "'${C}'", ID, $5, $6; ++ID}'
-done > mostConserved.bed
- # ~ 1 minute
+ done | sort -k1,1 -k2,2n \
+ | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
+done > tmpMostConserved.bed
+/cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
featureBits hg19 mostConserved.bed
# 146285948 bases of 2897316137 (5.049%) in intersection
@@ -5978,24 +5980,24 @@
# Longest finished job: 417s 6.95m 0.12h 0.00d
# Submission to last job: 1878s 31.30m 0.52h 0.02d
# create Most Conserved track
- ../all/bedCat.sh > mostConserved.bed
cut -f1 ../../../../chrom.sizes | while read C
do
ls -d bed/${C}.[0-9][0-9] 2> /dev/null | while read D
do
cat ${D}/${C}*.bed
- done | awk 'BEGIN{ ID=1 }{printf "%s\t%d\t%d\t%s.%d\t%d\t%s\n", "'${C}'", $2, $3, "'${C}'", ID, $5, $6; ++ID}'
-done > mostConserved.bed
- # ~ 1 minute
+ done | sort -k1,1 -k2,2n \
+ | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
+done > tmpMostConserved.bed
+/cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
# load into database
ssh hgwdev
cd /hive/data/genomes/hg19/bed/multiz46way/cons/placental
time nice -n +19 hgLoadBed hg19 phastConsElements46wayPlacental \
mostConserved.bed
- # Loaded 4805978 elements of size 6
+ # Loaded 3743478 elements of size 6
# real 1m15.952s
# verify coverage
featureBits hg19 phastConsElements46wayPlacental
# 118211444 bases of 2897316137 (4.080%) in intersection