src/hg/makeDb/doc/hg19.txt 1.63

1.63 2009/11/21 06:02:16 hiram
Proper calculation lodToBedScore for 46-way mostConserved tracks
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.62
retrieving revision 1.63
diff -b -B -U 4 -r1.62 -r1.63
--- src/hg/makeDb/doc/hg19.txt	18 Nov 2009 21:12:55 -0000	1.62
+++ src/hg/makeDb/doc/hg19.txt	21 Nov 2009 06:02:16 -0000	1.63
@@ -5697,11 +5697,12 @@
 do
     ls -d bed/${C}.[0-9][0-9] 2> /dev/null | while read D
     do
         cat ${D}/${C}*.bed
-    done | awk 'BEGIN{ ID=1 }{printf "%s\t%d\t%d\t%s.%d\t%d\t%s\n", "'${C}'", $2, $3, "'${C}'", ID, $5, $6; ++ID}'
-done > mostConserved.bed
-    #	~ 1 minute
+    done | sort -k1,1 -k2,2n \
+    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
+done > tmpMostConserved.bed
+/cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
 
     # load into database
     ssh hgwdev
     cd /hive/data/genomes/hg19/bed/multiz46way/cons/all
@@ -5830,18 +5831,19 @@
 # Average job time:                 156s       2.60m     0.04h    0.00d
 # Longest finished job:             402s       6.70m     0.11h    0.00d
 # Submission to last job:          2322s      38.70m     0.65h    0.03d
 
-    # create Most Conserved track
     cd /hive/data/genomes/hg19/bed/multiz46way/cons/primates
+    # create Most Conserved track
     cut -f1 ../../../../chrom.sizes | while read C
 do
     ls -d bed/${C}.[0-9][0-9] 2> /dev/null | while read D
     do
         cat ${D}/${C}*.bed
-    done | awk 'BEGIN{ ID=1 }{printf "%s\t%d\t%d\t%s.%d\t%d\t%s\n", "'${C}'", $2, $3, "'${C}'", ID, $5, $6; ++ID}'
-done > mostConserved.bed
-    #	~ 1 minute
+    done | sort -k1,1 -k2,2n \
+    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
+done > tmpMostConserved.bed
+/cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
 
     featureBits hg19 mostConserved.bed
     #	146285948 bases of 2897316137 (5.049%) in intersection
 
@@ -5978,24 +5980,24 @@
 # Longest finished job:             417s       6.95m     0.12h    0.00d
 # Submission to last job:          1878s      31.30m     0.52h    0.02d
 
     # create Most Conserved track
-    ../all/bedCat.sh > mostConserved.bed
     cut -f1 ../../../../chrom.sizes | while read C
 do
     ls -d bed/${C}.[0-9][0-9] 2> /dev/null | while read D
     do
         cat ${D}/${C}*.bed
-    done | awk 'BEGIN{ ID=1 }{printf "%s\t%d\t%d\t%s.%d\t%d\t%s\n", "'${C}'", $2, $3, "'${C}'", ID, $5, $6; ++ID}'
-done > mostConserved.bed
-    #	~ 1 minute
+    done | sort -k1,1 -k2,2n \
+    | awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", "'${C}'", $2, $3, $5, $5;}'
+done > tmpMostConserved.bed
+/cluster/bin/scripts/lodToBedScore tmpMostConserved.bed > mostConserved.bed
 
     # load into database
     ssh hgwdev
     cd /hive/data/genomes/hg19/bed/multiz46way/cons/placental
     time nice -n +19 hgLoadBed hg19 phastConsElements46wayPlacental \
 	mostConserved.bed
-    #	Loaded 4805978 elements of size 6
+    #	Loaded 3743478 elements of size 6
     #	real    1m15.952s
     # verify coverage
     featureBits hg19 phastConsElements46wayPlacental
     #	118211444 bases of 2897316137 (4.080%) in intersection