src/hg/makeDb/doc/hg18.txt 1.356
1.356 2009/03/12 18:24:04 angie
Fixed coords of hgdpHzyBantu and reloaded.
Index: src/hg/makeDb/doc/hg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg18.txt,v
retrieving revision 1.355
retrieving revision 1.356
diff -b -B -U 4 -r1.355 -r1.356
--- src/hg/makeDb/doc/hg18.txt 11 Mar 2009 18:31:44 -0000 1.355
+++ src/hg/makeDb/doc/hg18.txt 12 Mar 2009 18:24:04 -0000 1.356
@@ -26841,16 +26841,15 @@
#Loaded 660280 elements of size 7
#############################################################################
-# HGDP HETEROZYGOSITY (DONE 2/12/09 angie, except for Bantu 3/02/09)
+# HGDP HETEROZYGOSITY (DONE 2/12/09 angie, except for Bantu 3/12/09)
mkdir /hive/data/genomes/hg18/bed/hgdpHzy
cd /hive/data/genomes/hg18/bed/hgdpHzy
foreach continent (african americas easia european mideast oceania sasia)
wget --timestamping http://hgdp.uchicago.edu/data/hzy/$continent.gff.gz
end
wget --timestamping http://hgdp.uchicago.edu/data/hzy/allbantu.hzy.gff.gz
-#*** waiting to hear back from Joe about whether that's the right file
foreach continent (african allbantu americas easia european mideast oceania sasia)
set bedGraph = `echo $continent \
| sed -re 's/can$/ca/; s/pean$/pe/; s/asia/Asia/; s/allbantu/bantu/; \
s/(.*)/hgdpHzy\u\1.bedGraph/'`
@@ -26858,17 +26857,32 @@
zcat $continent.gff.gz \
| awk 'BEGIN{OFS="\t";} {print $1, ($4-1), $5, $6;}' \
> $bedGraph
end
+
+ # 3/12/09: All of the original files' coords were intervals between SNPs,
+ # but the Bantu file had SNP coordinates, and one more line per chrom than
+ # the others. So (after getting OK from Joe) I am going to transform the
+ # Bantu SNP coords to intervals like the others.
+ perl -we 'while (<>) { \
+ chomp; ($c, $s, undef, $h) = split; \
+ if (defined $lastC) { \
+ if ($lastC eq $c) { \
+ print "$c\t$lastS\t$s\t$lastH\n"; \
+ } # Discarding last SNP on each chrom \
+ } \
+ ($lastC, $lastS, $lastH) = ($c, $s, $h); \
+ }' \
+ hgdpHzyBantu.bedGraph > tmp
+ mv tmp hgdpHzyBantu.bedGraph
+
# Using bedGraph, not wig, because there are only 640k datapoints and
# some are over the 10Mbase wiggle item size limit.
foreach f (*.bedGraph)
hgLoadBed hg18 $f:r $f -bedGraph=4
end
# All have same size:
#Loaded 640676 elements of size 4
- # except for Bantu:
-#Loaded 640698 elements of size 4
#############################################################################
# HGDP FST (DONE 2/12/09 angie)