src/hg/makeDb/doc/encodeHg18.txt 1.34
1.34 2010/05/06 18:23:10 hartera
Removed documentation about CRG Mapability track as this is from the whole genome ENCODE and this document is for the pilot project.
Index: src/hg/makeDb/doc/encodeHg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/encodeHg18.txt,v
retrieving revision 1.33
retrieving revision 1.34
diff -b -B -U 1000000 -r1.33 -r1.34
--- src/hg/makeDb/doc/encodeHg18.txt 8 Apr 2010 04:39:11 -0000 1.33
+++ src/hg/makeDb/doc/encodeHg18.txt 6 May 2010 18:23:10 -0000 1.34
@@ -1,1256 +1,1246 @@
!/bin/csh -f
exit
#############################################################################
# This is the make doc for hg18 ENCODE
#############################################################################
# Changes to ENCODE groups (2007-07-31 kate)
# Change labels for Transcripts and Chrom
hgsql hg18 -e "UPDATE grp SET label='ENCODE Transcription' where name='encodeTxLevels'"
hgsql hg18 -e "UPDATE grp SET label='ENCODE Chromatin Structure' where name='encodeChrom'"
# Merge CompGeno and Var groups (few tracks)
hgsql hg18 -e "UPDATE grp SET label='ENCODE Comparative Genomics and Variation' where name='encodeCompGeno'"
hgsql hg18 -e "DELETE FROM grp where name='encodeVariation'"
# Retire obsolete group
hgsql hg18 -e "DELETE FROM grp where name='encode'"
#############################################################################
# Create encodeRegions table
ssh hgwdev
cd /cluster/data/encode
mkdir convertHg18
ln -s convertHg18 hg18
ln -s convertHg17 hg17
cd hg18
hgsql hg17 -N -e "SELECT * FROM encodeRegions ORDER BY name" | \
liftOver stdin /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz \
encodeRegions.bed encodeRegions.unmapped
hgLoadBed hg18 encodeRegions encodeRegions.bed
cp encodeRegions.bed ~/browser/ENCODE/build35_regions.bed
# cvs add, install in /usr/local/apache/htdocs/ENCODE
##########################################################################
# DOWNLOADS (2007-09-21 kate)
ssh hgwdev
cd /usr/local/apache/htdocs/goldenPath/hg18
mkdir -p encode
cd encode
# release terms
cp ../../hg17/encode/README.txt .
# annotation database
# request admin set up automated database dump
mkdir database
# auxiliary data files
mkdir datafiles
# sequences
cd /cluster/data/encode/convertHg18
hgsql hg18 -N -e \
"SELECT name, chrom, chromStart, chromEnd FROM encodeRegions ORDER BY name">regions.txt
ssh kolossus
cd /cluster/data/encode/convertHg18
mkdir regions
cd regions
/cluster/data/encode/bin/scripts/encodeSequences.pl -upper \
../regions.txt /iscratch/i/hg18/nib > hg18.fa
/cluster/data/encode/bin/scripts/encodeSequences.pl -masked \
../regions.txt /iscratch/i/hg18/nib > hg18.msk.fa
faSize -detailed hg18.fa > hg18_count.txt
gzip *.fa
md5sum *.fa.gz > md5sum.txt
# copy regions/README.txt from hg17 and edit
ssh hgwdev
cd /usr/local/apache/htdocs/goldenPath/hg18/encode
ln -s /cluster/data/encode/convertHg18/regions .
cp ../../hg17/encode/regions/README.txt regions
# edit README
##############################################################################
# Lifting rampage (Andy)
ssh hgwdev
bash
cd /cluster/data/encode/convertHg18
/cluster/data/encode/bin/scripts/listEncodeTables.csh hg17 > hg17.tables
wc -l hg17.tables
#554 hg17.tables (dang)
# start with easy beds i.e. the ones like "bed <num> ."
grep "bed.*\." hg17.tables > easyBeds.tables
grep -v "bed.*\." hg17.tables > remaining.tables
wc -l easyBeds.tables
#127 easyBeds.tables
mkdir easyBeds
for fields in 3 4 5 6 9 12; do
for table in `grep "bed $fields" easyBeds.tables | cut -f1`; do
hgsql hg18 -e "drop table $table"
/cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
$table $fields >> easyBeds.script.log
mv $table.* easyBeds/
done
done
# still got 4 like "bed ."
egrep -v "bed (3|4|5|6|9|12)" easyBeds.tables
#encodeYaleChIPSTAT1HeLaBingRenSites encodeChip bed .
#encodeYaleChIPSTAT1HeLaMaskLess36mer36bpSite encodeChip bed .
#encodeYaleChIPSTAT1HeLaMaskLess50mer38bpSite encodeChip bed .
#encodeYaleChIPSTAT1HeLaMaskLess50mer50bpSite encodeChip bed .
# these are all bed 3
for table in `egrep -v "bed (3|4|5|6|9|12)" easyBeds.tables | cut -f1`; do
/cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
$table 3 >> easyBeds.script.log
mv $table.* easyBeds/
done
# ok now there's ones like "bed <num> +"
wc -l remaining.tables
#427 remaining.tables
grep '\+' remaining.tables > plusBed.tables
grep -v '\+' remaining.tables > tmp; mv tmp remaining.tables
wc -l remaining.tables plusBed.tables
# 383 remaining.tables
# 44 plusBed.tables
# 427 total
mkdir plusBeds
for fields in 4 5 6 9 12; do
for table in `grep "bed $fields" plusBed.tables | cut -f1`; do
/cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
$table $fields >> plusBeds.script.log
mv $table.* plusBeds/
done
done
# how about bedGraph ones?
grep bedGraph remaining.tables > bedGraph.tables
grep -v bedGraph remaining.tables > tmp; mv tmp remaining.tables
wc -l bedGraph.tables remaining.tables
# 186 bedGraph.tables
# 197 remaining.tables
# 383 total
mkdir bedGraph
for table in `cut -f1 bedGraph.tables`; do
/cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
$table 4 >> bedGraph.script.log
mv $table.* bedGraph/
done
######################################
# Continue lifting rampage (ting, 06-07-2007)
#
# Examining remaining.tables
# All GIS tables are bed 12, so lift these
grep Gis remaining.tables > Gis.tables
grep -v Gis remaining.tables > tmp; mv tmp remaining.tables
wc -l Gis.tables remaining.tables
# 7 Gis.tables
# 190 remaining.tables
# 197 total
mkdir bedGis
doGis.csh
# 7 tables lifted.
# 190 remaining.
####################################################
# More lifting (Andy)
ssh hgwdev
bash
cd /cluster/data/encode/convertHg18
# genePred tables
grep genePred remaining.tables > genePred.tables
grep -v genePred remaining.tables > tmp; mv tmp remaining.tables
wc -l genePred.tables remaining.tables
# 68 genePred.tables
# 122 remaining.tables
# 190 total
mkdir genePred
for table in `cut -f1 genePred.tables`; do
/cluster/data/encode/bin/scripts/convertGenePredTable.csh hg17 hg18 $table >> genePred.scripts.log;
mv $table.* genePred/
done
# ERRORS, uh oh
# fixed /cluster/data/encode/bin/scripts/convertGenePredTable.csh
# binned hg17 tables weren't working right.
grep error genePred.scripts.log | sed 's/^.*converting\ \(.*\)\.txt.*$/\1/' > genePredBins.tables
for table in `cat genePredBins.tables`; do
/cluster/data/encode/bin/scripts/convertGenePredTable.csh hg17 hg18 $table >> genePredBins.scripts.log;
mv $table.* genePred/
done
# missed bed tables. There's a few like "bed5FloatScore" and "bed 3", etc.
# these can be treated as normal beds
grep bed remaining.tables | cut -f1,3 | sed 's/bed5FloatScore/bed 5/' > \
bedOther.tables
grep -v bed remaining.tables > tmp
wc -l bedOther.tables tmp
# 14 bedOther.tables
# 108 tmp
# 122 total
mkdir bedOther
for fields in 3 4 5; do
for table in `grep "bed $fields" bedOther.tables | cut -f1`; do
/cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
$table $fields >> bedOther.script.log
mv $table.* bedOther/
done
done
# wiggle
# first tally up which ones are in which DBs. The older ones can go hg16->hg18 instead
# of hg17->hg18. Make three sets of tables and do set operations:
hgsql hg16 -e 'show tables' > hg16.all.tables
hgsql hg17 -e 'show tables' > hg17.all.tables
grep -v wigMaf remaining.tables | cut -f1 > wig.tables
grep wigMaf remaining.tables > tmp; mv tmp remaining.tables
wc -l wig.tables remaining.tables
# 119 wig.tables
# 3 remaining.tables
# 122 total
# OOPS I forgot to subtract the 14 tables from last one.
# How many of the wiggle tables are in hg17? I hope all 119
grep -Fw -f wig.tables hg17.all.tables | wc -l
# 105
# good. Ok how about hg16?
grep -Fw -f wig.tables hg16.all.tables | wc -l
# 61
# I guess then hg17 should have 44 newer ones.
grep -Fw -f wig.tables hg16.all.tables > hg16.wig.tables
grep -Fwv -f hg16.wig.tables wig.tables > hg17.wig.tables
wc -l *wig.tables
# 61 hg16.wig.tables
# 44 hg17.wig.tables
# 105 wig.tables
# Awesome. These two sets shouldn't intersect at all:
grep -Fw -f hg16.wig.tables hg17.wig.tables | wc -l
# 0
# Great. Now lets move on. Let's use hgWiggle on each of these tables to
# fetch the old data. Then we'll convert that to bed 4, lift that, then
# run wigEncode on the lifted data.
mkdir ../hg18.wib
mkdir -p /gbdb/hg18/encode/wib
mkdir fromHg16.wig
for table in `cat hg16.wig.tables`; do
hgWiggle -db=hg16 $table \
| grep -v "^#" | awk -f varStepToBed.awk > $table.old.wig
liftOver -bedPlus=3 -tab $table.old.wig /gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz \
$table.new.wig $table.unmapped
sort -k1,1 -k2,2n $table.new.wig > tmp.wig; mv tmp.wig $table.new.wig
wigEncode $table.new.wig $table.wig $table.wib 2>> wigFromHg16.log
mv $table.wib ../hg18.wib/
ln -s /cluster/data/encode/hg18.wib/${table}.wib /gbdb/hg18/encode/wib/${table}.wib
hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 $table $table.wig
mv $table.*wig $table.unmapped fromHg16.wig/
done
mkdir fromHg17.wig
for table in `cat hg17.wig.tables`; do
hgWiggle -db=hg17 $table \
| grep -v "^#" | awk -f varStepToBed.awk > $table.old.wig
liftOver -bedPlus=3 -tab $table.old.wig /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz \
$table.new.wig $table.unmapped
sort -k1,1 -k2,2n $table.new.wig > tmp.wig; mv tmp.wig $table.new.wig
wigEncode $table.new.wig $table.wig $table.wib 2>> wigFromHg17.log
mv $table.wib ../hg18.wib/
ln -s /cluster/data/encode/hg18.wib/${table}.wib /gbdb/hg18/encode/wib/${table}.wib
hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 $table $table.wig
mv $table.*wig $table.unmapped fromHg17.wig/
done
##########################################################
# Wig lifting
# we need to find all the old wiggle data and lift that.
# Start with hg16
ssh hgwdev
bash
cd /cluster/data/encode/convertHg18
# find those hg16 tables
cat > affyChipChip.hg16.wig.tables << "EOF"
encodeAffyChIpHl60PvalBrg1Hr00 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalBrg1Hr02 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalBrg1Hr08 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalBrg1Hr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCebpeHr00 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCebpeHr02 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCebpeHr08 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCebpeHr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCtcfHr00 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCtcfHr02 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCtcfHr08 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCtcfHr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH3K27me3Hr00 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH3K27me3Hr02 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH3K27me3Hr08 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH3K27me3Hr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH4Kac4Hr00 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH4Kac4Hr02 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH4Kac4Hr08 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH4Kac4Hr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalP300Hr00 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalP300Hr02 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalP300Hr08 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalP300Hr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalPu1Hr00 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalPu1Hr02 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalPu1Hr08 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalPu1Hr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRaraHr00 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRaraHr02 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRaraHr08 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRaraHr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRnapHr00 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRnapHr02 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRnapHr08 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRnapHr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalSirt1Hr00 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalSirt1Hr02 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalSirt1Hr08 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalSirt1Hr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalTfiibHr32 Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_TFIIB-R_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
EOF
mkdir -p wigs/hg16
cd wigs/hg16
cat ../../affyChipChip.hg16.wig.tables | while read -a line; do
chain=/gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz
table=${line[0]};
oldWig=$table.hg16.wig
newWig=$table.hg18.wig
bad=$table.hg18.unmapped
wib=$table.wib
wigTable=$table.tab
file=/cluster/data/encode/${line[1]};
echo $table
bzcat $file | tail +2 | awk -f ../../varStepToBed.awk | \
awk 'BEGIN{OFS="\t"}{print $1, $2+1, $3, $4;}' > $oldWig;
liftOver -bedPlus=3 $oldWig $chain $newWig $bad
bedSort $newWig tmp
mv tmp $newWig
wigEncode $newWig $wigTable $wib
done
# One more for BU Orchid
awk -f ../../varStepToBed.awk ../../../BU/orchid/2005-06-09/t0 > encodeBu_ORChID1.hg16.wig
liftOver -bedPlus=3 encodeBu_ORChID1.hg16.wig /gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz encodeBu_ORChID1.hg18.wig encodeBu_ORChID1.hg18.unmapped
bedSort encodeBu_ORChID1.hg18.wig tmp; mv tmp encodeBu_ORChID1.hg18.wig
wigEncode encodeBu_ORChID1.hg18.wig encodeBu_ORChID1.tab encodeBu_ORChID1.wib
# NOTE: this track was replaced with newer data -- the lift was
# never used.
# Encode hapmap coverage
for graph in ../../../sanger/coverage/encode*.bedGraph; do
table=${graph%.bedGraph}
table=${table#*coverage\/}
liftOver -bedPlus=3 $graph /gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz \
$table.hg18.wig $table.hg18.unmapped
bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig
wigEncode $table.hg18.wig $table.tab $table.wib
done
# hg17 tables
cd ../
mkdir hg17
cd hg17
cat | while read -a line; do
table=${line[0]};
file=/cluster/data/encode/${line[1]};
chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
awk -f ../../varStepToBed.awk $file > $table.hg17.wig;
bedSort $table.hg17.wig tmp; mv tmp $table.hg17.wig
liftOver -bedPlus=3 $table.hg17.wig $chain $table.hg18.wig $table.hg18.unmapped;
bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig;
wigEncode $table.hg18.wig $table.tab $table.wib
done << "EOF"
encodeAffyChIpHl60PvalStrictH3K9K14DHr00 Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/00/EC_AS_HL60_DN_RA_H3K9K14D_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictH3K9K14DHr02 Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/02/EC_AS_HL60_DN_RA_H3K9K14D_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictH3K9K14DHr08 Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/08/EC_AS_HL60_DN_RA_H3K9K14D_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictH3K9K14DHr32 Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/32/EC_AS_HL60_DN_RA_H3K9K14D_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictHisH4Hr00 Affy/2005-10-03/lab/CHIP/wig/HisH4/00/EC_AS_HL60_DN_RA_HisH4_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictHisH4Hr02 Affy/2005-10-03/lab/CHIP/wig/HisH4/02/EC_AS_HL60_DN_RA_HisH4_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictHisH4Hr08 Affy/2005-10-03/lab/CHIP/wig/HisH4/08/EC_AS_HL60_DN_RA_HisH4_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictHisH4Hr32 Affy/2005-10-03/lab/CHIP/wig/HisH4/32/EC_AS_HL60_DN_RA_HisH4_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictp63_ActD Affy/2005-10-03/lab/CHIP/wig/p63_ActD/EC_AS_ME180_ActD_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.pval.median.wig
encodeAffyChIpHl60PvalStrictp63_mActD Affy/2005-10-03/lab/CHIP/wig/p63_mActD/EC_AS_ME180_Ctrl_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.pval.median.wig
encodeAffyChIpHl60PvalStrictPol2Hr00 Affy/2005-10-03/lab/CHIP/wig/Pol2/00/EC_AS_HL60_DN_RA_Pol2_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictPol2Hr02 Affy/2005-10-03/lab/CHIP/wig/Pol2/02/EC_AS_HL60_DN_RA_Pol2_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictPol2Hr08 Affy/2005-10-03/lab/CHIP/wig/Pol2/08/EC_AS_HL60_DN_RA_Pol2_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictPol2Hr32 Affy/2005-10-03/lab/CHIP/wig/Pol2/32/EC_AS_HL60_DN_RA_Pol2_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60SignalStrictH3K9K14DHr00 Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/00/EC_AS_HL60_DN_RA_H3K9K14D_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictH3K9K14DHr02 Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/02/EC_AS_HL60_DN_RA_H3K9K14D_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictH3K9K14DHr08 Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/08/EC_AS_HL60_DN_RA_H3K9K14D_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictH3K9K14DHr32 Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/32/EC_AS_HL60_DN_RA_H3K9K14D_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictHisH4Hr00 Affy/2005-10-03/lab/CHIP/wig/HisH4/00/EC_AS_HL60_DN_RA_HisH4_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictHisH4Hr02 Affy/2005-10-03/lab/CHIP/wig/HisH4/02/EC_AS_HL60_DN_RA_HisH4_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictHisH4Hr08 Affy/2005-10-03/lab/CHIP/wig/HisH4/08/EC_AS_HL60_DN_RA_HisH4_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictHisH4Hr32 Affy/2005-10-03/lab/CHIP/wig/HisH4/32/EC_AS_HL60_DN_RA_HisH4_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictp63_ActD Affy/2005-10-03/lab/CHIP/wig/p63_ActD/EC_AS_ME180_ActD_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.sig.median.wig
encodeAffyChIpHl60SignalStrictp63_mActD Affy/2005-10-03/lab/CHIP/wig/p63_mActD/EC_AS_ME180_Ctrl_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.sig.median.wig
encodeAffyChIpHl60SignalStrictPol2Hr00 Affy/2005-10-03/lab/CHIP/wig/Pol2/00/EC_AS_HL60_DN_RA_Pol2_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictPol2Hr02 Affy/2005-10-03/lab/CHIP/wig/Pol2/02/EC_AS_HL60_DN_RA_Pol2_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictPol2Hr08 Affy/2005-10-03/lab/CHIP/wig/Pol2/08/EC_AS_HL60_DN_RA_Pol2_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictPol2Hr32 Affy/2005-10-03/lab/CHIP/wig/Pol2/32/EC_AS_HL60_DN_RA_Pol2_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyRnaGm06990Signal Affy/2005-10-03/lab/RNA/wig/GM06990/EC_AS_GM06990_RCyP+_C01vsNULL.sig.wig
encodeAffyRnaHeLaSignal Affy/2005-11-22/lab/Affy_HeLa/wig/EC_AS_HeLa_RCyP+_C01vsNULL.sig.wig
encodeAffyRnaHl60SignalHr00 Affy/2005-10-03/lab/RNA/wig/HL60/00/EC_AS_HL60_RWP+_RA_00hr_C01vsNULL.sig.wig
encodeAffyRnaHl60SignalHr02 Affy/2005-10-03/lab/RNA/wig/HL60/02/EC_AS_HL60_RWP+_RA_02hr_C01vsNULL.sig.wig
encodeAffyRnaHl60SignalHr08 Affy/2005-10-03/lab/RNA/wig/HL60/08/EC_AS_HL60_RWP+_RA_08hr_C01vsNULL.sig.wig
encodeAffyRnaHl60SignalHr32 Affy/2005-10-03/lab/RNA/wig/HL60/32/EC_AS_HL60_RWP+_RA_32hr_C01vsNULL.sig.wig
encodeUvaDnaRepTr50 UVa/2005-10-15/lab/smoothedtr50.hg17.wig
EOF
# Uppsala hg17 is already in bed format
cat | while read -a line; do
table=${line[0]};
file=/cluster/data/encode/${line[1]};
chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
liftOver -bedPlus=3 $file $chain $table.hg18.wig $table.hg18.unmapped;
bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig;
wigEncode $table.hg18.wig $table.tab $table.wib
done << "EOF"
encodeUppsalaChipH3acBut0h Uppsala/2006-05-29/lab/encodeUppsalaChipH3acBut0h.wig.txt
encodeUppsalaChipH3acBut12h Uppsala/2006-05-29/lab/encodeUppsalaChipH3acBut12h.wig.txt
encodeUppsalaChipH4acBut0h Uppsala/2006-05-29/lab/encodeUppsalaChipH4acBut0h.wig.txt
encodeUppsalaChipH4acBut12h Uppsala/2006-05-29/lab/encodeUppsalaChipH4acBut12h.wig.txt
EOF
cat | while read -a line; do
table=${line[0]};
file=/cluster/data/encode/${line[1]};
chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
liftOver -bedPlus=3 $file $chain $table.hg18.wig $table.hg18.unmapped;
bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig;
wigEncode $table.hg18.wig $table.tab $table.wib
done << "EOF"
encodeYaleAffyNeutRNATransMap yale/rna/2005-10-14/encodeYaleAffyNeutRNATransMap.trim
encodeYaleAffyNB4RARNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_RA_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyNB4TPARNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_TPA_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyNB4UntrRNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_CTRL_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyPlacRNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_Placenta_RNA_Transcript_Map_ncbi35.wig
EOF
# ERRORS ... the first one worked, the others need trimming.
cat | while read -a line; do
table=${line[0]};
file=/cluster/data/encode/${line[1]};
chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
liftOver -bedPlus=3 $file $chain $table.hg18.wig $table.hg18.unmapped;
bedSort $table.hg18.wig stdout | /cluster/data/encode/bin/scripts/trimOverlap.pl > tmp;
mv tmp $table.hg18.wig;
wigEncode $table.hg18.wig $table.tab $table.wib;
done << "EOF"
encodeYaleAffyNB4RARNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_RA_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyNB4TPARNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_TPA_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyNB4UntrRNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_CTRL_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyPlacRNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_Placenta_RNA_Transcript_Map_ncbi35.wig
EOF
# Forgot an hg16 one
table=encodeUcsdNgChipSignal
file=/cluster/data/encode/UCSD/nimblegen/2005-05-31/encodeUcsdNgChipSignal.varStep
chain=/gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz
awk -f ../../varStepToBed.awk $file > $table.hg16.wig
liftOver -bedPlus=3 $table.hg16.wig $chain $table.hg18.wig $table.hg18.unmapped
bedSort $table.hg18.wig stdout | /cluster/data/encode/bin/scripts/trimOverlap.pl > tmp
mv tmp $table.hg18.wig
wigEncode $table.hg18.wig $table.tab $table.wib
##########################################################################
# Boston University ORChID track - (2007-06-29 ting)
# data developer contact: Steve Parker parker@bu.edu
# This is a new dataset to replace the old one, for the same track.
# On hg17 the track name is encodeBu_ORChID1, was commented as "non-standard table name"
# I took this chance to rename it as encodeBUORChID on hg18.
ssh hgwdev
cd /cluster/data/encode/BU
mkdir -p orchid/2007-06-29/lab
cd -p orchid/2007-06-29/lab
wget --timestamping "http://dna.bu.edu/parker/.data/orchid_hg18_encode.wig.gz"
cd ..
mkdir wib
# The file orchid_hg18_encode.wig.gz from data provider contains 0-based coordinates,
# thus wigEncode choked on it -- specifically, at chr16, position 0 (ENm008).
# I compared this new data to the old dataset (2005-09-08) and made sure that this
# is the case. I saved the original file to 'original.wig.gz', and added 1 to all
# positions in orchid_hg18_encode.wig.gz
wigEncode lab/orchid_hg18_encode.wig.gz encodeBUORChID.wig \
wib/encodeBUORChID.wib
# Converted lab/orchid_hg18_encode.wig.gz, upper limit 1.64, lower limit -0.98
# load
set dir = /gbdb/hg18/encode/BU/2007-06-29
mkdir -p $dir
hgLoadWiggle -pathPrefix=$dir hg18 encodeBUORChID encodeBUORChID.wig
mkdir -p $dir/wib
ln -s `pwd`/wib/encodeBUORChID.wib $dir/wib
# create encodeBUORChID.html at trackDb/human/hg18/
#############################################################################
# Stanford NRSF ChIP-seq (DONE, Heather, July 2007)
ssh hgwdev
cd /cluster/data/encode/stanford/2007-03-14
liftOver fix.bed /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz hg18.bed core.unmapped
liftOver control_fix.bed /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz hg18.control.bed control.unmapped
hgLoadBed hg18 encodeStanfordNRSFEnriched hg18.bed -tab
hgLoadBed hg18 encodeStanfordNRSFControl hg18.control.bed -tab
############################################################################
# Yale ENCODE Lifting
ssh hgwdev
cd /cluster/data/encode/convertHg18
for table in `cat yale.lst`; do
echo select tableName,type from trackDb where tableName=\"$table\" \
| hgsql hg17 | tail +2 >> yale.tables
done
sed -e 's/bed5FloatScoreWithFdr/bed 5/' \
-e 's/bedGraph\ 4/bed 4/' -e 's/bed5FloatScore/bed 5/' \
< yale.tables > tmp.tables
mv tmp.tables yale.tables
mkdir yale
for fields in 4 5; do
for table in `grep "bed $fields" yale.tables | cut -f1`; do
/cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
$table $fields >> yale.script.log
mv $table.* yale/
done
done
###########################################################################
# Pseudogenes Class table copied from hg17 (20087-08-01 kate)
# This table is copied unchanged.
ssh hgwdev
cd /cluster/data/encode/convertHg18
mkdir pseudogene
cd pseudogene
hgsqldump --all --tab=. hg17 encodePseudogeneClass
hgsql hg18 < encodePseudogeneClass.sql
echo "LOAD DATA LOCAL INFILE 'encodePseudogeneClass.txt' \
into table encodePseudogeneClass" | hgsql hg18
###########################################################################
# Affy EC chrom21/chrom22 (Andy DONE 2007-07-20)
ssh hgwdev
bash
cd /cluster/data/encode/Affy
mkdir -p 2007-07-12/lab
cd 2007-07-12/
mkdir -p processed/{bed,wigTable,wib,download}
cd lab/
cp /var/ftp/encode/encode_ext_RNA_hg18_chr21-22.tar.gz .
tar xfz encode/encode_ext_RNA_hg18_chr21-22.tar.gz
rm encode/encode_ext_RNA_hg18_chr21-22.tar.gz
cd ../
find lab -name '*.bed' > renamesBed.txt
find lab -name '*.wig' > renamesWig.txt
# Make 2nd column for table name
cat renamesBed.txt | while read -a line; do
tail +2 ${line[0]} > processed/bed/${line[1]}.bed
hgLoadBed hg18 ${line[1]} processed/bed/${line[1]}.bed
done
rm bed.tab
cat renamesWig.txt | while read -a line; do
table=${line[1]}
origFile=${line[0]}
tail +2 $origFile > processed/download/${table}.wig
wigEncode processed/download/${table}.wig processed/wigTable/${table}.tab \
processed/wib/${table}.wib 2>> processed/wigEncode.log
pushd /gbdb/hg18/encode/wib
ln -s /cluster/data/encode/Affy/2007-07-12/processed/wib/${table}.wib
popd
hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 $table processed/wigTable/${table}.tab
gzip processed/download/${table}.wig
done
cd /usr/local/apache/htdocs/goldenPath/hg18/encode
ln -s /cluster/data/encode/Affy/2007-07-12/processed/download/*.gz .
###########################################################################
# Yale Pol II Chip (Chip-seq) (DONE Andy 11-07-2007)
cd /cluster/data/encode/yale
mkdir -p 2007-07-17/lab
cd 2007-07-17/lab
unzip Yale_jul17_v2.zip
set table = wgEncodeYaleChipSeqPol2HelaSites
hgLoadBed hg18 $table lab/PolII/PolII_hg18-sites.bed
#Reading PolII_hg18-sites.bed
#Loaded 87253 elements of size 4
#Sorted
#Creating table definition for encodeYalePolIISites
#Saving bed.tab
#start -142, end 1144 out of range in findBin (max is 512M)
# CONTACTED submitter to ask about negative coordinate.
# For now, leave out chrM
sed '/^chrM/d' lab/PolII/PolII_hg18-sites.bed | hgLoadBed hg18 $table stdin
# NOTE: max score=1779, min score=7
# data distribution
awk '{print $4}' pol2.bed | sort -n | textHistogram stdin -binSize=100 ;
0 ************************************************************ 83113
100 ** 3300
200 545
300 144
400 58
500 37
600 20
700 6
800 6
900 0
1000 2
1100 2
1200 0
1300 0
1400 0
1500 0
1600 0
1700 1
bedSort PolII_hg18-signal.wig tmp.wig
v tmp.wig PolII_hg18-signal.wig
../../../bin/scripts/trimOverlap.pl < PolII_hg18-signal.wig > tmp.wig
mv tmp.wig PolII_hg18-signal.wig
mv encodeYalePolIISignal.wib /cluster/data/encode/hg18.wib/
ln -s /cluster/data/encode/hg18.wib/encodeYalePolIISignal.wib /gbdb/hg18/encode/wib/
hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 encodeYalePolIISignal encodeYalePolIISignal.wig
#Connected to database hg18 for track encodeYalePolIISignal
#Creating wiggle table definition in hg18.encodeYalePolIISignal
#Saving wiggle.tab
#WARNING: Exceeded chr18_random size 4406 > 4262. dropping 145 data point(s)
# hmmm... that's not a good warning. I wonder if these guys got the genome wrong.
hgLoadBed hg18 encodeYalePolIISites PolII_hg18-sites.bed
#Reading PolII_hg18-sites.bed
#Loaded 87253 elements of size 4
#Sorted
#Creating table definition for encodeYalePolIISites
#Saving bed.tab
#start -142, end 1144 out of range in findBin (max is 512M)
# MORE ERRORS. Clearly this submission wasn't quite meant to be just yet.
# to be continued...
# continued... made a "resub" dir and copied the resubmitted zipfile there.
cd /cluste/data/2007-07-17/resub
unzip Yale_jul17_v2.zip
cd PolII/
trimObBedLines PolII_hg18-signal.wig > ../../processed/wgEncodeYalePolIISignal.wigBed
pushd ../../processed/
wigEncode wgEncodeYalePolIISignal.wigBed wgEncodeYalePolIISignal.wig wgEncodeYalePolIISignal.wib
gzip wgEncodeYalePolIISignal.wigBed
cd ../../../hg18.wib
ln -s ../yale/2007-07-17/processed/wgEncodeYalePolIISignal.wib
cd /gbdb/hg18/encode/wib
ln -s /cluster/data/encode/hg18.wib/wgEncodeYalePolIISignal.wib
cd /usr/local/apache/htdocs/goldenPath/hg18/encode/wig
ln -s /cluster/data/encode/yale/2007-07-17/processed/wgEncodeYalePolIISignal.wigBed.gz
popd
hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 wgEncodeYalePolIISignal wgEncodeYalePolIISignal.wig
#########################################################################
# YALE STAT1 (more ChIP-seq) (DONE, Andy 2007-11-20)
cd /cluster/data/encode/yale
mkdir 2007-08-08
cd 2007-08-08/
cp /var/ftp/encode/Yale_aug8.zip .
unzip Yale_aug8.zip
cd STAT1/
trimObBedLines hg18 STAT1_hg18-signal.wig wgEncodeYaleStat1Signal.wigBed
trimObBedLines hg18 STAT1_hg18-sites.bed wgEncodeYaleStat1Sites.bed
gzip wgEncodeYaleStat1Signal.wigBed
wigEncode wgEncodeYaleStat1Signal.wigBed.gz wgEncodeYaleStat1Signal.{wig,wib}
cd ../
mkdir lab processed
mv readme_aug8.txt STAT1 lab/
rm Yale_aug8.zip
mv lab/STAT1/wgEncodeYaleStat1Si* processed/
pushd ../../hg18.wib/
ln -s ../yale/2007-08-08/processed/wgEncodeYaleStat1Signal.wib
cd /gbdb/hg18/encode/wib
ln -s /cluster/data/encode/hg18.wib/wgEncodeYaleStat1Signal.wib
popd
cd processed/
hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 wgEncodeYaleStat1Signal wgEncodeYaleStat1Signal.wig
hgLoadBed hg18 wgEncodeYaleStat1Sites wgEncodeYaleStat1Sites.bed
##########################################################################
# Genome Institute of Singapore PET data (2007-08-30 ting)
# Submitted 8/22 by Atif Shahab and Chia-lin Wei
# Three new PET datasets on human embryonic stem cell hES3.
# One polyA-RNA dataset, and two ChIP-PET datasets of H3K4me3 and H3K27me3.
# Build them as subtracks into existing GIS tracks: GIS-RNA-PET and GIS-CHIP-PET.
ssh hgwdev
cd /cluster/data/encode/GIS/
mkdir 2007-08-22
cd 2007-08-22
mkdir lab
cd lab
cp /var/ftp/encode/gis.tar.gz ./
gunzip gis.tar.gz
tar -xvf gis.tar
# obtained 3 data files: H3K27me3.bed H3K4me3.bed polyA.bed
# These are mapped on hg17, first lift.
cd /cluster/data/encode/GIS/2007-08-22
liftOver lab/polyA.bed ../../convertHg18/hg17ToHg18.over.chain.gz \
polyA-hg18.bed polyA-unmapped.bed
# 426301 lifted, 34 unmapped
liftOver lab/H3K4me3.bed ../../convertHg18/hg17ToHg18.over.chain.gz \
H3K4me3.bed H3K4me3-unmapped.bed
# 679752 lifted, 13 unmapped
liftOver lab/H3K27me3.bed ../../convertHg18/hg17ToHg18.over.chain.gz \
H3K27me3.bed H3K27me3-unmapped.bed
# 992509 lifted, 25 unmapped
# GIS data are not scored. Based on Angie and Kate's previous work,
# scored BED can be made from item name. Use scoreGisBed.pl to do so.
scoreGisBed.pl polyA-hg18.bed 2 encodeGisRnaPetHes3.bed
scoreGisBed.pl H3K4me3-hg18.bed 1 encodeGisChipPetHes3H3K4me3.bed
scoreGisBed.pl H3K27me3-hg18.bed 1 encodeGisChipPetHes3H3K27me3.bed
# load on hg18
hgLoadBed hg18 encodeGisRnaPetHes3 encodeGisRnaPetHes3.bed
# Loaded 426301 elements of size 12
hgLoadBed hg18 encodeGisChipPetHes3H3K4me3 encodeGisChipPetHes3H3K4me3.bed
# Loaded 679752 elements of size 12
hgLoadBed hg18 encodeGisChipPetHes3H3K27me3 encodeGisChipPetHes3H3K27me3.bed
# Loaded 992509 elements of size 12
# modified trackDb.encodeTxLevel.ra, trackDb.encodeChip.ra,
# encodeGisChipPetAll.html, encodeGisRnaPet.html
###########
# Promote UCSD genome-wide Chip tracks:
# UCSD TAF1 IMR90 Chip/chip to Regulation group
# (2007-09-14 kate)
# See hg18.txt
######################################################
# Add strand information for encodeGencodeRace data - ting 09-27-2007
# ENCODE 5RACE data do not contain strand information. This
# information is very important, and can be derived from
# available GENCODE and 5RACE data.
# There are two relatively simple strategies to derive strand
# information. However, there are several exceptions to either
# strategy. Therefore I will combine these two strategies in
# this one script.
# Strategy 1: a RACE primer should extend from 3' end of a transcript
# towards 5' end. Therefore, if any RACE frag from
# this primer extends towards the right of the primer
# location, it means the gene goes from right to left,
# i.e. on - strand. Therefore, the primer should be
# on the + strand, and the corresponding RACEfrag should
# be on the - strand (same as gene). By the same token,
# if a RACEfrag extends toward left, it indicates that
# the primer is on - strand, while the gene and RACEfrag
# are on + strand.
# The only case that such relationship can not be determined
# is when the RACEfrag contains only one exon, and the
# primer locates in that exon. It is not sure if the
# RACEfrag extends to the right or left.
# This strategy leaves 3 primers undetermined.
#
# Strategy 2: RACE primers should be designed based on GENCODE
# exons. Therefore, the orientation of the primer can be
# determined by its overlapping GENCODE exon. In this case,
# the primer is on the opposite strand of the GENCODE exon,
# and any RACEfrag from this primer should be on the opposite
# strand of the primer.
# There exist several exceptions, where the primer is
# located outside of exons. It is probably ok if instead
# look at the nearest exon if it doesn't overlap with any.
# This strategy leaves 37 primers undetermined.
# Combining 1 and 2 all primers are determined for their orientation.
#
# Instead of working on the original gff files, I decide to work on
# data files after hg18 migration. These files are genePred formatted.
# Working folder is
# /cluster/store6/encode/GencodeRACEfrags/2007-04-11/strand
ssh hdwdev
cd /cluster/data/encode/GencodeRACEfrags/latest/
mkdir strand
cd strand
cp /cluster/data/encode/convertHg18/genePred/*Race*.tab ./
cp /cluster/data/encode/convertHg18/genePred/encodeGencodeGeneKnownMar07.tab ./
./addRacePrimerStrand.pl encodeGencodeRaceFragsPrimer.tab encodeGencodeGeneKnownMar07.tab
csh load.csh > & ! load.log
# encodeGencodeRaceFragsBrain
# Reading encodeGencodeRaceFragsBrain.tab
# 269 gene predictions
# encodeGencodeRaceFragsColon
# Reading encodeGencodeRaceFragsColon.tab
# 269 gene predictions
# encodeGencodeRaceFragsGM06990
# Reading encodeGencodeRaceFragsGM06990.tab
# 236 gene predictions
# encodeGencodeRaceFragsHL60
# Reading encodeGencodeRaceFragsHL60.tab
# 236 gene predictions
# encodeGencodeRaceFragsHeart
# Reading encodeGencodeRaceFragsHeart.tab
# 261 gene predictions
# encodeGencodeRaceFragsHela
# Reading encodeGencodeRaceFragsHela.tab
# 168 gene predictions
# encodeGencodeRaceFragsKidney
# Reading encodeGencodeRaceFragsKidney.tab
# 293 gene predictions
# encodeGencodeRaceFragsLiver
# Reading encodeGencodeRaceFragsLiver.tab
# 243 gene predictions
# encodeGencodeRaceFragsLung
# Reading encodeGencodeRaceFragsLung.tab
# 290 gene predictions
# encodeGencodeRaceFragsMuscle
# Reading encodeGencodeRaceFragsMuscle.tab
# 238 gene predictions
# encodeGencodeRaceFragsPlacenta
# Reading encodeGencodeRaceFragsPlacenta.tab
# 275 gene predictions
# encodeGencodeRaceFragsPrimer
# Reading encodeGencodeRaceFragsPrimer.tab
# 365 gene predictions
# encodeGencodeRaceFragsSmallIntest
# Reading encodeGencodeRaceFragsSmallIntest.tab
# 277 gene predictions
# encodeGencodeRaceFragsSpleen
# Reading encodeGencodeRaceFragsSpleen.tab
# 275 gene predictions
# encodeGencodeRaceFragsStomach
# Reading encodeGencodeRaceFragsStomach.tab
# 300 gene predictions
# encodeGencodeRaceFragsTestis
# Reading encodeGencodeRaceFragsTestis.tab
# 292 gene predictions
# Strand information is added for primers and all RACEfrags.
######################################################
# LIFT NHGRI DIPs from hg17 (2007-10-22 kate)
sh hgwdev
cd /cluster/data/encode/NHGRI/mullikin/hg17
hgsql hg18 < encodeIndels.sql
zcat encodeIndels.bed.gz | tail +2 | \
liftOver -bedPlus=8 stdin /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz \
encodeIndels.hg18.bed encodeIndels.hg18.unmapped
# lost 670 items (of 11452 total)
# This is high -- nearly 6%, and losses were in all regions,
# not just chrX.
hgLoadBed hg18 encodeIndels -tab -sqlTable=encodeIndels.sql \
encodeIndels.hg18.bed
# change group name to merge in variation
hgsql hg18 -e "update grp set name='encodeCompAndVar' where name='encodeCompGeno'"
#########################################################
# 2007-11-08 (ASZ)
# These wig files were shown to not match their corresponding database table
# Dropped them from the hgdownload server:
#/goldenPath/hg18/encode/wig/encodeUppsalaChipH3acBut0h.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeUppsalaChipH3acBut12h.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeUppsalaChipH4acBut0h.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeUppsalaChipH4acBut12h.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeYaleAffyNB4RARNATransMap.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeYaleAffyNB4TPARNATransMap.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeYaleAffyNB4UntrRNATransMap.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeYaleAffyNeutRNATransMap.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeYaleAffyPlacRNATransMap.wigBed.gz
#########################################################
# Yale RACE (2007-11-15 galt)
#
cd /cluster/data/encode/yale/
mkdir race
cd race
mkdir 2007-11-15
ln -s 2007-11-15/ latest
cd latest
mkdir lab
cd lab
wget http://homes.gersteinlab.org/people/jiangdu/race_seq/race_desc.html
wget http://homes.gersteinlab.org/people/jiangdu/race_seq/conserved_transcripts-til-20070402.bed
tail +5 conserved_transcripts-til-20070402.bed | gawk '{print$1}' | sort -u | head
chr11
chr21
chr22
cp race_desc.html ${HOME}/kent/src/hg/makeDb/trackDb/human/hg18/encodeYaleRace.html
cvs add ${HOME}/kent/src/hg/makeDb/trackDb/human/hg18/encodeYaleRace.html
tail +5 conserved_transcripts-til-20070402.bed | hgLoadBed hg18 encodeYaleRace stdin
vi trackDb.encodeTxLevels.ra
---
track encodeYaleRace
superTrack encodeYaleRnaSuper dense
shortLabel Yale RACE
longLabel Yale RACE 420 primarily novel TARs in ENCODE regions
group encodeTxLevels
priority 32.0
chromosomes chr11,chr21,chr22
visibility hide
type bed 12 .
dataVersion ENCODE Nov 2007
origAssembly hg18
---
vi ${HOME}/kent/src/hg/makeDb/trackDb/human/hg18/encodeYaleRnaSuper.html
#edit to add the new RACE track to the Credits section
#############################################################################
# TBA alignments from Margulies lab, NHGRI (2008-2-20 kate)
# Submitted by Gayle McEwen (mceweng@mail.nih.gov), from their DEC-07 freeze
# Requested doc update (README & track description) from Elliott on 2/20
# Conservation scores: (BinCons and ChaiCons provided 3/24/08
ssh kkstore03
cd /cluster/data/encode/TBA
mkdir -p DEC-07/2008-01-10/lab
cd DEC-07/2008-01-10/lab
wget -nd ftp://kronos.nhgri.nih.gov/pub/outgoing/elliott/encode/freeze/DEC-2007/tba-DEC-2007.tar.gz
tar xvfz tba-DEC-2007.tar.gz
cd ..
mkdir maf
cat > getMafs.csh << 'EOF'
foreach f (lab/tba/*/*.maf.gz)
set r = $f:t:r:r:e
echo $r
gunzip -c $f | \
sed -e 's/^s human\./s hg18./' \
-e 's/^s mouse\./s mm9./' \
-e 's/^s cow\./s bosTau3./' \
-e 's/^s dog\./s canFam2./' \
-e 's/^s chicken\./s galGal3./' \
-e 's/^s monodelphis\./s monDom4./' \
-e 's/^s chimp\./s panTro2./' \
-e 's/^s macaque\./s rheMac2./' \
-e 's/^s orangutan\./s ponAbe2./' \
-e 's/^s rat\./s rn4./' \
> maf/$r.maf
end
'EOF'
csh getMafs.csh >&! getMafs.log &
# Score too small messages -- can be ignored (the score isn't meaningful)
# Add gap annotation
# prepare bed files with gap info
ssh kkstore03
cd /cluster/data/encode/TBA
cd DEC-07/2008-01-10
mkdir anno
cd anno
mkdir maf run
cd run
cat > species.lst << 'EOF'
hg18
bosTau3
canFam2
galGal3
monDom4
panTro2
rheMac2
ponAbe2
mm9
rn4
'EOF'
cat > doNBed.csh << 'EOF'
foreach db (`cat species.lst`)
echo -n "$db "
set cdir = /cluster/data/$db
if (! -e $cdir/$db.N.bed) then
echo "creating N.bed"
twoBitInfo -nBed $cdir/$db.2bit $cdir/$db.N.bed
else
echo ""
endif
end
'EOF'
csh doNBed.csh >&! doNBed.log &
rm -f nBeds
foreach db (`grep -v hg18 species.lst`)
echo "$db "
ln -s /cluster/data/$db/$db.N.bed $db.bed
echo $db.bed >> nBeds
end
cat > doAnno.csh << 'EOF'
foreach f (../../maf/*.maf)
set b = $f:t
echo $f
nice mafAddIRows -nBeds=nBeds $f \
/cluster/data/hg18/hg18.2bit ../maf/$b
end
'EOF'
#<< happy emacs
csh doAnno.csh >&! doAnno.log &
# Load MAF table with annotated mafs. Also load summary table.
ssh hgwdev
set mdir = /cluster/data/encode/TBA/DEC-07/2008-01-10/anno/maf
cd $mdir
set gdir = /gbdb/hg18/encode/TBA/DEC-07/2008-01-10/maf
rm -f $gdir/*.maf
mkdir -p $gdir
ln -s $mdir/*.maf $gdir
hgLoadMaf -pathPrefix=$gdir -WARN hg18 encodeTbaAlignDec07 >&! load.log &
cat *.maf | hgLoadMafSummary hg18 encodeTbaSummaryDec07 stdin
cd ..
# Reannotate with newer mafAddIRows having distinctive rows for
# tandem dups (by request of JK)
# 2008-10-23 kate
# again (another fix to mafAddIRows) 2008-10-27 kate
ssh kolossus
cd /cluster/data/encode/TBA
cd DEC-07/2008-01-10
cd anno/run
# edit doAnno.csh to use new version
csh doAnno.csh >&! doAnno.log &
ssh hgwdev
set mdir = /cluster/data/encode/TBA/DEC-07/2008-01-10/anno/maf
cd $mdir
set gdir = /gbdb/hg18/encode/TBA/DEC-07/2008-01-10/maf
hgLoadMaf -pathPrefix=$gdir -WARN hg18 encodeTbaAlignDec07 >&! load.log &
# Ignore 'score too small' errors
cat *.maf | hgLoadMafSummary hg18 encodeTbaSummaryDec07 stdin
#Created 141213 summary blocks from 8144409 components and 389847 mafs from stdin
#Loading into hg18 table encodeTbaSummaryDec07...
# Gene frames
ssh hgwdev
cd /cluster/data/encode/TBA/DEC-07/2008-01-10
mkdir frames
cd frames
# Pick gene tables, according to the following criteria:
# KG if present, else refGene if >10000 entries, else ensGene (unless dog),
# else mgcGenes, else mrnas if > 10000 else none. In all cases
# except none, add in refGene.
# NOTE: shortcut by using sources from hg18 multiz framing
# (added braney 2008-03-01) use geneCode for hg18, no
# genes from ponAbe2
hg18: encodeGencodeGeneKnownMar07
bosTau3: mrna
canFam2: mrna
galGal3: mrna
monDom4: ensGene
panTro2: refGene
rheMac2: ensGene
rn4: knownGene
mm9: knownGene
# get the genes for all genomes
# mRNAs with CDS. single select to get cds+psl, then split that up and
# create genePred
# using mrna table as genes
cat > getGenes.csh << 'EOF'
rm -fr genes
mkdir -p genes
set mrnaDbs = "bosTau3 canFam2 galGal3"
foreach queryDb ($mrnaDbs)
set tmpExt = `mktemp temp.XXXXXX`
set tmpMrnaCds = ${queryDb}.mrna-cds.${tmpExt}
set tmpMrna = ${queryDb}.mrna.${tmpExt}
set tmpCds = ${queryDb}.cds.${tmpExt}
echo $queryDb
hgsql -N -e 'select all_mrna.qName,cds.name,all_mrna.* \
from all_mrna,gbCdnaInfo,cds \
where (all_mrna.qName = gbCdnaInfo.acc) and \
(gbCdnaInfo.cds != 0) and (gbCdnaInfo.cds = cds.id)' \
$queryDb > ${tmpMrnaCds}
cut -f 1-2 ${tmpMrnaCds} > ${tmpCds}
cut -f 4-100 ${tmpMrnaCds} > ${tmpMrna}
mrnaToGene -cdsFile=${tmpCds} -smallInsertSize=8 -quiet ${tmpMrna} stdout | \
genePredSingleCover stdin stdout | gzip -2c > /scratch/tmp/$queryDb.tmp.gz
rm ${tmpMrnaCds} ${tmpMrna} ${tmpCds}
mv /scratch/tmp/$queryDb.tmp.gz genes/$queryDb.gp.gz
rm -f $tmpExt
end
# using encodeGencodeGeneKnownMar07 for hg18
# using knownGene for rn4 mm9
# using refGene for panTro2
# using ensGene for monDom4, rheMac2
# genePreds; (must keep only the first 10 columns for knownGene)
#set geneDbs = "hg18 mm9 rn4 panTro2 monDom4 rheMac2 ponAbe2"
# NOTE: next time include ponAbe2, using ensGene
set geneDbs = "hg18 mm9 rn4 panTro2 monDom4 rheMac2"
foreach queryDb ($geneDbs)
if ($queryDb == "monDom4" || $queryDb == "rheMac2") then
set geneTbl = ensGene
else if ($queryDb == "panTro2") then
set geneTbl = refGene
else if ($queryDb == "rn4" || $queryDb == "mm9") then
set geneTbl = knownGene
else if ($queryDb == "hg18") then
set geneTbl = encodeGencodeGeneMar07
endif
hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from $geneTbl" ${queryDb} \
| genePredSingleCover stdin stdout | gzip -2c \
> /scratch/tmp/$queryDb.tmp.gz
mv /scratch/tmp/$queryDb.tmp.gz genes/$queryDb.gp.gz
end
'EOF'
csh getGenes.csh >&! getGenes.log &
ssh kkstore03
cd /cluster/data/encode/TBA/DEC-07/2008-01-10/frames
(cat ../maf/*.maf | nice genePredToMafFrames hg18 stdin stdout bosTau3 genes/bosTau3.gp.gz canFam2 genes/canFam2.gp.gz galGal3 genes/galGal3.gp.gz hg18 genes/hg18.gp.gz panTro2 genes/panTro2.gp.gz rheMac2 genes/rheMac2.gp.gz mm9 genes/mm9.gp.gz rn4 genes/rn4.gp.gz monDom4 genes/monDom4.gp.gz | nice gzip > mafFrames.gz) >& frames.log &
ssh hgwdev
cd /cluster/data/encode/TBA/DEC-07/2008-01-10/frames
nice hgLoadMafFrames hg18 encodeTbaFramesDec07 mafFrames.gz >& loadFrames.log &
# Post downloads
ssh kkstore03
cd /cluster/data/encode/TBA/DEC-07/2008-01-10/
mkdir downloads
cd anno/maf
# redo to include re-annotated mafs (with 'T' lines for tandem dups)
# 2008-11-06 kate
tar cvfz ../../downloads/encodeTba.maf.tgz *.maf
# Obtain sequence freeze
ssh kkstore03
cd /cluster/data/encode/MSA
mkdir -p DEC-07/lab
cd DEC-07/lab
wget -nd ftp://kronos.nhgri.nih.gov/pub/outgoing/elliott/encode/freeze/DEC-2007/DEC-2007.tar.gz
# Received README.txt for sequence freeze
# and encodeTbaAlign.html update from Gayle McEwen, 6/12/08
cp encodeTbaAlign_DEC-2007.html ~/kent/src/hg/makeDb/trackDb/human/hg18/encodeTbaAlignDec07.html
# checkin to CVS
cd ..
mkdir downloads
cd downloads
ln -s /cluster/data/encode/MSA/DEC-07/lab/DEC-2007.tar.gz .
ln -s /cluster/data/encode/MSA/DEC-07/lab/seq sequences
# Received species tree from Gayle 8/08
cp ../lab/conserved.mod tree_4d.tba.nh
# edit to remove phastCons-specific header
# edit tree to remove species not in this dataset: gorilla, lemur, black_lemur, sheep,
# muntjak_indian, ajbat, cpbat, eehedgehog, wallaby, dunnart, torgoise, xenopus
# tetraodon, fugu, zebrafish, pig
tail +2 seq/metadata.txt | awk '{print $1}' | sort | uniq > species.txt
echo `cat species.txt|sed 's/$/,/'` | sed 's/ //g' > speciesList.txt
/cluster/bin/phast/tree_doctor --prune-all-but `cat speciesList.txt` ../lab/tree_Dec2007.nh | sed 's/:0.000000//g' > species36.nh
# Create tree image with phyloGif -- use 700 height, preserve underscores
# encode_36way.gif
cp encode_36way.gif ~/browser/images/phylo/
# checkin to CVS
cp ../lab/README_DEC-2007.txt README.txt
# fix typo -- it's the Dec not Sep freeze
# edit DIRECTORY structure section a bit to reflect this downloads organiatoin
# post for download
ssh hgwdev
cd /usr/local/apache/htdocs/goldenPath/hg18/encode
mkdir -p MSA/DEC-2007
cd MSA/DEC-2007
ln -s /cluster/data/encode/MSA/DEC-07/downloads/{README.txt,DEC-2007.tar.gz} .
ln -s /cluster/data/encode/MSA/DEC-07/downloads/{tree_4d.tba.nh,species36.nh} .
cp ~/browser/images/phylo/encode_36way.gif .
mkdir -p alignments/TBA/
cd alignments/TBA
ln -s /cluster/data/encode/TBA/DEC-07/2008-01-10/downloads/encodeTba.maf.tgz encodeTbaDec07.maf.tgz
# Conservation
mkdir -p ChaiCons/2008-03-24/lab BinCons/2008-03-24/lab
# copy files from Gayle McEwan email
# binCons files are formatted <region> start end name score,
# where score is always 1000
# Lift these to hg18 coordinates, and remove score field.
echo "select chromStart, name, chromEnd-chromStart, chrom from encodeRegions" | hgsql -N hg18 | sed 's/$/\t30000000/' > /cluster/data/encode/MSA/encodeRegions.lft
liftUp ChaiCons.bed /cluster/data/encode/MSA/encodeRegions.lft warn lab/CHAI.bed
wc -l ChaiCons.bed3 lab/CHAI.bed
#208916 ChaiCons.bed
#208916 lab/CHAI.bed
awk '{printf "%s\t%d\t%d\tchai.%d\n", $1, $2, $3, NR}' ChaiCons.bed3 > ChaiCons.bed4
hgLoadBed hg18 encodeTbaChaiConsDec07 ChaiCons.bed4
# Loaded 208916 elements of size 3
cd ../../BinCons/2008-03-24
liftUp -type=.bed stdout /cluster/data/encode/MSA/encodeRegions.lft warn lab/BINCONS.bed |\
sed 's/1000$//' > BinCons.bed
wc -l BinCons.bed lab/BINCONS.bed
# 117793 BinCons.bed
# 117836 lab/BINCONS.bed
# difference due to blank lines in source file:
grep '^$' lab/* | wc -l
# 43
hgLoadBed hg18 encodeTbaBinConsDec07 BinCons.bed
#############################################################################
-# CRG MAPABILITY (2010-01-19 - 2010-01-28, hartera, DONE)
-# See kent/src/hg/makeDb/doc/hg18.txt for documentation on the addition of
-# CRG alignability subtracks, to the existing ENCODE Mapability track, for
-# sequence k-mers of 36,40,50,75 and 100 nucleotides.
-# The data was provided by an ENCODE lab (Guigo lab at CRG) but the data
-# production was only partially funded by ENCODE so no metadata was added
-# to trackDb for this track.
-
-
-#############################################################################
# encodeGencodeGeneKnownMar07 (2010-04-07 markd)
# Was discovered to be corrupted on hgwdev and all servers
#
cd /cluster/data/encode/convertHg18/genePred
genePredCheck -db=hg18 encodeGencodeGeneKnownMar07.tab
checked: 2991 failed: 0
hgLoadGenePred -genePredExt hg18 encodeGencodeGeneKnownMar07 encodeGencodeGeneKnownMar07.tab
genePredCheck -db=hg18 encodeGencodeGeneKnownMar07