src/hg/makeDb/doc/encodeHg18.txt 1.34

1.34 2010/05/06 18:23:10 hartera
Removed documentation about CRG Mapability track as this is from the whole genome ENCODE and this document is for the pilot project.
Index: src/hg/makeDb/doc/encodeHg18.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/encodeHg18.txt,v
retrieving revision 1.33
retrieving revision 1.34
diff -b -B -U 1000000 -r1.33 -r1.34
--- src/hg/makeDb/doc/encodeHg18.txt	8 Apr 2010 04:39:11 -0000	1.33
+++ src/hg/makeDb/doc/encodeHg18.txt	6 May 2010 18:23:10 -0000	1.34
@@ -1,1256 +1,1246 @@
 !/bin/csh -f
 exit
 
 #############################################################################
 # This is the make doc for hg18 ENCODE
 
 #############################################################################
 # Changes to ENCODE groups (2007-07-31 kate)
 
 # Change labels for Transcripts and Chrom 
 hgsql hg18 -e "UPDATE grp SET label='ENCODE Transcription' where name='encodeTxLevels'"
 hgsql hg18 -e "UPDATE grp SET label='ENCODE Chromatin Structure' where name='encodeChrom'"
 
 # Merge CompGeno and Var groups (few tracks)
 hgsql hg18 -e "UPDATE grp SET label='ENCODE Comparative Genomics and Variation' where name='encodeCompGeno'"
 hgsql hg18 -e "DELETE FROM grp where name='encodeVariation'"
 
 # Retire obsolete group
 hgsql hg18 -e "DELETE FROM grp where name='encode'"
 
 #############################################################################
 # Create encodeRegions table
 
     ssh hgwdev
     cd /cluster/data/encode
     mkdir convertHg18
     ln -s convertHg18 hg18
     ln -s convertHg17 hg17
     cd hg18
     hgsql hg17 -N -e "SELECT * FROM encodeRegions ORDER BY name" | \
         liftOver stdin /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz \
                 encodeRegions.bed encodeRegions.unmapped
     hgLoadBed hg18 encodeRegions encodeRegions.bed
     cp encodeRegions.bed ~/browser/ENCODE/build35_regions.bed
     # cvs add, install in /usr/local/apache/htdocs/ENCODE
 
 ##########################################################################
 # DOWNLOADS (2007-09-21 kate)
 
     ssh hgwdev
     cd /usr/local/apache/htdocs/goldenPath/hg18
     mkdir -p encode
     cd encode
     # release terms
     cp ../../hg17/encode/README.txt .
     # annotation database
     # request admin set up automated database dump
     mkdir database
     # auxiliary data files
     mkdir datafiles 
     # sequences
     cd /cluster/data/encode/convertHg18
     hgsql hg18 -N -e \
       "SELECT name, chrom, chromStart, chromEnd FROM encodeRegions ORDER BY name">regions.txt 
 
     ssh kolossus
     cd /cluster/data/encode/convertHg18
     mkdir regions
     cd regions
     /cluster/data/encode/bin/scripts/encodeSequences.pl -upper \
         ../regions.txt /iscratch/i/hg18/nib  > hg18.fa
     /cluster/data/encode/bin/scripts/encodeSequences.pl -masked \
         ../regions.txt /iscratch/i/hg18/nib  > hg18.msk.fa
     faSize -detailed hg18.fa > hg18_count.txt
     gzip *.fa
     md5sum *.fa.gz > md5sum.txt
     # copy regions/README.txt from hg17 and edit
 
     ssh hgwdev
     cd /usr/local/apache/htdocs/goldenPath/hg18/encode
     ln -s /cluster/data/encode/convertHg18/regions .
     cp ../../hg17/encode/regions/README.txt regions
     # edit README
 
 ##############################################################################
 # Lifting rampage (Andy)
 
 ssh hgwdev
 bash
 cd /cluster/data/encode/convertHg18
 /cluster/data/encode/bin/scripts/listEncodeTables.csh hg17 > hg17.tables
 wc -l hg17.tables
 #554 hg17.tables (dang)
 
 # start with easy beds i.e. the ones like "bed <num> ."
 
 grep "bed.*\." hg17.tables > easyBeds.tables
 grep -v "bed.*\." hg17.tables > remaining.tables
 wc -l easyBeds.tables
 #127 easyBeds.tables
 mkdir easyBeds
 for fields in 3 4 5 6 9 12; do 
     for table in `grep "bed $fields" easyBeds.tables | cut -f1`; do 
        hgsql hg18 -e "drop table $table"
        /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
          $table $fields >> easyBeds.script.log 
        mv $table.* easyBeds/
     done
 done
 # still got 4 like "bed ." 
 egrep -v "bed (3|4|5|6|9|12)" easyBeds.tables
 #encodeYaleChIPSTAT1HeLaBingRenSites     encodeChip      bed .
 #encodeYaleChIPSTAT1HeLaMaskLess36mer36bpSite    encodeChip      bed .
 #encodeYaleChIPSTAT1HeLaMaskLess50mer38bpSite    encodeChip      bed .
 #encodeYaleChIPSTAT1HeLaMaskLess50mer50bpSite    encodeChip      bed .
 # these are all bed 3
 for table in `egrep -v "bed (3|4|5|6|9|12)" easyBeds.tables | cut -f1`; do
        /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
          $table 3 >> easyBeds.script.log
        mv $table.* easyBeds/
 done
 
 # ok now there's ones like "bed <num> +"
 wc -l remaining.tables
 #427 remaining.tables
 grep '\+' remaining.tables > plusBed.tables
 grep -v '\+' remaining.tables > tmp; mv tmp remaining.tables 
 wc -l remaining.tables plusBed.tables 
 #  383 remaining.tables
 #   44 plusBed.tables
 #  427 total
 mkdir plusBeds
 for fields in 4 5 6 9 12; do
     for table in `grep "bed $fields" plusBed.tables | cut -f1`; do
        /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
          $table $fields >> plusBeds.script.log
        mv $table.* plusBeds/
     done
 done
 
 # how about bedGraph ones?
 grep bedGraph remaining.tables > bedGraph.tables
 grep -v bedGraph remaining.tables > tmp; mv tmp remaining.tables 
 wc -l bedGraph.tables remaining.tables 
 #  186 bedGraph.tables
 #  197 remaining.tables
 #  383 total
 mkdir bedGraph
 for table in `cut -f1 bedGraph.tables`; do
        /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
          $table 4 >> bedGraph.script.log
        mv $table.* bedGraph/
 done
 
 ######################################
 # Continue lifting rampage (ting, 06-07-2007)
 #
 
 # Examining remaining.tables
 # All GIS tables are bed 12, so lift these
 		grep Gis remaining.tables > Gis.tables
 		grep -v Gis remaining.tables > tmp; mv tmp remaining.tables
 		wc -l Gis.tables remaining.tables
 		#    7 Gis.tables
 		#  190 remaining.tables
 		#  197 total
 		mkdir bedGis
 		doGis.csh
 		# 7 tables lifted.
 
 # 190 remaining.
 
 ####################################################
 # More lifting (Andy)
 
 ssh hgwdev
 bash
 cd /cluster/data/encode/convertHg18
 
 # genePred tables
 grep genePred remaining.tables > genePred.tables
 grep -v genePred remaining.tables > tmp; mv tmp remaining.tables
 wc -l genePred.tables remaining.tables 
 #  68 genePred.tables
 # 122 remaining.tables
 # 190 total
 mkdir genePred
 for table in `cut -f1 genePred.tables`; do
     /cluster/data/encode/bin/scripts/convertGenePredTable.csh hg17 hg18 $table >> genePred.scripts.log;
     mv $table.* genePred/
 done
 # ERRORS, uh oh
 # fixed /cluster/data/encode/bin/scripts/convertGenePredTable.csh
 # binned hg17 tables weren't working right.
 grep error genePred.scripts.log | sed 's/^.*converting\ \(.*\)\.txt.*$/\1/' > genePredBins.tables
 for table in `cat genePredBins.tables`; do
     /cluster/data/encode/bin/scripts/convertGenePredTable.csh hg17 hg18 $table >> genePredBins.scripts.log;
     mv $table.* genePred/
 done
 
 # missed bed tables.  There's a few like "bed5FloatScore" and "bed 3", etc.
 # these can be treated as normal beds
 grep bed remaining.tables | cut -f1,3 | sed 's/bed5FloatScore/bed 5/' > \
   bedOther.tables
 grep -v bed remaining.tables > tmp
 wc -l bedOther.tables tmp
 #  14 bedOther.tables
 # 108 tmp
 # 122 total
 mkdir bedOther
 for fields in 3 4 5; do 
     for table in `grep "bed $fields" bedOther.tables | cut -f1`; do 
        /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
          $table $fields >> bedOther.script.log 
        mv $table.* bedOther/
     done
 done
 
 # wiggle
 # first tally up which ones are in which DBs.  The older ones can go hg16->hg18 instead
 # of hg17->hg18.  Make three sets of tables and do set operations:
 hgsql hg16 -e 'show tables' > hg16.all.tables
 hgsql hg17 -e 'show tables' > hg17.all.tables
 grep -v wigMaf remaining.tables | cut -f1 > wig.tables
 grep wigMaf remaining.tables > tmp; mv tmp remaining.tables
 wc -l wig.tables remaining.tables 
 # 119 wig.tables
 #   3 remaining.tables
 # 122 total
 #  OOPS I forgot to subtract the 14 tables from last one.
 
 # How many of the wiggle tables are in hg17?  I hope all 119
 grep -Fw -f wig.tables hg17.all.tables | wc -l
 # 105
 # good.  Ok how about hg16?
 grep -Fw -f wig.tables hg16.all.tables | wc -l
 # 61
 # I guess then hg17 should have 44 newer ones.
 grep -Fw -f wig.tables hg16.all.tables > hg16.wig.tables
 grep -Fwv -f hg16.wig.tables wig.tables > hg17.wig.tables
 wc -l *wig.tables
 #  61 hg16.wig.tables
 #  44 hg17.wig.tables
 # 105 wig.tables
 # Awesome.  These two sets shouldn't intersect at all:
 grep -Fw -f hg16.wig.tables hg17.wig.tables | wc -l
 # 0
 # Great.  Now lets move on.  Let's use hgWiggle on each of these tables to 
 # fetch the old data.  Then we'll convert that to bed 4, lift that, then 
 # run wigEncode on the lifted data.
 
 mkdir ../hg18.wib
 mkdir -p /gbdb/hg18/encode/wib
 
 mkdir fromHg16.wig
 for table in `cat hg16.wig.tables`; do 
     hgWiggle -db=hg16 $table \
        | grep -v "^#" | awk -f varStepToBed.awk > $table.old.wig
     liftOver -bedPlus=3 -tab $table.old.wig /gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz \
       $table.new.wig $table.unmapped
     sort -k1,1 -k2,2n $table.new.wig > tmp.wig; mv tmp.wig $table.new.wig
     wigEncode $table.new.wig $table.wig $table.wib 2>> wigFromHg16.log
     mv $table.wib ../hg18.wib/
     ln -s /cluster/data/encode/hg18.wib/${table}.wib /gbdb/hg18/encode/wib/${table}.wib
     hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 $table $table.wig
     mv $table.*wig $table.unmapped fromHg16.wig/
 done
 
 mkdir fromHg17.wig
 for table in `cat hg17.wig.tables`; do
     hgWiggle -db=hg17 $table \
        | grep -v "^#" | awk -f varStepToBed.awk > $table.old.wig
     liftOver -bedPlus=3 -tab $table.old.wig /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz \
       $table.new.wig $table.unmapped
     sort -k1,1 -k2,2n $table.new.wig > tmp.wig; mv tmp.wig $table.new.wig
     wigEncode $table.new.wig $table.wig $table.wib 2>> wigFromHg17.log
     mv $table.wib ../hg18.wib/
     ln -s /cluster/data/encode/hg18.wib/${table}.wib /gbdb/hg18/encode/wib/${table}.wib
     hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 $table $table.wig
     mv $table.*wig $table.unmapped fromHg17.wig/
 done
 
 ##########################################################
 # Wig lifting 
 # we need to find all the old wiggle data and lift that.  
 # Start with hg16
 
 ssh hgwdev
 bash 
 cd /cluster/data/encode/convertHg18
 
 # find those hg16 tables
 
 cat > affyChipChip.hg16.wig.tables << "EOF"
 encodeAffyChIpHl60PvalBrg1Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalBrg1Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalBrg1Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalBrg1Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalCebpeHr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalCebpeHr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalCebpeHr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalCebpeHr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalCtcfHr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalCtcfHr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalCtcfHr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalCtcfHr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalH3K27me3Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalH3K27me3Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalH3K27me3Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalH3K27me3Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalH4Kac4Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalH4Kac4Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalH4Kac4Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalH4Kac4Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalP300Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalP300Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalP300Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalP300Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalPu1Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalPu1Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalPu1Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalPu1Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalRaraHr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalRaraHr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalRaraHr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalRaraHr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalRnapHr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalRnapHr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalRnapHr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalRnapHr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalSirt1Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalSirt1Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalSirt1Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalSirt1Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 encodeAffyChIpHl60PvalTfiibHr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_TFIIB-R_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
 EOF
 
 mkdir -p wigs/hg16
 cd wigs/hg16
 cat ../../affyChipChip.hg16.wig.tables | while read -a line; do 
    chain=/gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz
    table=${line[0]};
    oldWig=$table.hg16.wig
    newWig=$table.hg18.wig
    bad=$table.hg18.unmapped
    wib=$table.wib
    wigTable=$table.tab
    file=/cluster/data/encode/${line[1]}; 
 
    echo $table
    bzcat $file | tail +2 | awk -f ../../varStepToBed.awk | \
      awk 'BEGIN{OFS="\t"}{print $1, $2+1, $3, $4;}' > $oldWig;
    liftOver -bedPlus=3 $oldWig $chain $newWig $bad
    bedSort $newWig tmp
    mv tmp $newWig
    wigEncode $newWig $wigTable $wib
 done
 
 # One more for BU Orchid
 
 awk -f ../../varStepToBed.awk ../../../BU/orchid/2005-06-09/t0 > encodeBu_ORChID1.hg16.wig
 liftOver -bedPlus=3 encodeBu_ORChID1.hg16.wig /gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz encodeBu_ORChID1.hg18.wig encodeBu_ORChID1.hg18.unmapped
 bedSort encodeBu_ORChID1.hg18.wig tmp; mv tmp encodeBu_ORChID1.hg18.wig
 wigEncode encodeBu_ORChID1.hg18.wig encodeBu_ORChID1.tab encodeBu_ORChID1.wib
 # NOTE: this track was replaced with newer data -- the lift was
 # never used.
 
 # Encode hapmap coverage
 
 for graph in ../../../sanger/coverage/encode*.bedGraph; do
      table=${graph%.bedGraph}
      table=${table#*coverage\/}
      liftOver -bedPlus=3 $graph /gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz \
        $table.hg18.wig $table.hg18.unmapped
      bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig
      wigEncode $table.hg18.wig $table.tab $table.wib
 done
 
 # hg17 tables
 cd ../
 mkdir hg17
 cd hg17
 cat | while read -a line; do
     table=${line[0]};
     file=/cluster/data/encode/${line[1]};
     chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
     awk -f ../../varStepToBed.awk $file > $table.hg17.wig;
     bedSort $table.hg17.wig tmp; mv tmp $table.hg17.wig
     liftOver -bedPlus=3 $table.hg17.wig $chain $table.hg18.wig $table.hg18.unmapped;
     bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig;
     wigEncode $table.hg18.wig $table.tab $table.wib
 done << "EOF"
 encodeAffyChIpHl60PvalStrictH3K9K14DHr00	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/00/EC_AS_HL60_DN_RA_H3K9K14D_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictH3K9K14DHr02	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/02/EC_AS_HL60_DN_RA_H3K9K14D_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictH3K9K14DHr08	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/08/EC_AS_HL60_DN_RA_H3K9K14D_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictH3K9K14DHr32	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/32/EC_AS_HL60_DN_RA_H3K9K14D_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictHisH4Hr00	Affy/2005-10-03/lab/CHIP/wig/HisH4/00/EC_AS_HL60_DN_RA_HisH4_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictHisH4Hr02	Affy/2005-10-03/lab/CHIP/wig/HisH4/02/EC_AS_HL60_DN_RA_HisH4_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictHisH4Hr08	Affy/2005-10-03/lab/CHIP/wig/HisH4/08/EC_AS_HL60_DN_RA_HisH4_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictHisH4Hr32	Affy/2005-10-03/lab/CHIP/wig/HisH4/32/EC_AS_HL60_DN_RA_HisH4_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictp63_ActD	Affy/2005-10-03/lab/CHIP/wig/p63_ActD/EC_AS_ME180_ActD_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.pval.median.wig
 encodeAffyChIpHl60PvalStrictp63_mActD	Affy/2005-10-03/lab/CHIP/wig/p63_mActD/EC_AS_ME180_Ctrl_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.pval.median.wig
 encodeAffyChIpHl60PvalStrictPol2Hr00	Affy/2005-10-03/lab/CHIP/wig/Pol2/00/EC_AS_HL60_DN_RA_Pol2_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictPol2Hr02	Affy/2005-10-03/lab/CHIP/wig/Pol2/02/EC_AS_HL60_DN_RA_Pol2_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictPol2Hr08	Affy/2005-10-03/lab/CHIP/wig/Pol2/08/EC_AS_HL60_DN_RA_Pol2_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60PvalStrictPol2Hr32	Affy/2005-10-03/lab/CHIP/wig/Pol2/32/EC_AS_HL60_DN_RA_Pol2_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
 encodeAffyChIpHl60SignalStrictH3K9K14DHr00	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/00/EC_AS_HL60_DN_RA_H3K9K14D_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictH3K9K14DHr02	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/02/EC_AS_HL60_DN_RA_H3K9K14D_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictH3K9K14DHr08	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/08/EC_AS_HL60_DN_RA_H3K9K14D_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictH3K9K14DHr32	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/32/EC_AS_HL60_DN_RA_H3K9K14D_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictHisH4Hr00	Affy/2005-10-03/lab/CHIP/wig/HisH4/00/EC_AS_HL60_DN_RA_HisH4_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictHisH4Hr02	Affy/2005-10-03/lab/CHIP/wig/HisH4/02/EC_AS_HL60_DN_RA_HisH4_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictHisH4Hr08	Affy/2005-10-03/lab/CHIP/wig/HisH4/08/EC_AS_HL60_DN_RA_HisH4_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictHisH4Hr32	Affy/2005-10-03/lab/CHIP/wig/HisH4/32/EC_AS_HL60_DN_RA_HisH4_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictp63_ActD	Affy/2005-10-03/lab/CHIP/wig/p63_ActD/EC_AS_ME180_ActD_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.sig.median.wig
 encodeAffyChIpHl60SignalStrictp63_mActD	Affy/2005-10-03/lab/CHIP/wig/p63_mActD/EC_AS_ME180_Ctrl_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.sig.median.wig
 encodeAffyChIpHl60SignalStrictPol2Hr00	Affy/2005-10-03/lab/CHIP/wig/Pol2/00/EC_AS_HL60_DN_RA_Pol2_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictPol2Hr02	Affy/2005-10-03/lab/CHIP/wig/Pol2/02/EC_AS_HL60_DN_RA_Pol2_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictPol2Hr08	Affy/2005-10-03/lab/CHIP/wig/Pol2/08/EC_AS_HL60_DN_RA_Pol2_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyChIpHl60SignalStrictPol2Hr32	Affy/2005-10-03/lab/CHIP/wig/Pol2/32/EC_AS_HL60_DN_RA_Pol2_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
 encodeAffyRnaGm06990Signal	Affy/2005-10-03/lab/RNA/wig/GM06990/EC_AS_GM06990_RCyP+_C01vsNULL.sig.wig
 encodeAffyRnaHeLaSignal	Affy/2005-11-22/lab/Affy_HeLa/wig/EC_AS_HeLa_RCyP+_C01vsNULL.sig.wig
 encodeAffyRnaHl60SignalHr00	Affy/2005-10-03/lab/RNA/wig/HL60/00/EC_AS_HL60_RWP+_RA_00hr_C01vsNULL.sig.wig        
 encodeAffyRnaHl60SignalHr02	Affy/2005-10-03/lab/RNA/wig/HL60/02/EC_AS_HL60_RWP+_RA_02hr_C01vsNULL.sig.wig
 encodeAffyRnaHl60SignalHr08	Affy/2005-10-03/lab/RNA/wig/HL60/08/EC_AS_HL60_RWP+_RA_08hr_C01vsNULL.sig.wig
 encodeAffyRnaHl60SignalHr32	Affy/2005-10-03/lab/RNA/wig/HL60/32/EC_AS_HL60_RWP+_RA_32hr_C01vsNULL.sig.wig
 encodeUvaDnaRepTr50	UVa/2005-10-15/lab/smoothedtr50.hg17.wig
 EOF
 
 # Uppsala hg17 is already in bed format
 
 cat | while read -a line; do
     table=${line[0]};
     file=/cluster/data/encode/${line[1]};
     chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
     liftOver -bedPlus=3 $file $chain $table.hg18.wig $table.hg18.unmapped;
     bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig;
     wigEncode $table.hg18.wig $table.tab $table.wib
 done << "EOF"
 encodeUppsalaChipH3acBut0h      Uppsala/2006-05-29/lab/encodeUppsalaChipH3acBut0h.wig.txt
 encodeUppsalaChipH3acBut12h     Uppsala/2006-05-29/lab/encodeUppsalaChipH3acBut12h.wig.txt
 encodeUppsalaChipH4acBut0h      Uppsala/2006-05-29/lab/encodeUppsalaChipH4acBut0h.wig.txt
 encodeUppsalaChipH4acBut12h     Uppsala/2006-05-29/lab/encodeUppsalaChipH4acBut12h.wig.txt
 EOF
 
 cat | while read -a line; do
     table=${line[0]};
     file=/cluster/data/encode/${line[1]};
     chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
     liftOver -bedPlus=3 $file $chain $table.hg18.wig $table.hg18.unmapped;
     bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig;
     wigEncode $table.hg18.wig $table.tab $table.wib
 done << "EOF"
 encodeYaleAffyNeutRNATransMap   yale/rna/2005-10-14/encodeYaleAffyNeutRNATransMap.trim
 encodeYaleAffyNB4RARNATransMap  yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_RA_RNA_Transcript_Map_ncbi35.wig
 encodeYaleAffyNB4TPARNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_TPA_RNA_Transcript_Map_ncbi35.wig
 encodeYaleAffyNB4UntrRNATransMap        yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_CTRL_RNA_Transcript_Map_ncbi35.wig
 encodeYaleAffyPlacRNATransMap   yale/rna/2005-10-14/lab/encode_Yale_Affy_Placenta_RNA_Transcript_Map_ncbi35.wig
 EOF
 
 # ERRORS ... the first one worked, the others need trimming.
 
 cat | while read -a line; do
     table=${line[0]};
     file=/cluster/data/encode/${line[1]};
     chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
     liftOver -bedPlus=3 $file $chain $table.hg18.wig $table.hg18.unmapped;
     bedSort $table.hg18.wig stdout | /cluster/data/encode/bin/scripts/trimOverlap.pl > tmp;
     mv tmp $table.hg18.wig;
     wigEncode $table.hg18.wig $table.tab $table.wib;
 done << "EOF"
 encodeYaleAffyNB4RARNATransMap  yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_RA_RNA_Transcript_Map_ncbi35.wig
 encodeYaleAffyNB4TPARNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_TPA_RNA_Transcript_Map_ncbi35.wig
 encodeYaleAffyNB4UntrRNATransMap        yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_CTRL_RNA_Transcript_Map_ncbi35.wig
 encodeYaleAffyPlacRNATransMap   yale/rna/2005-10-14/lab/encode_Yale_Affy_Placenta_RNA_Transcript_Map_ncbi35.wig
 EOF
 
 # Forgot an hg16 one
 
 table=encodeUcsdNgChipSignal
 file=/cluster/data/encode/UCSD/nimblegen/2005-05-31/encodeUcsdNgChipSignal.varStep
 chain=/gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz
 awk -f ../../varStepToBed.awk $file > $table.hg16.wig
 liftOver -bedPlus=3 $table.hg16.wig $chain $table.hg18.wig $table.hg18.unmapped
 bedSort $table.hg18.wig stdout | /cluster/data/encode/bin/scripts/trimOverlap.pl > tmp
 mv tmp $table.hg18.wig
 wigEncode $table.hg18.wig $table.tab $table.wib
 
 ##########################################################################
 # Boston University ORChID track - (2007-06-29 ting)
 #	data developer contact:  Steve Parker parker@bu.edu
 # This is a new dataset to replace the old one, for the same track.
 # On hg17 the track name is encodeBu_ORChID1, was commented as "non-standard table name"
 # I took this chance to rename it as encodeBUORChID on hg18.
 
     ssh hgwdev
     cd /cluster/data/encode/BU
     mkdir -p orchid/2007-06-29/lab
     cd -p orchid/2007-06-29/lab
     wget --timestamping "http://dna.bu.edu/parker/.data/orchid_hg18_encode.wig.gz"
     cd ..
     mkdir wib
 # The file orchid_hg18_encode.wig.gz from data provider contains 0-based coordinates,
 # thus wigEncode choked on it -- specifically, at chr16, position 0 (ENm008). 
 # I compared this new data to the old dataset (2005-09-08) and made sure that this
 # is the case. I saved the original file to 'original.wig.gz', and added 1 to all
 # positions in orchid_hg18_encode.wig.gz
     wigEncode lab/orchid_hg18_encode.wig.gz encodeBUORChID.wig \
     	wib/encodeBUORChID.wib 
 #   Converted lab/orchid_hg18_encode.wig.gz, upper limit 1.64, lower limit -0.98
 
 # load
     set dir = /gbdb/hg18/encode/BU/2007-06-29
     mkdir -p $dir
     hgLoadWiggle -pathPrefix=$dir hg18 encodeBUORChID encodeBUORChID.wig
     mkdir -p $dir/wib
     ln -s `pwd`/wib/encodeBUORChID.wib $dir/wib
 
 # create encodeBUORChID.html at trackDb/human/hg18/
     
 
 #############################################################################
 # Stanford NRSF ChIP-seq (DONE, Heather, July 2007)
 
 ssh hgwdev
 cd /cluster/data/encode/stanford/2007-03-14
 liftOver fix.bed /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz hg18.bed core.unmapped
 liftOver control_fix.bed /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz hg18.control.bed control.unmapped
 hgLoadBed hg18 encodeStanfordNRSFEnriched hg18.bed -tab
 hgLoadBed hg18 encodeStanfordNRSFControl hg18.control.bed -tab
 
 ############################################################################
 # Yale ENCODE Lifting
 
 ssh hgwdev
 cd /cluster/data/encode/convertHg18
 for table in `cat yale.lst`; do 
    echo select tableName,type from trackDb where tableName=\"$table\" \
      | hgsql hg17 | tail +2 >> yale.tables 
 done
 sed -e 's/bed5FloatScoreWithFdr/bed 5/' \
     -e 's/bedGraph\ 4/bed 4/' -e 's/bed5FloatScore/bed 5/' \
     < yale.tables > tmp.tables
 mv tmp.tables yale.tables
 mkdir yale
 for fields in 4 5; do
     for table in `grep "bed $fields" yale.tables | cut -f1`; do
        /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
          $table $fields >> yale.script.log
        mv $table.* yale/
     done
 done
 
 ###########################################################################
 # Pseudogenes Class table copied from hg17  (20087-08-01 kate)
 # This table is copied unchanged.
 
     ssh hgwdev
     cd /cluster/data/encode/convertHg18
     mkdir pseudogene
     cd pseudogene
     hgsqldump --all --tab=. hg17 encodePseudogeneClass
     hgsql hg18 < encodePseudogeneClass.sql
     echo "LOAD DATA LOCAL INFILE 'encodePseudogeneClass.txt' \
                 into table encodePseudogeneClass" | hgsql hg18
 
 ###########################################################################
 # Affy EC chrom21/chrom22 (Andy DONE 2007-07-20)
 
 ssh hgwdev
 bash
 cd /cluster/data/encode/Affy
 mkdir -p 2007-07-12/lab
 cd 2007-07-12/
 mkdir -p processed/{bed,wigTable,wib,download}
 cd lab/
 cp /var/ftp/encode/encode_ext_RNA_hg18_chr21-22.tar.gz .
 tar xfz encode/encode_ext_RNA_hg18_chr21-22.tar.gz
 rm encode/encode_ext_RNA_hg18_chr21-22.tar.gz
 cd ../
 find lab -name '*.bed' > renamesBed.txt
 find lab -name '*.wig' > renamesWig.txt
 # Make 2nd column for table name
 cat renamesBed.txt | while read -a line; do
     tail +2 ${line[0]} > processed/bed/${line[1]}.bed
     hgLoadBed hg18 ${line[1]} processed/bed/${line[1]}.bed
 done
 rm bed.tab
 cat renamesWig.txt | while read -a line; do
     table=${line[1]}
     origFile=${line[0]}
     tail +2 $origFile > processed/download/${table}.wig
     wigEncode processed/download/${table}.wig processed/wigTable/${table}.tab \
         processed/wib/${table}.wib 2>> processed/wigEncode.log
     pushd /gbdb/hg18/encode/wib
     ln -s /cluster/data/encode/Affy/2007-07-12/processed/wib/${table}.wib
     popd
     hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 $table processed/wigTable/${table}.tab
     gzip processed/download/${table}.wig
 done
 cd /usr/local/apache/htdocs/goldenPath/hg18/encode
 ln -s /cluster/data/encode/Affy/2007-07-12/processed/download/*.gz .
 
 ###########################################################################
 # Yale Pol II Chip (Chip-seq) (DONE Andy 11-07-2007)
 
 cd /cluster/data/encode/yale
 mkdir -p 2007-07-17/lab
 cd 2007-07-17/lab
 unzip Yale_jul17_v2.zip
 
 set table = wgEncodeYaleChipSeqPol2HelaSites
 hgLoadBed hg18 $table lab/PolII/PolII_hg18-sites.bed
 #Reading PolII_hg18-sites.bed
 #Loaded 87253 elements of size 4
 #Sorted
 #Creating table definition for encodeYalePolIISites
 #Saving bed.tab
 #start -142, end 1144 out of range in findBin (max is 512M)
 # CONTACTED submitter to ask about negative coordinate.
 # For now, leave out chrM
 sed '/^chrM/d' lab/PolII/PolII_hg18-sites.bed | hgLoadBed hg18 $table stdin
 
 # NOTE: max score=1779, min score=7
 # data distribution
  awk '{print $4}' pol2.bed | sort -n | textHistogram stdin -binSize=100 ;
    0 ************************************************************ 83113
    100 ** 3300
    200  545
    300  144
    400  58
    500  37
    600  20
    700  6
    800  6
    900  0
    1000  2
    1100  2
    1200  0
    1300  0
    1400  0
    1500  0
    1600  0
    1700  1
 
 
 bedSort PolII_hg18-signal.wig tmp.wig
 v tmp.wig PolII_hg18-signal.wig 
 ../../../bin/scripts/trimOverlap.pl < PolII_hg18-signal.wig > tmp.wig
 mv tmp.wig PolII_hg18-signal.wig 
 mv encodeYalePolIISignal.wib /cluster/data/encode/hg18.wib/
 ln -s /cluster/data/encode/hg18.wib/encodeYalePolIISignal.wib /gbdb/hg18/encode/wib/
 hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 encodeYalePolIISignal encodeYalePolIISignal.wig 
 #Connected to database hg18 for track encodeYalePolIISignal
 #Creating wiggle table definition in hg18.encodeYalePolIISignal
 #Saving wiggle.tab
 #WARNING: Exceeded chr18_random size 4406 > 4262. dropping 145 data point(s)
 # hmmm... that's not a good warning.  I wonder if these guys got the genome wrong.
 hgLoadBed hg18 encodeYalePolIISites PolII_hg18-sites.bed 
 #Reading PolII_hg18-sites.bed
 #Loaded 87253 elements of size 4
 #Sorted
 #Creating table definition for encodeYalePolIISites
 #Saving bed.tab
 #start -142, end 1144 out of range in findBin (max is 512M)
 
 # MORE ERRORS.  Clearly this submission wasn't quite meant to be just yet.
 # to be continued...
 
 # continued... made a "resub" dir and copied the resubmitted zipfile there.
 cd /cluste/data/2007-07-17/resub   
 unzip Yale_jul17_v2.zip
 cd PolII/
 trimObBedLines PolII_hg18-signal.wig > ../../processed/wgEncodeYalePolIISignal.wigBed
 pushd ../../processed/
 wigEncode wgEncodeYalePolIISignal.wigBed wgEncodeYalePolIISignal.wig wgEncodeYalePolIISignal.wib
 gzip wgEncodeYalePolIISignal.wigBed
 cd ../../../hg18.wib
 ln -s ../yale/2007-07-17/processed/wgEncodeYalePolIISignal.wib
 cd /gbdb/hg18/encode/wib
 ln -s /cluster/data/encode/hg18.wib/wgEncodeYalePolIISignal.wib 
 cd /usr/local/apache/htdocs/goldenPath/hg18/encode/wig
 ln -s /cluster/data/encode/yale/2007-07-17/processed/wgEncodeYalePolIISignal.wigBed.gz 
 popd
 hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 wgEncodeYalePolIISignal wgEncodeYalePolIISignal.wig
 
 #########################################################################
 # YALE STAT1 (more ChIP-seq) (DONE, Andy 2007-11-20)
 
 cd /cluster/data/encode/yale
 mkdir 2007-08-08
 cd 2007-08-08/
 cp /var/ftp/encode/Yale_aug8.zip .
 unzip Yale_aug8.zip
 cd STAT1/
 trimObBedLines hg18 STAT1_hg18-signal.wig wgEncodeYaleStat1Signal.wigBed
 trimObBedLines hg18 STAT1_hg18-sites.bed wgEncodeYaleStat1Sites.bed
 gzip wgEncodeYaleStat1Signal.wigBed
 wigEncode wgEncodeYaleStat1Signal.wigBed.gz wgEncodeYaleStat1Signal.{wig,wib}
 cd ../
 mkdir lab processed
 mv readme_aug8.txt STAT1 lab/
 rm Yale_aug8.zip 
 mv lab/STAT1/wgEncodeYaleStat1Si* processed/
 pushd ../../hg18.wib/
 ln -s ../yale/2007-08-08/processed/wgEncodeYaleStat1Signal.wib
 cd /gbdb/hg18/encode/wib
 ln -s /cluster/data/encode/hg18.wib/wgEncodeYaleStat1Signal.wib 
 popd
 cd processed/
 hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 wgEncodeYaleStat1Signal wgEncodeYaleStat1Signal.wig
 hgLoadBed hg18 wgEncodeYaleStat1Sites wgEncodeYaleStat1Sites.bed 
 
 
 ##########################################################################
 # Genome Institute of Singapore PET data (2007-08-30 ting)
 # Submitted 8/22 by Atif Shahab and Chia-lin Wei
 # Three new PET datasets on human embryonic stem cell hES3.
 # One polyA-RNA dataset, and two ChIP-PET datasets of H3K4me3 and H3K27me3.
 # Build them as subtracks into existing GIS tracks: GIS-RNA-PET and GIS-CHIP-PET.
 
     ssh hgwdev
     cd /cluster/data/encode/GIS/
     mkdir 2007-08-22
     cd 2007-08-22
     mkdir lab
     cd lab
     cp /var/ftp/encode/gis.tar.gz ./
     gunzip gis.tar.gz
     tar -xvf gis.tar
     
     # obtained 3 data files: H3K27me3.bed  H3K4me3.bed  polyA.bed
     # These are mapped on hg17, first lift.
     cd /cluster/data/encode/GIS/2007-08-22
     liftOver lab/polyA.bed ../../convertHg18/hg17ToHg18.over.chain.gz \
     	polyA-hg18.bed polyA-unmapped.bed
     # 426301 lifted, 34 unmapped
     
     liftOver lab/H3K4me3.bed ../../convertHg18/hg17ToHg18.over.chain.gz \
     	H3K4me3.bed H3K4me3-unmapped.bed
     # 679752 lifted, 13 unmapped
     
     liftOver lab/H3K27me3.bed ../../convertHg18/hg17ToHg18.over.chain.gz \
     	H3K27me3.bed H3K27me3-unmapped.bed
     # 992509 lifted, 25 unmapped
     
     # GIS data are not scored. Based on Angie and Kate's previous work,
     # scored BED can be made from item name. Use scoreGisBed.pl to do so.
     scoreGisBed.pl polyA-hg18.bed 2 encodeGisRnaPetHes3.bed
     scoreGisBed.pl H3K4me3-hg18.bed 1 encodeGisChipPetHes3H3K4me3.bed
     scoreGisBed.pl H3K27me3-hg18.bed 1 encodeGisChipPetHes3H3K27me3.bed
     
     # load on hg18
     hgLoadBed hg18 encodeGisRnaPetHes3 encodeGisRnaPetHes3.bed
     # Loaded 426301 elements of size 12
     
     hgLoadBed hg18 encodeGisChipPetHes3H3K4me3 encodeGisChipPetHes3H3K4me3.bed
     # Loaded 679752 elements of size 12
     
     hgLoadBed hg18 encodeGisChipPetHes3H3K27me3 encodeGisChipPetHes3H3K27me3.bed 
     # Loaded 992509 elements of size 12
     
     # modified trackDb.encodeTxLevel.ra, trackDb.encodeChip.ra,
     #          encodeGisChipPetAll.html, encodeGisRnaPet.html
 
 ###########
 # Promote UCSD genome-wide Chip tracks:
 # UCSD TAF1 IMR90 Chip/chip to Regulation group
 # (2007-09-14 kate)
 # See hg18.txt
 
 ######################################################
 # Add strand information for encodeGencodeRace data - ting 09-27-2007
 # ENCODE 5RACE data do not contain strand information. This
 # information is very important, and can be derived from 
 # available GENCODE and 5RACE data.
 # There are two relatively simple strategies to derive strand
 # information. However, there are several exceptions to either
 # strategy. Therefore I will combine these two strategies in
 # this one script. 
 # Strategy 1: a RACE primer should extend from 3' end of a transcript
 #             towards 5' end. Therefore, if any RACE frag from
 #             this primer extends towards the right of the primer
 #             location, it means the gene goes from right to left,
 #             i.e. on - strand. Therefore, the primer should be
 #             on the + strand, and the corresponding RACEfrag should
 #             be on the - strand (same as gene). By the same token,
 #             if a RACEfrag extends toward left, it indicates that
 #             the primer is on - strand, while the gene and RACEfrag
 #             are on + strand.
 #             The only case that such relationship can not be determined
 #             is when the RACEfrag contains only one exon, and the
 #             primer locates in that exon. It is not sure if the 
 #             RACEfrag extends to the right or left. 
 #             This strategy leaves 3 primers undetermined.
 # 
 # Strategy 2: RACE primers should be designed based on GENCODE
 #             exons. Therefore, the orientation of the primer can be 
 #             determined by its overlapping GENCODE exon. In this case,
 #             the primer is on the opposite strand of the GENCODE exon,
 #             and any RACEfrag from this primer should be on the opposite
 #             strand of the primer. 
 #             There exist several exceptions, where the primer is 
 #             located outside of exons. It is probably ok if instead 
 #             look at the nearest exon if it doesn't overlap with any.
 #             This strategy leaves 37 primers undetermined.
 # Combining 1 and 2 all primers are determined for their orientation.
 # 
 # Instead of working on the original gff files, I decide to work on
 # data files after hg18 migration. These files are genePred formatted.
 # Working folder is 
 # /cluster/store6/encode/GencodeRACEfrags/2007-04-11/strand
 
   ssh hdwdev
   cd /cluster/data/encode/GencodeRACEfrags/latest/
   mkdir strand
   cd strand
   cp /cluster/data/encode/convertHg18/genePred/*Race*.tab ./
   cp /cluster/data/encode/convertHg18/genePred/encodeGencodeGeneKnownMar07.tab ./
   
   ./addRacePrimerStrand.pl encodeGencodeRaceFragsPrimer.tab encodeGencodeGeneKnownMar07.tab
   csh load.csh > & ! load.log
   
   # encodeGencodeRaceFragsBrain
   # Reading encodeGencodeRaceFragsBrain.tab
   # 269 gene predictions
   # encodeGencodeRaceFragsColon
   # Reading encodeGencodeRaceFragsColon.tab
   # 269 gene predictions
   # encodeGencodeRaceFragsGM06990
   # Reading encodeGencodeRaceFragsGM06990.tab
   # 236 gene predictions
   # encodeGencodeRaceFragsHL60
   # Reading encodeGencodeRaceFragsHL60.tab
   # 236 gene predictions
   # encodeGencodeRaceFragsHeart
   # Reading encodeGencodeRaceFragsHeart.tab
   # 261 gene predictions
   # encodeGencodeRaceFragsHela
   # Reading encodeGencodeRaceFragsHela.tab
   # 168 gene predictions
   # encodeGencodeRaceFragsKidney
   # Reading encodeGencodeRaceFragsKidney.tab
   # 293 gene predictions
   # encodeGencodeRaceFragsLiver
   # Reading encodeGencodeRaceFragsLiver.tab
   # 243 gene predictions
   # encodeGencodeRaceFragsLung
   # Reading encodeGencodeRaceFragsLung.tab
   # 290 gene predictions
   # encodeGencodeRaceFragsMuscle
   # Reading encodeGencodeRaceFragsMuscle.tab
   # 238 gene predictions
   # encodeGencodeRaceFragsPlacenta
   # Reading encodeGencodeRaceFragsPlacenta.tab
   # 275 gene predictions
   # encodeGencodeRaceFragsPrimer
   # Reading encodeGencodeRaceFragsPrimer.tab
   # 365 gene predictions
   # encodeGencodeRaceFragsSmallIntest
   # Reading encodeGencodeRaceFragsSmallIntest.tab
   # 277 gene predictions
   # encodeGencodeRaceFragsSpleen
   # Reading encodeGencodeRaceFragsSpleen.tab
   # 275 gene predictions
   # encodeGencodeRaceFragsStomach
   # Reading encodeGencodeRaceFragsStomach.tab
   # 300 gene predictions
   # encodeGencodeRaceFragsTestis
   # Reading encodeGencodeRaceFragsTestis.tab
   # 292 gene predictions
 
   # Strand information is added for primers and all RACEfrags.
 
 ######################################################
 # LIFT NHGRI DIPs from hg17 (2007-10-22 kate)
 
     sh hgwdev
     cd /cluster/data/encode/NHGRI/mullikin/hg17
     hgsql hg18 < encodeIndels.sql
     zcat encodeIndels.bed.gz | tail +2 | \
         liftOver -bedPlus=8 stdin /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz \
                 encodeIndels.hg18.bed encodeIndels.hg18.unmapped
         # lost 670 items (of 11452 total)
         # This is high -- nearly 6%, and losses were in all regions,
         # not just chrX.
     hgLoadBed hg18 encodeIndels -tab -sqlTable=encodeIndels.sql \
                 encodeIndels.hg18.bed
 
     # change group name to merge in variation
     hgsql hg18 -e "update grp set name='encodeCompAndVar' where name='encodeCompGeno'"
 
 
 #########################################################
 # 2007-11-08 (ASZ)
 # These wig files were shown to not match their corresponding database table 
 # Dropped them from the hgdownload server:
 #/goldenPath/hg18/encode/wig/encodeUppsalaChipH3acBut0h.wigBed.gz
 #/goldenPath/hg18/encode/wig/encodeUppsalaChipH3acBut12h.wigBed.gz 
 #/goldenPath/hg18/encode/wig/encodeUppsalaChipH4acBut0h.wigBed.gz
 #/goldenPath/hg18/encode/wig/encodeUppsalaChipH4acBut12h.wigBed.gz
 #/goldenPath/hg18/encode/wig/encodeYaleAffyNB4RARNATransMap.wigBed.gz
 #/goldenPath/hg18/encode/wig/encodeYaleAffyNB4TPARNATransMap.wigBed.gz
 #/goldenPath/hg18/encode/wig/encodeYaleAffyNB4UntrRNATransMap.wigBed.gz
 #/goldenPath/hg18/encode/wig/encodeYaleAffyNeutRNATransMap.wigBed.gz
 #/goldenPath/hg18/encode/wig/encodeYaleAffyPlacRNATransMap.wigBed.gz
 
 #########################################################
 # Yale RACE (2007-11-15 galt)
 #
 cd /cluster/data/encode/yale/
 mkdir race
 cd race
 mkdir 2007-11-15
 ln -s 2007-11-15/ latest
 cd latest
 mkdir lab
 cd lab
 
 wget http://homes.gersteinlab.org/people/jiangdu/race_seq/race_desc.html
 wget http://homes.gersteinlab.org/people/jiangdu/race_seq/conserved_transcripts-til-20070402.bed
 
 tail +5 conserved_transcripts-til-20070402.bed | gawk '{print$1}' | sort -u | head
 chr11
 chr21
 chr22
 
 cp race_desc.html ${HOME}/kent/src/hg/makeDb/trackDb/human/hg18/encodeYaleRace.html
 cvs add ${HOME}/kent/src/hg/makeDb/trackDb/human/hg18/encodeYaleRace.html
 
 tail +5 conserved_transcripts-til-20070402.bed | hgLoadBed hg18 encodeYaleRace stdin
 
 vi trackDb.encodeTxLevels.ra
 ---
 track encodeYaleRace
 superTrack encodeYaleRnaSuper dense
 shortLabel Yale RACE
 longLabel Yale RACE 420 primarily novel TARs in ENCODE regions
 group encodeTxLevels
 priority 32.0
 chromosomes chr11,chr21,chr22
 visibility hide
 type bed 12 .
 dataVersion ENCODE Nov 2007 
 origAssembly hg18
 ---
 
 vi ${HOME}/kent/src/hg/makeDb/trackDb/human/hg18/encodeYaleRnaSuper.html
 #edit to add the new RACE track to the Credits section
 
 
 #############################################################################
 # TBA alignments from Margulies lab, NHGRI (2008-2-20 kate)
 #  Submitted by Gayle McEwen (mceweng@mail.nih.gov), from their DEC-07 freeze
 #  Requested doc update (README & track description) from Elliott on 2/20
 # Conservation  scores: (BinCons and ChaiCons provided 3/24/08
 
     ssh kkstore03
     cd /cluster/data/encode/TBA
     mkdir -p DEC-07/2008-01-10/lab
     cd DEC-07/2008-01-10/lab
     wget -nd ftp://kronos.nhgri.nih.gov/pub/outgoing/elliott/encode/freeze/DEC-2007/tba-DEC-2007.tar.gz
     tar xvfz tba-DEC-2007.tar.gz
     cd ..
 
     mkdir maf
 cat > getMafs.csh << 'EOF'
     foreach f (lab/tba/*/*.maf.gz)
         set r = $f:t:r:r:e
         echo $r
         gunzip -c $f | \
             sed -e 's/^s human\./s hg18./' \
                 -e 's/^s mouse\./s mm9./' \
                 -e 's/^s cow\./s bosTau3./' \
                 -e 's/^s dog\./s canFam2./' \
                 -e 's/^s chicken\./s galGal3./' \
                 -e 's/^s monodelphis\./s monDom4./' \
                 -e 's/^s chimp\./s panTro2./' \
                 -e 's/^s macaque\./s rheMac2./' \
                 -e 's/^s orangutan\./s ponAbe2./' \
                 -e 's/^s rat\./s rn4./' \
                         > maf/$r.maf
     end
 'EOF'
     csh getMafs.csh >&! getMafs.log &
 
     # Score too small messages -- can be ignored (the score isn't meaningful)
 
     # Add gap annotation
     # prepare bed files with gap info
     ssh kkstore03
     cd /cluster/data/encode/TBA
     cd DEC-07/2008-01-10
     mkdir anno
     cd anno
     mkdir maf run
     cd run
 
     cat > species.lst << 'EOF'
         hg18
         bosTau3
         canFam2
         galGal3
         monDom4
         panTro2
         rheMac2
         ponAbe2
         mm9
         rn4
 'EOF'
 
 cat > doNBed.csh << 'EOF'
     foreach db (`cat species.lst`)
         echo -n "$db "
         set cdir = /cluster/data/$db
         if (! -e $cdir/$db.N.bed) then
             echo "creating N.bed"
             twoBitInfo -nBed $cdir/$db.2bit $cdir/$db.N.bed
         else
             echo ""
         endif
     end
 'EOF'
     csh doNBed.csh >&! doNBed.log &
 
     rm -f nBeds
     foreach db (`grep -v hg18 species.lst`)
         echo "$db "
         ln -s  /cluster/data/$db/$db.N.bed $db.bed
         echo $db.bed  >> nBeds
     end
     
 cat > doAnno.csh << 'EOF'
     foreach f (../../maf/*.maf)
         set b = $f:t
         echo $f
         nice mafAddIRows -nBeds=nBeds $f \
                 /cluster/data/hg18/hg18.2bit ../maf/$b
     end
 'EOF'
 #<< happy emacs
     csh doAnno.csh >&! doAnno.log &
 
     # Load MAF table with annotated mafs.  Also load summary table.
     ssh hgwdev
     set mdir = /cluster/data/encode/TBA/DEC-07/2008-01-10/anno/maf
     cd $mdir
     set gdir = /gbdb/hg18/encode/TBA/DEC-07/2008-01-10/maf
     rm -f $gdir/*.maf
     mkdir -p $gdir
     ln -s $mdir/*.maf $gdir
     hgLoadMaf -pathPrefix=$gdir -WARN hg18 encodeTbaAlignDec07 >&! load.log &
     cat *.maf | hgLoadMafSummary hg18 encodeTbaSummaryDec07 stdin 
     cd ..
 
     # Reannotate with newer mafAddIRows having distinctive rows for
     # tandem dups (by request of JK)
     # 2008-10-23 kate
     # again (another fix to mafAddIRows) 2008-10-27 kate
     ssh kolossus
     cd /cluster/data/encode/TBA
     cd DEC-07/2008-01-10
     cd anno/run
     # edit doAnno.csh to use new version
     csh doAnno.csh >&! doAnno.log &
     ssh hgwdev
     set mdir = /cluster/data/encode/TBA/DEC-07/2008-01-10/anno/maf
     cd $mdir
     set gdir = /gbdb/hg18/encode/TBA/DEC-07/2008-01-10/maf
     hgLoadMaf -pathPrefix=$gdir -WARN hg18 encodeTbaAlignDec07 >&! load.log &
     # Ignore 'score too small' errors
     cat *.maf | hgLoadMafSummary hg18 encodeTbaSummaryDec07 stdin 
     #Created 141213 summary blocks from 8144409 components and 389847 mafs from stdin
     #Loading into hg18 table encodeTbaSummaryDec07...
 
     # Gene frames
     ssh hgwdev
     cd /cluster/data/encode/TBA/DEC-07/2008-01-10
     mkdir frames
     cd frames
 
     # Pick gene tables, according to the following criteria:
     # KG if present, else refGene if >10000 entries, else ensGene (unless dog),
     # else mgcGenes, else mrnas if > 10000 else none.   In all cases 
     # except none, add in refGene.
     # NOTE: shortcut by using sources from hg18 multiz framing
     # (added braney 2008-03-01)  use geneCode for hg18, no
     #                            genes from ponAbe2
 
 hg18: encodeGencodeGeneKnownMar07 
 bosTau3: mrna
 canFam2: mrna
 galGal3: mrna
 monDom4: ensGene
 panTro2: refGene
 rheMac2: ensGene
 rn4: knownGene
 mm9: knownGene
 
     # get the genes for all genomes
     # mRNAs with CDS.  single select to get cds+psl, then split that up and
     # create genePred
     # using mrna table as genes
 cat > getGenes.csh << 'EOF'
     rm -fr genes
     mkdir -p genes
     set mrnaDbs = "bosTau3 canFam2 galGal3"
     foreach queryDb ($mrnaDbs)
       set tmpExt = `mktemp temp.XXXXXX`
       set tmpMrnaCds = ${queryDb}.mrna-cds.${tmpExt}
       set tmpMrna = ${queryDb}.mrna.${tmpExt}
       set tmpCds = ${queryDb}.cds.${tmpExt}
       echo $queryDb
       hgsql -N -e 'select all_mrna.qName,cds.name,all_mrna.* \
                    from all_mrna,gbCdnaInfo,cds \
                    where (all_mrna.qName = gbCdnaInfo.acc) and \
                      (gbCdnaInfo.cds != 0) and (gbCdnaInfo.cds = cds.id)' \
        $queryDb > ${tmpMrnaCds}
       cut -f 1-2  ${tmpMrnaCds} > ${tmpCds}
       cut -f 4-100  ${tmpMrnaCds} > ${tmpMrna}
       mrnaToGene -cdsFile=${tmpCds} -smallInsertSize=8 -quiet ${tmpMrna} stdout | \
         genePredSingleCover stdin stdout | gzip -2c > /scratch/tmp/$queryDb.tmp.gz
       rm ${tmpMrnaCds} ${tmpMrna} ${tmpCds}
       mv /scratch/tmp/$queryDb.tmp.gz genes/$queryDb.gp.gz
       rm -f $tmpExt
     end
     # using encodeGencodeGeneKnownMar07 for hg18
     # using knownGene for rn4 mm9 
     # using refGene for panTro2
     # using ensGene for monDom4, rheMac2
     # genePreds; (must keep only the first 10 columns for knownGene)
     #set geneDbs = "hg18 mm9 rn4 panTro2 monDom4 rheMac2 ponAbe2"
     #  NOTE: next time include ponAbe2, using ensGene
     set geneDbs = "hg18 mm9 rn4 panTro2 monDom4 rheMac2"
     foreach queryDb ($geneDbs)
       if ($queryDb == "monDom4" || $queryDb == "rheMac2") then
         set geneTbl = ensGene
       else if ($queryDb == "panTro2") then
         set geneTbl = refGene
       else if ($queryDb == "rn4" || $queryDb == "mm9") then
         set geneTbl = knownGene
       else if ($queryDb == "hg18") then
         set geneTbl = encodeGencodeGeneMar07 
       endif
       hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from $geneTbl" ${queryDb} \
       | genePredSingleCover stdin stdout | gzip -2c \
         > /scratch/tmp/$queryDb.tmp.gz
       mv /scratch/tmp/$queryDb.tmp.gz genes/$queryDb.gp.gz
     end
 'EOF'
     csh getGenes.csh >&! getGenes.log &
 
     ssh kkstore03
     cd /cluster/data/encode/TBA/DEC-07/2008-01-10/frames
     (cat  ../maf/*.maf | nice genePredToMafFrames hg18 stdin stdout bosTau3 genes/bosTau3.gp.gz canFam2 genes/canFam2.gp.gz galGal3 genes/galGal3.gp.gz hg18 genes/hg18.gp.gz panTro2 genes/panTro2.gp.gz rheMac2 genes/rheMac2.gp.gz mm9 genes/mm9.gp.gz rn4 genes/rn4.gp.gz monDom4 genes/monDom4.gp.gz | nice gzip > mafFrames.gz) >& frames.log &
 
     ssh hgwdev
     cd /cluster/data/encode/TBA/DEC-07/2008-01-10/frames
     nice hgLoadMafFrames hg18 encodeTbaFramesDec07 mafFrames.gz >& loadFrames.log &
 
     # Post downloads
     ssh kkstore03
     cd /cluster/data/encode/TBA/DEC-07/2008-01-10/
     mkdir downloads
     cd anno/maf
     # redo to include re-annotated mafs (with 'T' lines for tandem dups)
     # 2008-11-06 kate
     tar cvfz ../../downloads/encodeTba.maf.tgz  *.maf
 
     # Obtain sequence freeze
     ssh kkstore03
     cd /cluster/data/encode/MSA
     mkdir -p DEC-07/lab
     cd DEC-07/lab
     wget -nd ftp://kronos.nhgri.nih.gov/pub/outgoing/elliott/encode/freeze/DEC-2007/DEC-2007.tar.gz
 
     # Received README.txt for sequence freeze
     # and encodeTbaAlign.html update from Gayle McEwen, 6/12/08
     cp encodeTbaAlign_DEC-2007.html ~/kent/src/hg/makeDb/trackDb/human/hg18/encodeTbaAlignDec07.html
     # checkin to CVS
 
     cd ..
     mkdir downloads
     cd downloads
     ln -s /cluster/data/encode/MSA/DEC-07/lab/DEC-2007.tar.gz  .
     ln -s /cluster/data/encode/MSA/DEC-07/lab/seq sequences
 
     # Received species tree from Gayle 8/08
     cp ../lab/conserved.mod tree_4d.tba.nh
     # edit to remove phastCons-specific header
     # edit tree to remove species not in this dataset: gorilla, lemur, black_lemur, sheep, 
     #   muntjak_indian, ajbat, cpbat, eehedgehog, wallaby, dunnart, torgoise, xenopus
     #   tetraodon, fugu, zebrafish, pig
     tail +2 seq/metadata.txt | awk '{print $1}' | sort | uniq > species.txt
     echo `cat species.txt|sed 's/$/,/'` | sed 's/ //g' > speciesList.txt
     /cluster/bin/phast/tree_doctor --prune-all-but `cat speciesList.txt` ../lab/tree_Dec2007.nh | sed 's/:0.000000//g' > species36.nh
     # Create tree image with phyloGif -- use 700 height, preserve underscores
     # encode_36way.gif
     cp encode_36way.gif ~/browser/images/phylo/
     # checkin to CVS
     cp ../lab/README_DEC-2007.txt README.txt
     # fix typo -- it's the Dec not Sep freeze
     # edit DIRECTORY structure section a bit to reflect this downloads organiatoin
 
     # post for download 
     ssh hgwdev
     cd /usr/local/apache/htdocs/goldenPath/hg18/encode
     mkdir -p MSA/DEC-2007
     cd MSA/DEC-2007
     ln -s /cluster/data/encode/MSA/DEC-07/downloads/{README.txt,DEC-2007.tar.gz} .
     ln -s /cluster/data/encode/MSA/DEC-07/downloads/{tree_4d.tba.nh,species36.nh} .
     cp ~/browser/images/phylo/encode_36way.gif .
     mkdir -p alignments/TBA/
     cd alignments/TBA
     ln -s /cluster/data/encode/TBA/DEC-07/2008-01-10/downloads/encodeTba.maf.tgz encodeTbaDec07.maf.tgz
 
     # Conservation
     mkdir -p ChaiCons/2008-03-24/lab BinCons/2008-03-24/lab
     # copy files from Gayle McEwan email
 
     # binCons files are formatted <region> start end name score,
     # where score is always 1000
     # Lift these to hg18 coordinates, and remove score field.
 
     echo "select chromStart, name, chromEnd-chromStart, chrom from encodeRegions" | hgsql -N hg18 | sed 's/$/\t30000000/' > /cluster/data/encode/MSA/encodeRegions.lft
 
     liftUp ChaiCons.bed /cluster/data/encode/MSA/encodeRegions.lft warn lab/CHAI.bed
     wc -l ChaiCons.bed3 lab/CHAI.bed
      #208916 ChaiCons.bed
      #208916 lab/CHAI.bed
     awk '{printf "%s\t%d\t%d\tchai.%d\n", $1, $2, $3, NR}' ChaiCons.bed3 > ChaiCons.bed4
     hgLoadBed hg18 encodeTbaChaiConsDec07 ChaiCons.bed4
     # Loaded 208916 elements of size 3
 
     cd ../../BinCons/2008-03-24
     liftUp -type=.bed stdout /cluster/data/encode/MSA/encodeRegions.lft warn lab/BINCONS.bed |\
         sed 's/1000$//' > BinCons.bed
 
     wc -l BinCons.bed lab/BINCONS.bed
     # 117793 BinCons.bed
     # 117836 lab/BINCONS.bed
     # difference due to blank lines in source file:
     grep '^$' lab/* | wc -l
     # 43
     hgLoadBed hg18 encodeTbaBinConsDec07 BinCons.bed
     
 #############################################################################
-# CRG MAPABILITY (2010-01-19 - 2010-01-28, hartera, DONE)
-# See kent/src/hg/makeDb/doc/hg18.txt for documentation on the addition of 
-# CRG alignability subtracks, to the existing ENCODE Mapability track, for 
-# sequence k-mers of 36,40,50,75 and 100 nucleotides. 
-# The data was provided by an ENCODE lab (Guigo lab at CRG) but the data
-# production was only partially funded by ENCODE so no metadata was added
-# to trackDb for this track.   
-
-
-#############################################################################
 # encodeGencodeGeneKnownMar07  (2010-04-07 markd)
 # Was discovered to be corrupted on hgwdev and all servers
 # 
 
     cd /cluster/data/encode/convertHg18/genePred
     genePredCheck -db=hg18 encodeGencodeGeneKnownMar07.tab 
     checked: 2991 failed: 0
     hgLoadGenePred -genePredExt hg18 encodeGencodeGeneKnownMar07 encodeGencodeGeneKnownMar07.tab
     genePredCheck -db=hg18 encodeGencodeGeneKnownMar07