src/hg/makeDb/doc/hg19.txt 1.3
1.3 2009/03/13 19:14:27 hiram
cpgIslands done, genbank run done, blatServers assigned
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/makeDb/doc/hg19.txt 6 Mar 2009 23:27:05 -0000 1.2
+++ src/hg/makeDb/doc/hg19.txt 13 Mar 2009 19:14:27 -0000 1.3
@@ -70,9 +70,9 @@
"s/^GL000252.1/chr6_dbb_hap3/" -e \
"s/^GL000253.1/chr6_mann_hap4/" -e \
"s/^GL000254.1/chr6_mcf_hap5/" -e \
"s/^GL000255.1/chr6_qbl_hap6/" -e \
-"s/^GL000256.1/chr6_ssto_hap6/" -e \
+"s/^GL000256.1/chr6_ssto_hap7/" -e \
"s/^GL000257.1/chr4_ctg9_hap1/" -e \
"s/^GL000258.1/chr17_ctg5_hap1/"
done > scaffolds.agp
@@ -284,4 +284,153 @@
# ask admin to sync this directory: /hive/data/staging/data/hg19/
# to the kluster nodes /scratch/data/hg19/
############################################################################
+# running cpgIsland business (DONE - 2009-03-06 - Hiram)
+ mkdir /hive/data/genomes/hg19/bed/cpgIsland
+ cd /hive/data/genomes/hg19/bed/cpgIsland
+ cvs -d /projects/compbio/cvsroot checkout -P hg3rdParty/cpgIslands
+ cd hg3rdParty/cpgIslands
+ # comment out the following two lines if it compiles cleanly
+ # some day (there were some other fixups too, adding include lines)
+ sed -e "s#\(extern char\* malloc\)#// \1#" cpg_lh.c > tmp.c
+ mv tmp.c cpg_lh.c
+ make
+ cd ../../
+ ln -s hg3rdParty/cpgIslands/cpglh.exe
+ mkdir -p hardMaskedFa
+ cut -f1 ../../chrom.sizes | while read C
+do
+ echo ${C}
+ twoBitToFa ../../hg19.2bit:$C stdout \
+ | maskOutFa stdin hard hardMaskedFa/${C}.fa
+done
+
+ cut -f1 ../../chrom.sizes > chr.list
+ cat << '_EOF_' > template
+#LOOP
+./runOne $(root1) {check out line results/$(root1).cpg}
+#ENDLOOP
+'_EOF_'
+ # << happy emacs
+
+ cat << '_EOF_' > runOne
+#!/bin/csh -fe
+./cpglh.exe hardMaskedFa/$1.fa > /scratch/tmp/$1.$$
+mv /scratch/tmp/$1.$$ $2
+'_EOF_'
+ # << happy emacs
+
+ gensub2 chr.list single template jobList
+ para create jobList
+ para try
+ para check ... etc
+ para time
+# Completed: 93 of 93 jobs
+# CPU time in finished jobs: 172s 2.86m 0.05h 0.00d 0.000 y
+# IO & Wait Time: 1748s 29.14m 0.49h 0.02d 0.000 y
+# Average job time: 21s 0.34m 0.01h 0.00d
+# Longest finished job: 34s 0.57m 0.01h 0.00d
+# Submission to last job: 83s 1.38m 0.02h 0.00d
+
+ # Transform cpglh output to bed +
+ catDir results | awk '{
+$2 = $2 - 1;
+width = $3 - $2;
+printf("%s\t%d\t%s\t%s %s\t%s\t%s\t%0.0f\t%0.1f\t%s\t%s\n",
+ $1, $2, $3, $5,$6, width,
+ $6, width*$7*0.01, 100.0*2*$6/width, $7, $9);
+}' > cpgIsland.bed
+
+ cd /hive/data/genomes/hg19/bed/cpgIsland
+ hgLoadBed hg19 cpgIslandExt -tab \
+ -sqlTable=$HOME/kent/src/hg/lib/cpgIslandExt.sql cpgIsland.bed
+
+# Reading cpgIsland.bed
+# Loaded 28226 elements of size 10
+# Sorted
+# Saving bed.tab
+# Loading hg18
+
+############################################################################
+# create lift file on unBridged gaps for genbank splits (2009-03-09 - Hiram)
+ mkdir /hive/data/genomes/hg19/bed/gap
+ cd /hive/data/genomes/hg19/bed/gap
+ gapToLift hg19 hg19.unBridged.lift -bedFile=unBridged.lift.bed
+ cp -p hg19.unBridged.lift ../../jkStuff
+ cp -p hg19.unBridged.lift /hive/data/staging/data/hg19
+
+############################################################################
+# AUTO UPDATE GENBANK RUN (DONE - 2009-03-07,13 - Hiram)
+ # align with latest genbank process.
+ cd ~/kent/src/hg/makeDb/genbank
+ cvsup
+ # edit etc/genbank.conf to add hg19 just after hg18
+
+# hg19 - GRCh37 - Genome Reference Consortium Human Reference 37
+# Assembly Accession: GCA_000001405.1
+hg19.serverGenome = /hive/data/genomes/hg19/hg19.2bit
+hg19.clusterGenome = /scratch/data/hg19/hg19.2bit
+hg19.ooc = /scratch/data/hg19/11.ooc
+hg19.lift = /scratch/data/hg19/hg19.unBridged.lift
+# hg19.hapRegions = /hive/data/genomes/hg19/bed/haplotypePos/haplotypePos.psl
+hg19.refseq.mrna.native.pslCDnaFilter = ${finished.refseq.mrna.native.pslCDnaFilter}
+hg19.refseq.mrna.xeno.pslCDnaFilter = ${finished.refseq.mrna.xeno.pslCDnaFilter}
+hg19.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
+hg19.genbank.mrna.xeno.pslCDnaFilter = ${finished.genbank.mrna.xeno.pslCDnaFilter}
+hg19.genbank.est.native.pslCDnaFilter = ${finished.genbank.est.native.pslCDnaFilter}
+hg19.genbank.est.xeno.pslCDnaFilter = ${finished.genbank.est.xeno.pslCDnaFilter}
+hg19.genbank.est.xeno.load = yes
+hg19.refseq.mrna.xeno.load = yes
+hg19.refseq.mrna.xeno.loadDesc = yes
+hg19.mgc = yes
+hg19.orfeome = yes
+hg19.downloadDir = hg19
+# hg19.ccds.ncbiBuild = 36.3
+# hg19.upstreamGeneTbl = refGene
+# hg19.upstreamMaf = multiz28way
+# /hive/data/genomes/hg19/bed/multiz28way/species.lst multiz44way
+# /hive/data/genomes/hg19/bed/multiz44way/species.list
+hg19.genbank.mrna.blatTargetDb = yes
+
+ cvs ci -m "Added hg19." etc/genbank.conf
+ # update /cluster/data/genbank/:
+ make etc-update
+
+ ssh genbank
+ screen # use a screen to manage this job
+ cd /cluster/data/genbank
+ time nice -n +19 bin/gbAlignStep -initial hg19 &
+ # logFile: var/build/logs/2009.03.10-20:28:44.hg19.initalign.log
+ # real 2761m13.680s
+ # that ran on the swarm with little interference and no problems
+
+ # load database when finished
+ ssh hgwdev
+ screen # use screen to manage this long running command
+ cd /cluster/data/genbank
+ time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad hg19 &
+ # logFile: var/dbload/hgwdev/logs/2009.03.12-21:10:02.dbload.log
+ # real 369m11.941s
+
+ # enable daily alignment and update of hgwdev (DONE - 2009-02-24 - Hiram)
+ cd ~/kent/src/hg/makeDb/genbank
+ cvsup
+ # add hg19 to:
+ etc/align.dbs
+ etc/hgwdev.dbs
+ cvs ci -m "Added hg19 - Human - GRCh37" etc/align.dbs etc/hgwdev.dbs
+ make etc-update
+
+#########################################################################
+# BLATSERVERS ENTRY (DONE - 2009-03-09 - Hiram)
+# After getting a blat server assigned by the Blat Server Gods,
+ ssh hgwdev
+
+ hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
+ VALUES ("hg19", "blat13", "17778", "1", "0"); \
+ INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
+ VALUES ("hg19", "blat13", "17779", "0", "1");' \
+ hgcentraltest
+ # test it with some sequence
+
+############################################################################