src/hg/makeDb/doc/hg19.txt 1.41
1.41 2009/10/01 10:01:27 kent
Doing stuff to get gnfAtlas2 data in.
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.40
retrieving revision 1.41
diff -b -B -U 4 -r1.40 -r1.41
--- src/hg/makeDb/doc/hg19.txt 30 Sep 2009 19:30:56 -0000 1.40
+++ src/hg/makeDb/doc/hg19.txt 1 Oct 2009 10:01:27 -0000 1.41
@@ -6262,9 +6262,9 @@
cd /cluster/data/hg19/bed
mkdir allenBrain
cd allenBrain
-# Remap the probe alignments from mm7 to hg18
+# Remap the probe alignments from mm7 to hg19
zcat /gbdb/mm9/liftOver/mm9ToHg19.over.chain.gz \
| pslMap -chainMapFile -swapMap \
/cluster/data/mm9/bed/allenBrain/allenBrainAli.psl stdin stdout \
@@ -6283,4 +6283,105 @@
# Make mapping between known genes and allenBrain
hgMapToGene hg19 allenBrainAli -type=psl knownGene knownToAllenBrain
+############################################################################
+# AFFY U133AB (Done - 2009-09-31 - Jim)
+ # Align probes
+ ssh swarm
+ cd /cluster/data/hg19/bed
+ mkdir -p affyProbes/affyU133/run
+ cd affyProbes/affyU133/run
+ mkdir psl
+ ls -1 /scratch/data/hg19/nib/*.nib > genome.lst
+ ls -1 /hive/data/outside/affyProbes/HG-U133AB_all.fa > mrna.lst
+
+ cat << '_EOF_' > gsub
+#LOOP
+/cluster/bin/x86_64/blat -fine -ooc=/scratch/data/hg19/11.ooc $(path1) $(path2) {check out line+ psl/$(root1)_$(root2).psl}
+#ENDLOOP
+'_EOF_'
+ # << this line makes emacs coloring happy
+
+ gensub2 genome.lst mrna.lst gsub jobList
+ para create jobList
+ para try
+ para check
+ para push
+ para time
+#Completed: 93 of 93 jobs
+#CPU time in finished jobs: 21246s 354.09m 5.90h 0.25d 0.001 y
+#IO & Wait Time: 349s 5.82m 0.10h 0.00d 0.000 y
+#Average job time: 232s 3.87m 0.06h 0.00d
+#Longest finished job: 1650s 27.50m 0.46h 0.02d
+#Submission to last job: 1685s 28.08m 0.47h 0.02d
+
+
+ # Do sort, best in genome filter.
+ # to create affyU133.psl.
+ pslSort dirs raw.psl tmp psl
+ pslReps -minCover=0.3 -minAli=0.95 -nearTop=0.005 raw.psl ../affyU133.psl /dev/null
+ rm -r raw.psl psl
+
+ # Load probes and alignments into database.
+ ssh hgwdev
+ cd /cluster/data/hg19/bed/affyProbes/affyU133
+ hgLoadPsl hg19 affyU133.psl
+ hgLoadSeq hg19 /gbdb/hgFixed/affyProbes/HG-U133AB_all.fa
+
+##########################################################################
+# GNF ATLAS 2 (In progress - 2009-09-31 - Jim)
+ # Align probes from GNF1H chip.
+ ssh swarm
+ cd /cluster/data/hg19/bed
+ mkdir -p geneAtlas2/run/psl
+ cd geneAtlas2/run
+ mkdir psl
+ ls -1 /scratch/data/hg19/nib/*.nib > genome.lst
+ ls -1 /hive/data/outside/gnf/human/atlas2/gnf1h.fa > mrna.lst
+ cat << '_EOF_' > gsub
+#LOOP
+/cluster/bin/x86_64/blat -fine -ooc=/scratch/data/hg19/11.ooc $(path1) $(path2) {check out line+ psl/$(root1)_$(root2).psl}
+#ENDLOOP
+'_EOF_'
+ # << this line makes emacs coloring happy
+
+ gensub2 genome.lst mrna.lst gsub jobList
+ para create jobList
+ para try
+ para check
+ para push
+ para time
+#Completed: 93 of 93 jobs
+#CPU time in finished jobs: 3299s 54.98m 0.92h 0.04d 0.000 y
+#IO & Wait Time: 330s 5.50m 0.09h 0.00d 0.000 y
+#Average job time: 39s 0.65m 0.01h 0.00d
+#Longest finished job: 370s 6.17m 0.10h 0.00d
+#Submission to last job: 477s 7.95m 0.13h 0.01d
+
+
+ # Do sort, best in genome filter
+ # to create gnf1h.psl.
+ pslSort dirs raw.psl tmp psl
+ pslReps -minCover=0.3 -minAli=0.95 -nearTop=0.005 raw.psl ../affyGnf1h.psl /dev/null
+ rm -r raw.psl psl
+
+ # Load probes and alignments from GNF1H into database.
+ ssh hgwdev
+ cd /hive/data/genomes/hg19/bed/geneAtlas2
+ hgLoadPsl hg19 affyGnf1h.psl
+ hgLoadSeq hg19 /gbdb/hgFixed/affyProbes/gnf1h.fa
+
+ grep -v U133B ../affyProbes/affyU133/affyU133.psl \
+ | sed -e "s/exemplar://; s/consensus://; s/U133A://" \
+ | sed -e "s/;//" > affyU133A.psl
+
+ hgMapMicroarray gnfAtlas2.bed hgFixed.gnfHumanAtlas2MedianRatio \
+ affyU133A.psl affyGnf1h.psl
+
+ # Loaded 44696 rows of expression data from hgFixed.gnfHumanAtlas2MedianRatio
+ # Mapped 33186, multiply-mapped 3171, missed 48, unmapped 11510
+
+ hgLoadBed hg19 gnfAtlas2 gnfAtlas2.bed
+ # Loaded 36357 elements of size 15
+
+