src/hg/makeDb/doc/hg19.txt 1.106
1.106 2010/05/12 21:55:45 chinhli
Add Affy_U133Plus2 track
Index: src/hg/makeDb/doc/hg19.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/hg19.txt,v
retrieving revision 1.105
retrieving revision 1.106
diff -b -B -U 4 -r1.105 -r1.106
--- src/hg/makeDb/doc/hg19.txt 11 May 2010 23:43:07 -0000 1.105
+++ src/hg/makeDb/doc/hg19.txt 12 May 2010 21:55:45 -0000 1.106
@@ -9767,8 +9767,9 @@
# Download GFF file of latest miRNA annotations from miRBase at the
# ftp://mirbase.org/pub/mirbase/CURRENT/. This is Release 14.0
# (September, 2009)
+ # 04-27-2010 Get the newest miRNA release 15
wget --timestamping \
ftp://mirbase.org/pub/mirbase/CURRENT/genomes/hsa.gff
# Re-format, need to add "chr" to the beginning of each line.
sed -e 's/^/chr/' hsa.gff > hsMirBaseFormat.gff
@@ -9795,8 +9796,9 @@
else if ($0 !~ /#/ && $7 == "-") \
print $1, $4-1, $5, $9, 480, $7, 0, 0, "miRNA";}' \
hsMirBaseFormatIdOnly.gff > hsMirBaseFormatIdOnly.bed
+XXXX 04-27 stop pending new data for snoRNA
# 2010-04-21
# Down load the current snoRNABase coordinates (version 3, based on hg19)
# from
# http://www-snorna.biotoul.fr/coordinates.php
@@ -9844,6 +9846,62 @@
featureBits hg19 wgRna
# 107878 bases of 2897316137 (0.004%) in intersection
+#############################################################################
+# AFFY U133Plus2 (working 2010-05-12 Chin)
+ # Align probes
+ ssh swarm
+ cd /hive/data/genomes/hg19/bed
+ mkdir -p affyProbes/affyU133Plus2/run
+ cd affyProbes/affyU133Plus2/run
+ mkdir psl
+ ls -1 /scratch/data/hg19/nib/*.nib > genome.lst
+ ls -1 /hive/data/outside/affyProbes/U133Plus2_all.fa > mrna.lst
+
+ cat << '_EOF_' > gsub
+#LOOP
+/cluster/bin/x86_64/blat -fine -ooc=/scratch/data/hg19/11.ooc $(path1) $(path2) {check out line+ psl/$(root1)_$(root2).psl}
+#ENDLOOP
+'_EOF_'
+ # << this line makes emacs coloring happy
+
+ gensub2 genome.lst mrna.lst gsub jobList
+ para create jobList
+ para try
+ para check
+ para push
+ para time
+# Completed: 93 of 93 jobs
+# CPU time in finished jobs: 32443s 540.71m 9.01h 0.38d 0.001 y
+# IO & Wait Time: 3416s 56.94m 0.95h 0.04d 0.000 y
+# Average job time: 386s 6.43m 0.11h 0.00d
+# Longest finished job: 2736s 45.60m 0.76h 0.03d
+# Submission to last job: 5872s 97.87m 1.63h 0.07d
+# Estimated complete: 0s 0.00m 0.00h 0.00d
+
+ # Do sort, best in genome filter.
+ # to create affyU133Plus2.psl.
+ pslSort dirs raw.psl tmp psl
+ pslReps -minCover=0.3 -minAli=0.95 -nearTop=0.005 raw.psl ../affyU133Plus2.psl /dev/null
+ # Processing raw.psl to ../affyU133Plus2.psl and /dev/null
+ # .....Processed 691720 alignments
+ rm -r raw.psl psl
+
+ # Load probes and alignments into database.
+ ssh hgwdev
+ cd /cluster/data/hg19/bed/affyProbes/affyU133Plus2
+ hgLoadPsl hg19 affyU133Plus2.psl
+ hgLoadSeq hg19 /gbdb/hgFixed/affyProbes/U133Plus2_all.fa
+ # Attempt to GET_LOCK timed out.
+ # Another client may have locked this name, history
+ # re-run it with -replace option
+ hgLoadSeq -replace hg19 /gbdb/hgFixed/affyProbes/U133Plus2_all.fa
+ # Creating seq.tab file
+ # Adding /gbdb/hgFixed/affyProbes/U133Plus2_all.fa
+ # 54613 sequences
+ # Updating seq table
+ # All done
+
+
############################################################################