src/hg/makeDb/doc/mm9.txt 1.105
1.105 2009/08/20 22:51:18 angie
Updates for MGI: RepTranscript, Allele, Phenotype, and IKMC.
Index: src/hg/makeDb/doc/mm9.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/mm9.txt,v
retrieving revision 1.104
retrieving revision 1.105
diff -b -B -U 4 -r1.104 -r1.105
--- src/hg/makeDb/doc/mm9.txt 4 Aug 2009 21:40:18 -0000 1.104
+++ src/hg/makeDb/doc/mm9.txt 20 Aug 2009 22:51:18 -0000 1.105
@@ -8231,13 +8231,14 @@
# loaded by Belinda Giardine, in same manner as hg18 ORegAnno track
############################################################################
-# JAX/MGI TRACKS (DONE 6/11/09 angie)
-# Previously done 4/24/09 in /hive/data/genomes/mm9/bed/jax/2009_06 (not pushed)
+# JAX/MGI TRACKS (DONE 8/20/09 angie)
+# Previously done 6/11/09 in /hive/data/genomes/mm9/bed/jax/2009_06 (pushed)
+# Previously done 4/24/09 in /hive/data/genomes/mm9/bed/jax/2009_04 (not pushed)
# Previously done 9/24/08 in /cluster/data/mm9/bed/jax/2008_09
- mkdir -p /hive/data/genomes/mm9/bed/jax/2009_06
- cd /hive/data/genomes/mm9/bed/jax/2009_06
+ mkdir -p /hive/data/genomes/mm9/bed/jax/2009_08
+ cd /hive/data/genomes/mm9/bed/jax/2009_08
wget ftp://ftp.informatics.jax.org/pub/gbrowse/\*
wget ftp://ftp.informatics.jax.org/pub/reports/MGI_PhenotypicAllele.rpt
# Jax Rep Transcript track
@@ -8246,9 +8247,9 @@
# -- aliases ~ MGI:\d+
# Use simple perl script to uniquify transcript names and make alias.tab.
# (Copied /hive/data/genomes/mm8/bed/jax/2007_07/parseRepTranscript.pl and
# modified to tweak a regex for tweaked name NR_027008_Gt(ROSA)26Sor_1)
- ./parseRepTranscript.pl SEQ_RepTransGenomic_rpt.gff \
+ ../2009_06/parseRepTranscript.pl SEQ_RepTransGenomic_rpt.gff \
| sed -e 's/^/chr/; s/chrMT/chrM/;' \
> jaxRepTranscript.gff
# Jax Allele track
@@ -8282,32 +8283,34 @@
rm -f jaxPhenotype.bed jaxPhenotypeAlias.tab fixJaxPhenotype.sql
foreach f (MP_*.gff)
set type = `echo $f:t:r \
| perl -wpe 's/MP_[0-9]*_//; s/[_-](\w)/\u$1/g; s/^(\w)/\u$1/; \
- s@AdiposeTissue@Adipose@; \
- s@BehaviorNeurological@Behavior@; \
- s@CardiovascularSystem@Cardiovascular@; \
- s@DigestiveAlimentary@Digestive@; \
- s@EndocrineExocrineGland@Gland@; \
- s@GrowthSize@Growth Size@; \
- s@HearingEar@Hearing/Ear@; \
- s@HematopoieticSystem@Hematopoietic@; \
- s@HomeostasisMetabolism@Homeostasis@; \
- s@ImmuneSystem@Immune@; \
- s@LethalityEmbryonicPerinatal@Embryonic Lethal@; \
- s@LethalityPostnatal@Postnatal Lethal@; \
- s@LifeSpanPostWeaningAging@Life Span@; \
- s@LimbsDigitsTail@Limbs and Tail@; \
- s@LiverBiliarySystem@Liver and Bile@; \
- s@NervousSystem@Nervous System@; \
- s@RenalUrinarySystem@Renal/Urinary@; \
- s@ReproductiveSystem@Reproductive@; \
- s@RespiratorySystem@Respiratory@; \
- s@SkinCoatNails@Skin/Coat/Nails@; \
- s@TasteOlfaction@Taste/Smell@; \
- s@TouchVibrissae@Touch@; \
- s@Tumorigenesis@Tumorigenesis@; \
- s@VisionEye@Vision/Eye@;'`
+ s@AdiposeTissue@Adipose@ || \
+ s@BehaviorNeurological@Behavior@ || \
+ s@CardiovascularSystem@Cardiovascular@ || \
+ s@DigestiveAlimentary@Digestive@ || \
+ s@EndocrineExocrineGland@Gland@ || \
+ s@GrowthSize@Growth Size@ || \
+ s@HearingEar@Hearing/Ear@ || \
+ s@HematopoieticSystem@Hematopoietic@ || \
+ s@HomeostasisMetabolism@Homeostasis@ || \
+ s@ImmuneSystem@Immune@ || \
+ s@LethalityEmbryonicPerinatal@Embryonic Lethal@ || \
+ s@LethalityPostnatal@Postnatal Lethal@ || \
+ s@LifeSpanPostWeaningAging@Life Span@ || \
+ s@LimbsDigitsTail@Limbs and Tail@ || \
+ s@LiverBiliarySystem@Liver and Bile@ || \
+ s@NervousSystem@Nervous System@ || \
+ s@RenalUrinarySystem@Renal/Urinary@ || \
+ s@ReproductiveSystem@Reproductive@ || \
+ s@RespiratorySystem@Respiratory@ || \
+ s@SkinCoatNails@Skin/Coat/Nails@ || \
+ s@TasteOlfaction@Taste/Smell@ || \
+ s@TouchVibrissae@Touch@ || \
+ s@Tumorigenesis@Tumorigenesis@ || \
+ s@VisionEye@Vision/Eye@ || \
+ m/^Craniofacial|Cellular|Embryogenesis|Muscle|Normal|Other|Pigmentation|Skeleton|$/ || \
+ die "Unrec $_";'`
echo $type
/hive/data/genomes/mm8/bed/jax/2006_10/parsePhenotype.pl $f \
| ldHgGene mm9 placeholder stdin -nobin -out=stdout \
| /cluster/bin/scripts/genePredToBed \
@@ -8319,8 +8322,11 @@
# Jax QTL track
# QTL*.gff --> jaxQtl2 (or 3?)... but we're missing MIT SSLP marker
# and CM distance for 2, or those plus flanking markers for 3...
+ cmp MGI_QTL.gff ../2009_06/MGI_QTL.gff
+ # No output, so skip this part:
+ if (0)
perl -wpe 'chomp; s/\s*$//; \
($c, undef, undef, $start, $end, undef, $strand, undef, $info) = \
split("\t"); \
if ($info =~ /QTL (\S+); Dbxref "(MGI:\d+)"; Alias .*; Note "([^"]+)"/) { \
@@ -8329,8 +8335,9 @@
if ($start > $end) { $tmp = $end; $end = $start; $start = $tmp; } \
$start-- unless $start == 0; \
s/^.*$/chr$c\t$start\t$end\t$name\t1000\t$strand\t\t$mgiID\t$desc\t0.0\n/;' \
MGI_QTL.gff > jaxQtl.bed
+ endif
# Extract phenotype-allele relationships:
# Make a file for the one code not already in a filename:
cp /dev/null MP_0003012_no_phenotypic_analysis
@@ -8343,47 +8350,37 @@
# Load tables
# jaxRepTranscript
ldHgGene mm9 jaxRepTranscript jaxRepTranscript.gff
-#35313 gene predictions
+#35505 gene predictions
hgsql mm9 < fixJaxRepTranscript.sql
- sed -e 's/genericAlias/jaxRepTranscriptAlias/g' \
- ~/kent/src/hg/lib/genericAlias.sql > jaxRepTranscriptAlias.sql
hgLoadSqlTab mm9 jaxRepTranscriptAlias \
- jaxRepTranscriptAlias.sql jaxRepTranscriptAlias.tab
+ ~/kent/src/hg/lib/genericAlias.sql jaxRepTranscriptAlias.tab
checkTableCoords mm9 jaxRepTranscript
# jaxAllele
hgLoadBed -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/bed12Source.sql \
mm9 jaxAllele jaxAllele.bed
-#Loaded 15526 elements of size 13
+#Loaded 15904 elements of size 13
# fixJaxAllele.sql is empty so don't need to do this:
# hgsql mm9 < fixJaxAllele.sql
hgLoadSqlTab mm9 jaxAlleleInfo \
~/kent/src/hg/lib/jaxAlleleInfo.sql jaxAlleleInfo.tab
# jaxPhenotype
hgLoadBed -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/bed12Source.sql \
-tab mm9 jaxPhenotype jaxPhenotype.bed
-#Loaded 32061 elements of size 13
+#Loaded 32922 elements of size 13
# fixJaxPhenotype.sql is empty so don't need to do this:
# hgsql mm9 < fixJaxPhenotype.sql
- sed -e 's/genericAlias/jaxPhenotypeAlias/' \
- ~/kent/src/hg/lib/genericAlias.sql > jaxPhenotypeAlias.sql
hgLoadSqlTab mm9 jaxPhenotypeAlias \
- jaxPhenotypeAlias.sql jaxPhenotypeAlias.tab
+ ~/kent/src/hg/lib/genericAlias.sql jaxPhenotypeAlias.tab
# jaxQtl
- hgLoadBed -tab -notItemRgb -noBin \
- -sqlTable=$HOME/kent/src/hg/lib/jaxQtl.sql \
- mm9 jaxQtl jaxQtl.bed
- checkTableCoords -verbose=2 mm9 jaxQtl
-#Loaded 1892 elements of size 10
-#mm9.jaxQtl item Ath13 chr14:51915898-165887941: chromEnd > chromSize 125194864
-#mm9.jaxQtl item Ity2 chr11:145756703-145756947: chromEnd > chromSize 121843856
- perl -wpe 's/^(\w+)\t(\d+)$/ \
- delete from jaxQtl where chrom="$1" and chromStart >= $2; \
- update jaxQtl set chromEnd = $2 where chrom="$1" and chromEnd > $2;/' \
- ../../../chrom.sizes \
- | hgsql mm9
+ cmp MGI_QTL.gff ../2009_06/MGI_QTL.gff
+ # No output ==> no data change, skip the following lines:
+# hgLoadBed -tab -notItemRgb -noBin \
+# -sqlTable=$HOME/kent/src/hg/lib/jaxQtl.sql \
+# mm9 jaxQtl jaxQtl.bed
checkTableCoords -verbose=2 mm9 jaxQtl
+ # No output, good.
# phenotype-allele relationships
hgLoadSqlTab mm9 jaxAllelePheno \
~/kent/src/hg/lib/jaxAllelePheno.sql jaxAllelePheno.tab
@@ -8614,19 +8611,20 @@
rm bed.tab
#########################################################################
-# KOMP (KNOCKOUT MOUSE PROJECT) (DONE 5/7/09 angie)
+# KOMP/IKMC (KNOCKOUT MOUSE PROJECT became Int'l Knockout Mouse Cons) (DONE 7/24/09 angie)
+# done 5/7/09 w/files emailed from Carol Bult 5/7
# done 2/12/09 w/files emailed from Carol Bult 2/12
# done 10/21/08 w/files emailed from Carol Bult 10/18
ssh hgwdev
- mkdir -p /hive/data/genomes/mm9/bed/komp/2009_05
- cd /hive/data/genomes/mm9/bed/komp/2009_05
- # Save files emailed from Carol Bult 5/7 as
- # ucsc.gff.zip
- unzip ucsc.gff.zip
+ mkdir -p /hive/data/genomes/mm9/bed/komp/2009_07
+ cd /hive/data/genomes/mm9/bed/komp/2009_07
+ # Save files emailed from Carol Bult 7/24 as
+ # 20090724_ikmc.gff.gz
# Make bed12 with itemRgb:
- perl -we \
+ zcat 20090724_ikmc.gff.gz \
+ | perl -we \
'while (<>) { \
s/\r?\n$//; \
($chr, undef, $ctr, $s, $e, undef, undef, undef, $id, $col, $n) = split("\t"); \
if ($s eq "") { warn "$_\n"; s/^.*//; next; } # Some lines have no coords. \
@@ -8635,9 +8633,9 @@
($col eq "Blue") ? "0,0,200" : "0,0,0"; \
$s--; \
$id =~ s/^MGI:\d+; (\w+); .*/$1/ || die "Cant parse id \"$id\""; \
my $geneId = join("|", $chr, $ctr, "${n}_$id"); \
- push @{$geneBlks{$geneId}}, [$s, $e, $col]; \
+ push @{$geneBlks{$geneId}}, [$s, $e, $col] unless $e <= 0; \
} \
warn "Got " . scalar(keys %geneBlks) . " genes.\n"; \
foreach my $geneId (keys %geneBlks) { \
my @blks = @{$geneBlks{$geneId}}; \
@@ -8656,35 +8654,40 @@
if ($col ne $color) { die "Blocks of $geneId of colors $color and $col"; } \
} \
print join("\t", $chrom, $chromStart, $chromEnd, $name, 0, ".", $chromStart, \
$chromStart, $color, $blkCount, $blkSizes, $blkStarts) . "\n"; \
- }' ucsc.gff \
+ }' \
| sort -k 1,1 -k 2n,2n > komp.bed
-#Got 26142 genes.
+#Got 32185 genes.
# No stderr empty-coord warnings this time (no unmapped items).
# Make an alias-style table with associated info (MGI ID and status):
- perl -wpe 's/\r?\n$//; @w = split("\t"); \
+ zcat 20090724_ikmc.gff.gz \
+ | perl -wpe 's/\r?\n$//; @w = split("\t"); \
if ($w[3] eq "") { s/^.*//; next; } # Some lines have no coords. \
+ if ($w[4] <= 0) { s/^.*//; next; } # A few lines have end=0. \
$w[8] =~ m/^(MGI:\d+); (\w+); (\w.*)/ || die; \
($mgi, $designId, $status) = ($1, $2, $3); \
- $_ = "$w[10]_$designId\t$mgi,$w[2],$status\n";' ucsc.gff \
+ $_ = "$w[10]_$designId\t$mgi,$w[2],$status\n";' \
| sort -u > kompExtra.tab
# Load 'em up:
hgLoadBed mm9 komp komp.bed
-#Loaded 26142 elements of size 12
- sed -e 's/genericAlias/kompExtra/' $HOME/kent/src/hg/lib/genericAlias.sql \
- > kompExtra.sql
- hgLoadSqlTab mm9 kompExtra kompExtra.sql kompExtra.tab
+#Loaded 32185 elements of size 12
+ hgLoadSqlTab mm9 kompExtra $HOME/kent/src/hg/lib/genericAlias.sql kompExtra.tab
checkTableCoords -verbose=2 mm9 komp
#mm9.komp item Tekt3_41479 chr11:62887195-62896116: blocks 3 and 4 overlap.
#mm9.komp item Tekt3_41478 chr11:62887195-62896116: blocks 3 and 4 overlap.
#mm9.komp item Tekt3_41477 chr11:62887195-62896116: blocks 3 and 4 overlap.
#mm9.komp item Tekt3_41476 chr11:62887195-62896116: blocks 3 and 4 overlap.
-#mm9.komp has 4 records with overlapping blocks.
- # Carol talked to the Sanger folks about that one... we can waive like last time.
+#mm9.komp item Cntn5_44827 chr9:10008998-10019351: blocks 1 and 2 overlap.
+ # Carol talked to the Sanger folks about those... pls waive.
+
+ # NOTE FOR NEXT TIME: Carol noticed some very long items and is asking
+ # Sanger about them. Here's how to check it ourselves next time:
+ hgsql mm9 -e 'select name, (chromEnd-chromStart) as length from komp \
+ where chromEnd - chromStart > 1000000 order by length desc;'
runJoiner.csh mm9 komp
-# mm9.kompExtra.name - hits 26142 of 26142 ok
+# mm9.kompExtra.name - hits 32185 of 32185 ok
#########################################################################
### Affy MOE430 version 2 (DONE - 2008-09-25,10-02 - Hiram)