src/hg/makeDb/doc/mm9.txt 1.93
1.93 2009/05/08 21:26:54 angie
IKMC (komp): data update.
Index: src/hg/makeDb/doc/mm9.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/mm9.txt,v
retrieving revision 1.92
retrieving revision 1.93
diff -b -B -U 4 -r1.92 -r1.93
--- src/hg/makeDb/doc/mm9.txt 27 Apr 2009 23:32:56 -0000 1.92
+++ src/hg/makeDb/doc/mm9.txt 8 May 2009 21:26:54 -0000 1.93
@@ -8607,27 +8607,29 @@
rm bed.tab
#########################################################################
-# KOMP (KNOCKOUT MOUSE PROJECT) (DONE 2/12/09 angie)
+# KOMP (KNOCKOUT MOUSE PROJECT) (DONE 5/7/09 angie)
+# done 2/12/09 w/files emailed from Carol Bult 2/12
# done 10/21/08 w/files emailed from Carol Bult 10/18
ssh hgwdev
- mkdir -p /hive/data/genomes/mm9/bed/komp/2009_02
- cd /hive/data/genomes/mm9/bed/komp/2009_02
- # Save files emailed from Carol Bult 2/12 as
- # csd_gff_021109.gz and regeneron_gff_021109.gz
+ mkdir -p /hive/data/genomes/mm9/bed/komp/2009_05
+ cd /hive/data/genomes/mm9/bed/komp/2009_05
+ # Save files emailed from Carol Bult 5/7 as
+ # ucsc.gff.zip
+ unzip ucsc.gff.zip
# Make bed12 with itemRgb:
- zcat *.gz \
- | perl -we \
+ perl -we \
'while (<>) { \
s/\r?\n$//; \
($chr, undef, $ctr, $s, $e, undef, undef, undef, $id, $col, $n) = split("\t"); \
if ($s eq "") { warn "$_\n"; s/^.*//; next; } # Some lines have no coords. \
$col = ($col eq "Yellow") ? "255,215,0" : \
($col eq "Green") ? "0,240,0" : \
($col eq "Blue") ? "0,0,200" : "0,0,0"; \
$s--; \
- my $geneId = join("|", $chr, $ctr, $n, $id); \
+ $id =~ s/^MGI:\d+; (\w+); .*/$1/ || die "Cant parse id \"$id\""; \
+ my $geneId = join("|", $chr, $ctr, "${n}_$id"); \
push @{$geneBlks{$geneId}}, [$s, $e, $col]; \
} \
warn "Got " . scalar(keys %geneBlks) . " genes.\n"; \
foreach my $geneId (keys %geneBlks) { \
@@ -8647,33 +8649,35 @@
if ($col ne $color) { die "Blocks of $geneId of colors $color and $col"; } \
} \
print join("\t", $chrom, $chromStart, $chromEnd, $name, 0, ".", $chromStart, \
$chromStart, $color, $blkCount, $blkSizes, $blkStarts) . "\n"; \
- }' \
+ }' ucsc.gff \
| sort -k 1,1 -k 2n,2n > komp.bed
-#Got 16665 genes.
+#Got 26142 genes.
# No stderr empty-coord warnings this time (no unmapped items).
# Make an alias-style table with associated info (MGI ID and status):
- zcat *.gz \
- | perl -wpe 's/\r?\n$//; @w = split("\t"); \
+ perl -wpe 's/\r?\n$//; @w = split("\t"); \
if ($w[3] eq "") { s/^.*//; next; } # Some lines have no coords. \
- $w[8] =~ m/^(MGI:\d+);\s*(\w.*)/ || die; \
- ($mgi, $status) = ($1, $2); \
- $_ = "$w[10]\t$mgi,$w[2],$status\n";' \
+ $w[8] =~ m/^(MGI:\d+); (\w+); (\w.*)/ || die; \
+ ($mgi, $designId, $status) = ($1, $2, $3); \
+ $_ = "$w[10]_$designId\t$mgi,$w[2],$status\n";' ucsc.gff \
| sort -u > kompExtra.tab
# Load 'em up:
hgLoadBed mm9 komp komp.bed
-#Loaded 16665 elements of size 12
+#Loaded 26142 elements of size 12
sed -e 's/genericAlias/kompExtra/' $HOME/kent/src/hg/lib/genericAlias.sql \
> kompExtra.sql
hgLoadSqlTab mm9 kompExtra kompExtra.sql kompExtra.tab
checkTableCoords -verbose=2 mm9 komp
-#mm9.komp item Tekt3 chr11:62887195-62896116: blocks 3 and 4 overlap.
-#mm9.komp has 1 records with overlapping blocks.
- # Carol talking to the Sanger folks about that one... I think we can waive.
+#mm9.komp item Tekt3_41479 chr11:62887195-62896116: blocks 3 and 4 overlap.
+#mm9.komp item Tekt3_41478 chr11:62887195-62896116: blocks 3 and 4 overlap.
+#mm9.komp item Tekt3_41477 chr11:62887195-62896116: blocks 3 and 4 overlap.
+#mm9.komp item Tekt3_41476 chr11:62887195-62896116: blocks 3 and 4 overlap.
+#mm9.komp has 4 records with overlapping blocks.
+ # Carol talked to the Sanger folks about that one... we can waive like last time.
runJoiner.csh mm9 komp
-# mm9.kompExtra.name - hits 16665 of 16665 ok
+# mm9.kompExtra.name - hits 26142 of 26142 ok
#########################################################################
### Affy MOE430 version 2 (DONE - 2008-09-25,10-02 - Hiram)