src/hg/makeDb/doc/sacCer2.txt 1.11

1.11 2009/07/10 19:30:37 hiram
Fixup sgdToName table
Index: src/hg/makeDb/doc/sacCer2.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/sacCer2.txt,v
retrieving revision 1.10
retrieving revision 1.11
diff -b -B -U 4 -r1.10 -r1.11
--- src/hg/makeDb/doc/sacCer2.txt	2 Jul 2009 22:25:06 -0000	1.10
+++ src/hg/makeDb/doc/sacCer2.txt	10 Jul 2009 19:30:37 -0000	1.11
@@ -1301,4 +1301,33 @@
     hgLoadWiggle -tmpDir=/data/tmp sacCer2 uwFootprintsTagCounts uwFootprintsTagCounts.wig 
 
     hgLoadBed -tab -tmpDir=/data/tmp sacCer2 uwFootprintsMappability yeast.mappability.bed
     hgLoadBed -tmpDir=/data/tmp sacCer2 uwFootprintsPrints yeast.footprints.bed
+
+############################################################################
+# fixup sgdToName table (DONE - 2009-07-09 - Hiram)
+    # this table is missing a name correspondence for some of
+    #	the gene names in sgdGene.name
+    #	to fixup, any names in sgdGene.name that are not in sgdToName,
+    #	simply add those names and reference themselves
+    mkdir /hive/data/genomes/sacCer2/bed/fixSgdToName
+    cd /hive/data/genomes/sacCer2/bed/fixSgdToName
+    hgsql -N -e "select name from sgdGene;" sacCer2 | sort -u > sgdGene.name
+    hgsql -N -e "select name from sgdToName;" sacCer2 > sgdToName.tab
+    #	convert the two columns of names to a single list of unique names
+    cat sgdToName.tab | tr '[\t]' '[\n]' | sort -u > all.sgdToName.name
+    comm -12 sgdGene.name all.sgdToName.name > commonToBoth
+    comm -23 sgdGene.name all.sgdToName.name > uniqueToSgdGene
+    comm -13 sgdGene.name all.sgdToName.name > uniqueToSgdToName
+    awk '{printf "%s\t%s\n", $1, $1}' uniqueToSgdGene > addSgdGeneNames.tab
+    #	count before load
+    hgsql -e "select count(*) from sgdToName;" sacCer2
+    #	6254
+    # adding names:
+    wc -l addSgdGeneNames.tab
+    #	472
+    #	6254 + 472 = 6726
+    hgsql sacCer2 \
+	-e 'load data local infile "addSgdGeneNames.tab" into table sgdToName;'
+    hgsql -e "select count(*) from sgdToName;" sacCer2
+    #	6726
+############################################################################