src/hg/makeDb/doc/sacCer2.txt 1.11
1.11 2009/07/10 19:30:37 hiram
Fixup sgdToName table
Index: src/hg/makeDb/doc/sacCer2.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/sacCer2.txt,v
retrieving revision 1.10
retrieving revision 1.11
diff -b -B -U 4 -r1.10 -r1.11
--- src/hg/makeDb/doc/sacCer2.txt 2 Jul 2009 22:25:06 -0000 1.10
+++ src/hg/makeDb/doc/sacCer2.txt 10 Jul 2009 19:30:37 -0000 1.11
@@ -1301,4 +1301,33 @@
hgLoadWiggle -tmpDir=/data/tmp sacCer2 uwFootprintsTagCounts uwFootprintsTagCounts.wig
hgLoadBed -tab -tmpDir=/data/tmp sacCer2 uwFootprintsMappability yeast.mappability.bed
hgLoadBed -tmpDir=/data/tmp sacCer2 uwFootprintsPrints yeast.footprints.bed
+
+############################################################################
+# fixup sgdToName table (DONE - 2009-07-09 - Hiram)
+ # this table is missing a name correspondence for some of
+ # the gene names in sgdGene.name
+ # to fixup, any names in sgdGene.name that are not in sgdToName,
+ # simply add those names and reference themselves
+ mkdir /hive/data/genomes/sacCer2/bed/fixSgdToName
+ cd /hive/data/genomes/sacCer2/bed/fixSgdToName
+ hgsql -N -e "select name from sgdGene;" sacCer2 | sort -u > sgdGene.name
+ hgsql -N -e "select name from sgdToName;" sacCer2 > sgdToName.tab
+ # convert the two columns of names to a single list of unique names
+ cat sgdToName.tab | tr '[\t]' '[\n]' | sort -u > all.sgdToName.name
+ comm -12 sgdGene.name all.sgdToName.name > commonToBoth
+ comm -23 sgdGene.name all.sgdToName.name > uniqueToSgdGene
+ comm -13 sgdGene.name all.sgdToName.name > uniqueToSgdToName
+ awk '{printf "%s\t%s\n", $1, $1}' uniqueToSgdGene > addSgdGeneNames.tab
+ # count before load
+ hgsql -e "select count(*) from sgdToName;" sacCer2
+ # 6254
+ # adding names:
+ wc -l addSgdGeneNames.tab
+ # 472
+ # 6254 + 472 = 6726
+ hgsql sacCer2 \
+ -e 'load data local infile "addSgdGeneNames.tab" into table sgdToName;'
+ hgsql -e "select count(*) from sgdToName;" sacCer2
+ # 6726
+############################################################################