144522754f3e836dd0303944ada14c38ee9f28e3 rhead Mon Mar 12 14:38:54 2012 -0700 Added a new special case for rn4 (it now uses RGD Genes instead of a UCSC Genes build) and got rid of the checking the gdbPdb table to determine whether an assembly is a UCSC Genes assembly. Added a comment to the usage that the script should be checked to see if it is using up-to-date tables. diff --git src/utils/qa/makeUniProtFile.csh src/utils/qa/makeUniProtFile.csh index 4ab2b80..ceb399b 100755 --- src/utils/qa/makeUniProtFile.csh +++ src/utils/qa/makeUniProtFile.csh @@ -1,114 +1,116 @@ #!/bin/tcsh source `which qaConfig.csh` ######################################## # # Ann Zweig 05-2009 # # Use this script to create a file for the folks at UnitProt. # They will use the file to create links from their web site back to our # Gene details pages. # ######################################## onintr cleanup set db='' set org='' set hasKGs='' set num='' if ($#argv != 1 ) then echo echo " Use this script to create a mapping of UniProt IDs to Gene IDs. \ UniProt will pick it up from our download server and use it to \ - create links from their web site to our gene details pages." + create links from their web site to our gene details pages. For the \ + gene sets from outside sources, check this script first to be sure it is \ + using the most up-to-date tables." echo echo " usage: db" echo exit 1 else set db=$argv[1] endif # run only from hgwdev if ( "$HOST" != "hgwdev" ) then echo "\nERROR: you must run this script on hgwdev!\n" exit 1 endif - -# check to see if this assembly has UCSC Genes -set hasKGs=`hgsql -Ne "SELECT genomeDb FROM gdbPdb" \ - hgcentraltest | grep "$db"` - -# based on whether or not this assembly has UCSC Genes, make the mapping file -if ( '' != $hasKGs ) then +# human and mouse use an official UCSC Genes build +if ( $db =~ "hg*" || $db =~ "mm*" ) then # get the data from the two KG-related tables hgsql -Ne "SELECT displayId, kgId FROM kgProtAlias" \ $db > $db.rawDataForUniProt hgsql -Ne "SELECT spId, kgId FROM kgSpAlias WHERE spId != ''" \ $db >> $db.rawDataForUniProt else #non-UCSC Gene assembly # each of the non-KG assemblies are treated a little differently + + if ( $db =~ "rn*" ) then + hgsql -Ne "hgsql -Ne "SELECT value, name FROM rgdGene2ToUniProt" $db > $db.rawDataForUniProt + endif + if ( $db =~ "dm*" ) then hgsql -Ne "SELECT alias, a.name FROM flyBase2004Xref AS a, \ flyBaseToUniProt AS b WHERE a.name=b.name AND alias != 'n/a'" \ $db > $db.rawDataForUniProt endif if ( $db =~ "ce*" ) then hgsql -Ne "SELECT acc, name FROM sangerGene AS a, uniProt.gene AS b \ WHERE a.proteinID=b.val and acc != 'n/a'" $db > $db.rawDataForUniProt endif if ( $db =~ "danRer*" ) then echo " \nERROR: Although a file could be generated for danRer, uniProt has" echo " decided that they do not want to do that mapping...yet\n" exit 1 endif if ( $db =~ "sacCer*" ) then echo " \nERROR: Although a file could be generated for sacCer, uniProt has" echo " decided that they do not want to do that mapping...yet\n" exit 1 endif endif if ( -e $db.rawDataForUniProt ) then # find out the name of the organism for this database set org=`hgsql -Ne 'SELECT organism FROM dbDb where name = "'$db'" LIMIT 1' \ hgcentraltest | perl -wpe '$_ = lcfirst($_)'` # now add the organism name to every row sed -e 's/$/ '$org'/' $db.rawDataForUniProt > $db.rawDataForUniProt.plus else echo " \nERROR: It is not possible to make a mapping file for UniProt from" echo " the database you entered: $db\n" exit 1 endif # make sure there is only one Gene ID - UniProt ID pair sort -k1,1 -u $db.rawDataForUniProt.plus > $db.uniProtToUcscGenes.txt # make the new directory and copy the file there mkdir -p /usr/local/apache/htdocs-hgdownload/goldenPath/$db/UCSCGenes cp $db.uniProtToUcscGenes.txt /usr/local/apache/htdocs-hgdownload/goldenPath/$db/UCSCGenes/uniProtToUcscGenes.txt # how big is the file set num=`wc -l $db.uniProtToUcscGenes.txt | awk '{print $1}'` # explain the output to the user echo "\nSUCCESS!\n" echo "Here's a sample of the ${num}-line file you just created" echo " (expect: UniProtId ucscGeneId orgName)" head $db.uniProtToUcscGenes.txt echo " \nAsk for a push of your new file to hgdownload:\n" echo " /usr/local/apache/htdocs-hgdownload/goldenPath/$db/UCSCGenes/uniProtToUcscGenes.txt" # clean up old files (except the real one) cleanup: rm -f $db.rawDataForUniProt* exit 0