1659b2691cac2372abde8c569916fc294aa9a56c braney Wed Aug 14 10:32:41 2019 -0700 new go database diff --git src/hg/makeDb/doc/go.txt src/hg/makeDb/doc/go.txt index de83377..b6924af 100644 --- src/hg/makeDb/doc/go.txt +++ src/hg/makeDb/doc/go.txt @@ -1,255 +1,295 @@ ### UPDATE GO DATABASE (DONE 04-11-24) # Adopted from an earlier process Gill did for his private go database # Update GO database (2005-07-26, hartera) # Download the terms and make the database. # Added symlink for the build dir: /cluster/data/geneOntology # so store1 doesn't need to be directly referenced (2007-01-10 kate) ssh hgwdev mkdir /cluster/data/geneOntology/20041124 cd /cluster/data/geneOntology/20041124 wget --timestamping http://www.godatabase.org/dev/database/archive/latest/go_200411-assocdb-data.gz hgsql mysql <<end create database go041124; end zcat go_*data.gz | hgsql go041124 # Download the associations wget --timestamping ftp://ftp.geneontology.org/pub/go/gene-associations/gene_association.goa_uniprot.gz # Note no specific taxon specified, this will import everything zcat gene_association.goa_uniprot.gz | hgGoAssociation go041124 goaPart stdin # Passed 4502016 of 5291097 of 5291097, 85.09% # Ask sys-admin to point go to go041124 # Asked sys-admin to rename database to go041115 and point go to the new name -- kuhn 12-06-04 # REBUILD GO WITH LATEST VERSION (DONE, 2005-07-26, hartera) # Re-made GO database as above, only the dates in names have changed # so downloads are in /cluster/store1/geneOntology/20050726 and the database # is called go050726. # Download file of terms is go_200507-assocdb-data.gz (July 18, 2005) # Download file of associations is gene_association.goa_uniprot.gz # (June 16, 2005) # hgGoAssociation: # Passed 5515470 of 6413534 of 6413534, 86.00% ########################################################################## # UPDATE GO DATABASE (DONE, 2006-12-20, Fan) # UPDATE GO DATABASE # Download the terms and make the database. ssh hgwdev mkdir /cluster/store1/geneOntology/20061219 cd /cluster/store1/geneOntology/20061219 wget --timestamping http://www.godatabase.org/dev/database/archive/latest/go_200612-assocdb-data.gz hgsql mysql <<end create database go061219; end zcat go_*data.gz | sed -e 's/ENGINE=MyISAM DEFAULT CHARSET=latin1/TYPE=MyISAM/g' >j.tmp hgsql go061219 <j.tmp rm j.tmp wget --timestamping ftp://ftp.geneontology.org/pub/go/gene-associations/gene_association.goa_uniprot.gz # The format of gene_association.goa_uniprot.gz changed, there is comment # lines at the head now. # Updated hgGoAssociation.c to skip those initial comment lines. zcat gene_association.goa_uniprot.gz\ |/cluster/home/fanhsu/bin/x86_64/hgGoAssociation go061219 goaPart stdin # Passed 9049274 of 10512956 of 10512956, 86.08% # Ask sys-admin to switch the database pointer go to point to go061219. ########################################################################## ### UPDATE GO DATABASE (DONE 07-01-16, Fan) # UPDATE GO DATABASE # Download the terms and make the database. ssh hgwdev mkdir /cluster/store1/geneOntology/20070111 cd /cluster/store1/geneOntology/20070111 wget --timestamping \ http://www.godatabase.org/dev/database/archive/latest/go_200701-assocdb-data.gz hgsql mysql -e 'create database go070111' zcat go_*data.gz | sed -e 's/ENGINE=MyISAM DEFAULT CHARSET=latin1/TYPE=MyISAM/g' >j.tmp hgsql go070111 <j.tmp rm j.tmp # Please note the new place to get gene_association.goa_uniprot.gz wget --timestamping \ ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/gene_association.goa_uniprot.gz zcat gene_association.goa_uniprot.gz\ |hgGoAssociation go070111 goaPart stdin # Passed 10341476 of 12083865 of 12083865, 85.58% # angie suggests running this at the end (it only make goaPart.as): # makeTableDescriptions go hg/lib/goaPart.as (BK 2007-07-05) # Ask sys-admin to switch the database pointer go to point to go061219. ########################################################################## ### UPDATE GO DATABASE (DONE 1/30/08, Fan) # UPDATE GO DATABASE # Download the terms and make the database. ssh hgwdev mkdir /cluster/store1/geneOntology/20080130 cd /cluster/store1/geneOntology/20080130 wget --timestamping http://archive.geneontology.org/latest/go_200801-assocdb-data.gz hgsql mysql -e 'create database go080130' zcat go_*data.gz | sed -e 's/ENGINE=MyISAM DEFAULT CHARSET=latin1/TYPE=MyISAM/g' >j.tmp hgsql go080130 <j.tmp rm j.tmp # Please note the new place to get gene_association.goa_uniprot.gz wget --timestamping \ ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/gene_association.goa_uniprot.gz zcat gene_association.goa_uniprot.gz\ |hgGoAssociation go080130 goaPart stdin Passed 16890026 of 25763098 of 25763098, 65.56% # Ask sys-admin to switch the database pointer go to point to go080130. ########################################################################## ### UPDATE GO DATABASE (DONE 2014-02-23 braney) # UPDATE GO DATABASE # Download the terms and make the database. ssh hgwdev mkdir /hive/data/outside/geneOntology/20150121 cd /hive/data/outside/geneOntology/20150121 wget "http://archive.geneontology.org/full/2015-01-01/go_201501-assocdb-tables.tar.gz" hgsql -e 'create database go150121' tar xvfz go_201501-assocdb-tables.tar.gz cd go_201402-assocdb-tables for i in *.txt do if test -s $i then cat `basename $i .txt`.sql fi done | hgsql go140213 for i in *.txt do if test -s $i then echo $i echo "load data local infile '$i' into table `basename $i .txt`" | hgsql go140213 fi done # Please note the new place to get gene_association.goa_uniprot.gz wget --timestamping \ ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/gene_association.goa_uniprot.gz zcat gene_association.goa_uniprot.gz |hgGoAssociation go140213 goaPart stdin # Passed 192715679 of 254966113 of 254966113, 75.58% # Ask sys-admin to switch the database pointer go to point to go140213 ########################################################################## ### UPDATE GO DATABASE (DONE 2015-01-21 braney) # UPDATE GO DATABASE # Download the terms and make the database. ssh hgwdev mkdir /hive/data/outside/geneOntology/20150121 cd /hive/data/outside/geneOntology/20150121 wget "http://archive.geneontology.org/full/2015-01-01/go_201501-assocdb-tables.tar.gz" hgsql -e 'create database go150121' tar xvfz go_201501-assocdb-tables.tar.gz cd go_201501-assocdb-tables for i in *.txt do if test -s $i then cat `basename $i .txt`.sql fi done | hgsql go150121 for i in *.txt do if test -s $i then echo $i echo "load data local infile '$i' into table `basename $i .txt`" | hgsql go150121 fi done # Please note the new place to get gene_association.goa_uniprot.gz wget --timestamping \ ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/gene_association.goa_uniprot.gz zcat gene_association.goa_uniprot.gz |hgGoAssociation go150121 goaPart stdin # Passed 300033664 of 401499085 of 401499085, 74.73% # Ask sys-admin to switch the database pointer go to point to go150121 ########################################################################## ### UPDATE GO DATABASE (DONE 2018-04-26 braney) # UPDATE GO DATABASE # Download the terms and make the database. ssh hgwdev mkdir /hive/data/outside/geneOntology/20180426 cd /hive/data/outside/geneOntology/20180426 #wget "http://archive.geneontology.org/full/2015-01-01/go_201501-assocdb-tables.tar.gz" wget "http://archive.geneontology.org/full/2017-01-01/go_monthly-assocdb-tables.tar.gz" hgsql -e 'create database go180426' tar xvfz go_monthly-assocdb-tables.tar.gz cd go_monthly-assocdb-tables for i in *.txt do if test -s $i then cat `basename $i .txt`.sql fi done | hgsql go180426 for i in *.txt do if test -s $i then echo $i echo "load data local infile '$i' into table `basename $i .txt`" | hgsql go180426 fi done # Please note the new place to get gene_association.goa_uniprot.gz wget --timestamping \ ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gaf.gz zcat goa_uniprot_all.gaf.gz |hgGoAssociation go180426 goaPart stdin # Passed 378374005 of 508539120 of 508539120, 74.40% # Ask sys-admin to switch the database pointer go to point to go140213 +########################################################################## +### UPDATE GO DATABASE (DONE 2019-08-14 braney) + +# UPDATE GO DATABASE +# Download the terms and make the database. +ssh hgwdev +mkdir /hive/data/outside/geneOntology/20190809 +cd /hive/data/outside/geneOntology/20190809 + +#wget "http://archive.geneontology.org/full/2015-01-01/go_201501-assocdb-tables.tar.gz" +wget "http://archive.geneontology.org/full/2017-01-01/go_monthly-assocdb-tables.tar.gz" + +hgsql -e 'create database go190809' +tar xvfz go_monthly-assocdb-tables.tar.gz + +cd go_monthly-assocdb-tables +for i in *.txt +do + if test -s $i + then + cat `basename $i .txt`.sql + fi +done | hgsql go190809 + +for i in *.txt +do + if test -s $i + then + echo $i + echo "load data local infile '$i' into table `basename $i .txt`" | hgsql go190809 + fi +done + +wget --timestamping \ +ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gaf.gz + +zcat goa_uniprot_all.gaf.gz |hgGoAssociation go190809 goaPart stdin +#Passed 527292541 of 697908155 of 697908155, 75.55% + +# Ask sys-admin to switch the database pointer go to point to go190809