736e0d588f220cef8093cf1ba6ea3bb4f376baf4 hiram Fri Sep 27 11:25:40 2024 -0700 now loading up the new expanded genark.hgcentral table refs #32596 diff --git src/hg/utils/otto/genArk/dbDb.clade.year.acc.tsv src/hg/utils/otto/genArk/dbDb.clade.year.acc.tsv new file mode 100644 index 0000000..5de8fcf --- /dev/null +++ src/hg/utils/otto/genArk/dbDb.clade.year.acc.tsv @@ -0,0 +1,248 @@ +# manually curated list of dbDb assemblies to relate to GenArk clades +# initial construction was via: +# +# grep -v "^#" UCSC* | cut -f3,6 | sort -u > genArk.sciName.clade.tsv +# +# hgsql -hgenome-centdb -N -e \ +# 'select scientificName,name from dbDb where active=1;' hgcentral \ +# | sort -u > dbDb.sciName.name.tsv +# +# join -t $'\t' <(sort genArk.sciName.clade.tsv) \ +# <(sort dbDb.sciName.name.tsv) | grep -v "(L)" \ +# | awk -F$'\t' '{printf "%s\t%s\n", $3, $2}' | sort -u > dbDb.name.clade.tsv +# +# sort -t$'\t' -k2,2 dbDb.sciName.name.tsv \ +# | join -t $'\t' -1 2 -v1 - dbDb.name.clade.tsv > dbDb.name.missingClade.tsv +# +# then those few missingClade that didn't match were manually added +# if new genomes are added to dbDb, they would need to be added here +# +# the years were added by getting the year out of dbDb, and joining +# that list with the previous two column table of name,clade +# +# hgsql -N -e 'select name,description from dbDb;' hgcentraltest \ +# | awk '{printf "%s\t%s\n", $1,$3}' | sort > dbDb.name.year +# +# and the RefSeq/GenBank accession correspondence was obtained by working +# with the asmEquivalence table in hgFixed to get as many as possible + +ailMel1 mammals 2009 GCF_000004335.2 +allMis1 vertebrate 2012 GCF_000281125.1 +anoCar1 vertebrate 2007 na +anoCar2 vertebrate 2010 GCF_000090745.1 +anoGam1 invertebrate 2003 na +anoGam3 invertebrate 2006 GCF_000005575.2 +apiMel1 invertebrate 2004 na +apiMel2 invertebrate 2005 na +aplCal1 invertebrate 2008 na +aptMan1 birds 2015 GCF_001039765.1 +aquChr2 birds 2014 GCF_000766835.1 +balAcu1 mammals 2013 GCF_000493695.1 +bisBis1 mammals 2014 GCA_000754665.1 +bosTau2 mammals 2005 na +bosTau3 mammals 2006 na +bosTau4 mammals 2007 na +bosTau6 mammals 2009 na +bosTau7 mammals 2011 GCF_000003205.5 +bosTau8 mammals 2014 GCF_000003055.5 +bosTau9 mammals 2018 GCF_002263795.1 +braFlo1 invertebrate 2006 na +caeJap1 invertebrate 2008 na +caePb1 invertebrate 2007 na +caePb2 invertebrate 2008 na +caeRem2 invertebrate 2006 na +caeRem3 invertebrate 2007 na +calJac1 primates 2007 na +calJac3 primates 2009 GCF_000004665.1 +calJac4 primates 2020 GCF_009663435.1 +calMil1 vertebrate 2013 GCF_000165045.1 +canFam1 mammals 2004 na +canFam2 mammals 2005 na +canFam3 mammals 2011 GCF_000002285.3 +canFam4 mammals 2020 GCF_011100685.1 +canFam5 mammals 2019 GCF_005444595.1 +canFam6 mammals 2020 GCF_000002285.5 +cavPor3 mammals 2008 GCF_000151735.1 +cb1 invertebrate 2002 na +cb3 invertebrate 2007 na +ce10 invertebrate 2010 na +ce11 invertebrate 2013 GCF_000002985.6 +ce2 invertebrate 2004 na +ce4 invertebrate 2007 na +ce6 invertebrate 2008 na +cerSim1 mammals 2012 GCF_000283155.1 +chlSab2 primates 2014 GCF_000409795.2 +choHof1 mammals 2008 GCA_000164785.1 +chrPic1 vertebrate 2011 na +ci1 invertebrate 2002 GCA_000183065.1 +ci2 invertebrate 2005 na +ci3 invertebrate 2011 GCF_000224145.1 +criGri1 mammals 2013 GCF_000419365.1 +criGriChoV1 mammals 2011 GCF_000223135.1 +criGriChoV2 mammals 2017 GCA_900186095.1 +danRer10 fish 2014 GCF_000002035.5 +danRer11 fish 2017 GCF_000002035.6 +danRer3 fish 2005 na +danRer4 fish 2006 na +danRer5 fish 2007 na +danRer6 fish 2008 na +danRer7 fish 2010 GCF_000002035.4 +dasNov3 mammals 2011 GCF_000208655.1 +dipOrd1 mammals 2008 GCA_000151885.1 +dm1 invertebrate 2003 na +dm2 invertebrate 2004 na +dm3 invertebrate 2006 na +dm6 invertebrate 2014 GCF_000001215.4 +dp2 invertebrate 2003 na +dp3 invertebrate 2004 na +droAna1 invertebrate 2004 na +droAna2 invertebrate 2005 na +droEre1 invertebrate 2005 na +droGri1 invertebrate 2005 na +droMoj1 invertebrate 2004 na +droMoj2 invertebrate 2005 na +droPer1 invertebrate 2005 GCF_000005195.2 +droSec1 invertebrate 2005 GCF_000005215.3 +droSim1 invertebrate 2005 na +droVir1 invertebrate 2004 na +droVir2 invertebrate 2005 na +droYak1 invertebrate 2004 na +droYak2 invertebrate 2005 na +eboVir3 viral 2014 na +echTel1 mammals 2005 na +echTel2 mammals 2012 GCF_000313985.1 +enhLutNer1 mammals 2019 GCA_006410715.1 +equCab1 mammals 2007 na +equCab2 mammals 2007 na +equCab3 mammals 2018 GCF_002863925.1 +eriEur1 mammals 2006 na +eriEur2 mammals 2012 GCF_000296755.1 +felCat3 mammals 2006 na +felCat4 mammals 2008 GCA_000003115.1 +felCat5 mammals 2011 GCF_000181335.1 +felCat8 mammals 2014 GCF_000181335.2 +felCat9 mammals 2017 GCF_000181335.3 +fr1 fish 2002 na +fr2 fish 2004 na +fr3 fish 2011 GCF_000180615.1 +gadMor1 fish 2010 GCA_000231765.1 +galGal2 birds 2004 na +galGal3 birds 2006 na +galGal4 birds 2011 GCF_000002315.3 +galGal5 birds 2015 GCF_000002315.4 +galGal6 birds 2018 GCF_000002315.5 +galVar1 mammals 2014 GCF_000696425.1 +gasAcu1 fish 2006 na +geoFor1 birds 2012 GCF_000277835.1 +gorGor3 primates 2011 na +gorGor4 primates 2014 GCF_000151905.2 +gorGor5 primates 2016 GCA_900006655.1 +gorGor6 primates 2019 GCF_008122165.1 +hetGla1 mammals 2011 GCF_000230445.1 +hetGla2 mammals 2012 GCF_000247695.1 +hg16 primates 2003 na +hg17 primates 2004 na +hg18 primates 2006 na +hg19 primates 2009 GCF_000001405.25 +hg38 primates 2013 GCA_000001405.28 +hs1 primates 2022 GCF_009914755.1 +latCha1 vertebrate 2011 GCF_000225785.1 +loxAfr3 mammals 2009 GCF_000001905.1 +macEug2 mammals 2009 GCA_000004035.1 +macFas5 primates 2013 GCF_000364345.1 +manPen1 mammals 2014 GCA_000738955.1 +melGal1 birds 2009 GCF_000146605.1 +melGal5 birds 2014 GCF_000146605.2 +melUnd1 birds 2011 GCF_000238935.1 +micMur1 primates 2007 GCA_000165445.1 +micMur2 primates 2015 GCF_000165445.1 +mm10 mammals 2011 GCF_000001635.26 +mm39 mammals 2020 GCF_000001635.27 +mm7 mammals 2005 na +mm8 mammals 2006 na +mm9 mammals 2007 na +monDom1 mammals 2004 na +monDom4 mammals 2006 na +monDom5 mammals 2006 na +mpxvRivers viral 2022 GCF_014621545.1 +musFur1 mammals 2011 GCF_000215625.1 +nanPar1 vertebrate 2015 GCF_000935625.1 +nasLar1 primates 2014 GCA_000772465.1 +neoSch1 mammals 2017 GCF_002201575.1 +nomLeu1 primates 2010 na +nomLeu2 primates 2011 na +nomLeu3 primates 2012 GCF_000146795.2 +ochPri2 mammals 2008 GCA_000164825.1 +ochPri3 mammals 2012 GCF_000292845.1 +oreNil2 fish 2011 GCF_000188235.2 +ornAna1 mammals 2007 GCA_000002275.2 +ornAna2 mammals 2007 GCA_000002275.2 +oryCun2 mammals 2009 GCF_000003625.3 +oryLat2 fish 2005 na +otoGar3 primates 2011 GCF_000181295.1 +oviAri1 mammals 2010 GCA_000005525.1 +oviAri3 mammals 2012 GCF_000298735.1 +oviAri4 mammals 2015 GCF_000298735.2 +panPan1 primates 2012 GCF_000258655.1 +panPan2 primates 2015 GCF_000258655.2 +panPan3 primates 2020 GCF_013052645.1 +panTro1 primates 2003 na +panTro2 primates 2006 na +panTro3 primates 2010 GCF_000001515.5 +panTro4 primates 2011 GCF_000001515.6 +panTro5 primates 2016 GCF_000001515.7 +panTro6 primates 2018 GCF_002880755.1 +papAnu2 primates 2012 GCF_000264685.1 +papAnu4 primates 2017 GCF_000264685.3 +papHam1 primates 2008 na +petMar1 vertebrate 2007 na +petMar2 vertebrate 2010 GCA_000148955.1 +petMar3 vertebrate 2017 GCA_002833325.1 +ponAbe2 primates 2007 na +ponAbe3 primates 2018 GCF_002880775.1 +priPac1 invertebrate 2007 na +proCap1 mammals 2008 GCA_000152225.1 +pteVam1 mammals 2008 GCA_000151845.1 +rheMac10 primates 2019 GCF_003339765.1 +rheMac2 primates 2006 na +rheMac3 primates 2010 GCA_000230795.1 +rheMac8 primates 2015 GCF_000772875.2 +rhiRox1 primates 2014 GCF_000769185.1 +rn3 mammals 2003 na +rn4 mammals 2004 na +rn5 mammals 2012 na +rn6 mammals 2014 GCF_000001895.5 +rn7 mammals 2020 GCF_015227675.2 +sacCer1 fungi 2003 na +sacCer2 fungi 2008 na +sacCer3 fungi 2011 GCF_000146045.2 +saiBol1 primates 2011 GCF_000235385.1 +sarHar1 mammals 2011 GCF_000189315.1 +sorAra1 mammals 2006 na +sorAra2 mammals 2008 GCF_000181275.1 +speTri2 mammals 2011 GCF_000236235.1 +strPur1 invertebrate 2005 na +strPur2 invertebrate 2006 na +susScr11 mammals 2017 GCF_000003025.6 +susScr2 mammals 2009 na +susScr3 mammals 2011 GCF_000003025.5 +taeGut1 birds 2008 na +taeGut2 birds 2013 GCF_000151805.1 +tarSyr1 primates 2008 na +tarSyr2 primates 2013 GCF_000164805.1 +tetNig1 fish 2004 na +tetNig2 fish 2007 na +thaSir1 vertebrate 2015 GCF_001077635.1 +triMan1 mammals 2011 GCF_000243295.1 +tupBel1 mammals 2006 na +turTru2 mammals 2011 GCA_000151865.2 +vicPac1 mammals 2008 na +vicPac2 mammals 2013 GCF_000164845.2 +wuhCor1 viral 2020 GCF_009858895.2 +xenLae2 vertebrate 2016 GCF_001663975.1 +xenTro1 vertebrate 2004 na +xenTro10 vertebrate 2019 GCF_000004195.4 +xenTro2 vertebrate 2005 na +xenTro3 vertebrate 2009 na +xenTro7 vertebrate 2012 GCF_000004195.2 +xenTro9 vertebrate 2016 GCF_000004195.3