420df98ed5d99b83c27e449769189e1f7e5e462a markd Wed Jul 17 19:06:38 2019 -0700 changed some generated URL due to changes in gencode site diff --git src/hg/makeDb/outside/gencode/gencodeGenerateTrackDbs src/hg/makeDb/outside/gencode/gencodeGenerateTrackDbs index b7b67ce..77211c0 100755 --- src/hg/makeDb/outside/gencode/gencodeGenerateTrackDbs +++ src/hg/makeDb/outside/gencode/gencodeGenerateTrackDbs @@ -349,78 +349,78 @@ "searchName wgEncodeGencode2wayConsPseudoV{gencodeVer}", "searchTable wgEncodeGencode2wayConsPseudoV{gencodeVer}", "searchMethod exact", "searchType genePred", "termRegex {twoWayPseudoIdRegex}", "searchPriority {twoWayConsPseudoSearchPriority:.5f}")) def trackHtmlTemplateGen(params): """Generate template for track HTML""" yield forBoth(params, ( "<h2>Description</h2>", "<p>", "The GENCODE Genes track (version {gencodeVer}, {gencodeDate}) shows high-quality manual", "annotations merged with evidence-based automated annotations across the entire", "{orgName} genome generated by the", - '<a href="https://www.gencodegenes.org/" target="_blank">GENCODE project</a>.', + '<a href="{gencodeGenesUrl}/" target="_blank">GENCODE project</a>.', "The GENCODE gene set presents a full merge", "between HAVANA manual annotation process and Ensembl automatic annotation pipeline.", "Priority is given to the manually curated HAVANA annotation using predicted", "Ensembl annotations when there are no corresponding manual annotations.")) yield forStd(params, ( "The {gencodeVer} annotation was carried out on genome assembly {orgGrcRel} ({orgHgDb}).", "</p>")) yield forLift(params, ( "The GENCODE V{gencodeSrcVer} annotations on the GRCh38 (hg38) primary assembly", "were mapped to GRCh37 (hg19) using the process", '<a href="ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/_README_GRCh37_mapping.txt" target="_blank">documented here</a>.', "</p>")) yield forBoth(params, ( "<p>", "The Ensembl human and mouse data sets are the same gene annotations as GENCODE for the", "corresponding release.", "</p>", "", '<!--#insert file="{displayHtml}"-->', "", "<h2>Downloads</h2>", - "GENCODE GFF3 and GTF files are available from the", - '<a href="{gencodeReleaseUrl}" target="_blank">GENCODE release {gencodeVer}</a> site.', + "<p>GENCODE GFF3 and GTF files are available from the", + '<a href="{gencodeReleaseUrl}" target="_blank">GENCODE release {gencodeVer}</a> site.</p>', "")) yield forStdHuman(params, ( "<h2>Verification</h2>", "", "<P>", "Selected transcript models are verified experimentally by RT-PCR amplification followed by sequencing.", "Those experiments can be found at GEO:</P>", "<ul>", ' <li> <a href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE30619" target="_blank"><b>GSE30619:[E-MTAB-612]</b></a> - Batch I is based on annotation from July 2008 (without pseudogenes).</li>', ' <li> <a href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE25711" target="_blank"><b>GSE25711:[E-MTAB-407]</b></a> - Batch II is based on annotation from April 2009. </li>', ' <li> <a href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE30612" target="_blank"><b>GSE30612:[E-MTAB-533]</b></a> - Batch III is verifying RGASP models for c.elegans and human.</li>', ' <li> <a href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE34797" target="_blank"><b>GSE34797:[E-MTAB-684]</b></a> - Batch IV is based on chromosome 3, 4 and 5 annotations from GENCODE 4 (January 2010).</li>', ' <li> <a href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE34820" target="_blank"><b>GSE34820:[E-MTAB-737]</b></a> - Batch V is based on annotations from GENCODE 6 (November 2010).</li>', ' <li> <a href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE34821" target="_blank"><b>GSE34821:[E-MTAB-831]</b></a> - Batch VI is based on annotations from GENCODE 6 (November 2010) as well as transcript models predicted by the Ensembl Genebuild group based on the Illumina Human BodyMap 2.0 data. </li>', "</ul>", "<P> See Harrow <em>et al.</em> (2006) for information on verification", "techniques.", "</P>", "")) yield forBoth(params, ( "<h2>Release Notes</h2>", "<p>", '<span style="font-weight: bold;">GENCODE version {gencodeVer} corresponds to Ensembl {ensemblVer}.</p>', - '<p>See also: <a href="https://www.gencodegenes.org/" target="_blank">The GENCODE Project</a>', + '<p>See also: <a href="{gencodeGenesUrl}/" target="_blank">The GENCODE Project</a>', "</p>", "", '<!--#insert file="../../wgEncodeGencodeCredits1.shared.html"-->',)) def joinerCheckTemplateGen(params): """Generate template for all.joiner""" yield forBoth(params, ( "# begin Gencode V{gencodeVer}", "", "# gencode genePred tables with no associations", "# wgEncodeGencodePolyaV{gencodeVer}", "# wgEncodeGencode2wayConsPseudoV{gencodeVer}", "", "# gencode genePred tables with joining through wgEncodeGencodeAttrsV{gencodeVer}", "identifier wgEncodeGencodeBasicAttrsV{gencodeVer}", @@ -527,51 +527,48 @@ def getTags(orgHgDb, gencodeVer): "obtain sorted list of tags" return hgSql(orgHgDb, "SELECT DISTINCT(tag) FROM wgEncodeGencodeTagV{} ORDER BY tag".format(gencodeVer)) class GencodeParams(object): """parameters to format info output files, base on GENCODE release. Base priorities are the start point for generating actually priorities using the version number. """ def __init__(self, orgName, orgHgDb, gencodeVer, ensemblVer, gencodeDate, superTrackBasePriority, searchBasePriority, ensemblTransIdRegex, havanaTransIdRegex, ensemblGeneIdRegex, havanaGeneIdRegex, ensemblProtIdRegex, twoWayPseudoIdRegex): + self.gencodeGenesUrl = "https://www.gencodegenes.org" self.orgName = orgName self.orgHgDb = orgHgDb if (orgHgDb == "hg38") or (orgHgDb == "grcHhh38"): self.orgGrcRel = "GRCh38" elif orgHgDb == "hg19": self.orgGrcRel = "GRCh37" elif orgHgDb == "mm10": self.orgGrcRel = "GRCm38" else: raise Exception("not a GENCODE hgdb: {}".format(orgHgDb)) self.gencodeVer = gencodeVer self.gencodeSrcVer = gencodeVer.split("lift")[0] self.ensemblVer = ensemblVer self.gencodeDate = gencodeDate m = re.match("M?([0-9]+)(lift37)?$", gencodeVer) self.numVersion = int(m.group(1)) # for constructing priorities - if self.orgName == "human": - gencodeReleaseDir = "releases/{}.html".format(gencodeVer) - else: - gencodeReleaseDir = "mouse_releases/{}.html".format(self.numVersion) - self.gencodeReleaseUrl = "https://www.gencodegenes.org/{}".format(gencodeReleaseDir) + self.gencodeReleaseUrl = "{}/{}/release_{}.html".format(self.gencodeGenesUrl, self.orgName, gencodeVer) self.ensemblTransIdRegex = ensemblTransIdRegex self.havanaTransIdRegex = havanaTransIdRegex self.ensemblGeneIdRegex = ensemblGeneIdRegex self.havanaGeneIdRegex = havanaGeneIdRegex self.ensemblProtIdRegex = ensemblProtIdRegex self.twoWayPseudoIdRegex = twoWayPseudoIdRegex self.transBiotypes = ",".join(getTransBioTypes(orgHgDb, gencodeVer)) self.tags = ",".join(getTags(orgHgDb, gencodeVer)) # Compute priorities based on version. This generated # priorities lower than ones that were hard-code before this # program was written. Adjusting track priority and search priorities by # -0.001*numVersion