b6aee4c6471cddebd638fec8dbb988c29a69bc22 markd Thu Apr 23 21:58:41 2026 -0700 import of GENCODE V50, MV39, and V50lift37; added a command to do import with a single command diff --git src/hg/makeDb/outside/gencode/bin/gencodeMakeAttrs src/hg/makeDb/outside/gencode/bin/gencodeMakeAttrs index 494448b06f9..f054fae2b86 100755 --- src/hg/makeDb/outside/gencode/bin/gencodeMakeAttrs +++ src/hg/makeDb/outside/gencode/bin/gencodeMakeAttrs @@ -1,88 +1,88 @@ #!/usr/bin/env python3 import sys import os myBinDir = os.path.normpath(os.path.dirname(sys.argv[0])) -sys.path.append(os.path.join(myBinDir, "../lib")) -sys.path.append(os.path.expanduser("/hive/groups/browser/pycbio/lib")) +sys.path.insert(0, os.path.join(myBinDir, "../lib")) +sys.path.insert(0, os.path.expanduser("/hive/groups/browser/pycbio/lib")) import argparse from pycbio.sys import fileOps from gencode.gencodeGenes import GencodeGenes def parseArgs(): desc = """%prog [options] gencodeGp gencodeAttrs gencodeAttrsTbl gencodeTagTbl gencodeTslTbl Output the gencode attributes table, adding computed columns to attributes file. Output matches: kent/src/hg/lib/encode/wgEncodeGencodeAttrs.as and is sorted by locus to optimize track display speed. Also outputs a tags in the format: kent/src/hg/lib/encode/wgEncodeGencodeTag.as """ parser = argparse.ArgumentParser(description=desc) parser.add_argument("gencodeGp", help="full GENCODE genePred") parser.add_argument("gencodeAttrs", help="gencode attributes TSV") parser.add_argument("attrsTbl", help="""write attributes to this file""") parser.add_argument("tagsTbl", help="""write tags to this file""") parser.add_argument("tslTbl", help="""write TSLs to this file""") return parser.parse_args() unused = "" def emptyIfNone(v): return v if v is not None else v def toUcscTsl(tsl): if tsl in (None, "", "NA"): return "-1" else: return tsl def _writeAttrsTblRow(tr, fh): """must match autoSql, which includes unused columns to match old table""" gn = tr.gene fileOps.prRowv(fh, gn.id, gn.name, gn.bioType, unused, tr.id, tr.name, tr.bioType, unused, gn.havanaId, tr.havanaId, emptyIfNone(tr.ccdsId), tr.level, tr.transcriptClass, emptyIfNone(tr.proteinId), tr.transcriptRank) def writeAttrsTbl(gencode, fh): for transcript in gencode.getTranscriptsSortById(): _writeAttrsTblRow(transcript, fh) def _writeTslRow(tr, fh): fileOps.prRowv(fh, tr.id, toUcscTsl(tr.transcriptSupportLevel)) def writeTslTbl(gencode, fh): for transcript in gencode.getTranscriptsSortById(): _writeTslRow(transcript, fh) def _writeTagsRows(tr, fh): for tag in sorted(tr.tags): fileOps.prRowv(fh, tr.id, tag) def writeTagsTbl(gencode, fh): for transcript in gencode.getTranscriptsSortById(): _writeTagsRows(transcript, fh) def gencodeMakeAttrs(opts): gencode = GencodeGenes.loadFromFiles(opts.gencodeGp, opts.gencodeAttrs, onlyExisting=True) with open(opts.attrsTbl, "w") as fh: writeAttrsTbl(gencode, fh) with open(opts.tslTbl, "w") as fh: writeTslTbl(gencode, fh) with open(opts.tagsTbl, "w") as fh: writeTagsTbl(gencode, fh) gencodeMakeAttrs(parseArgs())