b6aee4c6471cddebd638fec8dbb988c29a69bc22 markd Thu Apr 23 21:58:41 2026 -0700 import of GENCODE V50, MV39, and V50lift37; added a command to do import with a single command diff --git src/hg/makeDb/outside/gencode/bin/gencodePolyaGxfToGenePred src/hg/makeDb/outside/gencode/bin/gencodePolyaGxfToGenePred index 8e925df9f3a..0715b3a72d0 100755 --- src/hg/makeDb/outside/gencode/bin/gencodePolyaGxfToGenePred +++ src/hg/makeDb/outside/gencode/bin/gencodePolyaGxfToGenePred @@ -1,54 +1,54 @@ #!/usr/bin/env python3 import sys import os myBinDir = os.path.normpath(os.path.dirname(sys.argv[0])) -sys.path.append(os.path.join(myBinDir, "../lib")) -sys.path.append(os.path.expanduser("/hive/groups/browser/pycbio/lib")) +sys.path.insert(0, os.path.join(myBinDir, "../lib")) +sys.path.insert(0, os.path.expanduser("/hive/groups/browser/pycbio/lib")) import argparse from pycbio.sys import fileOps from gencode import gencodeGxfParserFactory import pipettor from gencode.gencodeProcess import GencodeLiftOver, getEditIdCmd # This currently produces an extended genePred due to historic reasons, however # bed would be more appropriate. def parseArgs(): desc = """%prog [options] polyAGxf ensemblToUcscChain outGp Create a genePred for the GENCODE polyAGxf. """ parser = argparse.ArgumentParser(description=desc) parser.add_argument("--verbose", dest="verbose", action="store_true", default=False, help="verbose output for debugging") parser.add_argument("inGxf", help="input GTF or GFF3") parser.add_argument("gencodeToUcscLift", help="GENCODE to UCSC to liftOver chains") parser.add_argument("outGenePred", help="output in genePred format") return parser.parse_args() def convertRec(rec, outGpPipe): transId = rec.attributes.transcript_id # 440715 chr1 - 141473 141475 141475 141475 1 141473, 141475, 0 polyA_site none none -1, row = [transId, rec.seqname, rec.strand, rec.start - 1, rec.end, rec.end, rec.end, 1, "{},".format(rec.start - 1), "{},".format(rec.end), 0, rec.feature, "none", "none", "-1,"] fileOps.prRow(outGpPipe, row) def gencodePolyaGxfToGenePred(opts): gxfParser = gencodeGxfParserFactory(opts.inGxf) liftOver = GencodeLiftOver("genePred", opts.gencodeToUcscLift) outGpPipe = pipettor.Popen([getEditIdCmd(0), liftOver.getLiftoverCmd(stdout=opts.outGenePred)], "w") for rec in gxfParser.reader(): convertRec(rec, outGpPipe) outGpPipe.close() unmappedInfo = liftOver.getLiftOverUnmappedInfo() liftOver.removeTmp() unmappedInfo.report() gencodePolyaGxfToGenePred(parseArgs())