67ee4b34f8d2fe743e9a610158830ed1523716fe max Thu Dec 2 00:52:01 2021 -0800 more docs and color support for uniprot tracks, refs #28560 diff --git src/hg/utils/otto/uniprot/doUniprot src/hg/utils/otto/uniprot/doUniprot index 12e8797..4f3351f 100755 --- src/hg/utils/otto/uniprot/doUniprot +++ src/hg/utils/otto/uniprot/doUniprot @@ -1233,31 +1233,31 @@ dbName = accToDb[acc] # add the transcripts that were used for mapping this protein to the genome chrom, start, end = row[0], row[1], row[2] if mapInfo: transListStr = ", ".join(sorted(list(mapInfo[(acc, chrom, start, end)]))) mapSource = accToMapSource[acc] if mapSource == "best": mapInfo = "best match(es) when aligning protein against all transcripts" elif mapSource == "uniprot": mapInfo = "alignment of protein to transcript(s) annotated by UniProt" elif mapSource == "entrez": mapInfo = "best match(es) when aligning protein against RefSeq transcripts of NCBI gene annotated by UniProt" else: - assert(False) + assert(False) # invalid mapSource ? mapInfoStr = transListStr+" (%s)" % mapInfo else: mapInfoStr = "(mapped with direct BLAT)" row.append(mapInfoStr) row = addExtraGeneInfoFields(row, dbName, recAnnot) # add the sequence (for the alignment, oSequence) seq = protSeqs[acc] row[17] = seq # add the CDS field oCDS row[18] = "1.."+str(3*len(seq)) if dbName=="swissprot": @@ -1854,31 +1854,30 @@ if options.onlyMap: continue dbBigBedDir = makeSubDir(bigBedDir, db) annotFnames = ["tab/swissprot.%d.annots.tab" % taxId] if doTrembl: annotFnames.append( "tab/trembl.%d.annots.tab" % taxId ) uniprotLift(fullFaFname, annotFnames, chromSizesFname, mapFname, dbBigBedDir, accToDb, accToMeta, options) shutil.copyfile(mapDescFname, join(dbBigBedDir, "liftInfo.json")) def makeSymlink(target, linkName): if isfile(linkName): - #logging.debug("%s already exists" % linkName) os.remove(linkName) if target is None or not isfile(target): logging.error("Cannot symlink: %s does not exist" % str(target)) return targetPath = abspath(target) linkPath = abspath(linkName) cmd = "ln -sf %(targetPath)s %(linkPath)s " % locals() run(cmd) def findForMask(fileMask): fnames = glob.glob(fileMask) if len(fnames)==0: logging.error("NOT FOUND: %s" % fileMask) return None @@ -1895,30 +1894,31 @@ for db in dbs: if onlyDbs is not None and db not in onlyDbs: continue if md5 is None: md5 = fastaMd5(fullFaFname) # do only once per organism, seqs don't change dbBigBedDir = join(bigBedDir, db) # find the bigBed files bbTargetDir = makeSubDir(join("/gbdb", db), "uniprot") bbFnames = glob.glob(join(dbBigBedDir, "*.bb")) logging.debug("Found %d bigBed files in %s" % (len(bbFnames), dbBigBedDir)) # and create links to them for bbFname in bbFnames: + if "_" not in bbFname: # skip the bigPsl files with the md5 in them bbLink = join(bbTargetDir, basename(bbFname)) makeSymlink(bbFname, bbLink) def checkPsl(pslName, faFname): " checking the qNames in a psl input (!) file " if pslName is None: logging.debug("Not found: %s" % pslName) return seqIds = list(parseFasta(faFname).keys()) accs = set([x.split("-")[0] for x in seqIds]) qNames = Counter() pslCount = 0 for line in open(pslName):