src/hg/utils/otto/uniprot/doUniprot 67ee4b34f8d2fe743e9a610158830ed1523716fe

67ee4b34f8d2fe743e9a610158830ed1523716fe
max
  Thu Dec 2 00:52:01 2021 -0800
more docs and color support for uniprot tracks, refs #28560

diff --git src/hg/utils/otto/uniprot/doUniprot src/hg/utils/otto/uniprot/doUniprot
index 12e8797..4f3351f 100755
--- src/hg/utils/otto/uniprot/doUniprot
+++ src/hg/utils/otto/uniprot/doUniprot
@@ -1233,31 +1233,31 @@
         dbName = accToDb[acc]
 
         # add the transcripts that were used for mapping this protein to the genome
         chrom, start, end = row[0], row[1], row[2]
 
         if mapInfo:
             transListStr = ", ".join(sorted(list(mapInfo[(acc, chrom, start, end)])))
             mapSource = accToMapSource[acc]
             if mapSource == "best":
                 mapInfo = "best match(es) when aligning protein against all transcripts"
             elif mapSource == "uniprot":
                 mapInfo = "alignment of protein to transcript(s) annotated by UniProt"
             elif mapSource == "entrez":
                 mapInfo = "best match(es) when aligning protein against RefSeq transcripts of NCBI gene annotated by UniProt"
             else:
-                assert(False)
+                assert(False) # invalid mapSource ?
             mapInfoStr = transListStr+" (%s)" % mapInfo
         else:
             mapInfoStr = "(mapped with direct BLAT)"
 
         row.append(mapInfoStr)
 
         row = addExtraGeneInfoFields(row, dbName, recAnnot)
 
         # add the sequence (for the alignment, oSequence)
         seq = protSeqs[acc]
         row[17] = seq
         # add the CDS field oCDS
         row[18] = "1.."+str(3*len(seq))
 
         if dbName=="swissprot":
@@ -1854,31 +1854,30 @@
             if options.onlyMap:
                 continue
 
             dbBigBedDir = makeSubDir(bigBedDir, db)
 
             annotFnames = ["tab/swissprot.%d.annots.tab" % taxId]
             if doTrembl:
                 annotFnames.append( "tab/trembl.%d.annots.tab" % taxId )
 
             uniprotLift(fullFaFname, annotFnames, chromSizesFname, mapFname, dbBigBedDir, accToDb, accToMeta, options)
 
             shutil.copyfile(mapDescFname, join(dbBigBedDir, "liftInfo.json"))
 
 def makeSymlink(target, linkName):
     if isfile(linkName):
-        #logging.debug("%s already exists" % linkName)
         os.remove(linkName)
     if target is None or not isfile(target):
         logging.error("Cannot symlink: %s does not exist" % str(target))
         return
 
     targetPath = abspath(target)
     linkPath = abspath(linkName)
     cmd = "ln -sf %(targetPath)s %(linkPath)s " % locals()
     run(cmd)
 
 def findForMask(fileMask):
     fnames = glob.glob(fileMask)
     if len(fnames)==0:
         logging.error("NOT FOUND: %s" % fileMask)
         return None
@@ -1895,30 +1894,31 @@
         for db in dbs:
             if onlyDbs is not None and db not in onlyDbs:
                 continue
             if md5 is None:
                 md5 = fastaMd5(fullFaFname) # do only once per organism, seqs don't change
 
             dbBigBedDir = join(bigBedDir, db)
 
             # find the bigBed files
             bbTargetDir = makeSubDir(join("/gbdb", db), "uniprot")
             bbFnames = glob.glob(join(dbBigBedDir, "*.bb"))
             logging.debug("Found %d bigBed files in %s" % (len(bbFnames), dbBigBedDir))
 
             # and create links to them
             for bbFname in bbFnames:
+                if "_" not in bbFname: # skip the bigPsl files with the md5 in them
                     bbLink = join(bbTargetDir, basename(bbFname))
                     makeSymlink(bbFname, bbLink)
 
 def checkPsl(pslName, faFname):
     " checking the qNames in a psl input (!) file "
     if pslName is None:
         logging.debug("Not found: %s" % pslName)
         return
 
     seqIds = list(parseFasta(faFname).keys())
     accs = set([x.split("-")[0] for x in seqIds])
 
     qNames = Counter()
     pslCount = 0
     for line in open(pslName):