ecb21defee4e9278c2d6b80036e2fc7ab1f9f838 max Fri Jun 30 06:50:47 2023 -0700 more fixes for hgGeneGraph, refs #31563 diff --git src/hg/hgGeneGraph/hgGeneGraph src/hg/hgGeneGraph/hgGeneGraph index b5df5d0..a05d236 100755 --- src/hg/hgGeneGraph/hgGeneGraph +++ src/hg/hgGeneGraph/hgGeneGraph @@ -1,19 +1,20 @@ #!/usr/bin/env python3 # Gene Interaction Viewer for the Genome Browser + # query tables with prefix "gg" in hgFixed, writes the results to a dot file, # runs graphviz's "dot" program to create a pathway map from it and write html # and mapfiles to the trash directory. # CGI params: gene=(HGNCsymbol) or link=sym1:sym2 # optional params: addNeighbors # colors: # grey+thickness = only text mining data # light blue, dashed = only high-throughput data # light blue, thickness = high-throughput data + text # dark blue, dashed = only low-throughput data @@ -1219,33 +1220,31 @@ # text above graph print("Mouse over or click genes or lines for details. Dashed lines indicate interactions without text mining support. ") print("Click any gene to make it the new center. Click any line to show details about the interaction. ") print(("Only %s-interacting genes and only the most-mentioned/most-curated interactions are shown in the graph. " % (targetGene))) print("See the <a href='../../goldenPath/help/hgGeneGraph.html'>Help Page</a> for details.<br>") # menu above graph # background #fffef5 would be an alternive print('<div style="display:inline-block"> <!-- graph -->') printGraphMenu(conn, targetGene, addNeighbors) print("<p>") # graph itself print('<img src="%s" usemap="#test">' % picName) mapData = open(mapName, "rb").read() - sys.stdout.flush() - sys.stdout.buffer.write(mapData) # only way to get binary data to stdout. Problem may be that data is mixed latin1/utf8 in tables from Stanford - sys.stdout.flush() + print(mapData.decode("latin1")) # graphviz seems to use latin1 encoding for its output file? print('</div> <!-- graph -->') print("<p>") def printPmidSearchForm(): " print a little form that allows to search for a PMID " print("<hr>") print("Search for a PMID: ") print('<form action="hgGeneGraph" method="get">') print(' <input type="text" name="search">') print(' <input type="submit" name="Search">') print('</form><p>') def printDisclaimer(): print(''' @@ -1464,30 +1463,37 @@ if eType == "family": geneStr = "/".join(eGenes) if eName=="": return "%s" % geneStr else: return "%s (%s)" % (eName, geneStr) def printDbRows(conn, rows, onlyDoc=None): " print a row from the ggEventDb table as html " print('<ul class="more">') # sort by database name for row in sorted(rows, key=operator.itemgetter(9)): eventId, causeType, causeName, causeGenes, themeType, themeName, themeGenes, \ relType, relSubtype, sourceDb, sourceId, sourceDesc, docIds, evidence = row + # in Python3, 'longblob' fields get converted to byte strings + causeGenes = causeGenes.decode("utf8") + themeGenes = themeGenes.decode("utf8") + sourceId = sourceId.decode("utf8") + docIds = docIds.decode("utf8") + evidence = evidence.decode("utf8") + if eventId.startswith("ppi_"): isPpi = True else: isPpi = False docSet = set(docIds.split("|")) if eventId.startswith("ppi_iref"): dbName, dbUrlPat = dbData["iref"] dbName = "IRef %s" % sourceDb.capitalize() else: dbName, dbUrlPat = dbData[sourceDb] url = dbUrlPat % sourceId @@ -1548,67 +1554,72 @@ ppiRows.append(r) else: pwyRows.append(r) if len(pwyRows)!=0: print("<h3>Pathways - manually collected, often from reviews:</h3>") printDbRows(conn, pwyRows) if len(ppiRows)!=0: print("<h3>Protein-Protein interactions - manually collected from original source literature:</h3>") print('<p>Studies that report less than %d interactions are marked with *</p>' % LTCUTOFF) printDbRows(conn, ppiRows) def markupSentence(row): " given a MSR-textmining row, print a sentence with the various detected words marked up in bold " - sent = row.sentence + sent = row.sentence.decode("utf8") tStart, tEnd = int(row.themeTokenStart), int(row.themeTokenEnd) cStart, cEnd = int(row.causeTokenStart), int(row.causeTokenEnd) trigToken = int(row.triggerTokenId) sentId = int(row.sentenceId) # put into temporary var to be able to replace " , " later parts = [] for i, word in enumerate(sent.split()): + themeGenes = row.themeGenes.decode("utf8") # longblob = byte string + causeGenes = row.causeGenes.decode("utf8") + if i==tStart or i==cStart: if i==cStart: - genes = row.themeGenes # XX bug in MsrNlp tab file: Inversed? + genes = themeGenes # XX bug in MsrNlp tab file: Inversed? else: - genes = row.causeGenes + genes = causeGenes geneDisp = genes.replace("|", ", ") gene1 = genes.split("|")[0] parts.append( '<b><i><a href="http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s" data-toggle="tooltip" data-placement="top" title="recognized genes: %s, click to go to Gene Cards" target=_blank>' % \ (gene1, geneDisp)) if i==trigToken: parts.append( '<i class="trigger">') if i==tEnd or i==cEnd: parts.append( "</a></i></b>") if i==trigToken+1: parts.append('</i class="trigger">') parts.append(word) line = " ".join(parts) line = line.replace(" , ", ", ").replace(" . ", ". ").rstrip(". ") line = line.replace("-LRB-", "(").replace("-RRB-", ")") return line def iterUniqueInteractions(rows): " iterate over msrNlp rows, but remove interactions we already had (=ignore direction) " doneDocs = set() for row in rows: # make sure to skip duplicated info (=same genes + same document + same sentence) - docInfo = tuple(sorted([row.causeGenes, row.themeGenes, row.docId, row.sentenceId])) + causeGenes = row.causeGenes.decode("utf8") # longblob = byte string + themeGenes = row.themeGenes.decode("utf8") + docInfo = tuple(sorted([causeGenes, themeGenes, row.docId, row.sentenceId])) if docInfo in doneDocs: continue doneDocs.add(docInfo) yield row def printDocNlpResults(rows): " print text mining results for document-centered view " for row in iterUniqueInteractions(rows): causeGene, themeGene = row.causeGenes, row.themeGenes if "Negative" in row.relType: sym = "8867" # unicode right-tack elif "Positive" in row.relType: sym = "8594" # unicode right arrow else: @@ -1661,68 +1672,70 @@ def prettyDocLinks(conn, pmids): " given a list of pmids, return a list of nice links to articles on our own site " quoteList = ['"%s"' % pmid for pmid in pmids] idStr = "(%s)" % (",".join(quoteList)) # XX remove distinct if not needed anymore query = "SELECT authors, title, journal, year, docId, resCount FROM ggDoc WHERE docId IN %s" % idStr rows = sqlQuery(conn, query) links = [] for row in rows: links.append(prettyDocLink(row)) return ", ".join(links) def prettyDocLink(row, showStar=True): " given a row that includes author/title/year/docId fields, return a nice link to our doc view " - authors = row.authors.split("; ") + authors = row.authors.decode("utf8").split("; ") # mysql longblob => byte string if len(authors)>0: fAu = authors[0].split(",")[0] else: fAu = "" suffix = "" if len(authors)>1: suffix = " et al." note = "" if showStar and int(row.resCount)<=LTCUTOFF: note = '<a href="#" title="Low-throughput publication">*</a>' #note = "*" text = "%s %s, %s %s" % (fAu, suffix, row.journal, row.year) mouseOver = None - if row.title!="": - mouseOver = row.title.replace('"', "") + title = row.title.decode("utf8") # longBlob = byte string + if title!="": + mouseOver = title.replace('"', "") return docLink(row.docId, text=text, mouseOver=mouseOver)+note def showSnipsLink(conn, gene1, gene2): " show snippets for a gene pair " rows = queryEventText(conn, gene1, gene2) if len(rows)!=0: print('<h3>Text-mined interactions from <A HREF="http://literome.azurewebsites.net/Network?gene1=%s&gene2=%s">Literome</A></h3>' % (gene1, gene2)) byDoc = defaultdict(list) for row in rows: byDoc[row.docId].append(row) for docId, rows in byDoc.items(): print(prettyDocLink(rows[0], showStar=False)) disSuffix = "" - if rows[0].context!="": - contexts = rows[0].context.split("|") + context = rows[0].context.decode("utf8") # longblob = byte string + if context!="": + contexts = context.split("|") if len(contexts)>1: suffix = "..." else: suffix = "" disSuffix = "(%s%s)" % (contexts[0], suffix) print("%s :" % disSuffix) printMsrNlpRows(rows) print("<br>") def showLink(link): " print details page with db info and snippets for a gene interaction " if ":" not in link: errAbort("'link' CGI parameter has to contain a colon-separated pair of genes, like PITX2:TBX5")