ecb21defee4e9278c2d6b80036e2fc7ab1f9f838
max
Fri Jun 30 06:50:47 2023 -0700
more fixes for hgGeneGraph, refs #31563
diff --git src/hg/hgGeneGraph/hgGeneGraph src/hg/hgGeneGraph/hgGeneGraph
index b5df5d0..a05d236 100755
--- src/hg/hgGeneGraph/hgGeneGraph
+++ src/hg/hgGeneGraph/hgGeneGraph
@@ -1,19 +1,20 @@
#!/usr/bin/env python3
# Gene Interaction Viewer for the Genome Browser
+
# query tables with prefix "gg" in hgFixed, writes the results to a dot file,
# runs graphviz's "dot" program to create a pathway map from it and write html
# and mapfiles to the trash directory.
# CGI params: gene=(HGNCsymbol) or link=sym1:sym2
# optional params: addNeighbors
# colors:
# grey+thickness = only text mining data
# light blue, dashed = only high-throughput data
# light blue, thickness = high-throughput data + text
# dark blue, dashed = only low-throughput data
@@ -1219,33 +1220,31 @@
# text above graph
print("Mouse over or click genes or lines for details. Dashed lines indicate interactions without text mining support. ")
print("Click any gene to make it the new center. Click any line to show details about the interaction. ")
print(("Only %s-interacting genes and only the most-mentioned/most-curated interactions are shown in the graph. " % (targetGene)))
print("See the Help Page for details.
")
# menu above graph
# background #fffef5 would be an alternive
print('
")
# graph itself
print('' % picName)
mapData = open(mapName, "rb").read()
- sys.stdout.flush()
- sys.stdout.buffer.write(mapData) # only way to get binary data to stdout. Problem may be that data is mixed latin1/utf8 in tables from Stanford
- sys.stdout.flush()
+ print(mapData.decode("latin1")) # graphviz seems to use latin1 encoding for its output file?
print('
") def printPmidSearchForm(): " print a little form that allows to search for a PMID " print("
') def printDisclaimer(): print(''' @@ -1464,30 +1463,37 @@ if eType == "family": geneStr = "/".join(eGenes) if eName=="": return "%s" % geneStr else: return "%s (%s)" % (eName, geneStr) def printDbRows(conn, rows, onlyDoc=None): " print a row from the ggEventDb table as html " print('
Studies that report less than %d interactions are marked with *
' % LTCUTOFF) printDbRows(conn, ppiRows) def markupSentence(row): " given a MSR-textmining row, print a sentence with the various detected words marked up in bold " - sent = row.sentence + sent = row.sentence.decode("utf8") tStart, tEnd = int(row.themeTokenStart), int(row.themeTokenEnd) cStart, cEnd = int(row.causeTokenStart), int(row.causeTokenEnd) trigToken = int(row.triggerTokenId) sentId = int(row.sentenceId) # put into temporary var to be able to replace " , " later parts = [] for i, word in enumerate(sent.split()): + themeGenes = row.themeGenes.decode("utf8") # longblob = byte string + causeGenes = row.causeGenes.decode("utf8") + if i==tStart or i==cStart: if i==cStart: - genes = row.themeGenes # XX bug in MsrNlp tab file: Inversed? + genes = themeGenes # XX bug in MsrNlp tab file: Inversed? else: - genes = row.causeGenes + genes = causeGenes geneDisp = genes.replace("|", ", ") gene1 = genes.split("|")[0] parts.append( '' % \ (gene1, geneDisp)) if i==trigToken: parts.append( '') if i==tEnd or i==cEnd: parts.append( "") if i==trigToken+1: parts.append('') parts.append(word) line = " ".join(parts) line = line.replace(" , ", ", ").replace(" . ", ". ").rstrip(". ") line = line.replace("-LRB-", "(").replace("-RRB-", ")") return line def iterUniqueInteractions(rows): " iterate over msrNlp rows, but remove interactions we already had (=ignore direction) " doneDocs = set() for row in rows: # make sure to skip duplicated info (=same genes + same document + same sentence) - docInfo = tuple(sorted([row.causeGenes, row.themeGenes, row.docId, row.sentenceId])) + causeGenes = row.causeGenes.decode("utf8") # longblob = byte string + themeGenes = row.themeGenes.decode("utf8") + docInfo = tuple(sorted([causeGenes, themeGenes, row.docId, row.sentenceId])) if docInfo in doneDocs: continue doneDocs.add(docInfo) yield row def printDocNlpResults(rows): " print text mining results for document-centered view " for row in iterUniqueInteractions(rows): causeGene, themeGene = row.causeGenes, row.themeGenes if "Negative" in row.relType: sym = "8867" # unicode right-tack elif "Positive" in row.relType: sym = "8594" # unicode right arrow else: @@ -1661,68 +1672,70 @@ def prettyDocLinks(conn, pmids): " given a list of pmids, return a list of nice links to articles on our own site " quoteList = ['"%s"' % pmid for pmid in pmids] idStr = "(%s)" % (",".join(quoteList)) # XX remove distinct if not needed anymore query = "SELECT authors, title, journal, year, docId, resCount FROM ggDoc WHERE docId IN %s" % idStr rows = sqlQuery(conn, query) links = [] for row in rows: links.append(prettyDocLink(row)) return ", ".join(links) def prettyDocLink(row, showStar=True): " given a row that includes author/title/year/docId fields, return a nice link to our doc view " - authors = row.authors.split("; ") + authors = row.authors.decode("utf8").split("; ") # mysql longblob => byte string if len(authors)>0: fAu = authors[0].split(",")[0] else: fAu = "" suffix = "" if len(authors)>1: suffix = " et al." note = "" if showStar and int(row.resCount)<=LTCUTOFF: note = '*' #note = "*" text = "%s %s, %s %s" % (fAu, suffix, row.journal, row.year) mouseOver = None - if row.title!="": - mouseOver = row.title.replace('"', "") + title = row.title.decode("utf8") # longBlob = byte string + if title!="": + mouseOver = title.replace('"', "") return docLink(row.docId, text=text, mouseOver=mouseOver)+note def showSnipsLink(conn, gene1, gene2): " show snippets for a gene pair " rows = queryEventText(conn, gene1, gene2) if len(rows)!=0: print('