d87311a4a4a25c703fff51e32d5a52574feaceb5 max Fri Jun 2 14:44:21 2017 -0700 adding the evidence field to the ggEventDb, refs #13634 diff --git src/hg/hgGeneGraph/hgGeneGraph src/hg/hgGeneGraph/hgGeneGraph index 598f51e..ee1f8c3 100755 --- src/hg/hgGeneGraph/hgGeneGraph +++ src/hg/hgGeneGraph/hgGeneGraph @@ -1,18 +1,18 @@ #!/usr/bin/env python2 -# Protein Interaction Viewer for the Genome Browser +# Gene Interaction Viewer for the Genome Browser # query tables with prefix "gg" in hgFixed, writes the results to a dot file, # runs graphviz's "dot" program to create a pathway map from it and write html # and mapfiles to the trash directory. # CGI params: gene=(HGNCsymbol) or link=sym1:sym2 # optional params: addNeighbors # colors: # grey+thickness = only text mining data # light blue, dashed = only high-throughput data # light blue, thickness = high-throughput data + text # dark blue, dashed = only low-throughput data @@ -38,36 +38,30 @@ # import the UCSC-specific library sys.path.append("pyLib") try: from hgLib import cgiArgs, cgiSetup, cgiString, printContentType, printMenuBar, \ sqlConnect, sqlQuery, errAbort, cfgOption, runCmd, cgiGetAll, printHgcHeader, \ printHgcSection, webStartGbNoBanner, htmlPageEnd, hConnectCentral, sqlTableExists except: print("Content-type: text/html\n") print("Cannot find the directory cgi-bin/pyLib in Apache. This is an installation error.") print("All all parts of cgi-bin installed? Did you do 'make' in kent/src/hg/pyLib?") import MySQLdb # not using feedback button for now. Fan would have liked it, but not sure how we can write # to any form of database. -#skimpyGimpyLoaded=False -#try: - #from skimpyGimpy import skimpyAPI - #skimpyGimpyLoaded=True -#except: - #pass # the list of allowed chars in cgi args: digits, letters and dashes legalChars = set(string.digits) legalChars.update(set(string.letters)) legalChars.update("_-./: ") # number of genes to show on graph by default DEFGENECOUNT="25" # ignore all text mining data with less than X abstracts MINSUPP=2 # minResCount is used throughout the code. For a given interaction, it is the minimal # number of interactions from all documents linked to this interaction. # E.g. minResCount of 5 means that the interaction is based on at least one document # that contained not more than 5 interactions. @@ -317,49 +311,30 @@ styleStr = "" if style!=None: styleStr='style="%s" ' % style titleStr = "" if title!=None: titleStr = ' title="%s"' % title.replace('"', ' ') return '<a %s%s%shref="%s"%s>%s</a>' % (titleStr, classStr, dataToggleStr, url, styleStr, linkName) def saltedHash(word, length=5): " return first 5 chars of salted hash " # pretty simple salt: PITX2, salting is just for the captcha hashStr = "".join(hashlib.sha1(word+"PITX2").hexdigest()[:length]).lower() return hashStr -def htmlCaptcha(word): - " return html that encodes captcha word " - # copied from http://skimpygimpy.sourceforge.net/ - if not skimpyGimpyLoaded: - return None - - HTMLSPECKLE = 0.1 - HTMLSCALE = 1.5 - HTMLCOLOR = "000000" - - # create an HTML generator - htmlGenerator = skimpyAPI.Pre(word, - speckle=HTMLSPECKLE, # optional - scale=HTMLSCALE, # optional - color=HTMLCOLOR, # optional - ) - # store the preformatted text as htmlText - htmlText = htmlGenerator.data() - return htmlText def reqMinSupp(links, minArtSupp, maxResCount, targetGene): """ remove all 'text mining only' links with less than minArtSupp supporting documents The only exception is targetGene which we always want to stay connected Also remove links that are PPI-only and have a high minResCount. """ newLinks = defaultdict(set) genes = set() targetConns = {} for genePair, linkData in links.iteritems(): docCount, dbCount, tagSet, pairMinResCount = linkData[:4] if targetGene in genePair: targetConns[genePair] = linkData # remove text-mining links with only one article @@ -1468,31 +1443,31 @@ return "%s (%s)" % (eName, geneStr) if eType == "family": geneStr = "/".join(eGenes) if eName=="": return "%s" % geneStr else: return "%s (%s)" % (eName, geneStr) def printDbRows(conn, rows, onlyDoc=None): " print a row from the ggEventDb table as html " print '<ul class="more">' # sort by database name for row in sorted(rows, key=operator.itemgetter(9)): eventId, causeType, causeName, causeGenes, themeType, themeName, themeGenes, \ - relType, relSubtype, sourceDb, sourceId, sourceDesc, docIds = row + relType, relSubtype, sourceDb, sourceId, sourceDesc, docIds, evidence = row if eventId.startswith("ppi_"): isPpi = True else: isPpi = False docSet = set(docIds.split("|")) if eventId.startswith("ppi_iref"): dbName, dbUrlPat = dbData["iref"] dbName = "IRef %s" % sourceDb.capitalize() else: dbName, dbUrlPat = dbData[sourceDb] @@ -1513,30 +1488,33 @@ if isPpi: print "—" # unicode long dash else: print "→" # Unicode Arrow print entToDesc(themeType, themeName, themeGenes) if relType!="" or relSubtype!="": if relType!="" and relSubtype!="": print "(%s/%s)" % (relType, relSubtype) elif relType!="": print "(%s)" % (relType) elif relSubtype!="": print "(%s)" % (relSubtype) + if evidence!="": + print ", Evidence: ", evidence.replace("|", ", ").replace("_", " ") + if onlyDoc==None and len(docSet)!=0: print prettyDocLinks(conn, sorted(docSet)) print "</li>" print "</ul>" def showPwyPpiInfo(conn, gene1, gene2): " print pathway and PPI info about link " gene1, gene2 = sorted([gene1, gene2]) q = "SELECT ggEventDb.* FROM ggLinkEvent, ggEventDb " \ "WHERE gene1='%s' and gene2='%s' AND ggEventDb.eventId=ggLinkEvent.eventId" % (gene1,gene2) rows = sqlQuery(conn, q) # split db rows into ppi and pathway rows @@ -1863,66 +1841,56 @@ print ("<p>") print ("Thank you for reporting errors, e.g.") print("<ul><li>not true (e.g. the authors state that the interaction does not happen )</li>") print("<li>text mining errors (the authors did not say anything about this particular interaction)</li>") print("</ul>") print("This makes it easier for us to improve the text mining system or database imports.<br>") print("You can leave your email address in the comment if you want to give us the possibility to comment.<P>") print ("Optional comment: <br>") print ('<FORM method="GET" action="%s">' % basename(__file__)) print ('<INPUT TYPE="HIDDEN" NAME="remove" VALUE="%s"></INPUT>' % param) print ('<TEXTAREA rows="6" cols="50" name="comment"></TEXTAREA>') print ("<P>") - captHtml = htmlCaptcha(saltedHash(param)) - if captHtml!=None: - print captHtml - print ('Enter the word above: <INPUT TYPE="TEXT" NAME="captcha"></INPUT><BR>') - print ('<INPUT TYPE="SUBMIT" name="submit" value="Report Interaction as inaccurate"></INPUT>') print ('</FORM>') #printMsrNlpRows(rows) showSnipsLink(conn, causeGene, themeGene) #def namedtuple_factory(cursor, row): #""" #used as a function pointer to sqlite to have it return structs with names and not just arrays #""" #fields = [col[0] for col in cursor.description] #Row = collections.namedtuple("Row", fields) #return Row(*row) #def openSqlite(dbName): #" opens sqlite database and have it return structs (=namedtuples) " #tryCount = retries #con = None #con = sqlite3.connect(dbName, timeout=20) #con.row_factory = namedtuple_factory ##con.row_factory = sqlite3.Row def removeInteraction(param, captcha, comment): fields = param.split(":") if len(fields)!=2: errAbort( "illegal CGI parameter") - if skimpyGimpyLoaded: - expCaptcha = saltedHash(param) - if expCaptcha.lower()!=captcha.lower(): - errAbort("Wrong captcha word, please go back and try again") - causeGene, themeGene = fields ip = cgi.escape(os.environ["REMOTE_ADDR"]) conn = hConnectCentral() if not sqlTableExists(conn, "ggFeedback"): sqlQuery(conn, "CREATE TABLE ggFeedback (causeGene varchar(255), themeGene varchar(255), pmid varchar(255), " \ "comment varchar(10000), time TIMESTAMP, ip varchar(255), INDEX allIdx (causeGene, themeGene, pmid));") cur = conn.cursor() cur.execute("INSERT INTO ggFeedback VALUES (%s, %s, %s, NOW(), %s, %s)", (causeGene, themeGene, 0, comment, ip)) cur.close() conn.close() print ("Interaction successfully removed.<p>") lastGene = getCgiVar("lastGene")