cd9e9bc3bea3e5438c2da40e57ffcb69c8fa2672 max Fri May 19 13:35:52 2017 -0700 adding all.joiner for the interactions track, refs #13634 diff --git src/hg/hgGeneGraph/hgGeneGraph src/hg/hgGeneGraph/hgGeneGraph index 6164183..f8cb051 100755 --- src/hg/hgGeneGraph/hgGeneGraph +++ src/hg/hgGeneGraph/hgGeneGraph @@ -343,38 +343,38 @@ ) # store the preformatted text as htmlText htmlText = htmlGenerator.data() return htmlText def reqMinSupp(links, minArtSupp, maxResCount, targetGene): """ remove all 'text mining only' links with less than minArtSupp supporting documents The only exception is targetGene which we always want to stay connected Also remove links that are PPI-only and have a high minResCount. """ newLinks = defaultdict(set) genes = set() targetConns = {} for genePair, linkData in links.iteritems(): - docCount, dbCount, tagSet, minResCount = linkData[:4] + docCount, dbCount, tagSet, pairMinResCount = linkData[:4] if targetGene in genePair: targetConns[genePair] = linkData # remove text-mining links with only one article if "text" in tagSet and docCount < minArtSupp: continue # remove noisy PPI links - if len(tagSet)==1 and "ppi" in tagSet and minResCount > maxResCount: + if len(tagSet)==1 and "ppi" in tagSet and pairMinResCount > maxResCount: continue genes.update(genePair) newLinks[genePair] = linkData # is the target gene still connected to something? If not add it back and # accept that these links are less than minSupp if targetGene not in genes: for genePair, linkData in targetConns.iteritems(): newLinks[genePair] = linkData return newLinks def scorePair(docCount, tagSet): " return the score for a gene pair " # pairs that have no text mining results get assigned artifical # article counts, based on this query: @@ -453,31 +453,31 @@ highLinksFiltered = defaultdict(set) for pair, linkData in graphLinks.iteritems(): g1, g2 = pair if pair in highPairs or g1==targetGene or g2==targetGene: #print "high", pair highLinksFiltered[pair] = linkData else: #print "low" lowLinks[pair] = linkData return highLinksFiltered, lowLinks def splitHighLowLinks(links, gene, minSupp, lowLinks, geneCount): """ split into two sets of links: high = best geneCount genes and best 2*geneCount links between them, low = all the others""" - links = reqMinSupp(links, minSupp, LTCUTOFF, gene) + links = reqMinSupp(links, minSupp, 999999999, gene) highLinks, lowLinks = limitGenes(links, geneCount, gene, lowLinks) #print "lowLinks", lowLinks, "
" #print "highLinks", highLinks, "
" highLinks, lowLinks = limitLinks(highLinks, lowLinks, 2*geneCount, gene) #print "bestLinks", highLinks, "
" return highLinks, lowLinks def queryLinks(conn, gene=None, genes=None): """ query the mysql table ggLink for either all links to gene or all links between all pairs of genes Return result as a dict (gene1, gene2) -> (docCount, tagSet, snippet) @@ -1957,31 +1957,31 @@ page = getCgiVar("page") if page=="stats": showStats() exit(0) showGraphBrowser() def main(): cgiSetup() format = getCgiVar("format") if format in ["pdf", "svg", "sif", "json"]: conn = sqlConnect(GGDB) gene, alg, addNeighbors, sortByCount, geneCount = parseGraphArgs() - graphLinks, lowLinks = buildGraph(conn, gene, geneCount, 2, addNeighbors) + graphLinks, lowLinks = buildGraph(conn, gene, geneCount, MINSUPP, addNeighbors) weightedLinks, minAbsCount = flattenLink(graphLinks) printGraph(conn, weightedLinks, alg, addNeighbors, gene, format) sys.exit(0) printContentType() if cgiString("debug") is not None: global DEBUG DEBUG = True htmlHeader() printInlineAndStyles() htmlMiddle() htmlPageEnd()