3b2e14c90156120b53c5031856842de0c57ca58c max Tue May 30 17:34:02 2017 -0700 gene interactions track: add to hgGene, patch GBIC/GBIB for it, refs #13634 diff --git src/utils/ggTables src/utils/ggTables index 7dbe660..c79207d 100755 --- src/utils/ggTables +++ src/utils/ggTables @@ -289,30 +289,31 @@ pairPmids = defaultdict(set) for row in rows: genes1 = set(row.causeGenes.split("|")) genes2 = set(row.themeGenes.split("|")) pairs = list([(aa, bb) for aa in genes1 for bb in genes2]) for cause, theme in pairs: pairPmids[(cause, theme)].add(row.pmid) return pairPmids def writeGraphTable(allPairs, pairDocs, pairToDbs, pairMinResCounts, pwDirPairs, bestSentences, outFname, outFname2): " write the ggLink table " logging.info("writing merged graph to %s" % outFname) rows = [] rows2 = [] + allSyms = set() for pair,pairRows in allPairs.iteritems(): gene1, gene2 = pair dbs = set() flags = [] if pair in dbPairs: flags.append("ppi") if pair in pwPairs: flags.append("pwy") if pair in textPairs: flags.append("text") refs = [row.eventId for row in pairRows] #if pairMinResultCounts: #flags.append("low") # direction of interaction - only based on pathways @@ -324,51 +325,56 @@ forwDocs = pairDocs.get(pair, []) revDocs = pairDocs.get(tuple(reversed(pair)), []) allDocs = set(forwDocs).union(set(revDocs)) if len(allDocs) list of event Ids " pmidToIds = defaultdict(set) for rows in rowList: for row in rows: pmidStr = row.pmids @@ -1134,48 +1141,54 @@ bestSentences = runSumBasic(textPairs, wordFname) allPairs = mergePairs([curatedPairs, textPairs]) #ltPairs, ltDocs = getResultCounts(curatedPairs) # keep result counts for the "docs" step ofh = open(join(outDir, "resultCounts.tmp.txt"), "w") for docId, pairs in docToPairs.iteritems(): ofh.write("%s\t%d\n" % (docId, len(pairs))) ofh.close() pairDirDocs = directedPairToDocs(textRows) pairDbs = pairToDbs(curatedPairs) outFname = join(outDir, "ggLink.tmp.txt") # needs the addContext step to complete it eventFname = join(outDir, "ggLinkEvent.tab") - writeGraphTable(allPairs, pairDirDocs, pairDbs, pairMinResultCounts, pwDirPairs, \ + allSyms = writeGraphTable(allPairs, pairDirDocs, pairDbs, pairMinResultCounts, pwDirPairs, \ bestSentences, outFname, eventFname) pmidToId = indexPmids([dbRows,pwRows], textRows) outFname = join(outDir, "ggDocEvent.tab") writeDocEvents(pmidToId, outFname) outFname = join(outDir, "ggEventDb.tab") writeEventTable([dbRows, pwRows], outFname, colCount=13) outFname = join(outDir, "ggEventText.tab") writeEventTable([textRows], outFname) # make sure we don't forget to update the link table with context linkFname = join(outDir, "ggLink.tab") if isfile(linkFname): os.remove(linkFname) + # hgGene does not like it if the gene symbols are in two different + # columns, so we create a very simple table with just the gene symbols + symFname = join(outDir, "ggSymbol.tab") + logging.info("Writing %s" % symFname) + open(symFname, "w").write("\n".join(allSyms)) + elif cmd == "medline": outDir = args[1] textDir = options.textDir medlineFname = join(outDir, allArtFname) writeAllDocInfo(textDir, medlineFname) elif cmd == "docs": outDir = args[1] outFname = join(outDir, "ggDoc.tab") pmidEventPath = join(outDir, "ggDocEvent.tab") medlineFname = join(outDir, allArtFname) meshTerms = parseMeshContext(options.meshFname) shortNames = parseShortNames(options.journalInfo)