a9c9cf2828c124535a1562f709bd4437b1bf60a2
braney
Mon Jun 22 16:41:31 2020 -0700
starting on making hgGene use and external database for the know* tables
diff --git src/hg/hgGene/pathways.c src/hg/hgGene/pathways.c
index 27ef8ec..03e4492 100644
--- src/hg/hgGene/pathways.c
+++ src/hg/hgGene/pathways.c
@@ -1,409 +1,409 @@
/* pathways - do pathways section. */
/* Copyright (C) 2013 The Regents of the University of California
* See README in this or parent directory for licensing information. */
#include "common.h"
#include "hash.h"
#include "linefile.h"
#include "dystring.h"
#include "jksql.h"
#include "hdb.h"
#include "spDb.h"
#include "hgGene.h"
struct pathwayLink
/* Info to link into a pathway. */
{
char *name; /* Symbolic name */
char *shortLabel; /* Short label. */
char *longLabel; /* Long label. */
char *tables; /* Tables that must exist. */
int (*count)(struct pathwayLink *pl,
struct sqlConnection *conn, char *geneId);
/* Count number of items referring to this gene. */
void (*printLinks)(struct pathwayLink *pl,
struct sqlConnection *conn, char *geneId);
/* Print out links. */
};
static void keggLink(struct pathwayLink *pl, struct sqlConnection *conn,
char *geneId)
/* Print out kegg database link. */
{
char query[512], **row;
struct sqlResult *sr;
if (isRgdGene(conn))
{
sqlSafef(query, sizeof(query),
"select distinct k.locusID, k.mapID, keggMapDesc.description"
" from rgdGene2KeggPathway k, keggMapDesc, rgdGene2 x"
" where k.rgdId=x.name "
" and x.name='%s'"
" and k.mapID = keggMapDesc.mapID"
, geneId);
}
else
{
sqlSafef(query, sizeof(query),
"select k.locusID, k.mapID, keggMapDesc.description"
" from keggPathway k, keggMapDesc, kgXref x"
" where k.kgID=x.kgId "
" and x.kgID='%s'"
" and k.mapID = keggMapDesc.mapID"
, geneId);
}
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
{
hPrintf("",
row[1], row[0]);
hPrintf("%s - %s
", row[1], row[2]);
}
sqlFreeResult(&sr);
}
static int keggCount(struct pathwayLink *pl, struct sqlConnection *conn,
char *geneId)
/* Count up number of hits. */
{
char query[256];
if (!isRgdGene(conn))
{
sqlSafef(query, sizeof(query),
"select count(*) from keggPathway k, kgXref x where k.kgID=x.kgId and x.kgId='%s'", geneId);
}
else
{
sqlSafef(query, sizeof(query),
"select count(*) from rgdGene2KeggPathway k, rgdGene2 x where k.rgdId=x.name and x.name='%s'", geneId);
}
return sqlQuickNum(conn, query);
}
static void bioCycLink(struct pathwayLink *pl, struct sqlConnection *conn,
char *geneId)
/* Print out bioCyc database link. */
{
char query[512], **row;
struct sqlResult *sr;
char *oldMapId = cloneString("");
char *upperCaseGenome = cloneString(genome);
toUpperN(upperCaseGenome, strlen(upperCaseGenome));
sqlSafef(query, sizeof(query),
"select bioCycPathway.mapId,description"
" from bioCycPathway,bioCycMapDesc"
" where bioCycPathway.kgId='%s'"
" and bioCycPathway.mapId = bioCycMapDesc.mapId order by bioCycPathway.mapId"
, geneId);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
{
/* only print new ones */
if (!sameWord(oldMapId, row[0]))
{
hPrintf("",
upperCaseGenome, row[0]);
hPrintf("%s - %s
\n", row[0], row[1]);
}
oldMapId = cloneString(row[0]);
}
sqlFreeResult(&sr);
}
static int bioCycCount(struct pathwayLink *pl, struct sqlConnection *conn,
char *geneId)
/* Count up number of hits. */
{
char query[256];
sqlSafef(query, sizeof(query),
"select count(*) from bioCycPathway where kgID='%s'", geneId);
return sqlQuickNum(conn, query);
}
static char *getCgapId(struct sqlConnection *conn)
/* Get cgap ID. */
{
char query[256];
sqlSafef(query, sizeof(query),
"select cgapId from cgapAlias where alias=\"%s\"", curGeneName);
return sqlQuickString(conn, query);
}
static void reactomeLink(struct pathwayLink *pl, struct sqlConnection *conn,
char *geneId)
{
char condStr[255];
char *spID, *chp;
struct sqlConnection *conn2;
char query2[256];
struct sqlResult *sr2;
char **row2;
char *eventDesc;
char *eventID;
/* check the existence of kgXref table first */
if (isRgdGene(conn))
{
if (!sqlTableExists(conn, "rgdGene2Xref")) return;
}
else
{
if (!sqlTableExists(conn, "kgXref")) return;
}
if (isRgdGene(conn))
{
sqlSafefFrag(condStr, sizeof(condStr), "name='%s'", geneId);
spID = sqlGetField(database, "rgdGene2ToUniProt", "value", condStr);
}
else
{
sqlSafefFrag(condStr, sizeof(condStr), "kgID='%s'", geneId);
spID = sqlGetField(database, "kgXref", "spID", condStr);
}
if (spID != NULL)
{
/* convert splice variant UniProt ID to its main root ID */
chp = strstr(spID, "-");
if (chp != NULL) *chp = '\0';
hPrintf(
"
Protein %s (Reactome details) participates in the following event(s):
"
, spID, spID);
conn2= hAllocConn(database);
sqlSafef(query2,sizeof(query2),
"select eventID, eventDesc from proteome.spReactomeEvent where spID='%s'", spID);
sr2 = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);
while (row2 != NULL)
{
eventID = row2[0];
eventDesc = row2[1];
hPrintf(
"%s %s
\n",
eventID, eventID, eventDesc);
row2 = sqlNextRow(sr2);
}
sqlFreeResult(&sr2);
hFreeConn(&conn2);
}
}
static void rgdPathwayLink(struct pathwayLink *pl, struct sqlConnection *conn,
char *geneId)
/* Print out bioCarta database link. */
{
char query[512], **row;
struct sqlResult *sr;
char *rgdId = geneId;
sqlSafef(query, sizeof(query),
"select x.pathwayId, description from rgdPathway p, rgdGenePathway x "
" where p.pathwayId = x.pathwayId "
" and x.geneId = '%s'"
, rgdId);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
{
hPrintf("", row[0]);
hPrintf("%s - %s
\n", row[0], row[1]);
}
sqlFreeResult(&sr);
}
static void bioCartaLink(struct pathwayLink *pl, struct sqlConnection *conn,
char *geneId)
/* Print out bioCarta database link. */
{
char *cgapId = getCgapId(conn);
if (cgapId != NULL)
{
struct hash *uniqHash = newHash(8);
char query[512], **row;
struct sqlResult *sr;
sqlSafef(query, sizeof(query),
"select cgapBiocDesc.mapID,cgapBiocDesc.description "
" from cgapBiocPathway,cgapBiocDesc"
" where cgapBiocPathway.cgapID='%s'"
" and cgapBiocPathway.mapID = cgapBiocDesc.mapID"
, cgapId);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
{
char *name = row[0];
if (!hashLookup(uniqHash, name))
{
hashAdd(uniqHash, name, NULL);
hPrintf("", row[0]);
hPrintf("%s - %s
\n", row[0], row[1]);
}
}
freez(&cgapId);
hashFree(&uniqHash);
}
}
static int bioCartaCount(struct pathwayLink *pl, struct sqlConnection *conn,
char *geneId)
/* Count up number of hits. */
{
int ret = 0;
char *cgapId = getCgapId(conn);
if (cgapId != NULL)
{
char query[256];
sqlSafef(query, sizeof(query),
"select count(*) from cgapBiocPathway where cgapID='%s'", cgapId);
ret = sqlQuickNum(conn, query);
freez(&cgapId);
}
return ret;
}
static int rgdPathwayCount(struct pathwayLink *pl, struct sqlConnection *conn,
char *geneId)
/* Count up number of hits. */
{
char query[256];
sqlSafef(query, sizeof(query),
"select count(*) from rgdGenePathway where geneId ='%s'", geneId);
return sqlQuickNum(conn, query);
}
static int reactomeCount(struct pathwayLink *pl, struct sqlConnection *conn,
char *geneId)
/* Count up number of hits. */
{
int ret = 0;
char query[256];
char *spID, *chp;
char condStr[256];
char *origSpID;
/* check the existence of kgXref table first */
if (!isRgdGene(conn))
{
if (!sqlTableExists(conn, "kgXref")) return(0);
}
else
{
if (!sqlTableExists(conn, "rgdGene2Xref")) return(0);
}
if (isRgdGene(conn))
{
sqlSafefFrag(condStr, sizeof(condStr), "name='%s'", geneId);
- spID = sqlGetField(database, "rgdGene2ToUniProt", "value", condStr);
+ spID = sqlGetField(sqlGetDatabase(conn), "rgdGene2ToUniProt", "value", condStr);
}
else
{
sqlSafefFrag(condStr, sizeof(condStr), "kgID='%s'", geneId);
- spID = sqlGetField(database, "kgXref", "spID", condStr);
+ spID = sqlGetField(sqlGetDatabase(conn), "kgXref", "spID", condStr);
}
if (spID != NULL)
{
origSpID = cloneString(spID);
/* convert splice variant UniProt ID to its main root ID */
chp = strstr(spID, "-");
if (chp != NULL) *chp = '\0';
if (!isRgdGene(conn))
{
sqlSafef(query, sizeof(query),
"select count(*) from %s.spReactomeEvent, %s.spVariant, %s.kgXref where kgID='%s' and kgXref.spID=variant and variant = '%s' and spReactomeEvent.spID=parent",
- PROTEOME_DB_NAME, PROTEOME_DB_NAME, database, geneId, origSpID);
+ PROTEOME_DB_NAME, PROTEOME_DB_NAME, sqlGetDatabase(conn), geneId, origSpID);
}
else
{
sqlSafef(query, sizeof(query),
"select count(*) from %s.spReactomeEvent, %s.spVariant, %s.rgdGene2ToUniProt where name='%s' and value=variant and variant = '%s' and spReactomeEvent.spID=parent",
- PROTEOME_DB_NAME, PROTEOME_DB_NAME, database, geneId, origSpID);
+ PROTEOME_DB_NAME, PROTEOME_DB_NAME, sqlGetDatabase(conn), geneId, origSpID);
}
ret = sqlQuickNum(conn, query);
}
return ret;
}
struct pathwayLink pathwayLinks[] =
{
{ "kegg", "KEGG", "KEGG - Kyoto Encyclopedia of Genes and Genomes",
"keggPathway keggMapDesc",
keggCount, keggLink},
{ "bioCyc", "BioCyc", "BioCyc Knowledge Library",
"bioCycPathway bioCycMapDesc",
bioCycCount, bioCycLink},
{ "bioCarta", "BioCarta", "BioCarta from NCI Cancer Genome Anatomy Project",
"cgapBiocPathway cgapBiocDesc cgapAlias",
bioCartaCount, bioCartaLink},
{ "reactome", "Reactome", "Reactome (by CSHL, EBI, and GO)",
"proteome.spReactomeEvent",
reactomeCount, reactomeLink},
{ "rgdPathway", "RGDPathway", "RGD Pathway",
"rgdPathway rgdGenePathway",
rgdPathwayCount, rgdPathwayLink},
};
static boolean pathwayExists(struct pathwayLink *pl,
struct sqlConnection *conn, char *geneId)
/* Return TRUE if pathway exists and has data. */
{
if (!sqlTablesExist(conn, pl->tables))
return FALSE;
return pl->count(pl, conn, geneId) > 0;
}
static boolean pathwaysExists(struct section *section,
struct sqlConnection *conn, char *geneId)
/* Return TRUE if there's some pathway info on this one. */
{
int i;
for (i=0; i\n");
hPrintf("%s
", pl->longLabel);
pl->printLinks(pl, conn, geneId);
}
}
}
struct section *pathwaysSection(struct sqlConnection *conn,
struct hash *sectionRa)
/* Create pathways section. */
{
struct section *section = sectionNew(sectionRa, "pathways");
section->exists = pathwaysExists;
section->print = pathwaysPrint;
return section;
}