8165601dddc27978eb4a2fe2654ba81212e30473
braney
Sat May 31 11:46:07 2025 -0700
hgGene changes for quickLift
diff --git src/hg/hgGene/hgGene.c src/hg/hgGene/hgGene.c
index c2ae503eab7..242431a5256 100644
--- src/hg/hgGene/hgGene.c
+++ src/hg/hgGene/hgGene.c
@@ -12,36 +12,42 @@
#include "htmshell.h"
#include "cart.h"
#include "hui.h"
#include "dbDb.h"
#include "hdb.h"
#include "dupTrack.h"
#include "web.h"
#include "botDelay.h"
#include "ra.h"
#include "spDb.h"
#include "genePred.h"
#include "hgColors.h"
#include "hgGene.h"
#include "obscure.h"
#include "genbank.h"
+#include "liftOver.h"
+#include "chain.h"
+#include "bigChain.h"
+#include "chainNetDbLoad.h"
+#include "trackHub.h"
/* ---- Global variables. ---- */
struct cart *cart; /* This holds cgi and other variables between clicks. */
struct hash *oldVars; /* Old cart hash. */
char *database; /* Name of genome database - hg15, mm3, or the like. */
+char *liftDb;
char *genome; /* Name of genome - mouse, human, etc. */
char *curGeneId; /* Current Gene Id. */
char *curGeneName; /* Biological name of gene. */
char *curGeneChrom; /* Chromosome current gene is on. */
char *curAlignId; /* Align id from knownGene genePred */
struct trackDb *globalTdb;
struct genePred *curGenePred; /* Current gene prediction structure. */
boolean isGencode; /* is this based on the Gencode models */
boolean isGencode2; /* is this based on the Gencode models and use ensembl id as primary id */
boolean isGencode3; /* is this based on the Gencode models and use ensembl id as primary id */
int curGeneStart,curGeneEnd; /* Position in chromosome. */
struct sqlConnection *spConn; /* Connection to SwissProt database. */
char *swissProtAcc; /* SwissProt accession (may be NULL). */
int kgVersion = KG_UNKNOWN; /* KG version */
int measureTiming = FALSE;
@@ -62,31 +68,32 @@
"options:\n"
" -hgsid=XXX Session ID to grab vars from session database\n"
" -db=XXX Genome database associated with gene\n"
" -org=XXX Organism associated with gene\n"
" -hgg_gene=XXX ID of gene\n"
);
}
/* --------------- Low level utility functions. ----------------- */
static char *rootDir = "hgGeneData";
struct hash *readRa(char *rootName, struct hash **retHashOfHash)
/* Read in ra in root, root/org, and root/org/database. */
{
-return hgReadRa(genome, database, rootDir, rootName, retHashOfHash);
+char *db = (liftDb != NULL) ? liftDb : database;
+return hgReadRa(genome, db, rootDir, rootName, retHashOfHash);
}
static struct hash *genomeSettings; /* Genome-specific settings from settings.ra. */
char *genomeSetting(char *name)
/* Return genome setting value. Aborts if setting not found. */
{
return hashMustFindVal(genomeSettings, name);
}
char *genomeOptionalSetting(char *name)
/* Returns genome setting value or NULL if not found. */
{
return hashFindVal(genomeSettings, name);
}
@@ -265,52 +272,52 @@
/* print genome position and size */
char buffer[1024];
char *commaPos;
if (isGencode || isGencode2 || isGencode3)
{
hPrintf("Gencode Transcript: %s
\n", isGencode2 || isGencode3 ? curGeneId : curAlignId);;
char buffer[1024];
hPrintf("Gencode Gene: %s
\n", getGencodeGeneId(conn, curGeneId, buffer, sizeof buffer));
}
exonCnt = curGenePred->exonCount;
safef(buffer, sizeof buffer, "%s:%d-%d", curGeneChrom, curGeneStart+1, curGeneEnd);
commaPos = addCommasToPos(database, buffer);
hPrintf("Transcript (Including UTRs)
\n");
-hPrintf(" Position: %s %s ",database, commaPos);
+hPrintf(" Position: %s %s ",trackHubSkipHubName(database), commaPos);
sprintLongWithCommas(buffer, (long long)curGeneEnd - curGeneStart);
hPrintf("Size: %s ", buffer);
hPrintf("Total Exon Count: %d ", exonCnt);
hPrintf("Strand: %s
\n",curGenePred->strand);
cdsStart = curGenePred->cdsStart;
cdsEnd = curGenePred->cdsEnd;
/* count CDS exons */
if (cdsStart < cdsEnd)
{
for (i=0; iexonEnds[i]) &&
(cdsEnd >= curGenePred->exonStarts[i]) )
cdsExonCnt++;
}
hPrintf("Coding Region
\n");
safef(buffer, sizeof buffer, "%s:%d-%d", curGeneChrom, cdsStart+1, cdsEnd);
commaPos = addCommasToPos(database, buffer);
- hPrintf(" Position: %s %s ",database, commaPos);
+ hPrintf(" Position: %s %s ",trackHubSkipHubName(database), commaPos);
sprintLongWithCommas(buffer, (long long)cdsEnd - cdsStart);
hPrintf("Size: %s ", buffer);
hPrintf("Coding Exon Count: %d \n", cdsExonCnt);
}
fflush(stdout);
}
char *sectionSetting(struct section *section, char *name)
/* Return section setting value if it exists. */
{
return hashFindVal(section->settings, name);
}
char *sectionRequiredSetting(struct section *section, char *name)
/* Return section setting. Squawk and die if it doesn't exist. */
@@ -706,30 +713,53 @@
}
void doKgMethod()
/* display knownGene.html content (UCSC Known Genes
* Method, Credits, and Data Use Restrictions) */
{
cartWebStart(cart, database, "Methods, Credits, and Use Restrictions");
char *tableName = cartUsualString(cart, hggType, NULL);
if (tableName == NULL)
tableName = "knownGene";
struct trackDb *tdb = hTrackDbForTrack(database, tableName);
hPrintf("%s", tdb->html);
cartWebEnd();
}
+static void quickLiftGenePred(struct cart *cart, struct trackDb *tdb)
+// map curGenePred to current db
+{
+char *chrom = cloneString(cartString(cart, hggChrom));
+int start = atoi(cartString(cart, hggStart));
+int end = atoi(cartString(cart, hggEnd));
+char *quickLiftFile = cloneString(trackDbSetting(tdb, "quickLiftUrl"));
+char *linkFileName = bigChainGetLinkFile(quickLiftFile);
+struct hash *chainHash = newHash(8);
+
+struct chain *chain, *chainList = chainLoadIdRangeHub(NULL, quickLiftFile, linkFileName, chrom, start, end, -1);
+for(chain = chainList; chain; chain = chain->next)
+ {
+ chainSwap(chain);
+
+ liftOverAddChainHash(chainHash, chain);
+ }
+calcLiftOverGenePreds( curGenePred, chainHash, 0.0, 1.0, TRUE, NULL, NULL, TRUE, FALSE);
+curGeneChrom = curGenePred->chrom;
+curGeneStart = curGenePred->txStart;
+curGeneEnd = curGenePred->txEnd;
+}
+
void cartMain(struct cart *theCart)
/* We got the persistent/CGI variable cart. Now
* set up the globals and make a web page. */
{
if (issueBotWarning)
{
char *ip = getenv("REMOTE_ADDR");
botDelayMessage(ip, botDelayMillis);
}
cart = theCart;
getDbAndGenome(cart, &database, &genome, oldVars);
initGenbankTableNames(database);
if (cartVarExists(cart, hggDoKgMethod))
doKgMethod();
else if (cartVarExists(cart, hggDoTxInfoDescription))
@@ -757,47 +787,49 @@
struct trackDb *tdb = hTrackDbForTrack(database, tableName);
if ((tdb == NULL) && sameString(tableName, "knownGene") && differentString(database, knownDb))
{
// if no table or knownGene has been given to us, and knownGene doesn't work, try the default gene track.
tableName = hdbGetMasterGeneTrack(knownDb);
tdb = hTrackDbForTrack(database, tableName);
}
if (tdb == NULL)
hUserAbort("Error: cannot open gene track %s.", tableName);
globalTdb = tdb;
char *externalDb = trackDbSetting(tdb, "externalDb");
- char *liftDb = trackDbSetting(tdb, "quickLiftDb");
+ liftDb = trackDbSetting(tdb, "quickLiftDb");
if (externalDb != NULL)
conn = hAllocConn(externalDb);
else
{
- if (liftDb)
- {
- database = liftDb;
- genome = hGenome(database);
- }
- conn = hAllocConn(database);
+ char *db = (liftDb != NULL) ? liftDb : database;
+ genome = hGenome(db);
+ conn = hAllocConn(db);
}
getGenomeSettings();
curGeneId = findGeneId(conn, geneName);
getGenePosition(conn);
curGenePred = getCurGenePred(conn);
+
+ // if we're quickLifting we need to map the liftDb genepred to the one in the quickLift hub
+ if (liftDb != NULL)
+ quickLiftGenePred(cart, tdb);
+
curGeneName = getGeneName(curGeneId, conn);
spConn = hAllocConn(UNIPROT_DB_NAME);
swissProtAcc = getSwissProtAcc(conn, spConn, curGeneId);
if (isRgdGene(conn)) swissProtAcc=getRgdGeneUniProtAcc(curGeneId, conn);
/* Check command variables, and do the ones that
* don't want to put up the hot link bar etc. */
if (cartVarExists(cart, hggDoGetMrnaSeq))
doGetMrnaSeq(conn, curGeneId, curGeneName);
else if (cartVarExists(cart, hggDoWikiTrack))
doWikiTrack(conn);
else if (cartVarExists(cart, hggDoGetProteinSeq))
doGetProteinSeq(conn, curGeneId, curGeneName);
else if (cartVarExists(cart, hggDoRnaFoldDisplay))
doRnaFoldDisplay(conn, curGeneId, curGeneName);