e323d595c241d03570d380d8c60de60e99c1c4ec braney Fri Mar 21 12:36:21 2025 -0700 add support for bigDbSnp to quicklift diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index b51fa89417f..8ec8aebd78d 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -11004,31 +11004,35 @@ printf("<HR>"); printPosOnChrom(chrom, atoi(chromStart), atoi(chromEnd), NULL, FALSE, itemName); } void doCosmic(struct trackDb *tdb, char *item) /* Put up COSMIC track info. */ { genericHeader(tdb, item); printCosmicDetails(tdb, item); printTrackHtml(tdb); } void printDecipherSnvsDetails(struct trackDb *tdb, char *itemName, boolean encode) /* Print details of a DECIPHER entry. */ { -struct sqlConnection *conn = hAllocConn(database); +char *db = database; +char *liftDb = cloneString(trackDbSetting(tdb, "quickLiftDb")); +if (liftDb != NULL) + db = liftDb; +struct sqlConnection *conn = hAllocConn(db); char query[256]; struct sqlResult *sr; char **row; char *strand={"+"}; int start = cartInt(cart, "o"); int end = cartInt(cart, "t"); char *chrom = cartString(cart, "c"); /* So far, we can just remove "chr" from UCSC chrom names to get DECIPHER names */ char *decipherChrom = chrom; if (startsWithNoCase("chr", decipherChrom)) decipherChrom += 3; printf("<H3>Patient %s </H3>", itemName); @@ -11122,35 +11126,39 @@ hFreeConn(&conn); } void doDecipherSnvs(struct trackDb *tdb, char *item, char *itemForUrl) /* Put up DECIPHER track info. */ { genericHeader(tdb, item); printDecipherSnvsDetails(tdb, item, FALSE); printTrackHtml(tdb); } void printDecipherCnvsDetails(struct trackDb *tdb, char *itemName, boolean encode) /* Print details of a DECIPHER entry. */ { -struct sqlConnection *conn = hAllocConn(database); +char *db = database; +char *liftDb = cloneString(trackDbSetting(tdb, "quickLiftDb")); +if (liftDb != NULL) + db = liftDb; +struct sqlConnection *conn = hAllocConn(db); char query[256]; struct sqlResult *sr; char **row; -struct sqlConnection *conn2 = hAllocConn(database); +struct sqlConnection *conn2 = hAllocConn(db); char query2[256]; struct sqlResult *sr2; char **row2; char *strand={"+"}; int start = cartInt(cart, "o"); int end = cartInt(cart, "t"); char *chrom = cartString(cart, "c"); /* So far, we can just remove "chr" from UCSC chrom names to get DECIPHER names */ char *decipherChrom = chrom; if (startsWithNoCase("chr", decipherChrom)) decipherChrom += 3; printf("<H3>Patient %s </H3>", itemName); @@ -18620,40 +18628,40 @@ } else ranOffEnd = TRUE; } } } if (! ranOffEnd) { struct dnaSeq *seq = hDnaFromSeq(database, gene->chrom, snpPlusOffset, snpPlusOffset+1, dnaUpper); base = seq->dna[0]; } return base; } -char *getSymbolForGeneName(char *geneTable, char *geneId) +char *getSymbolForGeneName(char *db, char *geneTable, char *geneId) /* Given a gene track and gene accession, look up the symbol if we know where to look * and if we find it, return a string with both symbol and acc. */ { struct dyString *dy = dyStringNew(32); char buf[256]; char *sym = NULL; if (sameString(geneTable, "knownGene") || sameString(geneTable, "refGene")) { - struct sqlConnection *conn = hAllocConn(database); + struct sqlConnection *conn = hAllocConn(db); char query[256]; query[0] = '\0'; if (sameString(geneTable, "knownGene")) sqlSafef(query, sizeof(query), "select geneSymbol from kgXref where kgID = '%s'", geneId); else if (sameString(geneTable, "refGene")) sqlSafef(query, sizeof(query), "select name from %s where mrnaAcc = '%s'", refLinkTable, geneId); sym = sqlQuickQuery(conn, query, buf, sizeof(buf)-1); hFreeConn(&conn); } if (sym != NULL) dyStringPrintf(dy, "%s (%s)", sym, geneId); else dyStringAppend(dy, geneId); return dyStringCannibalize(&dy); } @@ -18800,39 +18808,39 @@ geneTrack, geneName, snpMisoLinkFromFunc("stop_retained_variant"), refAA, refCodonHtml, snpAA, snpCodonHtml); else printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", geneTrack, geneName, snpMisoLinkFromFunc("coding-synon"), refAA, refCodonHtml, snpAA, snpCodonHtml); } } else printf(firstTwoColumnsPctS "%s %s --> %s\n", geneTrack, geneName, snpMisoLinkFromFunc("cds-synonymy-unknown"), abbreviateAllele(refAllele), abbreviateAllele(al)); } } -void printSnp125FunctionInGene(struct snp125 *snp, char *geneTable, char *geneTrack, +void printSnp125FunctionInGene(char *db, struct snp125 *snp, char *geneTable, char *geneTrack, struct genePred *gene) /* Given a SNP and a gene that overlaps it, say where in the gene it overlaps * and if in CDS, say what effect the coding alleles have. */ { int snpStart = snp->chromStart, snpEnd = snp->chromEnd; int cdsStart = gene->cdsStart, cdsEnd = gene->cdsEnd; boolean geneIsRc = sameString(gene->strand, "-"); -char *geneName = getSymbolForGeneName(geneTable, gene->name); +char *geneName = getSymbolForGeneName(db, geneTable, gene->name); int i, iStart = 0, iEnd = gene->exonCount, iIncr = 1; if (geneIsRc) { iStart = gene->exonCount - 1; iEnd = -1; iIncr = -1; } for (i = iStart; i != iEnd; i += iIncr) { int exonStart = gene->exonStarts[i], exonEnd = gene->exonEnds[i]; if (snpEnd > exonStart && snpStart < exonEnd) { if (snpEnd > cdsStart && snpStart < cdsEnd) printSnp125FunctionInCDS(snp, geneTable, geneTrack, gene, i, geneName); else if (cdsEnd > cdsStart) { boolean is5Prime = ((geneIsRc && (snpStart >= cdsEnd)) || (!geneIsRc && (snpEnd < cdsStart))); printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName, @@ -18880,176 +18888,184 @@ { struct sqlResult *sr; char query[512]; char **row; int snpStart = snp->chromStart, snpEnd = snp->chromEnd; int nearCount = 0; int maxDistance = 10000; /* query to the left: */ sqlSafef(query, sizeof(query), "select name,txEnd,strand from %s " "where chrom = '%s' and txStart < %d and txEnd > %d", geneTable, snp->chrom, snpStart, snpStart - maxDistance); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *gene = row[0]; - char *geneName = getSymbolForGeneName(geneTable, gene); + char *geneName = getSymbolForGeneName(sqlGetDatabase(conn), geneTable, gene); int end = sqlUnsigned(row[1]); char *strand = row[2]; boolean isRc = strand[0] == '-'; printf(firstTwoColumnsPctS "%s (%d bases %sstream)</TD></TR>\n", geneTrack, geneName, snpMisoLinkFromFunc(isRc ? "near-gene-5" : "near-gene-3"), (snpStart - end + 1), (isRc ? "up" : "down")); nearCount++; } sqlFreeResult(&sr); /* query to the right: */ sqlSafef(query, sizeof(query), "select name,txStart,strand from %s " "where chrom = '%s' and txStart < %d and txEnd > %d", geneTable, snp->chrom, snpEnd + maxDistance, snpEnd); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *gene = row[0]; - char *geneName = getSymbolForGeneName(geneTable, gene); + char *geneName = getSymbolForGeneName(sqlGetDatabase(conn), geneTable, gene); int start = sqlUnsigned(row[1]); char *strand = row[2]; boolean isRc = strand[0] == '-'; printf(firstTwoColumnsPctS "%s (%d bases %sstream)</TD></TR>\n", geneTrack, geneName, snpMisoLinkFromFunc(isRc ? "near-gene-3" : "near-gene-5"), (start - snpEnd + 1), (isRc ? "down" : "up")); nearCount++; } sqlFreeResult(&sr); if (nearCount == 0) printf("<TR><TD>%s </TD><TD></TD><TD>%s</TD></TR>", geneTrack, snpMisoLinkFromFunc("intergenic_variant")); } static struct genePred *getGPsWithFrames(struct sqlConnection *conn, char *geneTable, char *chrom, int start, int end) /* Given a known-to-exist genePred table name and a range, return * genePreds in range with exonFrames populated. */ { struct genePred *gpList = NULL; boolean hasBin; struct sqlResult *sr = hRangeQuery(conn, geneTable, chrom, start, end, NULL, &hasBin); -struct sqlConnection *conn2 = hAllocConn(database); +struct sqlConnection *conn2 = hAllocConn(sqlGetDatabase(conn)); boolean hasFrames = (sqlFieldIndex(conn2, geneTable, "exonFrames") == hasBin + 14); char **row; while ((row = sqlNextRow(sr)) != NULL) { int fieldCount = hasBin + (hasFrames ? 15 : 10); struct genePred *gp; if (hasFrames) { gp = genePredExtLoad(row+hasBin, fieldCount); // Some tables have an exonFrames column but it's empty... if (gp->exonFrames == NULL) genePredAddExonFrames(gp); } else { gp = genePredLoad(row+hasBin); genePredAddExonFrames(gp); } slAddHead(&gpList, gp); } sqlFreeResult(&sr); hFreeConn(&conn2); return gpList; } -void printSnp125FunctionShared(struct snp125 *snp, struct slName *geneTracks) +void printSnp125FunctionShared(char *db, struct snp125 *snp, struct slName *geneTracks) { -struct sqlConnection *conn = hAllocConn(database); +struct sqlConnection *conn = hAllocConn(db); struct slName *gt; boolean first = TRUE; for (gt = geneTracks; gt != NULL; gt = gt->next) if (sqlTableExists(conn, gt->name)) { if (first) { printf("<BR><B>UCSC's predicted function relative to selected gene tracks:</B>\n"); printf("<TABLE border=0 cellspacing=0 cellpadding=0>\n"); } struct genePred *geneList = getGPsWithFrames(conn, gt->name, snp->chrom, snp->chromStart, snp->chromEnd); struct genePred *gene; char query[256]; char buf[256]; sqlSafef(query, sizeof(query), "select shortLabel from trackDb where tableName='%s'", gt->name); char *shortLabel = sqlQuickQuery(conn, query, buf, sizeof(buf)-1); if (shortLabel == NULL) shortLabel = gt->name; for (gene = geneList; gene != NULL; gene = gene->next) - printSnp125FunctionInGene(snp, gt->name, shortLabel, gene); + printSnp125FunctionInGene(db, snp, gt->name, shortLabel, gene); if (geneList == NULL) printSnp125NearGenes(conn, snp, gt->name, shortLabel); first = FALSE; } if (! first) printf("</TABLE>\n"); hFreeConn(&conn); } void printSnp125Function(struct trackDb *tdb, struct snp125 *snp) /* If the user has selected a gene track for functional annotation, * report how this SNP relates to any nearby genes. */ { char varName[512]; safef(varName, sizeof(varName), "%s_geneTrack", tdb->track); struct slName *geneTracks = cartOptionalSlNameList(cart, varName); if (geneTracks == NULL && !cartListVarExists(cart, varName)) { char *defaultGeneTracks = trackDbSetting(tdb, "defaultGeneTracks"); if (isNotEmpty(defaultGeneTracks)) geneTracks = slNameListFromComma(defaultGeneTracks); else return; } -printSnp125FunctionShared(snp, geneTracks); +char *db = database; +char *liftDb = cloneString(trackDbSetting(tdb, "quickLiftDb")); +if (liftDb != NULL) + db = liftDb; +printSnp125FunctionShared(db, snp, geneTracks); } void printSnp153Function(struct trackDb *tdb, struct snp125 *snp) /* If the user has selected a gene track for functional annotation, * report how this SNP relates to any nearby genes. */ { struct slName *geneTracks = NULL; struct trackDb *correctTdb = tdbOrAncestorByName(tdb, tdb->track); struct slName *defaultGeneTracks = slNameListFromComma(trackDbSetting(tdb, "defaultGeneTracks")); -struct trackDb *geneTdbList = snp125FetchGeneTracks(database, cart); +char *db = database; +char *liftDb = cloneString(trackDbSetting(tdb, "quickLiftDb")); +if (liftDb != NULL) + db = liftDb; +struct trackDb *geneTdbList = snp125FetchGeneTracks(db, cart); struct trackDb *gTdb; for (gTdb = geneTdbList; gTdb; gTdb=gTdb->next) { char *trackName = gTdb->track; char suffix[512]; safef(suffix, sizeof(suffix), "geneTrack.%s", trackName); boolean option = cartUsualBooleanClosestToHome(cart, correctTdb, FALSE, suffix, slNameInList(defaultGeneTracks,trackName)); if (option) { slNameAddHead(&geneTracks, trackName); } } if (geneTracks) - printSnp125FunctionShared(snp, geneTracks); + printSnp125FunctionShared(db, snp, geneTracks); } char *dbSnpFuncFromInt(unsigned char funcCode) /* Translate an integer function code from NCBI into an abbreviated description. * Do not free return value! */ // Might be a good idea to flesh this out with all codes, libify, and share with // snpNcbiToUcsc instead of partially duplicating. { switch (funcCode) { case 3: return "coding-synon"; case 8: return "cds-reference"; case 41: @@ -19060,38 +19076,38 @@ return "stop-loss"; case 44: return "frameshift"; case 45: return "cds-indel"; default: { static char buf[16]; safef(buf, sizeof(buf), "%d", funcCode); return buf; } } } -void printSnp125CodingAnnotations(struct trackDb *tdb, struct snp125 *snp) +void printSnp125CodingAnnotations(char *db, struct trackDb *tdb, struct snp125 *snp) /* If tdb specifies extra table(s) that contain protein-coding annotations, * show the effects of SNP on transcript coding sequences. */ { char *tables = trackDbSetting(tdb, "codingAnnotations"); if (isEmpty(tables)) return; -struct sqlConnection *conn = hAllocConn(database); +struct sqlConnection *conn = hAllocConn(db); struct slName *tbl, *tableList = slNameListFromString(tables, ','); struct dyString *query = dyStringNew(0); for (tbl = tableList; tbl != NULL; tbl = tbl->next) { if (!sqlTableExists(conn, tbl->name)) continue; char setting[512]; safef(setting, sizeof(setting), "codingAnnoLabel_%s", tbl->name); char *label = trackDbSettingOrDefault(tdb, setting, NULL); if (label == NULL && endsWith(tbl->name, "DbSnp")) label = "dbSNP"; else label = tbl->name; boolean hasBin = hIsBinned(database, tbl->name); boolean hasCoords = (sqlFieldIndex(conn, tbl->name, "chrom") != -1); @@ -19108,31 +19124,31 @@ if (first) { printf("<BR><B>Coding annotations by %s:</B><BR>\n", label); first = FALSE; } struct snp125CodingCoordless *anno = snp125CodingCoordlessLoad(row+rowOffset); int i; boolean gotRef = (anno->funcCodes[0] == 8); for (i = 0; i < anno->alleleCount; i++) { memSwapChar(anno->peptides[i], strlen(anno->peptides[i]), 'X', '*'); if (anno->funcCodes[i] == 8) continue; char *txName = anno->transcript; if (startsWith("NM_", anno->transcript)) - txName = getSymbolForGeneName("refGene", anno->transcript); + txName = getSymbolForGeneName(db, "refGene", anno->transcript); char *func = dbSnpFuncFromInt(anno->funcCodes[i]); printf("%s: %s ", txName, snpMisoLinkFromFunc(func)); if (sameString(func, "frameshift") || sameString(func, "cds-indel")) { puts("<BR>"); continue; } if (gotRef) printf("%s (%s) --> ", anno->peptides[0], highlightCodonBase(anno->codons[0], anno->frame)); printf("%s (%s)<BR>\n", anno->peptides[i], highlightCodonBase(anno->codons[i], anno->frame)); } } sqlFreeResult(&sr); @@ -19240,31 +19256,35 @@ snp->locType); printf("<TR><TD><B><A HREF=\"#Class\">Class</A></B></TD><TD>%s</TD></TR>\n", snp->class); printf("<TR><TD><B><A HREF=\"#Valid\">Validation</A></B></TD><TD>%s</TD></TR>\n", snp->valid); printf("<TR><TD><B><A HREF=\"#Func\">Function</A></B></TD><TD>%s</TD></TR>\n", snpMisoLinkFromFunc(snp->func)); printf("<TR><TD><B><A HREF=\"#MolType\">Molecule Type</A> </B></TD><TD>%s</TD></TR>\n", snp->molType); if (snp->avHet>0) printf("<TR><TD><B><A HREF=\"#AvHet\">Average Heterozygosity</A> </TD>" "<TD></B>%.3f +/- %.3f</TD></TR>\n", snp->avHet, snp->avHetSE); printf("<TR><TD><B><A HREF=\"#Weight\">Weight</A></B></TD><TD>%d</TD></TR>\n", snp->weight); if (version >= 132) printSnp132ExtraColumns(tdb, snp); else printf("</TABLE>\n"); -printSnp125CodingAnnotations(tdb, snp125); +char *db = database; +char *liftDb = cloneString(trackDbSetting(tdb, "quickLiftDb")); +if (liftDb != NULL) + db = liftDb; +printSnp125CodingAnnotations(db, tdb, snp125); writeSnpExceptionWithVersion(tdb, snp, version); printSnp125Function(tdb, snp125); } static char *getExcDescTable(struct trackDb *tdb) /* Look up snpExceptionDesc in tdb and provide default if not found. Don't free return value! */ { static char excDescTable[128]; char *excDescTableSetting = trackDbSetting(tdb, "snpExceptionDesc"); if (excDescTableSetting) safecpy(excDescTable, sizeof(excDescTable), excDescTableSetting); else safef(excDescTable, sizeof(excDescTable), "%sExceptionDesc", tdb->table); return excDescTable; }