82e318c7e291c143aaf0f3171fddcd12c0ec4cb2 angie Tue Oct 23 10:21:16 2012 -0700 VCF details: if name looks like a dbSNP or dbVar ID, add a link.Also a fix to how we encode HTML (<>) in allele names, and refactoring of hardcoded links to dbSNP rs report (now hardcoded in only one place instead of half dozen). diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index e427d82..90f23b4 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -1,27 +1,28 @@ /* hgc - Human Genome Click processor - gets called when user clicks * on something in human tracks display. */ #include "common.h" #include "obscure.h" #include "hCommon.h" #include "hash.h" #include "binRange.h" #include "bits.h" #include "memgfx.h" #include "hvGfx.h" #include "portable.h" +#include "regexHelper.h" #include "errabort.h" #include "dystring.h" #include "nib.h" #include "cheapcgi.h" #include "htmshell.h" #include "cart.h" #include "jksql.h" #include "dnautil.h" #include "dnaseq.h" #include "fa.h" #include "fuzzyFind.h" #include "seqOut.h" #include "hdb.h" #include "spDb.h" #include "hui.h" @@ -14801,30 +14802,42 @@ xenoDb, xenoChrom, el.xenoStart, el.xenoEnd, xenoOrg, xenoChrom, el.xenoStart, el.xenoEnd); } printf("<A HREF=\"%s&o=%d&g=getDna&i=%s&c=%s&l=%d&r=%d&strand=%s&table=%s\">" "View DNA for this feature</A><BR>\n", hgcPathAndSettings(), el.chromStart, cgiEncode(el.name), el.chrom, el.chromStart, el.chromEnd, el.strand, tbl); freez(&elname); } printTrackHtml(tdb); sqlFreeResult(&sr); hFreeConn(&conn); } +void printDbSnpRsUrl(char *rsId, char *labelFormat, ...) +/* Print a link to dbSNP's report page for an rs[0-9]+ ID. */ +{ +printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=%s\" " + "TARGET=_BLANK>", rsId); +va_list args; +va_start(args, labelFormat); +vprintf(labelFormat, args); +va_end(args); +printf("</A>"); +} + char *validateOrGetRsId(char *name, struct sqlConnection *conn) /* If necessary, get the rsId from the affy120K or affy10K table, given the affyId. rsId is more common, affy120K is next, affy10K least. * returns "valid" if name is already a valid rsId, new rsId if it is found in the affy tables, or 0 if no valid rsId is found */ { char *rsId = cloneString(name); struct affy120KDetails *a120K = NULL; struct affy10KDetails *a10K = NULL; char query[512]; if (strncmp(rsId,"rs",2)) /* is not a valid rsId, so it must be an affyId */ { safef(query, sizeof(query), /* more likely to be affy120K, so check first */ @@ -15006,39 +15019,41 @@ while ((row = sqlNextRow(sr)) != NULL) { snpMapStaticLoad(row+rowOffset, &snpMap); bedPrintPos((struct bed *)&snpMap, 3, tdb); } else while ((row = sqlNextRow(sr)) != NULL) { snpStaticLoad(row+rowOffset, &snp); bedPrintPos((struct bed *)&snp, 3, tdb); } /* write dbSnpRs details if found. */ printId = doDbSnpRs(itemName); if (printId) { - printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?"); + puts("<BR>"); if (sameString(printId, "valid")) { - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP link</A>\n", itemName); + printDbSnpRsUrl(itemName, "dbSNP link"); + putchar('\n'); doSnpEntrezGeneLink(tdb, itemName); } else { - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP link (%s)</A>\n", printId, printId); + printDbSnpRsUrl(printId, "dbSNP link (%s)", printId); + putchar('\n'); doSnpEntrezGeneLink(tdb, printId); } } printTrackHtml(tdb); sqlFreeResult(&sr); hFreeConn(&conn); } void writeSnpException(char *exceptionList, char *itemName, int rowOffset, char *chrom, int chromStart, struct trackDb *tdb) { char *tokens; struct lineFile *lf; struct tokenizer *tkz; struct snpExceptions se; @@ -15519,32 +15534,32 @@ { snpStaticLoad(row+rowOffset, &snp); if (firstOne) { exception=cloneString(snp.exception); chrom = cloneString(snp.chrom); chromStart = snp.chromStart; bedPrintPos((struct bed *)&snp, 3, tdb); printf("<BR>\n"); firstOne=0; } printSnpInfo(snp); } if (startsWith("rs",itemName)) { - printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?"); - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP</A>\n", itemName); + printDbSnpRsUrl(itemName, "dbSNP"); + putchar('\n'); doSnpEntrezGeneLink(tdb, itemName); } if (hTableExists(database, "snpExceptions") && differentString(exception,"0")) writeSnpException(exception, itemName, rowOffset, chrom, chromStart, tdb); printTrackHtml(tdb); sqlFreeResult(&sr); hFreeConn(&conn); } void doAffy120KDetails(struct trackDb *tdb, char *name) /* print additional SNP details */ { struct sqlConnection *conn = sqlConnect("hgFixed"); char query[1024]; struct affy120KDetails *snp = NULL; @@ -15564,35 +15579,35 @@ snp = affy120KDetailsLoadByQuery(conn, query); if (snp!=NULL) { printf("<BR>\n"); printf("<B>Sample Prep Enzyme:</B> <I>%s</I><BR>\n",snp->enzyme); printf("<B>Minimum Allele Frequency:</B> %.3f<BR>\n",snp->minFreq); printf("<B>Heterozygosity:</B> %.3f<BR>\n",snp->hetzyg); printf("<B>Base A: </B> <span style='font-family:Courier;'>%s</span><BR>\n", snp->baseA); printf("<B>Base B: </B> <span style='font-family:Courier;'>%s</span><BR>\n", snp->baseB); printf("<B>Sequence of Allele A:</B> <span style='font-family:Courier;'>"); printf("%s</span><BR>\n",snp->sequenceA); printf("<B>Sequence of Allele B:</B> <span style='font-family:Courier;'>"); printf("%s</span><BR>\n",snp->sequenceB); - if (snp->rsId>0) + if (isNotEmpty(snp->rsId)) { - printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?"); - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP link for %s</A><BR>\n", - snp->rsId, snp->rsId); + puts("<BR>"); + printDbSnpRsUrl(snp->rsId, "dbSNP link for %s", snp->rsId); + puts("<BR>"); } doSnpEntrezGeneLink(tdb, snp->rsId); printf("<BR>Genotypes:<BR>"); printf("\n<BR><span style='font-family:Courier;'>"); printf("NA04477: %s ", snp->NA04477); printf("NA04479: %s ", snp->NA04479); printf("NA04846: %s ", snp->NA04846); printf("NA11036: %s ", snp->NA11036); printf("NA11038: %s ", snp->NA11038); printf("NA13056: %s ", snp->NA13056); printf("\n<BR>NA17011: %s ", snp->NA17011); printf("NA17012: %s ", snp->NA17012); printf("NA17013: %s ", snp->NA17013); printf("NA17014: %s ", snp->NA17014); printf("NA17015: %s ", snp->NA17015); @@ -16143,33 +16158,33 @@ printf("%s</span><BR>\n",snp->baseA); printf("<B>Base B: </B> <span style='font-family:Courier;'>"); printf("%s</span><BR>\n",snp->baseB); printf("<B>Sequence of Allele A: </B> <span style='font-family:Courier;'>"); printf("%s</span><BR>\n",snp->sequenceA); printf("<B>Sequence of Allele B: </B> <span style='font-family:Courier;'>"); printf("%s</span><BR>\n",snp->sequenceB); printf("<P><A HREF=\"https://www.affymetrix.com/LinkServlet?probeset="); printf("%s", snp->affyId); printf("\" TARGET=_blank>Affymetrix NetAffx Analysis Center link for "); printf("%s</A></P>\n", snp->affyId); if (strncmp(snp->rsId,"unmapped",8)) { - printf("<P><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?"); - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP link for rs%s</A></P>\n", - snp->rsId, snp->rsId); + puts("<P>"); + printDbSnpRsUrl(snp->rsId, "dbSNP link for %s", snp->rsId); + puts("</P>"); } printf("<BR><A HREF=\"http://snp.cshl.org/cgi-bin/snp?name="); printf("%s\" TARGET=_blank>TSC link for %s</A>\n", snp->tscId, snp->tscId); doSnpEntrezGeneLink(tdb, snp->rsId); } /* else errAbort("<BR>Error in Query:\n%s<BR>\n",query); */ affy10KDetailsFree(&snp); sqlDisconnect(&conn); } void doAffy10K(struct trackDb *tdb, char *itemName) /* Put up info on an Affymetrix SNP. */ { char *table = tdb->table; @@ -17593,33 +17608,35 @@ struct gwasCatalog *gc = gwasCatalogLoad(row+rowOffset); printCustomUrl(tdb, item, FALSE); printPos(gc->chrom, gc->chromStart, gc->chromEnd, NULL, TRUE, gc->name); printf("<B>Reported region:</B> %s<BR>\n", gc->region); printf("<B>Publication:</B> %s <em>et al.</em> " "<A HREF=\"", gc->author); printEntrezPubMedUidAbstractUrl(stdout, gc->pubMedID); printf("\" TARGET=_BLANK>%s</A>%s <em>%s.</em> %s<BR>\n", gc->title, (endsWith(gc->title, ".") ? "" : "."), gc->journal, gc->pubDate); printf("<B>Disease or trait:</B> %s<BR>\n", subNrNs(gc->trait)); printf("<B>Initial sample size:</B> %s<BR>\n", subNrNs(gc->initSample)); printf("<B>Replication sample size:</B> %s<BR>\n", subNrNs(gc->replSample)); printf("<B>Reported gene(s):</B> %s<BR>\n", subNrNs(gc->genes)); char *strongAllele = NULL, *strongRsID = splitSnpAndAllele(gc->riskAllele, &strongAllele); if (strongRsID) - printf("<B>Strongest SNP-Risk allele:</B> " - "<A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=%s\" " - "TARGET=_BLANK>%s</A>-%s<BR>\n", strongRsID, strongRsID, strongAllele); + { + printf("<B>Strongest SNP-Risk allele:</B> "); + printDbSnpRsUrl(strongRsID, "%s", strongRsID); + printf("-%s<BR>\n", strongAllele); + } else printf("<B>Strongest SNP-Risk allele:</B> %s<BR>\n", subNrNs(gc->riskAllele)); gwasCatalogCheckSnpAlleles(tdb, gc); printf("<B>Risk Allele Frequency:</B> %s<BR>\n", subNrNs(gc->riskAlFreq)); if (isEmpty(gc->pValueDesc) || sameString(gc->pValueDesc, "NS")) printf("<B>p-Value:</B> %s<BR>\n", subNrNs(gc->pValue)); else if (gc->pValueDesc[0] == '(') printf("<B>p-Value:</B> %s %s<BR>\n", gc->pValue, subNrNs(gc->pValueDesc)); else printf("<B>p-Value:</B> %s (%s)<BR>\n", gc->pValue, subNrNs(gc->pValueDesc)); printf("<B>Odds Ratio or beta:</B> %s<BR>\n", subNrNs(gc->orOrBeta)); printf("<B>95%% confidence interval:</B> %s<BR>\n", subNrNs(gc->ci95)); printf("<B>Platform:</B> %s<BR>\n", subNrNs(gc->platform)); printf("<B>Copy Number Variant (CNV)?:</B> %s<BR>\n", (gc->cnv == gwasCatalogY ? "Yes" : "No")); @@ -21170,32 +21187,33 @@ int minorCount = 0; char *majorAllele = NULL; char *minorAllele = NULL; char popCode[4]; safencpy(popCode, sizeof(popCode), table + strlen("hapmapSnps"), 3); popCode[3] = '\0'; genericHeader(tdb, itemName); safef(query, sizeof(query), "select * from %s where chrom = '%s' and " "chromStart=%d and name = '%s'", table, seqName, start, itemName); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); struct hapmapSnps *item = hapmapSnpsLoad(row+rowOffset); -printf("<B>SNP rsId:</B> <A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?" - "type=rs&rs=%s\" TARGET=_blank> %s</A><BR>\n", itemName, itemName); +printf("<B>SNP rsId:</B> "); +printDbSnpRsUrl(itemName, "%s", itemName); +puts("<BR>"); printf("<B>Position:</B> <A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</A><BR>\n", hgTracksPathAndSettings(), database, item->chrom, item->chromStart+1, item->chromEnd, item->chrom, item->chromStart+1, item->chromEnd); printf("<B>Strand:</B> %s<BR>\n", item->strand); printf("<B>Polymorphism type:</B> %s<BR>\n", item->observed); if (item->homoCount1 >= item->homoCount2) { majorAllele = cloneString(item->allele1); majorCount = item->homoCount1; minorCount = item->homoCount2; minorAllele = cloneString(item->allele2); } else { majorAllele = cloneString(item->allele2); @@ -22313,40 +22331,40 @@ printf("<B>Polymorphism:</B> %s \n", row[2]); if (end == start + 1) { hNibForChrom(database, seqName, nibName); seq = hFetchSeq(nibName, seqName, start, end); touppers(seq->dna); if (sameString(row[1], "-")) reverseComplement(seq->dna, 1); printf("<BR><B>Reference allele:</B> %s \n", seq->dna); } if (sameString(dataSource, "Affy")) { printf("<BR><BR><A HREF=\"https://www.affymetrix.com/LinkServlet?probeset=%s\" TARGET=_blank>NetAffx</A> (log in required, registration is free)\n", itemName); - if (!sameString(row[3], "unknown")) + if (regexMatch(row[3], "^rs[0-9]+$")) { - printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?"); - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP (%s)</A>\n", row[3], row[3]); + printf("<BR>"); + printDbSnpRsUrl(row[3], "dbSNP (%s)", row[3]); } } - else + else if (regexMatch(itemName, "^rs[0-9]+$")) { - printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?"); - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP (%s)</A>\n", itemName, itemName); + printf("<BR>"); + printDbSnpRsUrl(itemName, "dbSNP (%s)", itemName); } } sqlFreeResult(&sr); printTrackHtml(tdb); hFreeConn(&conn); } void doSnpArray2 (struct trackDb *tdb, char *itemName, char *dataSource) /* doSnpArray2 is essential the same as doSnpArray except that the strand is blanked out */ /* This is a temp solution for 3 Illumina SNP Arrays to blank out strand info for non-dbSnp entries */ /* Should be removed once Illumina comes up with a clear defintion of their strand data */ { char *table = tdb->table; struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; @@ -22375,40 +22393,40 @@ printf("<B>Polymorphism:</B> %s \n", row[2]); if (end == start + 1) { hNibForChrom(database, seqName, nibName); seq = hFetchSeq(nibName, seqName, start, end); touppers(seq->dna); if (sameString(row[1], "-")) reverseComplement(seq->dna, 1); printf("<BR><B>Reference allele:</B> %s \n", seq->dna); } if (sameString(dataSource, "Affy")) { printf("<BR><BR><A HREF=\"https://www.affymetrix.com/LinkServlet?probeset=%s\" TARGET=_blank>NetAffx</A> (log in required, registration is free)\n", itemName); - if (!sameString(row[3], "unknown")) + if (regexMatch(row[3], "^rs[0-9]+$")) { - printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?"); - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP (%s)</A>\n", row[3], row[3]); + printf("<BR>"); + printDbSnpRsUrl(row[3], "dbSNP (%s)", row[3]); } } - else + else if (regexMatch(itemName, "^rs[0-9]+$")) { - printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?"); - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP (%s)</A>\n", itemName, itemName); + printf("<BR>"); + printDbSnpRsUrl(itemName, "dbSNP (%s)", itemName); } } sqlFreeResult(&sr); printTrackHtml(tdb); hFreeConn(&conn); } void printGvAttrCatType (int i) /* prints new category and type labels for attributes as needed */ { /* only print name and category if different */ if (gvPrevCat == NULL) { /* print start of both */ /* if need to print category layer, here is where print first */