82e318c7e291c143aaf0f3171fddcd12c0ec4cb2 angie Tue Oct 23 10:21:16 2012 -0700 VCF details: if name looks like a dbSNP or dbVar ID, add a link.Also a fix to how we encode HTML (<>) in allele names, and refactoring of hardcoded links to dbSNP rs report (now hardcoded in only one place instead of half dozen). diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index e427d82..90f23b4 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -1,27 +1,28 @@ /* hgc - Human Genome Click processor - gets called when user clicks * on something in human tracks display. */ #include "common.h" #include "obscure.h" #include "hCommon.h" #include "hash.h" #include "binRange.h" #include "bits.h" #include "memgfx.h" #include "hvGfx.h" #include "portable.h" +#include "regexHelper.h" #include "errabort.h" #include "dystring.h" #include "nib.h" #include "cheapcgi.h" #include "htmshell.h" #include "cart.h" #include "jksql.h" #include "dnautil.h" #include "dnaseq.h" #include "fa.h" #include "fuzzyFind.h" #include "seqOut.h" #include "hdb.h" #include "spDb.h" #include "hui.h" @@ -14801,30 +14802,42 @@ xenoDb, xenoChrom, el.xenoStart, el.xenoEnd, xenoOrg, xenoChrom, el.xenoStart, el.xenoEnd); } printf("" "View DNA for this feature
\n", hgcPathAndSettings(), el.chromStart, cgiEncode(el.name), el.chrom, el.chromStart, el.chromEnd, el.strand, tbl); freez(&elname); } printTrackHtml(tdb); sqlFreeResult(&sr); hFreeConn(&conn); } +void printDbSnpRsUrl(char *rsId, char *labelFormat, ...) +/* Print a link to dbSNP's report page for an rs[0-9]+ ID. */ +{ +printf("", rsId); +va_list args; +va_start(args, labelFormat); +vprintf(labelFormat, args); +va_end(args); +printf(""); +} + char *validateOrGetRsId(char *name, struct sqlConnection *conn) /* If necessary, get the rsId from the affy120K or affy10K table, given the affyId. rsId is more common, affy120K is next, affy10K least. * returns "valid" if name is already a valid rsId, new rsId if it is found in the affy tables, or 0 if no valid rsId is found */ { char *rsId = cloneString(name); struct affy120KDetails *a120K = NULL; struct affy10KDetails *a10K = NULL; char query[512]; if (strncmp(rsId,"rs",2)) /* is not a valid rsId, so it must be an affyId */ { safef(query, sizeof(query), /* more likely to be affy120K, so check first */ @@ -15006,39 +15019,41 @@ while ((row = sqlNextRow(sr)) != NULL) { snpMapStaticLoad(row+rowOffset, &snpMap); bedPrintPos((struct bed *)&snpMap, 3, tdb); } else while ((row = sqlNextRow(sr)) != NULL) { snpStaticLoad(row+rowOffset, &snp); bedPrintPos((struct bed *)&snp, 3, tdb); } /* write dbSnpRs details if found. */ printId = doDbSnpRs(itemName); if (printId) { - printf("
"); if (sameString(printId, "valid")) { - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP link\n", itemName); + printDbSnpRsUrl(itemName, "dbSNP link"); + putchar('\n'); doSnpEntrezGeneLink(tdb, itemName); } else { - printf("type=rs&rs=%s\" TARGET=_blank>dbSNP link (%s)\n", printId, printId); + printDbSnpRsUrl(printId, "dbSNP link (%s)", printId); + putchar('\n'); doSnpEntrezGeneLink(tdb, printId); } } printTrackHtml(tdb); sqlFreeResult(&sr); hFreeConn(&conn); } void writeSnpException(char *exceptionList, char *itemName, int rowOffset, char *chrom, int chromStart, struct trackDb *tdb) { char *tokens; struct lineFile *lf; struct tokenizer *tkz; struct snpExceptions se; @@ -15519,32 +15534,32 @@ { snpStaticLoad(row+rowOffset, &snp); if (firstOne) { exception=cloneString(snp.exception); chrom = cloneString(snp.chrom); chromStart = snp.chromStart; bedPrintPos((struct bed *)&snp, 3, tdb); printf("
\n"); firstOne=0; } printSnpInfo(snp); } if (startsWith("rs",itemName)) { - printf("dbSNP\n", itemName); + printDbSnpRsUrl(itemName, "dbSNP"); + putchar('\n'); doSnpEntrezGeneLink(tdb, itemName); } if (hTableExists(database, "snpExceptions") && differentString(exception,"0")) writeSnpException(exception, itemName, rowOffset, chrom, chromStart, tdb); printTrackHtml(tdb); sqlFreeResult(&sr); hFreeConn(&conn); } void doAffy120KDetails(struct trackDb *tdb, char *name) /* print additional SNP details */ { struct sqlConnection *conn = sqlConnect("hgFixed"); char query[1024]; struct affy120KDetails *snp = NULL; @@ -15564,35 +15579,35 @@ snp = affy120KDetailsLoadByQuery(conn, query); if (snp!=NULL) { printf("
\n"); printf("Sample Prep Enzyme: %s
\n",snp->enzyme); printf("Minimum Allele Frequency: %.3f
\n",snp->minFreq); printf("Heterozygosity: %.3f
\n",snp->hetzyg); printf("Base A: %s
\n", snp->baseA); printf("Base B: %s
\n", snp->baseB); printf("Sequence of Allele A: "); printf("%s
\n",snp->sequenceA); printf("Sequence of Allele B: "); printf("%s
\n",snp->sequenceB); - if (snp->rsId>0) + if (isNotEmpty(snp->rsId)) { - printf("
dbSNP link for %s
\n", - snp->rsId, snp->rsId); + puts("
"); + printDbSnpRsUrl(snp->rsId, "dbSNP link for %s", snp->rsId); + puts("
"); } doSnpEntrezGeneLink(tdb, snp->rsId); printf("
Genotypes:
"); printf("\n
"); printf("NA04477: %s  ", snp->NA04477); printf("NA04479: %s  ", snp->NA04479); printf("NA04846: %s  ", snp->NA04846); printf("NA11036: %s  ", snp->NA11036); printf("NA11038: %s  ", snp->NA11038); printf("NA13056: %s  ", snp->NA13056); printf("\n
NA17011: %s  ", snp->NA17011); printf("NA17012: %s  ", snp->NA17012); printf("NA17013: %s  ", snp->NA17013); printf("NA17014: %s  ", snp->NA17014); printf("NA17015: %s  ", snp->NA17015); @@ -16143,33 +16158,33 @@ printf("%s

\n",snp->baseA); printf("Base B: "); printf("%s
\n",snp->baseB); printf("Sequence of Allele A:  "); printf("%s
\n",snp->sequenceA); printf("Sequence of Allele B:  "); printf("%s
\n",snp->sequenceB); printf("

affyId); printf("\" TARGET=_blank>Affymetrix NetAffx Analysis Center link for "); printf("%s

\n", snp->affyId); if (strncmp(snp->rsId,"unmapped",8)) { - printf("

dbSNP link for rs%s

\n", - snp->rsId, snp->rsId); + puts("

"); + printDbSnpRsUrl(snp->rsId, "dbSNP link for %s", snp->rsId); + puts("

"); } printf("
TSC link for %s\n", snp->tscId, snp->tscId); doSnpEntrezGeneLink(tdb, snp->rsId); } /* else errAbort("
Error in Query:\n%s
\n",query); */ affy10KDetailsFree(&snp); sqlDisconnect(&conn); } void doAffy10K(struct trackDb *tdb, char *itemName) /* Put up info on an Affymetrix SNP. */ { char *table = tdb->table; @@ -17593,33 +17608,35 @@ struct gwasCatalog *gc = gwasCatalogLoad(row+rowOffset); printCustomUrl(tdb, item, FALSE); printPos(gc->chrom, gc->chromStart, gc->chromEnd, NULL, TRUE, gc->name); printf("Reported region: %s
\n", gc->region); printf("Publication: %s et al. " "author); printEntrezPubMedUidAbstractUrl(stdout, gc->pubMedID); printf("\" TARGET=_BLANK>%s%s %s. %s
\n", gc->title, (endsWith(gc->title, ".") ? "" : "."), gc->journal, gc->pubDate); printf("Disease or trait: %s
\n", subNrNs(gc->trait)); printf("Initial sample size: %s
\n", subNrNs(gc->initSample)); printf("Replication sample size: %s
\n", subNrNs(gc->replSample)); printf("Reported gene(s): %s
\n", subNrNs(gc->genes)); char *strongAllele = NULL, *strongRsID = splitSnpAndAllele(gc->riskAllele, &strongAllele); if (strongRsID) - printf("Strongest SNP-Risk allele: " - "%s-%s
\n", strongRsID, strongRsID, strongAllele); + { + printf("Strongest SNP-Risk allele: "); + printDbSnpRsUrl(strongRsID, "%s", strongRsID); + printf("-%s
\n", strongAllele); + } else printf("Strongest SNP-Risk allele: %s
\n", subNrNs(gc->riskAllele)); gwasCatalogCheckSnpAlleles(tdb, gc); printf("Risk Allele Frequency: %s
\n", subNrNs(gc->riskAlFreq)); if (isEmpty(gc->pValueDesc) || sameString(gc->pValueDesc, "NS")) printf("p-Value: %s
\n", subNrNs(gc->pValue)); else if (gc->pValueDesc[0] == '(') printf("p-Value: %s %s
\n", gc->pValue, subNrNs(gc->pValueDesc)); else printf("p-Value: %s (%s)
\n", gc->pValue, subNrNs(gc->pValueDesc)); printf("Odds Ratio or beta: %s
\n", subNrNs(gc->orOrBeta)); printf("95%% confidence interval: %s
\n", subNrNs(gc->ci95)); printf("Platform: %s
\n", subNrNs(gc->platform)); printf("Copy Number Variant (CNV)?: %s
\n", (gc->cnv == gwasCatalogY ? "Yes" : "No")); @@ -21170,32 +21187,33 @@ int minorCount = 0; char *majorAllele = NULL; char *minorAllele = NULL; char popCode[4]; safencpy(popCode, sizeof(popCode), table + strlen("hapmapSnps"), 3); popCode[3] = '\0'; genericHeader(tdb, itemName); safef(query, sizeof(query), "select * from %s where chrom = '%s' and " "chromStart=%d and name = '%s'", table, seqName, start, itemName); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); struct hapmapSnps *item = hapmapSnpsLoad(row+rowOffset); -printf("SNP rsId: %s
\n", itemName, itemName); +printf("SNP rsId: "); +printDbSnpRsUrl(itemName, "%s", itemName); +puts("
"); printf("Position: %s:%d-%d
\n", hgTracksPathAndSettings(), database, item->chrom, item->chromStart+1, item->chromEnd, item->chrom, item->chromStart+1, item->chromEnd); printf("Strand: %s
\n", item->strand); printf("Polymorphism type: %s
\n", item->observed); if (item->homoCount1 >= item->homoCount2) { majorAllele = cloneString(item->allele1); majorCount = item->homoCount1; minorCount = item->homoCount2; minorAllele = cloneString(item->allele2); } else { majorAllele = cloneString(item->allele2); @@ -22313,40 +22331,40 @@ printf("Polymorphism: %s \n", row[2]); if (end == start + 1) { hNibForChrom(database, seqName, nibName); seq = hFetchSeq(nibName, seqName, start, end); touppers(seq->dna); if (sameString(row[1], "-")) reverseComplement(seq->dna, 1); printf("
Reference allele: %s \n", seq->dna); } if (sameString(dataSource, "Affy")) { printf("

NetAffx (log in required, registration is free)\n", itemName); - if (!sameString(row[3], "unknown")) + if (regexMatch(row[3], "^rs[0-9]+$")) { - printf("
dbSNP (%s)\n", row[3], row[3]); + printf("
"); + printDbSnpRsUrl(row[3], "dbSNP (%s)", row[3]); } } - else + else if (regexMatch(itemName, "^rs[0-9]+$")) { - printf("
dbSNP (%s)\n", itemName, itemName); + printf("
"); + printDbSnpRsUrl(itemName, "dbSNP (%s)", itemName); } } sqlFreeResult(&sr); printTrackHtml(tdb); hFreeConn(&conn); } void doSnpArray2 (struct trackDb *tdb, char *itemName, char *dataSource) /* doSnpArray2 is essential the same as doSnpArray except that the strand is blanked out */ /* This is a temp solution for 3 Illumina SNP Arrays to blank out strand info for non-dbSnp entries */ /* Should be removed once Illumina comes up with a clear defintion of their strand data */ { char *table = tdb->table; struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; @@ -22375,40 +22393,40 @@ printf("Polymorphism: %s \n", row[2]); if (end == start + 1) { hNibForChrom(database, seqName, nibName); seq = hFetchSeq(nibName, seqName, start, end); touppers(seq->dna); if (sameString(row[1], "-")) reverseComplement(seq->dna, 1); printf("
Reference allele: %s \n", seq->dna); } if (sameString(dataSource, "Affy")) { printf("

NetAffx (log in required, registration is free)\n", itemName); - if (!sameString(row[3], "unknown")) + if (regexMatch(row[3], "^rs[0-9]+$")) { - printf("
dbSNP (%s)\n", row[3], row[3]); + printf("
"); + printDbSnpRsUrl(row[3], "dbSNP (%s)", row[3]); } } - else + else if (regexMatch(itemName, "^rs[0-9]+$")) { - printf("
dbSNP (%s)\n", itemName, itemName); + printf("
"); + printDbSnpRsUrl(itemName, "dbSNP (%s)", itemName); } } sqlFreeResult(&sr); printTrackHtml(tdb); hFreeConn(&conn); } void printGvAttrCatType (int i) /* prints new category and type labels for attributes as needed */ { /* only print name and category if different */ if (gvPrevCat == NULL) { /* print start of both */ /* if need to print category layer, here is where print first */