82e318c7e291c143aaf0f3171fddcd12c0ec4cb2 angie Tue Oct 23 10:21:16 2012 -0700 VCF details: if name looks like a dbSNP or dbVar ID, add a link.Also a fix to how we encode HTML (<>) in allele names, and refactoring of hardcoded links to dbSNP rs report (now hardcoded in only one place instead of half dozen). diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c index 68fb1e5..bfbe8df 100644 --- src/hg/hgc/vcfClick.c +++ src/hg/hgc/vcfClick.c @@ -3,89 +3,90 @@ #ifdef USE_TABIX #include "common.h" #include "dystring.h" #include "errCatch.h" #include "hCommon.h" #include "hdb.h" #include "hgc.h" #include "htmshell.h" #include "jsHelper.h" #if (defined USE_TABIX && defined KNETFILE_HOOKS) #include "knetUdc.h" #include "udc.h" #endif//def USE_TABIX && KNETFILE_HOOKS #include "pgSnp.h" +#include "regexHelper.h" #include "trashDir.h" #include "vcf.h" #include "vcfUi.h" #define NA "<em>n/a</em>" static void printKeysWithDescriptions(struct vcfFile *vcff, int wordCount, char **words, - struct vcfInfoDef *infoDefs, boolean escapeHtml) + struct vcfInfoDef *infoDefs) /* Given an array of keys, print out a list of values with * descriptions if descriptions are available. */ { int i; for (i = 0; i < wordCount; i++) { if (i > 0) printf(", "); char *key = words[i]; const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, key); - char *htmlKey = escapeHtml ? htmlEncode(key) : key; + char *htmlKey = htmlEncode(key); if (def != NULL) printf("%s (%s)", htmlKey, def->description); else printf("%s", htmlKey); } printf("<BR>\n"); } static void vcfAltAlleleDetails(struct vcfRecord *rec, char **displayAls) /* If VCF header specifies any symbolic alternate alleles, pull in descriptions. */ { printf("<B>Alternate allele(s):</B> "); if (rec->alleleCount < 2 || sameString(rec->alleles[1], ".")) { printf(NA"<BR>\n"); return; } struct vcfFile *vcff = rec->file; -printKeysWithDescriptions(vcff, rec->alleleCount-1, &(displayAls[1]), vcff->altDefs, FALSE); +printKeysWithDescriptions(vcff, rec->alleleCount-1, &(displayAls[1]), vcff->altDefs); } static void vcfQualDetails(struct vcfRecord *rec) /* If VCF header specifies a quality/confidence score (not "."), print it out. */ { printf("<B>Quality/confidence score:</B> %s<BR>\n", sameString(rec->qual, ".") ? NA : rec->qual); } static void vcfFilterDetails(struct vcfRecord *rec) /* If VCF header specifies any filters, pull in descriptions. */ { if (rec->filterCount == 0 || sameString(rec->filters[0], ".")) printf("<B>Filter:</B> "NA"<BR>\n"); else if (rec->filterCount == 1 && sameString(rec->filters[0], "PASS")) printf("<B>Filter:</B> PASS<BR>\n"); else { printf("<B>Filter failures:</B> "); printf("<font style='font-weight: bold; color: #FF0000;'>\n"); struct vcfFile *vcff = rec->file; - printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs, TRUE); + printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs); printf("</font>\n"); } } static void vcfInfoDetails(struct vcfRecord *rec) /* Expand info keys to descriptions, then print out keys and values. */ { if (rec->infoCount == 0) return; struct vcfFile *vcff = rec->file; puts("<B>INFO column annotations:</B><BR>"); puts("<TABLE border=0 cellspacing=0 cellpadding=0>"); int i; for (i = 0; i < rec->infoCount; i++) { @@ -283,89 +284,110 @@ static void pgSnpCodingDetail(struct vcfRecord *rec) /* Translate rec into pgSnp (with proper chrom name) and call Belinda's * coding effect predictor from pgSnp details. */ { char *genePredTable = "knownGene"; if (hTableExists(database, genePredTable)) { struct pgSnp *pgs = pgSnpFromVcfRecord(rec); if (!sameString(rec->chrom, seqName)) // rec->chrom might be missing "chr" prefix: pgs->chrom = seqName; printSeqCodDisplay(database, pgs, genePredTable); } } -static void abbreviateLongSeq(char *seqIn, int endLength, struct dyString *dy) +static void abbreviateLongSeq(char *seqIn, int endLength, boolean showLength, struct dyString *dy) /* If seqIn is longer than 2*endLength plus abbreviation fudge, abbreviate it * to its first endLength bases, ellipsis that says how many bases are skipped, * and its last endLength bases; add result to dy. */ { int threshold = 2*endLength + 30; int seqInLen = strlen(seqIn); if (seqInLen > threshold) { dyStringAppendN(dy, seqIn, endLength); + dyStringAppend(dy, "..."); + if (showLength) + { int skippedLen = seqInLen-2*endLength; - dyStringPrintf(dy, "...<%d bases>...%s", - skippedLen, seqIn+seqInLen-endLength); + dyStringPrintf(dy, "<%d bases>...", skippedLen); + } + dyStringAppend(dy, seqIn+seqInLen-endLength); } else dyStringAppend(dy, seqIn); } static void makeDisplayAlleles(struct vcfRecord *rec, boolean showLeftBase, char leftBase, - int endLength, char **displayAls) + int endLength, boolean showLength, boolean encodeHtml, + char **displayAls) /* If necessary, show the left base that we trimmed and/or abbreviate long sequences. */ { struct dyString *dy = dyStringNew(128); int i; for (i = 0; i < rec->alleleCount; i++) { dyStringClear(dy); if (showLeftBase) dyStringPrintf(dy, "(%c)", leftBase); - abbreviateLongSeq(rec->alleles[i], endLength, dy); - displayAls[i] = htmlEncode(dy->string); // leak some mem + abbreviateLongSeq(rec->alleles[i], endLength, showLength, dy); + if (encodeHtml) + displayAls[i] = htmlEncode(dy->string); + else + displayAls[i] = cloneString(dy->string); } } static void vcfRecordDetails(struct trackDb *tdb, struct vcfRecord *rec) /* Display the contents of a single line of VCF, assumed to be from seqName * (using seqName instead of rec->chrom because rec->chrom might lack "chr"). */ { printf("<B>Name:</B> %s<BR>\n", rec->name); +// Since these are variants, if it looks like a dbSNP or dbVar ID, provide a link: +if (regexMatch(rec->name, "^rs[0-9]+$")) + { + printf("<B>dbSNP:</B> "); + printDbSnpRsUrl(rec->name, "%s", rec->name); + puts("<BR>"); + } +else if (regexMatch(rec->name, "^[en]ss?v[0-9]+$")) + { + printf("<B>dbVar:</B> "); + printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/dbvar/variants/%s/\" " + "TARGET=_BLANK>%s</A><BR>\n", rec->name, rec->name); + } printCustomUrl(tdb, rec->name, TRUE); static char *formName = "vcfCfgHapCenter"; printf("<FORM NAME=\"%s\" ACTION=\"%s\">\n", formName, hgTracksName()); cartSaveSession(cart); vcfCfgHaplotypeCenter(cart, tdb, tdb->track, FALSE, rec->file, rec->name, seqName, rec->chromStart, formName); printf("</FORM>\n"); char leftBase = rec->alleles[0][0]; unsigned int vcfStart = vcfRecordTrimIndelLeftBase(rec); boolean showLeftBase = (rec->chromStart == vcfStart+1); char *displayAls[rec->alleleCount]; -makeDisplayAlleles(rec, showLeftBase, leftBase, 20, displayAls); +makeDisplayAlleles(rec, showLeftBase, leftBase, 20, TRUE, FALSE, displayAls); printPosOnChrom(seqName, rec->chromStart, rec->chromEnd, NULL, FALSE, rec->name); printf("<B>Reference allele:</B> %s<BR>\n", displayAls[0]); vcfAltAlleleDetails(rec, displayAls); vcfQualDetails(rec); vcfFilterDetails(rec); vcfInfoDetails(rec); pgSnpCodingDetail(rec); -makeDisplayAlleles(rec, showLeftBase, leftBase, 5, displayAls); +makeDisplayAlleles(rec, showLeftBase, leftBase, 5, FALSE, TRUE, displayAls); vcfGenotypesDetails(rec, tdb->track, displayAls); } void doVcfTabixDetails(struct trackDb *tdb, char *item) /* Show details of an alignment from a VCF file compressed and indexed by tabix. */ { #if (defined USE_TABIX && defined KNETFILE_HOOKS) knetUdcInstall(); if (udcCacheTimeout() < 300) udcSetCacheTimeout(300); #endif//def USE_TABIX && KNETFILE_HOOKS int start = cartInt(cart, "o"); int end = cartInt(cart, "t"); struct sqlConnection *conn = hAllocConnTrack(database, tdb); // TODO: will need to handle per-chrom files like bam, maybe fold bamFileNameFromTable into this::