82e318c7e291c143aaf0f3171fddcd12c0ec4cb2
angie
  Tue Oct 23 10:21:16 2012 -0700
VCF details: if name looks like a dbSNP or dbVar ID, add a link.Also a fix to how we encode HTML (<>) in allele names, and
refactoring of hardcoded links to dbSNP rs report (now hardcoded
in only one place instead of half dozen).

diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c
index 68fb1e5..bfbe8df 100644
--- src/hg/hgc/vcfClick.c
+++ src/hg/hgc/vcfClick.c
@@ -3,89 +3,90 @@
 #ifdef USE_TABIX
 
 #include "common.h"
 #include "dystring.h"
 #include "errCatch.h"
 #include "hCommon.h"
 #include "hdb.h"
 #include "hgc.h"
 #include "htmshell.h"
 #include "jsHelper.h"
 #if (defined USE_TABIX && defined KNETFILE_HOOKS)
 #include "knetUdc.h"
 #include "udc.h"
 #endif//def USE_TABIX && KNETFILE_HOOKS
 #include "pgSnp.h"
+#include "regexHelper.h"
 #include "trashDir.h"
 #include "vcf.h"
 #include "vcfUi.h"
 
 #define NA "<em>n/a</em>"
 
 static void printKeysWithDescriptions(struct vcfFile *vcff, int wordCount, char **words,
-				      struct vcfInfoDef *infoDefs, boolean escapeHtml)
+				      struct vcfInfoDef *infoDefs)
 /* Given an array of keys, print out a list of values with
  * descriptions if descriptions are available. */
 {
 int i;
 for (i = 0;  i < wordCount; i++)
     {
     if (i > 0)
 	printf(", ");
     char *key = words[i];
     const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, key);
-    char *htmlKey = escapeHtml ? htmlEncode(key) : key;
+    char *htmlKey = htmlEncode(key);
     if (def != NULL)
 	printf("%s (%s)", htmlKey, def->description);
     else
 	printf("%s", htmlKey);
     }
 printf("<BR>\n");
 }
 
 static void vcfAltAlleleDetails(struct vcfRecord *rec, char **displayAls)
 /* If VCF header specifies any symbolic alternate alleles, pull in descriptions. */
 {
 printf("<B>Alternate allele(s):</B> ");
 if (rec->alleleCount < 2 || sameString(rec->alleles[1], "."))
     {
     printf(NA"<BR>\n");
     return;
     }
 struct vcfFile *vcff = rec->file;
-printKeysWithDescriptions(vcff, rec->alleleCount-1, &(displayAls[1]), vcff->altDefs, FALSE);
+printKeysWithDescriptions(vcff, rec->alleleCount-1, &(displayAls[1]), vcff->altDefs);
 }
 
 static void vcfQualDetails(struct vcfRecord *rec)
 /* If VCF header specifies a quality/confidence score (not "."), print it out. */
 {
 printf("<B>Quality/confidence score:</B> %s<BR>\n", sameString(rec->qual, ".") ? NA : rec->qual);
 }
 
 static void vcfFilterDetails(struct vcfRecord *rec)
 /* If VCF header specifies any filters, pull in descriptions. */
 {
 if (rec->filterCount == 0 || sameString(rec->filters[0], "."))
     printf("<B>Filter:</B> "NA"<BR>\n");
 else if (rec->filterCount == 1 && sameString(rec->filters[0], "PASS"))
     printf("<B>Filter:</B> PASS<BR>\n");
 else
     {
     printf("<B>Filter failures:</B> ");
     printf("<font style='font-weight: bold; color: #FF0000;'>\n");
     struct vcfFile *vcff = rec->file;
-    printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs, TRUE);
+    printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs);
     printf("</font>\n");
     }
 }
 
 static void vcfInfoDetails(struct vcfRecord *rec)
 /* Expand info keys to descriptions, then print out keys and values. */
 {
 if (rec->infoCount == 0)
     return;
 struct vcfFile *vcff = rec->file;
 puts("<B>INFO column annotations:</B><BR>");
 puts("<TABLE border=0 cellspacing=0 cellpadding=0>");
 int i;
 for (i = 0;  i < rec->infoCount;  i++)
     {
@@ -283,89 +284,110 @@
 static void pgSnpCodingDetail(struct vcfRecord *rec)
 /* Translate rec into pgSnp (with proper chrom name) and call Belinda's
  * coding effect predictor from pgSnp details. */
 {
 char *genePredTable = "knownGene";
 if (hTableExists(database, genePredTable))
     {
     struct pgSnp *pgs = pgSnpFromVcfRecord(rec);
     if (!sameString(rec->chrom, seqName))
 	// rec->chrom might be missing "chr" prefix:
 	pgs->chrom = seqName;
     printSeqCodDisplay(database, pgs, genePredTable);
     }
 }
 
-static void abbreviateLongSeq(char *seqIn, int endLength, struct dyString *dy)
+static void abbreviateLongSeq(char *seqIn, int endLength, boolean showLength, struct dyString *dy)
 /* If seqIn is longer than 2*endLength plus abbreviation fudge, abbreviate it
  * to its first endLength bases, ellipsis that says how many bases are skipped,
  * and its last endLength bases; add result to dy. */
 {
 int threshold = 2*endLength + 30;
 int seqInLen = strlen(seqIn);
 if (seqInLen > threshold)
     {
     dyStringAppendN(dy, seqIn, endLength);
+    dyStringAppend(dy, "...");
+    if (showLength)
+	{
     int skippedLen = seqInLen-2*endLength;
-    dyStringPrintf(dy, "...&lt;%d bases&gt;...%s",
-		   skippedLen, seqIn+seqInLen-endLength);
+	dyStringPrintf(dy, "<%d bases>...", skippedLen);
+	}
+    dyStringAppend(dy, seqIn+seqInLen-endLength);
     }
 else
     dyStringAppend(dy, seqIn);
 }
 
 static void makeDisplayAlleles(struct vcfRecord *rec, boolean showLeftBase, char leftBase,
-			       int endLength, char **displayAls)
+			       int endLength, boolean showLength, boolean encodeHtml,
+			       char **displayAls)
 /* If necessary, show the left base that we trimmed and/or abbreviate long sequences. */
 {
 struct dyString *dy = dyStringNew(128);
 int i;
 for (i = 0;  i < rec->alleleCount; i++)
     {
     dyStringClear(dy);
     if (showLeftBase)
 	dyStringPrintf(dy, "(%c)", leftBase);
-    abbreviateLongSeq(rec->alleles[i], endLength, dy);
-    displayAls[i] = htmlEncode(dy->string); // leak some mem
+    abbreviateLongSeq(rec->alleles[i], endLength, showLength, dy);
+    if (encodeHtml)
+	displayAls[i] = htmlEncode(dy->string);
+    else
+	displayAls[i] = cloneString(dy->string);
     }
 }
 
 static void vcfRecordDetails(struct trackDb *tdb, struct vcfRecord *rec)
 /* Display the contents of a single line of VCF, assumed to be from seqName
  * (using seqName instead of rec->chrom because rec->chrom might lack "chr"). */
 {
 printf("<B>Name:</B> %s<BR>\n", rec->name);
+// Since these are variants, if it looks like a dbSNP or dbVar ID, provide a link:
+if (regexMatch(rec->name, "^rs[0-9]+$"))
+    {
+    printf("<B>dbSNP:</B> ");
+    printDbSnpRsUrl(rec->name, "%s", rec->name);
+    puts("<BR>");
+    }
+else if (regexMatch(rec->name, "^[en]ss?v[0-9]+$"))
+    {
+    printf("<B>dbVar:</B> ");
+    printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/dbvar/variants/%s/\" "
+	   "TARGET=_BLANK>%s</A><BR>\n", rec->name, rec->name);
+    }
 printCustomUrl(tdb, rec->name, TRUE);
 static char *formName = "vcfCfgHapCenter";
 printf("<FORM NAME=\"%s\" ACTION=\"%s\">\n", formName, hgTracksName());
 cartSaveSession(cart);
 vcfCfgHaplotypeCenter(cart, tdb, tdb->track, FALSE, rec->file, rec->name,
 		      seqName, rec->chromStart, formName);
 printf("</FORM>\n");
 char leftBase = rec->alleles[0][0];
 unsigned int vcfStart = vcfRecordTrimIndelLeftBase(rec);
 boolean showLeftBase = (rec->chromStart == vcfStart+1);
 char *displayAls[rec->alleleCount];
-makeDisplayAlleles(rec, showLeftBase, leftBase, 20, displayAls);
+makeDisplayAlleles(rec, showLeftBase, leftBase, 20, TRUE, FALSE, displayAls);
 printPosOnChrom(seqName, rec->chromStart, rec->chromEnd, NULL, FALSE, rec->name);
 printf("<B>Reference allele:</B> %s<BR>\n", displayAls[0]);
 vcfAltAlleleDetails(rec, displayAls);
 vcfQualDetails(rec);
 vcfFilterDetails(rec);
 vcfInfoDetails(rec);
 pgSnpCodingDetail(rec);
-makeDisplayAlleles(rec, showLeftBase, leftBase, 5, displayAls);
+makeDisplayAlleles(rec, showLeftBase, leftBase, 5, FALSE, TRUE, displayAls);
 vcfGenotypesDetails(rec, tdb->track, displayAls);
 }
 
 void doVcfTabixDetails(struct trackDb *tdb, char *item)
 /* Show details of an alignment from a VCF file compressed and indexed by tabix. */
 {
 #if (defined USE_TABIX && defined KNETFILE_HOOKS)
 knetUdcInstall();
 if (udcCacheTimeout() < 300)
     udcSetCacheTimeout(300);
 #endif//def USE_TABIX && KNETFILE_HOOKS
 int start = cartInt(cart, "o");
 int end = cartInt(cart, "t");
 struct sqlConnection *conn = hAllocConnTrack(database, tdb);
 // TODO: will need to handle per-chrom files like bam, maybe fold bamFileNameFromTable into this::