82e318c7e291c143aaf0f3171fddcd12c0ec4cb2
angie
  Tue Oct 23 10:21:16 2012 -0700
VCF details: if name looks like a dbSNP or dbVar ID, add a link.Also a fix to how we encode HTML (<>) in allele names, and
refactoring of hardcoded links to dbSNP rs report (now hardcoded
in only one place instead of half dozen).

diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c
index e427d82..90f23b4 100644
--- src/hg/hgc/hgc.c
+++ src/hg/hgc/hgc.c
@@ -1,27 +1,28 @@
 /* hgc - Human Genome Click processor - gets called when user clicks
  * on something in human tracks display. */
 
 #include "common.h"
 #include "obscure.h"
 #include "hCommon.h"
 #include "hash.h"
 #include "binRange.h"
 #include "bits.h"
 #include "memgfx.h"
 #include "hvGfx.h"
 #include "portable.h"
+#include "regexHelper.h"
 #include "errabort.h"
 #include "dystring.h"
 #include "nib.h"
 #include "cheapcgi.h"
 #include "htmshell.h"
 #include "cart.h"
 #include "jksql.h"
 #include "dnautil.h"
 #include "dnaseq.h"
 #include "fa.h"
 #include "fuzzyFind.h"
 #include "seqOut.h"
 #include "hdb.h"
 #include "spDb.h"
 #include "hui.h"
@@ -14801,30 +14802,42 @@
 	       xenoDb, xenoChrom, el.xenoStart, el.xenoEnd,
 	       xenoOrg, xenoChrom, el.xenoStart, el.xenoEnd);
 
 	}
     printf("<A HREF=\"%s&o=%d&g=getDna&i=%s&c=%s&l=%d&r=%d&strand=%s&table=%s\">"
 	   "View DNA for this feature</A><BR>\n",  hgcPathAndSettings(),
 	   el.chromStart, cgiEncode(el.name),
 	   el.chrom, el.chromStart, el.chromEnd, el.strand, tbl);
     freez(&elname);
     }
 printTrackHtml(tdb);
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 }
 
+void printDbSnpRsUrl(char *rsId, char *labelFormat, ...)
+/* Print a link to dbSNP's report page for an rs[0-9]+ ID. */
+{
+printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=%s\" "
+       "TARGET=_BLANK>", rsId);
+va_list args;
+va_start(args, labelFormat);
+vprintf(labelFormat, args);
+va_end(args);
+printf("</A>");
+}
+
 char *validateOrGetRsId(char *name, struct sqlConnection *conn)
 /* If necessary, get the rsId from the affy120K or affy10K table,
    given the affyId.  rsId is more common, affy120K is next, affy10K least.
  * returns "valid" if name is already a valid rsId,
            new rsId if it is found in the affy tables, or
            0 if no valid rsId is found */
 {
 char  *rsId = cloneString(name);
 struct affy120KDetails *a120K = NULL;
 struct affy10KDetails *a10K = NULL;
 char   query[512];
 
 if (strncmp(rsId,"rs",2)) /* is not a valid rsId, so it must be an affyId */
     {
     safef(query, sizeof(query), /* more likely to be affy120K, so check first */
@@ -15006,39 +15019,41 @@
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	snpMapStaticLoad(row+rowOffset, &snpMap);
 	bedPrintPos((struct bed *)&snpMap, 3, tdb);
 	}
 else
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	snpStaticLoad(row+rowOffset, &snp);
 	bedPrintPos((struct bed *)&snp, 3, tdb);
 	}
 /* write dbSnpRs details if found. */
 printId = doDbSnpRs(itemName);
 if (printId)
     {
-    printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?");
+    puts("<BR>");
     if (sameString(printId, "valid"))
         {
-	printf("type=rs&rs=%s\" TARGET=_blank>dbSNP link</A>\n", itemName);
+	printDbSnpRsUrl(itemName, "dbSNP link");
+	putchar('\n');
 	doSnpEntrezGeneLink(tdb, itemName);
 	}
     else
 	{
-	printf("type=rs&rs=%s\" TARGET=_blank>dbSNP link (%s)</A>\n", printId, printId);
+	printDbSnpRsUrl(printId, "dbSNP link (%s)", printId);
+	putchar('\n');
 	doSnpEntrezGeneLink(tdb, printId);
 	}
     }
 printTrackHtml(tdb);
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 }
 
 void writeSnpException(char *exceptionList, char *itemName, int rowOffset,
                        char *chrom, int chromStart, struct trackDb *tdb)
 {
 char    *tokens;
 struct   lineFile      *lf;
 struct   tokenizer     *tkz;
 struct   snpExceptions  se;
@@ -15519,32 +15534,32 @@
     {
     snpStaticLoad(row+rowOffset, &snp);
     if (firstOne)
 	{
 	exception=cloneString(snp.exception);
 	chrom = cloneString(snp.chrom);
 	chromStart = snp.chromStart;
 	bedPrintPos((struct bed *)&snp, 3, tdb);
 	printf("<BR>\n");
 	firstOne=0;
 	}
     printSnpInfo(snp);
     }
 if (startsWith("rs",itemName))
     {
-    printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?");
-    printf("type=rs&rs=%s\" TARGET=_blank>dbSNP</A>\n", itemName);
+    printDbSnpRsUrl(itemName, "dbSNP");
+    putchar('\n');
     doSnpEntrezGeneLink(tdb, itemName);
     }
 if (hTableExists(database, "snpExceptions") && differentString(exception,"0"))
     writeSnpException(exception, itemName, rowOffset, chrom, chromStart, tdb);
 printTrackHtml(tdb);
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 }
 
 void doAffy120KDetails(struct trackDb *tdb, char *name)
 /* print additional SNP details */
 {
 struct sqlConnection *conn = sqlConnect("hgFixed");
 char query[1024];
 struct affy120KDetails *snp = NULL;
@@ -15564,35 +15579,35 @@
 snp = affy120KDetailsLoadByQuery(conn, query);
 if (snp!=NULL)
     {
     printf("<BR>\n");
     printf("<B>Sample Prep Enzyme:</B> <I>%s</I><BR>\n",snp->enzyme);
     printf("<B>Minimum Allele Frequency:</B> %.3f<BR>\n",snp->minFreq);
     printf("<B>Heterozygosity:</B> %.3f<BR>\n",snp->hetzyg);
     printf("<B>Base A:          </B> <span style='font-family:Courier;'>%s</span><BR>\n",
 	   snp->baseA);
     printf("<B>Base B:          </B> <span style='font-family:Courier;'>%s</span><BR>\n",
 	   snp->baseB);
     printf("<B>Sequence of Allele A:</B>&nbsp;<span style='font-family:Courier;'>");
     printf("%s</span><BR>\n",snp->sequenceA);
     printf("<B>Sequence of Allele B:</B>&nbsp;<span style='font-family:Courier;'>");
     printf("%s</span><BR>\n",snp->sequenceB);
-    if (snp->rsId>0)
+    if (isNotEmpty(snp->rsId))
 	{
-	printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?");
-	printf("type=rs&rs=%s\" TARGET=_blank>dbSNP link for %s</A><BR>\n",
-	       snp->rsId, snp->rsId);
+	puts("<BR>");
+	printDbSnpRsUrl(snp->rsId, "dbSNP link for %s", snp->rsId);
+	puts("<BR>");
 	}
     doSnpEntrezGeneLink(tdb, snp->rsId);
     printf("<BR>Genotypes:<BR>");
     printf("\n<BR><span style='font-family:Courier;'>");
     printf("NA04477:&nbsp;%s&nbsp;&nbsp;", snp->NA04477);
     printf("NA04479:&nbsp;%s&nbsp;&nbsp;", snp->NA04479);
     printf("NA04846:&nbsp;%s&nbsp;&nbsp;", snp->NA04846);
     printf("NA11036:&nbsp;%s&nbsp;&nbsp;", snp->NA11036);
     printf("NA11038:&nbsp;%s&nbsp;&nbsp;", snp->NA11038);
     printf("NA13056:&nbsp;%s&nbsp;&nbsp;", snp->NA13056);
     printf("\n<BR>NA17011:&nbsp;%s&nbsp;&nbsp;", snp->NA17011);
     printf("NA17012:&nbsp;%s&nbsp;&nbsp;", snp->NA17012);
     printf("NA17013:&nbsp;%s&nbsp;&nbsp;", snp->NA17013);
     printf("NA17014:&nbsp;%s&nbsp;&nbsp;", snp->NA17014);
     printf("NA17015:&nbsp;%s&nbsp;&nbsp;", snp->NA17015);
@@ -16143,33 +16158,33 @@
     printf("%s</span><BR>\n",snp->baseA);
     printf("<B>Base B:                  </B> <span style='font-family:Courier;'>");
     printf("%s</span><BR>\n",snp->baseB);
     printf("<B>Sequence of Allele A:    </B>&nbsp;<span style='font-family:Courier;'>");
     printf("%s</span><BR>\n",snp->sequenceA);
     printf("<B>Sequence of Allele B:    </B>&nbsp;<span style='font-family:Courier;'>");
     printf("%s</span><BR>\n",snp->sequenceB);
 
     printf("<P><A HREF=\"https://www.affymetrix.com/LinkServlet?probeset=");
     printf("%s", snp->affyId);
     printf("\" TARGET=_blank>Affymetrix NetAffx Analysis Center link for ");
     printf("%s</A></P>\n", snp->affyId);
 
     if (strncmp(snp->rsId,"unmapped",8))
 	{
-	printf("<P><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?");
-        printf("type=rs&rs=%s\" TARGET=_blank>dbSNP link for rs%s</A></P>\n",
-	       snp->rsId, snp->rsId);
+	puts("<P>");
+	printDbSnpRsUrl(snp->rsId, "dbSNP link for %s", snp->rsId);
+	puts("</P>");
 	}
     printf("<BR><A HREF=\"http://snp.cshl.org/cgi-bin/snp?name=");
     printf("%s\" TARGET=_blank>TSC link for %s</A>\n",
 	   snp->tscId, snp->tscId);
     doSnpEntrezGeneLink(tdb, snp->rsId);
     }
 /* else errAbort("<BR>Error in Query:\n%s<BR>\n",query); */
 affy10KDetailsFree(&snp);
 sqlDisconnect(&conn);
 }
 
 void doAffy10K(struct trackDb *tdb, char *itemName)
 /* Put up info on an Affymetrix SNP. */
 {
 char *table = tdb->table;
@@ -17593,33 +17608,35 @@
     struct gwasCatalog *gc = gwasCatalogLoad(row+rowOffset);
     printCustomUrl(tdb, item, FALSE);
     printPos(gc->chrom, gc->chromStart, gc->chromEnd, NULL, TRUE, gc->name);
     printf("<B>Reported region:</B> %s<BR>\n", gc->region);
     printf("<B>Publication:</B> %s <em>et al.</em> "
 	   "<A HREF=\"", gc->author);
     printEntrezPubMedUidAbstractUrl(stdout, gc->pubMedID);
     printf("\" TARGET=_BLANK>%s</A>%s <em>%s.</em> %s<BR>\n",
 	   gc->title, (endsWith(gc->title, ".") ? "" : "."), gc->journal, gc->pubDate);
     printf("<B>Disease or trait:</B> %s<BR>\n", subNrNs(gc->trait));
     printf("<B>Initial sample size:</B> %s<BR>\n", subNrNs(gc->initSample));
     printf("<B>Replication sample size:</B> %s<BR>\n", subNrNs(gc->replSample));
     printf("<B>Reported gene(s):</B> %s<BR>\n", subNrNs(gc->genes));
     char *strongAllele = NULL, *strongRsID = splitSnpAndAllele(gc->riskAllele, &strongAllele);
     if (strongRsID)
-	printf("<B>Strongest SNP-Risk allele:</B> "
-	       "<A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=%s\" "
-	       "TARGET=_BLANK>%s</A>-%s<BR>\n", strongRsID, strongRsID, strongAllele);
+	{
+	printf("<B>Strongest SNP-Risk allele:</B> ");
+	printDbSnpRsUrl(strongRsID, "%s", strongRsID);
+	printf("-%s<BR>\n", strongAllele);
+	}
     else
 	printf("<B>Strongest SNP-Risk allele:</B> %s<BR>\n", subNrNs(gc->riskAllele));
     gwasCatalogCheckSnpAlleles(tdb, gc);
     printf("<B>Risk Allele Frequency:</B> %s<BR>\n", subNrNs(gc->riskAlFreq));
     if (isEmpty(gc->pValueDesc) || sameString(gc->pValueDesc, "NS"))
 	printf("<B>p-Value:</B> %s<BR>\n", subNrNs(gc->pValue));
     else if (gc->pValueDesc[0] == '(')
 	printf("<B>p-Value:</B> %s %s<BR>\n", gc->pValue, subNrNs(gc->pValueDesc));
     else
 	printf("<B>p-Value:</B> %s (%s)<BR>\n", gc->pValue, subNrNs(gc->pValueDesc));
     printf("<B>Odds Ratio or beta:</B> %s<BR>\n", subNrNs(gc->orOrBeta));
     printf("<B>95%% confidence interval:</B> %s<BR>\n", subNrNs(gc->ci95));
     printf("<B>Platform:</B> %s<BR>\n", subNrNs(gc->platform));
     printf("<B>Copy Number Variant (CNV)?:</B> %s<BR>\n",
 	   (gc->cnv == gwasCatalogY ? "Yes" : "No"));
@@ -21170,32 +21187,33 @@
 int minorCount = 0;
 char *majorAllele = NULL;
 char *minorAllele = NULL;
 char popCode[4];
 safencpy(popCode, sizeof(popCode), table + strlen("hapmapSnps"), 3);
 popCode[3] = '\0';
 
 genericHeader(tdb, itemName);
 
 safef(query, sizeof(query),
       "select * from %s where chrom = '%s' and "
       "chromStart=%d and name = '%s'", table, seqName, start, itemName);
 sr = sqlGetResult(conn, query);
 row = sqlNextRow(sr);
 struct hapmapSnps *item = hapmapSnpsLoad(row+rowOffset);
-printf("<B>SNP rsId:</B> <A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?"
-       "type=rs&rs=%s\" TARGET=_blank> %s</A><BR>\n", itemName, itemName);
+printf("<B>SNP rsId:</B> ");
+printDbSnpRsUrl(itemName, "%s", itemName);
+puts("<BR>");
 printf("<B>Position:</B> <A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</A><BR>\n",
        hgTracksPathAndSettings(), database, item->chrom, item->chromStart+1, item->chromEnd,
        item->chrom, item->chromStart+1, item->chromEnd);
 printf("<B>Strand:</B> %s<BR>\n", item->strand);
 printf("<B>Polymorphism type:</B> %s<BR>\n", item->observed);
 if (item->homoCount1 >= item->homoCount2)
     {
     majorAllele = cloneString(item->allele1);
     majorCount = item->homoCount1;
     minorCount = item->homoCount2;
     minorAllele = cloneString(item->allele2);
     }
 else
     {
     majorAllele = cloneString(item->allele2);
@@ -22313,40 +22331,40 @@
     printf("<B>Polymorphism:</B> %s \n", row[2]);
 
     if (end == start + 1)
         {
         hNibForChrom(database, seqName, nibName);
         seq = hFetchSeq(nibName, seqName, start, end);
 	touppers(seq->dna);
         if (sameString(row[1], "-"))
            reverseComplement(seq->dna, 1);
         printf("<BR><B>Reference allele:</B> %s \n", seq->dna);
         }
 
     if (sameString(dataSource, "Affy"))
         {
         printf("<BR><BR><A HREF=\"https://www.affymetrix.com/LinkServlet?probeset=%s\" TARGET=_blank>NetAffx</A> (log in required, registration is free)\n", itemName);
-        if (!sameString(row[3], "unknown"))
+        if (regexMatch(row[3], "^rs[0-9]+$"))
             {
-            printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?");
-            printf("type=rs&rs=%s\" TARGET=_blank>dbSNP (%s)</A>\n", row[3], row[3]);
+	    printf("<BR>");
+	    printDbSnpRsUrl(row[3], "dbSNP (%s)", row[3]);
 	    }
 	}
-    else
+    else if (regexMatch(itemName, "^rs[0-9]+$"))
         {
-        printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?");
-        printf("type=rs&rs=%s\" TARGET=_blank>dbSNP (%s)</A>\n", itemName, itemName);
+	printf("<BR>");
+	printDbSnpRsUrl(itemName, "dbSNP (%s)", itemName);
 	}
     }
 sqlFreeResult(&sr);
 printTrackHtml(tdb);
 hFreeConn(&conn);
 }
 
 void doSnpArray2 (struct trackDb *tdb, char *itemName, char *dataSource)
 /* doSnpArray2 is essential the same as doSnpArray except that the strand is blanked out */
 /* This is a temp solution for 3 Illumina SNP Arrays to blank out strand info for non-dbSnp entries */
 /* Should be removed once Illumina comes up with a clear defintion of their strand data */
 {
 char *table = tdb->table;
 struct sqlConnection *conn = hAllocConn(database);
 struct sqlResult *sr;
@@ -22375,40 +22393,40 @@
     printf("<B>Polymorphism:</B> %s \n", row[2]);
 
     if (end == start + 1)
         {
         hNibForChrom(database, seqName, nibName);
         seq = hFetchSeq(nibName, seqName, start, end);
 	touppers(seq->dna);
         if (sameString(row[1], "-"))
            reverseComplement(seq->dna, 1);
         printf("<BR><B>Reference allele:</B> %s \n", seq->dna);
         }
 
     if (sameString(dataSource, "Affy"))
         {
         printf("<BR><BR><A HREF=\"https://www.affymetrix.com/LinkServlet?probeset=%s\" TARGET=_blank>NetAffx</A> (log in required, registration is free)\n", itemName);
-        if (!sameString(row[3], "unknown"))
+        if (regexMatch(row[3], "^rs[0-9]+$"))
             {
-            printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?");
-            printf("type=rs&rs=%s\" TARGET=_blank>dbSNP (%s)</A>\n", row[3], row[3]);
+            printf("<BR>");
+	    printDbSnpRsUrl(row[3], "dbSNP (%s)", row[3]);
 	    }
 	}
-    else
+    else if (regexMatch(itemName, "^rs[0-9]+$"))
         {
-        printf("<BR><A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?");
-        printf("type=rs&rs=%s\" TARGET=_blank>dbSNP (%s)</A>\n", itemName, itemName);
+        printf("<BR>");
+	printDbSnpRsUrl(itemName, "dbSNP (%s)", itemName);
 	}
     }
 sqlFreeResult(&sr);
 printTrackHtml(tdb);
 hFreeConn(&conn);
 }
 
 void printGvAttrCatType (int i)
 /* prints new category and type labels for attributes as needed */
 {
 /* only print name and category if different */
 if (gvPrevCat == NULL)
     {
     /* print start of both */
     /* if need to print category layer, here is where print first */