82e318c7e291c143aaf0f3171fddcd12c0ec4cb2
angie
Tue Oct 23 10:21:16 2012 -0700
VCF details: if name looks like a dbSNP or dbVar ID, add a link.Also a fix to how we encode HTML (<>) in allele names, and
refactoring of hardcoded links to dbSNP rs report (now hardcoded
in only one place instead of half dozen).
diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c
index 68fb1e5..bfbe8df 100644
--- src/hg/hgc/vcfClick.c
+++ src/hg/hgc/vcfClick.c
@@ -3,89 +3,90 @@
#ifdef USE_TABIX
#include "common.h"
#include "dystring.h"
#include "errCatch.h"
#include "hCommon.h"
#include "hdb.h"
#include "hgc.h"
#include "htmshell.h"
#include "jsHelper.h"
#if (defined USE_TABIX && defined KNETFILE_HOOKS)
#include "knetUdc.h"
#include "udc.h"
#endif//def USE_TABIX && KNETFILE_HOOKS
#include "pgSnp.h"
+#include "regexHelper.h"
#include "trashDir.h"
#include "vcf.h"
#include "vcfUi.h"
#define NA "n/a"
static void printKeysWithDescriptions(struct vcfFile *vcff, int wordCount, char **words,
- struct vcfInfoDef *infoDefs, boolean escapeHtml)
+ struct vcfInfoDef *infoDefs)
/* Given an array of keys, print out a list of values with
* descriptions if descriptions are available. */
{
int i;
for (i = 0; i < wordCount; i++)
{
if (i > 0)
printf(", ");
char *key = words[i];
const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, key);
- char *htmlKey = escapeHtml ? htmlEncode(key) : key;
+ char *htmlKey = htmlEncode(key);
if (def != NULL)
printf("%s (%s)", htmlKey, def->description);
else
printf("%s", htmlKey);
}
printf("
\n");
}
static void vcfAltAlleleDetails(struct vcfRecord *rec, char **displayAls)
/* If VCF header specifies any symbolic alternate alleles, pull in descriptions. */
{
printf("Alternate allele(s): ");
if (rec->alleleCount < 2 || sameString(rec->alleles[1], "."))
{
printf(NA"
\n");
return;
}
struct vcfFile *vcff = rec->file;
-printKeysWithDescriptions(vcff, rec->alleleCount-1, &(displayAls[1]), vcff->altDefs, FALSE);
+printKeysWithDescriptions(vcff, rec->alleleCount-1, &(displayAls[1]), vcff->altDefs);
}
static void vcfQualDetails(struct vcfRecord *rec)
/* If VCF header specifies a quality/confidence score (not "."), print it out. */
{
printf("Quality/confidence score: %s
\n", sameString(rec->qual, ".") ? NA : rec->qual);
}
static void vcfFilterDetails(struct vcfRecord *rec)
/* If VCF header specifies any filters, pull in descriptions. */
{
if (rec->filterCount == 0 || sameString(rec->filters[0], "."))
printf("Filter: "NA"
\n");
else if (rec->filterCount == 1 && sameString(rec->filters[0], "PASS"))
printf("Filter: PASS
\n");
else
{
printf("Filter failures: ");
printf("\n");
struct vcfFile *vcff = rec->file;
- printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs, TRUE);
+ printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs);
printf("\n");
}
}
static void vcfInfoDetails(struct vcfRecord *rec)
/* Expand info keys to descriptions, then print out keys and values. */
{
if (rec->infoCount == 0)
return;
struct vcfFile *vcff = rec->file;
puts("INFO column annotations:
");
puts("
");
int i;
for (i = 0; i < rec->infoCount; i++)
{
@@ -283,89 +284,110 @@
static void pgSnpCodingDetail(struct vcfRecord *rec)
/* Translate rec into pgSnp (with proper chrom name) and call Belinda's
* coding effect predictor from pgSnp details. */
{
char *genePredTable = "knownGene";
if (hTableExists(database, genePredTable))
{
struct pgSnp *pgs = pgSnpFromVcfRecord(rec);
if (!sameString(rec->chrom, seqName))
// rec->chrom might be missing "chr" prefix:
pgs->chrom = seqName;
printSeqCodDisplay(database, pgs, genePredTable);
}
}
-static void abbreviateLongSeq(char *seqIn, int endLength, struct dyString *dy)
+static void abbreviateLongSeq(char *seqIn, int endLength, boolean showLength, struct dyString *dy)
/* If seqIn is longer than 2*endLength plus abbreviation fudge, abbreviate it
* to its first endLength bases, ellipsis that says how many bases are skipped,
* and its last endLength bases; add result to dy. */
{
int threshold = 2*endLength + 30;
int seqInLen = strlen(seqIn);
if (seqInLen > threshold)
{
dyStringAppendN(dy, seqIn, endLength);
+ dyStringAppend(dy, "...");
+ if (showLength)
+ {
int skippedLen = seqInLen-2*endLength;
- dyStringPrintf(dy, "...<%d bases>...%s",
- skippedLen, seqIn+seqInLen-endLength);
+ dyStringPrintf(dy, "<%d bases>...", skippedLen);
+ }
+ dyStringAppend(dy, seqIn+seqInLen-endLength);
}
else
dyStringAppend(dy, seqIn);
}
static void makeDisplayAlleles(struct vcfRecord *rec, boolean showLeftBase, char leftBase,
- int endLength, char **displayAls)
+ int endLength, boolean showLength, boolean encodeHtml,
+ char **displayAls)
/* If necessary, show the left base that we trimmed and/or abbreviate long sequences. */
{
struct dyString *dy = dyStringNew(128);
int i;
for (i = 0; i < rec->alleleCount; i++)
{
dyStringClear(dy);
if (showLeftBase)
dyStringPrintf(dy, "(%c)", leftBase);
- abbreviateLongSeq(rec->alleles[i], endLength, dy);
- displayAls[i] = htmlEncode(dy->string); // leak some mem
+ abbreviateLongSeq(rec->alleles[i], endLength, showLength, dy);
+ if (encodeHtml)
+ displayAls[i] = htmlEncode(dy->string);
+ else
+ displayAls[i] = cloneString(dy->string);
}
}
static void vcfRecordDetails(struct trackDb *tdb, struct vcfRecord *rec)
/* Display the contents of a single line of VCF, assumed to be from seqName
* (using seqName instead of rec->chrom because rec->chrom might lack "chr"). */
{
printf("Name: %s
\n", rec->name);
+// Since these are variants, if it looks like a dbSNP or dbVar ID, provide a link:
+if (regexMatch(rec->name, "^rs[0-9]+$"))
+ {
+ printf("dbSNP: ");
+ printDbSnpRsUrl(rec->name, "%s", rec->name);
+ puts("
");
+ }
+else if (regexMatch(rec->name, "^[en]ss?v[0-9]+$"))
+ {
+ printf("dbVar: ");
+ printf("%s
\n", rec->name, rec->name);
+ }
printCustomUrl(tdb, rec->name, TRUE);
static char *formName = "vcfCfgHapCenter";
printf("\n");
char leftBase = rec->alleles[0][0];
unsigned int vcfStart = vcfRecordTrimIndelLeftBase(rec);
boolean showLeftBase = (rec->chromStart == vcfStart+1);
char *displayAls[rec->alleleCount];
-makeDisplayAlleles(rec, showLeftBase, leftBase, 20, displayAls);
+makeDisplayAlleles(rec, showLeftBase, leftBase, 20, TRUE, FALSE, displayAls);
printPosOnChrom(seqName, rec->chromStart, rec->chromEnd, NULL, FALSE, rec->name);
printf("Reference allele: %s
\n", displayAls[0]);
vcfAltAlleleDetails(rec, displayAls);
vcfQualDetails(rec);
vcfFilterDetails(rec);
vcfInfoDetails(rec);
pgSnpCodingDetail(rec);
-makeDisplayAlleles(rec, showLeftBase, leftBase, 5, displayAls);
+makeDisplayAlleles(rec, showLeftBase, leftBase, 5, FALSE, TRUE, displayAls);
vcfGenotypesDetails(rec, tdb->track, displayAls);
}
void doVcfTabixDetails(struct trackDb *tdb, char *item)
/* Show details of an alignment from a VCF file compressed and indexed by tabix. */
{
#if (defined USE_TABIX && defined KNETFILE_HOOKS)
knetUdcInstall();
if (udcCacheTimeout() < 300)
udcSetCacheTimeout(300);
#endif//def USE_TABIX && KNETFILE_HOOKS
int start = cartInt(cart, "o");
int end = cartInt(cart, "t");
struct sqlConnection *conn = hAllocConnTrack(database, tdb);
// TODO: will need to handle per-chrom files like bam, maybe fold bamFileNameFromTable into this::