4cab7f311cadce7428c1d7e1c4dfdcb0d3a43033
angie
Wed Jul 27 10:32:19 2011 -0700
Feature #3710 (vcfTabix UI options): Added controls for selection of centervariant for haplotype clustering/sorting. Also, some hgc improvements:
made the Genotype Details table into a collapsible section; better display
of QUAL and FILTER column values, which also involved improving the
representation of those columns in src/inc/vcf.h.
diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c
index 4a1565d..4339481 100644
--- src/hg/hgc/vcfClick.c
+++ src/hg/hgc/vcfClick.c
@@ -1,268 +1,287 @@
/* vcfTrack -- handlers for Variant Call Format data. */
#ifdef USE_TABIX
#include "common.h"
#include "dystring.h"
#include "errCatch.h"
#include "hCommon.h"
#include "hdb.h"
#include "hgc.h"
#include "htmshell.h"
+#include "jsHelper.h"
#if (defined USE_TABIX && defined KNETFILE_HOOKS)
#include "knetUdc.h"
#include "udc.h"
#endif//def USE_TABIX && KNETFILE_HOOKS
#include "pgSnp.h"
#include "trashDir.h"
#include "vcf.h"
-
+#include "vcfUi.h"
#define NA "n/a"
-static void printListWithDescriptions(struct vcfFile *vcff, char *str, char *sep, struct vcfInfoDef *infoDefs)
-/* Given a VCF field, its separator char and a list of vcfInfoDefs, print out a list
- * of values with descriptions if descriptions are available. */
+static void printKeysWithDescriptions(struct vcfFile *vcff, int wordCount, char **words,
+ struct vcfInfoDef *infoDefs)
+/* Given an array of keys, print out a list of values with
+ * descriptions if descriptions are available. */
{
-char *copy = cloneString(str);
-char *words[256];
-int i, wordCount = chopString(copy, sep, words, ArraySize(words));
+int i;
for (i = 0; i < wordCount; i++)
{
if (i > 0)
printf(", ");
char *key = words[i];
const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, key);
if (def != NULL)
printf("%s (%s)", htmlEncode(key), def->description);
else
printf("%s", htmlEncode(key));
}
printf("
\n");
}
+static void printListWithDescriptions(struct vcfFile *vcff, char *str, char *sep, struct vcfInfoDef *infoDefs)
+/* Given a VCF field, its separator char and a list of vcfInfoDefs, print out a list
+ * of values with descriptions if descriptions are available. */
+{
+char *copy = cloneString(str);
+char *words[256];
+int wordCount = chopString(copy, sep, words, ArraySize(words));
+printKeysWithDescriptions(vcff, wordCount, words, infoDefs);
+}
+
static void vcfAltAlleleDetails(struct vcfRecord *rec)
/* If VCF header specifies any symbolic alternate alleles, pull in descriptions. */
{
printf("Alternate allele(s): ");
if (sameString(rec->alt, "."))
{
printf(NA"
\n");
return;
}
struct vcfFile *vcff = rec->file;
printListWithDescriptions(vcff, rec->alt, ",", vcff->altDefs);
}
+static void vcfQualDetails(struct vcfRecord *rec)
+/* If VCF header specifies a quality/confidence score (not "."), print it out. */
+{
+printf("Quality/confidence score: %s
\n", sameString(rec->qual, ".") ? NA : rec->qual);
+}
+
static void vcfFilterDetails(struct vcfRecord *rec)
/* If VCF header specifies any filters, pull in descriptions. */
{
-if (sameString(rec->filter, "PASS"))
- printf("Filter: ");
+if (rec->filterCount == 0 || sameString(rec->filters[0], "."))
+ printf("Filter: "NA"
\n");
+else if (rec->filterCount == 1 && sameString(rec->filters[0], "PASS"))
+ printf("Filter: PASS
\n");
else
+ {
printf("Filter failures: ");
+ printf("\n");
struct vcfFile *vcff = rec->file;
-if (sameString(rec->filter, "."))
- printf(NA"
\n");
-else
- printListWithDescriptions(vcff, rec->filter, ",", vcff->filterDefs);
+ printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs);
+ printf("\n");
+ }
}
static void vcfInfoDetails(struct vcfRecord *rec)
/* Expand info keys to descriptions, then print out keys and values. */
{
if (rec->infoCount == 0)
return;
struct vcfFile *vcff = rec->file;
puts("INFO column annotations:
");
puts("
");
int i;
for (i = 0; i < rec->infoCount; i++)
{
struct vcfInfoElement *el = &(rec->infoElements[i]);
const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key);
if (def == NULL)
continue;
printf("%s: | ", el->key);
int j;
enum vcfInfoType type = def->type;
if (type == vcfInfoFlag && el->count == 0)
printf("Yes"); // no values, so we can't call vcfPrintDatum...
// However, if this is older VCF, type vcfInfoFlag might have a value.
for (j = 0; j < el->count; j++)
{
if (j > 0)
printf(", ");
vcfPrintDatum(stdout, el->values[j], type);
}
if (def != NULL)
printf(" | %s", def->description);
else
printf(" | ");
printf(" |
\n");
}
puts("
");
}
static char *hapFromIx(char *ref, char *altAlleles[], unsigned char altAlCount, unsigned char hapIx)
/* Look up the allele specified by hapIx: 0 = ref, 1 & up = offset index into altAlleles */
{
if (hapIx == 0)
return ref;
else if (hapIx-1 < altAlCount)
return altAlleles[hapIx-1];
else
errAbort("hapFromIx: index %d is out of range (%d alleles specified)", hapIx, altAlCount+1);
return NULL;
}
-static void vcfGenotypesDetails(struct vcfRecord *rec)
+static void vcfGenotypesDetails(struct vcfRecord *rec, char *track)
/* Print genotypes in some kind of table... */
{
struct vcfFile *vcff = rec->file;
if (vcff->genotypeCount == 0)
return;
static struct dyString *tmp1 = NULL, *tmp2 = NULL;
if (tmp1 == NULL)
{
tmp1 = dyStringNew(0);
tmp2 = dyStringNew(0);
}
-// TODO: make this a collapsible section
+jsBeginCollapsibleSection(cart, track, "genotypes", "Detailed genotypes", FALSE);
vcfParseGenotypes(rec);
dyStringClear(tmp1);
dyStringAppend(tmp1, rec->format);
enum vcfInfoType formatTypes[256];
char *formatKeys[256];
int formatCount = chopString(tmp1->string, ":", formatKeys, ArraySize(formatKeys));
-puts("
Genotype info key:
");
+puts("Genotype info key:
");
int i;
for (i = 0; i < formatCount; i++)
{
if (sameString(formatKeys[i], vcfGtGenotype))
continue;
const struct vcfInfoDef *def = vcfInfoDefForGtKey(vcff, formatKeys[i]);
char *desc = def ? def->description : "not described in VCF header";
printf(" %s: %s
\n", formatKeys[i], desc);
formatTypes[i] = def->type;
}
hTableStart();
puts("Sample ID | Genotype | Phased? | ");
for (i = 0; i < formatCount; i++)
{
if (sameString(formatKeys[i], vcfGtGenotype))
continue;
printf("%s | ", formatKeys[i]);
}
puts("
\n");
dyStringClear(tmp2);
dyStringAppend(tmp2, rec->alt);
char *altAlleles[256];
unsigned char altCount = chopCommas(tmp2->string, altAlleles);
for (i = 0; i < vcff->genotypeCount; i++)
{
struct vcfGenotype *gt = &(rec->genotypes[i]);
char *hapA = hapFromIx(rec->ref, altAlleles, altCount, gt->hapIxA);
char *hapB = gt->isHaploid ? NA : hapFromIx(rec->ref, altAlleles, altCount, gt->hapIxB);
char sep = gt->isPhased ? '|' : '/';
char *phasing = gt->isHaploid ? NA : gt->isPhased ? "Y" : "n";
printf("%s | %s%c%s | %s | ", vcff->genotypeIds[i],
hapA, sep, hapB, phasing);
int j;
for (j = 0; j < formatCount; j++)
{
if (sameString(formatKeys[j], vcfGtGenotype))
continue;
printf("");
struct vcfInfoElement *el = &(gt->infoElements[j]);
int k;
for (k = 0; k < el->count; k++)
{
if (k > 0)
printf(", ");
vcfPrintDatum(stdout, el->values[k], formatTypes[j]);
}
printf(" | ");
}
puts("
");
}
hTableEnd();
+jsEndCollapsibleSection();
}
static void pgSnpCodingDetail(struct vcfRecord *rec)
/* Translate rec into pgSnp (with proper chrom name) and call Belinda's
* coding effect predictor from pgSnp details. */
{
if (hTableExists(database, "knownGene"))
{
struct pgSnp *pgs = pgSnpFromVcfRecord(rec);
if (!sameString(rec->chrom, seqName))
// rec->chrom might be missing "chr" prefix:
pgs->chrom = seqName;
printSeqCodDisplay(database, pgs);
}
}
static void vcfRecordDetails(struct trackDb *tdb, struct vcfRecord *rec)
/* Display the contents of a single line of VCF, assumed to be from seqName
* (using seqName instead of rec->chrom because rec->chrom might lack "chr"). */
{
printf("Name: %s
\n", rec->name);
-if (rec->file->genotypeCount > 0)
- {
- char cartVar[512];
- safef(cartVar, sizeof(cartVar), "%s.centerVariantPos", tdb->track);
- printf("View haplotypes sorted by variants at this position
\n",
- hgTracksPathAndSettings(), cartVar, seqName, rec->chromStart);
- }
+static char *formName = "vcfCfgHapCenter";
+printf("\n");
printPosOnChrom(seqName, rec->chromStart, rec->chromEnd, NULL, FALSE, rec->name);
printf("Reference allele: %s
\n", rec->ref);
vcfAltAlleleDetails(rec);
-if (rec->qual != 0.0)
- printf("Call quality: %.1f
\n", rec->qual);
+vcfQualDetails(rec);
vcfFilterDetails(rec);
vcfInfoDetails(rec);
pgSnpCodingDetail(rec);
-vcfGenotypesDetails(rec);
+// Wrapper table for collapsible section:
+puts("");
+vcfGenotypesDetails(rec, tdb->track);
+puts("
");
}
void doVcfTabixDetails(struct trackDb *tdb, char *item)
/* Show details of an alignment from a VCF file compressed and indexed by tabix. */
{
#if (defined USE_TABIX && defined KNETFILE_HOOKS)
knetUdcInstall();
if (udcCacheTimeout() < 300)
udcSetCacheTimeout(300);
#endif//def USE_TABIX && KNETFILE_HOOKS
int start = cartInt(cart, "o");
int end = cartInt(cart, "t");
struct sqlConnection *conn = hAllocConnTrack(database, tdb);
// TODO: will need to handle per-chrom files like bam, maybe fold bamFileNameFromTable into this::
char *fileOrUrl = bbiNameFromSettingOrTable(tdb, conn, tdb->table);
hFreeConn(&conn);
int vcfMaxErr = 100;
struct vcfFile *vcff = NULL;
/* protect against temporary network error */
struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
{
vcff = vcfTabixFileMayOpen(fileOrUrl, seqName, start, end, vcfMaxErr);
}
errCatchEnd(errCatch);
if (errCatch->gotError)
{
if (isNotEmpty(errCatch->message->string))
warn("%s", errCatch->message->string);
}
errCatchFree(&errCatch);
if (vcff != NULL)
{
struct vcfRecord *rec;
for (rec = vcff->records; rec != NULL; rec = rec->next)
if (rec->chromStart == start && rec->chromEnd == end) // in pgSnp mode, don't get name
vcfRecordDetails(tdb, rec);
}
}
#endif // no USE_TABIX