ee1f8e9674b4c647936113dceb0a8113b622c22d
angie
Mon Jul 28 15:16:21 2014 -0700
David pointed out how confusing it is to see the 'Haplotype sorting order' optionto change the central variant, when haplotype clustering is disabled. So don't
show the option unless haplotype clustering is enabled.
diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c
index be50782..b611796 100644
--- src/hg/hgc/vcfClick.c
+++ src/hg/hgc/vcfClick.c
@@ -1,454 +1,458 @@
/* vcfTrack -- handlers for Variant Call Format data. */
/* Copyright (C) 2014 The Regents of the University of California
* See README in this or parent directory for licensing information. */
#include "common.h"
#include "dystring.h"
#include "errCatch.h"
#include "hCommon.h"
#include "hdb.h"
#include "hgc.h"
#include "htmshell.h"
#include "jsHelper.h"
#include "pgSnp.h"
#include "regexHelper.h"
#include "trashDir.h"
#include "knetUdc.h"
#include "udc.h"
#include "vcf.h"
#include "vcfUi.h"
#define NA "n/a"
static void printKeysWithDescriptions(struct vcfFile *vcff, int wordCount, char **words,
struct vcfInfoDef *infoDefs)
/* Given an array of keys, print out a list of values with
* descriptions if descriptions are available. */
{
int i;
for (i = 0; i < wordCount; i++)
{
if (i > 0)
printf(", ");
char *key = words[i];
const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, key);
char *htmlKey = htmlEncode(key);
if (def != NULL)
printf("%s (%s)", htmlKey, def->description);
else
printf("%s", htmlKey);
}
printf("
\n");
}
static void vcfAltAlleleDetails(struct vcfRecord *rec, char **displayAls)
/* If VCF header specifies any symbolic alternate alleles, pull in descriptions. */
{
printf("Alternate allele(s): ");
if (rec->alleleCount < 2 || sameString(rec->alleles[1], "."))
{
printf(NA"
\n");
return;
}
struct vcfFile *vcff = rec->file;
printKeysWithDescriptions(vcff, rec->alleleCount-1, &(displayAls[1]), vcff->altDefs);
}
static void vcfQualDetails(struct vcfRecord *rec)
/* If VCF header specifies a quality/confidence score (not "."), print it out. */
{
printf("Quality/confidence score: %s
\n", sameString(rec->qual, ".") ? NA : rec->qual);
}
static void vcfFilterDetails(struct vcfRecord *rec)
/* If VCF header specifies any filters, pull in descriptions. */
{
if (rec->filterCount == 0 || sameString(rec->filters[0], "."))
printf("Filter: "NA"
\n");
else if (rec->filterCount == 1 && sameString(rec->filters[0], "PASS"))
printf("Filter: PASS
\n");
else
{
printf("Filter failures: ");
printf("\n");
struct vcfFile *vcff = rec->file;
printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs);
printf("\n");
}
}
static void vcfInfoDetails(struct vcfRecord *rec)
/* Expand info keys to descriptions, then print out keys and values. */
{
if (rec->infoCount == 0)
return;
struct vcfFile *vcff = rec->file;
puts("INFO column annotations:
");
puts("
");
int i;
for (i = 0; i < rec->infoCount; i++)
{
struct vcfInfoElement *el = &(rec->infoElements[i]);
const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key);
if (def == NULL)
continue;
printf("%s: | ",
el->key);
int j;
enum vcfInfoType type = def->type;
if (type == vcfInfoFlag && el->count == 0)
printf("Yes"); // no values, so we can't call vcfPrintDatum...
// However, if this is older VCF, type vcfInfoFlag might have a value.
for (j = 0; j < el->count; j++)
{
if (j > 0)
printf(", ");
if (el->missingData[j])
printf(".");
else
vcfPrintDatum(stdout, el->values[j], type);
}
if (def != NULL)
printf(" | %s", def->description);
else
printf(" | ");
printf(" |
\n");
}
puts("
");
}
static void vcfGenotypeTable(struct vcfRecord *rec, char *track, char **displayAls)
/* Put the table containing details about each genotype into a collapsible section. */
{
static struct dyString *tmp1 = NULL;
if (tmp1 == NULL)
tmp1 = dyStringNew(0);
jsBeginCollapsibleSection(cart, track, "genotypes", "Detailed genotypes", FALSE);
dyStringClear(tmp1);
dyStringAppend(tmp1, rec->format);
struct vcfFile *vcff = rec->file;
enum vcfInfoType formatTypes[256];
char *formatKeys[256];
int formatCount = chopString(tmp1->string, ":", formatKeys, ArraySize(formatKeys));
puts("Genotype info key:
");
int i;
for (i = 0; i < formatCount; i++)
{
if (sameString(formatKeys[i], vcfGtGenotype))
continue;
const struct vcfInfoDef *def = vcfInfoDefForGtKey(vcff, formatKeys[i]);
char *desc = def ? def->description : "not described in VCF header";
printf(" %s: %s
\n", formatKeys[i], desc);
formatTypes[i] = def->type;
}
hTableStart();
puts("Sample ID | Genotype | Phased? | ");
for (i = 0; i < formatCount; i++)
{
if (sameString(formatKeys[i], vcfGtGenotype))
continue;
printf("%s | ", formatKeys[i]);
}
puts("
\n");
for (i = 0; i < vcff->genotypeCount; i++)
{
struct vcfGenotype *gt = &(rec->genotypes[i]);
char *hapA = ".", *hapB = ".";
if (gt->hapIxA >= 0)
hapA = displayAls[(unsigned char)gt->hapIxA];
if (gt->isHaploid)
hapB = "";
else if (gt->hapIxB >= 0)
hapB = displayAls[(unsigned char)gt->hapIxB];
char sep = gt->isHaploid ? ' ' : gt->isPhased ? '|' : '/';
char *phasing = gt->isHaploid ? NA : gt->isPhased ? "Y" : "n";
printf("%s | %s%c%s | %s | ", vcff->genotypeIds[i],
hapA, sep, hapB, phasing);
int j;
for (j = 0; j < gt->infoCount; j++)
{
if (sameString(formatKeys[j], vcfGtGenotype))
continue;
printf("");
struct vcfInfoElement *el = &(gt->infoElements[j]);
int k;
for (k = 0; k < el->count; k++)
{
if (k > 0)
printf(", ");
if (el->missingData[k])
printf(".");
else
vcfPrintDatum(stdout, el->values[k], formatTypes[j]);
}
printf(" | ");
}
puts("
");
}
hTableEnd();
jsEndCollapsibleSection();
}
static void ignoreEm(char *format, va_list args)
/* Ignore warnings from genotype parsing -- when there's one, there
* are usually hundreds more just like it. */
{
}
static void vcfGenotypesDetails(struct vcfRecord *rec, char *track, char **displayAls)
/* Print summary of allele and genotype frequency, plus collapsible section
* with table of genotype details. */
{
struct vcfFile *vcff = rec->file;
if (vcff->genotypeCount == 0)
return;
// Wrapper table for collapsible section:
puts("");
pushWarnHandler(ignoreEm);
vcfParseGenotypes(rec);
popWarnHandler();
// Tally genotypes and alleles for summary:
int refs = 0, alts = 0, unks = 0;
int refRefs = 0, refAlts = 0, altAlts = 0, gtUnk = 0, gtOther = 0, phasedGts = 0;
int i;
for (i = 0; i < vcff->genotypeCount; i++)
{
struct vcfGenotype *gt = &(rec->genotypes[i]);
if (gt->isPhased)
phasedGts++;
if (gt->hapIxA == 0)
refs++;
else if (gt->hapIxA > 0)
alts++;
else
unks++;
if (!gt->isHaploid)
{
if (gt->hapIxB == 0)
refs++;
else if (gt->hapIxB > 0)
alts++;
else
unks++;
if (gt->hapIxA == 0 && gt->hapIxB == 0)
refRefs++;
else if (gt->hapIxA == 1 && gt->hapIxB == 1)
altAlts++;
else if ((gt->hapIxA == 1 && gt->hapIxB == 0) ||
(gt->hapIxA == 0 && gt->hapIxB == 1))
refAlts++;
else if (gt->hapIxA < 0 || gt->hapIxB < 0)
gtUnk++;
else
gtOther++;
}
}
printf("Genotype count: %d", vcff->genotypeCount);
if (differentString(seqName, "chrY"))
printf(" (%d phased)", phasedGts);
else
printf(" (haploid)");
puts("
");
int totalAlleles = refs + alts + unks;
double refAf = (double)refs/totalAlleles;
double altAf = (double)alts/totalAlleles;
printf("Alleles: %s: %d (%.3f%%); %s: %d (%.3f%%)",
displayAls[0], refs, 100*refAf, displayAls[1], alts, 100*altAf);
if (unks > 0)
printf("; unknown: %d (%.3f%%)", unks, 100 * (double)unks/totalAlleles);
puts("
");
// Should be a better way to detect haploid chromosomes than comparison with "chrY":
if (vcff->genotypeCount > 1 && differentString(seqName, "chrY"))
{
printf("Genotypes: %s/%s: %d (%.3f%%); %s/%s: %d (%.3f%%); %s/%s: %d (%.3f%%)",
displayAls[0], displayAls[0], refRefs, 100*(double)refRefs/vcff->genotypeCount,
displayAls[0], displayAls[1], refAlts, 100*(double)refAlts/vcff->genotypeCount,
displayAls[1], displayAls[1], altAlts, 100*(double)altAlts/vcff->genotypeCount);
if (gtUnk > 0)
printf("; unknown: %d (%.3f%%)", gtUnk, 100*(double)gtUnk/vcff->genotypeCount);
if (gtOther > 0)
printf("; other: %d (%.3f%%)", gtOther, 100*(double)gtOther/vcff->genotypeCount);
printf("
\n");
if (rec->alleleCount == 2)
printf("Hardy-Weinberg equilibrium: "
"P(%s/%s) = %.3f%%; P(%s/%s) = %.3f%%; P(%s/%s) = %.3f%%
",
displayAls[0], displayAls[0], 100*refAf*refAf,
displayAls[0], displayAls[1], 100*2*refAf*altAf,
displayAls[1], displayAls[1], 100*altAf*altAf);
}
vcfGenotypeTable(rec, track, displayAls);
puts("
");
}
static void pgSnpCodingDetail(struct vcfRecord *rec)
/* Translate rec into pgSnp (with proper chrom name) and call Belinda's
* coding effect predictor from pgSnp details. */
{
char *genePredTable = "knownGene";
if (hTableExists(database, genePredTable))
{
struct pgSnp *pgs = pgSnpFromVcfRecord(rec);
if (!sameString(rec->chrom, seqName))
// rec->chrom might be missing "chr" prefix:
pgs->chrom = seqName;
printSeqCodDisplay(database, pgs, genePredTable);
}
}
static void abbreviateLongSeq(char *seqIn, int endLength, boolean showLength, struct dyString *dy)
/* If seqIn is longer than 2*endLength plus abbreviation fudge, abbreviate it
* to its first endLength bases, ellipsis that says how many bases are skipped,
* and its last endLength bases; add result to dy. */
{
int threshold = 2*endLength + 30;
int seqInLen = strlen(seqIn);
if (seqInLen > threshold)
{
dyStringAppendN(dy, seqIn, endLength);
dyStringAppend(dy, "...");
if (showLength)
{
int skippedLen = seqInLen-2*endLength;
dyStringPrintf(dy, "<%d bases>...", skippedLen);
}
dyStringAppend(dy, seqIn+seqInLen-endLength);
}
else
dyStringAppend(dy, seqIn);
}
static void makeDisplayAlleles(struct vcfRecord *rec, boolean showLeftBase, char leftBase,
int endLength, boolean showLength, boolean encodeHtml,
char **displayAls)
/* If necessary, show the left base that we trimmed and/or abbreviate long sequences. */
{
struct dyString *dy = dyStringNew(128);
int i;
for (i = 0; i < rec->alleleCount; i++)
{
dyStringClear(dy);
if (showLeftBase)
dyStringPrintf(dy, "(%c)", leftBase);
abbreviateLongSeq(rec->alleles[i], endLength, showLength, dy);
if (encodeHtml)
displayAls[i] = htmlEncode(dy->string);
else
displayAls[i] = cloneString(dy->string);
}
}
static void vcfRecordDetails(struct trackDb *tdb, struct vcfRecord *rec)
/* Display the contents of a single line of VCF, assumed to be from seqName
* (using seqName instead of rec->chrom because rec->chrom might lack "chr"). */
{
printf("Name: %s
\n", rec->name);
// Since these are variants, if it looks like a dbSNP or dbVar ID, provide a link:
if (regexMatch(rec->name, "^rs[0-9]+$"))
{
printf("dbSNP: ");
printDbSnpRsUrl(rec->name, "%s", rec->name);
puts("
");
}
else if (regexMatch(rec->name, "^[en]ss?v[0-9]+$"))
{
printf("dbVar: ");
printf("%s
\n", rec->name, rec->name);
}
printCustomUrl(tdb, rec->name, TRUE);
+boolean hapClustEnabled = cartOrTdbBoolean(cart, tdb, VCF_HAP_ENABLED_VAR, TRUE);
+if (hapClustEnabled)
+ {
static char *formName = "vcfCfgHapCenter";
printf("\n");
+ }
char leftBase = rec->alleles[0][0];
unsigned int vcfStart = vcfRecordTrimIndelLeftBase(rec);
boolean showLeftBase = (rec->chromStart == vcfStart+1);
(void)vcfRecordTrimAllelesRight(rec);
char *displayAls[rec->alleleCount];
makeDisplayAlleles(rec, showLeftBase, leftBase, 20, TRUE, FALSE, displayAls);
printPosOnChrom(seqName, rec->chromStart, rec->chromEnd, NULL, FALSE, rec->name);
printf("Reference allele: %s
\n", displayAls[0]);
vcfAltAlleleDetails(rec, displayAls);
vcfQualDetails(rec);
vcfFilterDetails(rec);
vcfInfoDetails(rec);
pgSnpCodingDetail(rec);
makeDisplayAlleles(rec, showLeftBase, leftBase, 5, FALSE, TRUE, displayAls);
vcfGenotypesDetails(rec, tdb->track, displayAls);
}
void doVcfDetailsCore(struct trackDb *tdb, char *fileOrUrl, boolean isTabix)
/* Show item details using fileOrUrl. */
{
genericHeader(tdb, NULL);
int start = cartInt(cart, "o");
int end = cartInt(cart, "t");
int vcfMaxErr = -1;
struct vcfFile *vcff = NULL;
/* protect against temporary network or parsing error */
struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
{
if (isTabix)
vcff = vcfTabixFileMayOpen(fileOrUrl, seqName, start, end, vcfMaxErr, -1);
else
vcff = vcfFileMayOpen(fileOrUrl, seqName, start, end, vcfMaxErr, -1, TRUE);
}
errCatchEnd(errCatch);
if (errCatch->gotError)
{
if (isNotEmpty(errCatch->message->string))
warn("%s", errCatch->message->string);
}
errCatchFree(&errCatch);
if (vcff != NULL)
{
struct vcfRecord *rec;
for (rec = vcff->records; rec != NULL; rec = rec->next)
if (rec->chromStart == start && rec->chromEnd == end) // in pgSnp mode, don't get name
vcfRecordDetails(tdb, rec);
}
else
printf("Sorry, unable to open %s
\n", fileOrUrl);
printTrackHtml(tdb);
}
#ifdef USE_TABIX
void doVcfTabixDetails(struct trackDb *tdb, char *item)
/* Show details of an alignment from a VCF file compressed and indexed by tabix. */
{
knetUdcInstall();
if (udcCacheTimeout() < 300)
udcSetCacheTimeout(300);
struct sqlConnection *conn = hAllocConnTrack(database, tdb);
char *fileOrUrl = bbiNameFromSettingOrTableChrom(tdb, conn, tdb->table, seqName);
hFreeConn(&conn);
doVcfDetailsCore(tdb, fileOrUrl, TRUE);
}
#endif // no USE_TABIX
void doVcfDetails(struct trackDb *tdb, char *item)
/* Show details of an alignment from an uncompressed VCF file. */
{
struct customTrack *ct = lookupCt(tdb->track);
struct sqlConnection *conn = NULL;
char *table = tdb->table;
if (ct)
{
conn = hAllocConn(CUSTOM_TRASH);
table = ct->dbTableName;
}
else
conn = hAllocConnTrack(database, tdb);
char *fileOrUrl = bbiNameFromSettingOrTableChrom(tdb, conn, table, seqName);
hFreeConn(&conn);
doVcfDetailsCore(tdb, fileOrUrl, FALSE);
}