be4311c07e14feb728abc6425ee606ffaa611a58 markd Fri Jan 22 06:46:58 2021 -0800 merge with master diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c index 132a41d..15f5dd7 100644 --- src/hg/hgTracks/vcfTrack.c +++ src/hg/hgTracks/vcfTrack.c @@ -1,3140 +1,3146 @@ /* vcfTrack -- handlers for Variant Call Format data. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "bigWarn.h" #include "dystring.h" #include "rainbow.h" #include "errCatch.h" #include "fa.h" #include "genePredReader.h" #include "hacTree.h" #include "hdb.h" #include "hgColors.h" #include "hgTracks.h" #include "iupac.h" #include "net.h" #include "pgSnp.h" #include "phyloTree.h" #include "trackHub.h" #include "trashDir.h" #include "variantProjector.h" #include "vcf.h" #include "vcfUi.h" #include "knetUdc.h" #include "udc.h" #include "memgfx.h" // Russ Corbett-Detig suggested darker shades for coloring non-synonymous variants green Color darkerShadesOfGreenOnWhite[EXPR_DATA_SHADES]; static boolean getMinQual(struct trackDb *tdb, double *retMinQual) /* Return TRUE and set retMinQual if cart contains minimum QUAL filter */ { if (cartOrTdbBoolean(cart, tdb, VCF_APPLY_MIN_QUAL_VAR, VCF_DEFAULT_APPLY_MIN_QUAL)) { if (retMinQual != NULL) *retMinQual = cartOrTdbDouble(cart, tdb, VCF_MIN_QUAL_VAR, VCF_DEFAULT_MIN_QUAL); return TRUE; } return FALSE; } static boolean minQualFail(struct vcfRecord *record, double minQual) /* Return TRUE if record's QUAL column value is non-numeric or is less than minQual. */ { if (isEmpty(record->qual) || (record->qual[0] != '-' && !isdigit(record->qual[0])) || atof(record->qual) < minQual) return TRUE; return FALSE; } static boolean getFilterValues(struct trackDb *tdb, struct slName **retValues) /* Return TRUE and set retValues if cart contains FILTER column values to exclude */ { if (cartListVarExistsAnyLevel(cart, tdb, FALSE, VCF_EXCLUDE_FILTER_VAR)) { struct slName *selectedValues = cartOptionalSlNameListClosestToHome(cart, tdb, FALSE, VCF_EXCLUDE_FILTER_VAR); if (retValues != NULL) *retValues = selectedValues; return TRUE; } return FALSE; } static boolean filterColumnFail(struct vcfRecord *record, struct slName *filterValues) /* Return TRUE if record's FILTER column value(s) matches one of filterValues (from cart). */ { int i; for (i = 0; i < record->filterCount; i++) if (slNameInList(filterValues, record->filters[i])) return TRUE; return FALSE; } static boolean getMinFreq(struct trackDb *tdb, double *retMinFreq) /* Return TRUE and set retMinFreq if cart contains nonzero minimum minor allele frequency. */ { double minFreq = cartOrTdbDouble(cart, tdb, VCF_MIN_ALLELE_FREQ_VAR, VCF_DEFAULT_MIN_ALLELE_FREQ); if (minFreq > 0) { if (retMinFreq != NULL) *retMinFreq = minFreq; return TRUE; } return FALSE; } static boolean minFreqFail(struct vcfRecord *record, double minFreq) /* Return TRUE if record's INFO include AF (alternate allele frequencies) or AC+AN * (alternate allele counts and total count of observed alleles) and the minor allele * frequency < minFreq -- or rather, major allele frequency > (1 - minFreq) because * variants with > 2 alleles might have some significant minor frequencies along with * tiny minor frequencies). */ { struct vcfFile *vcff = record->file; boolean gotInfo = FALSE; double refFreq = 1.0; double maxAltFreq = 0.0; int i; const struct vcfInfoElement *afEl = vcfRecordFindInfo(record, "AF"); const struct vcfInfoDef *afDef = vcfInfoDefForKey(vcff, "AF"); if (afEl != NULL && afDef != NULL && afDef->type == vcfInfoFloat) { // If INFO includes alt allele freqs, use them directly. gotInfo = TRUE; for (i = 0; i < afEl->count; i++) { if (afEl->missingData[i]) continue; double altFreq = afEl->values[i].datFloat; refFreq -= altFreq; if (altFreq > maxAltFreq) maxAltFreq = altFreq; } } else { // Calculate alternate allele freqs from AC and AN: const struct vcfInfoElement *acEl = vcfRecordFindInfo(record, "AC"); const struct vcfInfoDef *acDef = vcfInfoDefForKey(vcff, "AC"); const struct vcfInfoElement *anEl = vcfRecordFindInfo(record, "AN"); const struct vcfInfoDef *anDef = vcfInfoDefForKey(vcff, "AN"); if (acEl != NULL && acDef != NULL && acDef->type == vcfInfoInteger && anEl != NULL && anDef != NULL && anDef->type == vcfInfoInteger && anEl->count == 1 && anEl->missingData[0] == FALSE) { gotInfo = TRUE; int totalCount = anEl->values[0].datInt; for (i = 0; i < acEl->count; i++) { if (acEl->missingData[i]) continue; int altCount = acEl->values[i].datInt; double altFreq = (double)altCount / totalCount; refFreq -= altFreq; if (altFreq < maxAltFreq) maxAltFreq = altFreq; } } else // Use MAF for alternate allele freqs from MAF: { const struct vcfInfoElement *mafEl = vcfRecordFindInfo(record, "MAF"); const struct vcfInfoDef *mafDef = vcfInfoDefForKey(vcff, "MAF"); if (mafEl != NULL && mafDef != NULL && mafDef->type == vcfInfoString && startsWith("Minor Allele Frequency",mafDef->description)) { // If INFO includes alt allele freqs, use them directly. gotInfo = TRUE; if (mafEl->count >= 1 && !mafEl->missingData[mafEl->count-1]) { char data[64]; safecpy(data,sizeof(data),mafEl->values[mafEl->count-1].datString); maxAltFreq = atof(lastWordInLine(data)); refFreq -= maxAltFreq; } } } } if (gotInfo) { double majorAlFreq = max(refFreq, maxAltFreq); if (majorAlFreq > (1.0 - minFreq)) return TRUE; } return FALSE; } static void filterRefOnlyAlleles(struct vcfFile *vcff, struct trackDb *tdb) /* Drop items from VCF that don't differ from the reference for any of the * samples specified in trackDb */ { struct vcfRecord *rec, *nextRecord, *retList = NULL; const struct vcfGenotype *gt; boolean hideOtherSamples = cartUsualBooleanClosestToHome(cart, tdb, FALSE, VCF_PHASED_HIDE_OTHER_VAR, FALSE); struct slPair *sample, *sampleOrder = vcfPhasedGetSampleOrder(cart, tdb, FALSE, hideOtherSamples); for (rec = vcff->records; rec != NULL; rec = nextRecord) { nextRecord = rec->next; boolean allRef = TRUE; for (sample = sampleOrder; sample != NULL; sample = sample->next) { gt = vcfRecordFindGenotype(rec, sample->name); if (gt && !(gt->hapIxA == 0 && gt->hapIxB == 0) ) allRef = FALSE; } if (!allRef) slAddHead(&retList, rec); } slReverse(&retList); vcff->records = retList; } static void filterRecords(struct vcfFile *vcff, struct trackDb *tdb) /* If a filter is specified in the cart, remove any records that don't pass filter. */ { double minQual = VCF_DEFAULT_MIN_QUAL; struct slName *filterValues = NULL; double minFreq = VCF_DEFAULT_MIN_ALLELE_FREQ; boolean gotQualFilter = getMinQual(tdb, &minQual); boolean gotFilterFilter = getFilterValues(tdb, &filterValues); boolean gotMinFreqFilter = getMinFreq(tdb, &minFreq); if (! (gotQualFilter || gotFilterFilter || gotMinFreqFilter) ) return; struct vcfRecord *rec, *nextRec, *newList = NULL; for (rec = vcff->records; rec != NULL; rec = nextRec) { nextRec = rec->next; if (! ((gotQualFilter && minQualFail(rec, minQual)) || (gotFilterFilter && filterColumnFail(rec, filterValues)) || (gotMinFreqFilter && minFreqFail(rec, minFreq)) )) slAddHead(&newList, rec); } slReverse(&newList); vcff->records = newList; } struct pgSnpVcfStartEnd /* This extends struct pgSnp by tacking on an original VCF chromStart and End at the end, * for use by indelTweakMapItem below. This can be cast to pgs. */ { struct pgSnp pgs; unsigned int vcfStart; unsigned int vcfEnd; }; static struct pgSnp *vcfFileToPgSnp(struct vcfFile *vcff, struct trackDb *tdb) /* Convert vcff's records to pgSnp; don't free vcff until you're done with pgSnp * because it contains pointers into vcff's records' chrom. If the trackDb setting * sampleName is present, then check whether all the records are phased or not */ { struct pgSnp *pgsList = NULL; struct vcfRecord *rec; int maxLen = 33; int maxAlCount = 5; struct slPair *sample = NULL, *phasedSamples = NULL; if (sameString(tdb->type, "vcfPhasedTrio")) { boolean hideOtherSamples = cartUsualBooleanClosestToHome(cart, tdb, FALSE, VCF_PHASED_HIDE_OTHER_VAR, FALSE); phasedSamples = vcfPhasedGetSampleOrder(cart, tdb, FALSE, hideOtherSamples); } vcff->allPhased = TRUE; for (rec = vcff->records; rec != NULL; rec = rec->next) { struct pgSnpVcfStartEnd *psvs = needMem(sizeof(*psvs)); psvs->vcfStart = vcfRecordTrimIndelLeftBase(rec); psvs->vcfEnd = vcfRecordTrimAllelesRight(rec); for (sample = phasedSamples; sample != NULL; sample = sample->next) { const struct vcfGenotype *gt = vcfRecordFindGenotype(rec, sample->name); if (!gt->isPhased) vcff->allPhased = FALSE; } struct pgSnp *pgs = pgSnpFromVcfRecord(rec); memcpy(&(psvs->pgs), pgs, sizeof(*pgs)); pgs = (struct pgSnp *)psvs; // leak mem // Insertion sequences can be quite long; abbreviate here for display. int len = strlen(pgs->name); if (len > maxLen) { int maxAlLen = (maxLen / min(rec->alleleCount, maxAlCount)) - 1; pgs->name[0] = '\0'; int i; for (i = 0; i < rec->alleleCount; i++) { if (i > 0) safencat(pgs->name, len+1, "/", 1); if (i >= maxAlCount) { safecat(pgs->name, len+1, "..."); pgs->alleleCount = maxAlCount; break; } if (strlen(rec->alleles[i]) > maxAlLen-3) strcpy(rec->alleles[i]+maxAlLen-3, "..."); safencat(pgs->name, len+1, rec->alleles[i], maxAlLen); } } slAddHead(&pgsList, pgs); } slReverse(&pgsList); return pgsList; } // Center-weighted alpha clustering of haplotypes -- see Redmine #3711, #2823 note 7 // It might be nice to use an allele-frequency representation here instead of [ACGTN] strings // with "N" for missing info or differences, but keep it simple. struct cwaExtraData /* Helper data for hacTree clustering of haplotypes by center-weighted alpha distance */ { int center; // index from which each point's contribution to distance is to be weighted int len; // total length of haplotype strings double alpha; // weighting factor for distance from center struct lm *localMem; }; // This is the representation of a cluster of up to 65,535 haplotypes of equal length, // where each variant's alleles are specified as 0 (reference) or 1 (alternate) // [or possibly 2 for second alternate, but those are rare so I'll ignore them]. // When an individual is heterozygous and unphased for some variant, we need to // account for missing data. struct hapCluster { struct hapCluster *next; // hacTree wants slList of items unsigned int *refCounts; // per-variant count of reference alleles observed unsigned int *unkCounts; // per-variant count of unknown (or unphased het) alleles unsigned int leafCount; // number of leaves under this node (or 1 if leaf) unsigned int gtHapIx; // if leaf, (genotype index << 1) + hap (0 or 1 for diploid) }; INLINE boolean isRef(const struct hapCluster *c, int varIx) // Return TRUE if the leaves of cluster have at least as many reference alleles // as alternate alleles for variant varIx. { unsigned int altCount = c->leafCount - c->refCounts[varIx] - c->unkCounts[varIx]; return (c->refCounts[varIx] >= altCount); } static double cwaDistance(const struct slList *item1, const struct slList *item2, void *extraData) /* Center-weighted alpha sequence distance function for hacTree clustering of haplotype seqs */ // This is inner-loop so I am not doing defensive checks. Caller must ensure: // 1. kids's sequences' lengths are both equal to helper->len // 2. 0 <= helper->center <= len // 3. 0.0 < helper->alpha <= 1.0 { const struct hapCluster *kid1 = (const struct hapCluster *)item1; const struct hapCluster *kid2 = (const struct hapCluster *)item2; struct cwaExtraData *helper = extraData; double distance = 0; double weight = 1; // start at center: alpha to the 0th power int i; for (i=helper->center; i >= 0; i--) { if (isRef(kid1, i) != isRef(kid2, i)) distance += weight; weight *= helper->alpha; } weight = helper->alpha; // start at center+1: alpha to the 1st power for (i=helper->center+1; i < helper->len; i++) { if (isRef(kid1, i) != isRef(kid2, i)) distance += weight; weight *= helper->alpha; } return distance; } static struct hapCluster *lmHapCluster(struct cwaExtraData *helper) /* Use localMem to allocate a new cluster of the given len. */ { struct hapCluster *c = lmAlloc(helper->localMem, sizeof(struct hapCluster)); c->refCounts = lmAlloc(helper->localMem, helper->len * sizeof(unsigned int)); c->unkCounts = lmAlloc(helper->localMem, helper->len * sizeof(unsigned int)); return c; } static struct slList *cwaMerge(const struct slList *item1, const struct slList *item2, void *extraData) /* Make a consensus haplotype from two input haplotypes, for hacTree clustering by * center-weighted alpha distance. */ // This is inner-loop so I am not doing defensive checks. Caller must ensure that // kids's sequences' lengths are both equal to helper->len. { const struct hapCluster *kid1 = (const struct hapCluster *)item1; const struct hapCluster *kid2 = (const struct hapCluster *)item2; struct cwaExtraData *helper = extraData; struct hapCluster *consensus = lmHapCluster(helper); consensus->leafCount = kid1->leafCount + kid2->leafCount; consensus->gtHapIx = kid1->gtHapIx; int i; for (i=0; i < helper->len; i++) { consensus->refCounts[i] = kid1->refCounts[i] + kid2->refCounts[i]; consensus->unkCounts[i] = kid1->unkCounts[i] + kid2->unkCounts[i]; } return (struct slList *)consensus; } INLINE void hapClusterToString(const struct hapCluster *c, char *s, int len) /* Write a text representation of hapCluster's alleles into s which is at least len+1 long. */ { int i; for (i=0; i < len; i++) s[i] = isRef(c, i) ? '0': '1'; s[i] = 0; } static int cwaCmp(const struct slList *item1, const struct slList *item2, void *extraData) /* Convert hapCluster to allele strings for easy comparison by strcmp. */ { const struct hapCluster *c1 = (const struct hapCluster *)item1; const struct hapCluster *c2 = (const struct hapCluster *)item2; struct cwaExtraData *helper = extraData; char s1[helper->len+1]; char s2[helper->len+1]; hapClusterToString(c1, s1, helper->len); hapClusterToString(c2, s2, helper->len); return strcmp(s1, s2); } void rSetGtHapOrder(struct hacTree *ht, unsigned int *gtHapOrder, unsigned int *retGtHapEnd) /* Traverse hacTree and build an ordered array of genotype + haplotype indices. */ { if (ht->left == NULL && ht->right == NULL) { struct hapCluster *c = (struct hapCluster *)ht->itemOrCluster; gtHapOrder[(*retGtHapEnd)++] = c->gtHapIx; } else if (ht->left == NULL) rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd); else if (ht->right == NULL) rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd); else { struct hapCluster *cL = (struct hapCluster *)ht->left->itemOrCluster; struct hapCluster *cR = (struct hapCluster *)ht->right->itemOrCluster; if (cL->leafCount >= cR->leafCount) { rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd); rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd); } else { rSetGtHapOrder(ht->right, gtHapOrder, retGtHapEnd); rSetGtHapOrder(ht->left, gtHapOrder, retGtHapEnd); } } } static unsigned int *clusterHaps(const struct vcfFile *vcff, int centerIx, int startIx, int endIx, unsigned int *retGtHapEnd, struct hacTree **retTree) /* Given a bunch of VCF records with phased genotypes, build up one haplotype string * per chromosome that is the sequence of alleles in all variants (simplified to one base * per variant). Each individual/sample will have two haplotype strings (unless haploid * like Y or male X). Independently cluster the haplotype strings using hacTree with the * center-weighted alpha functions above. Return an array of genotype+haplotype indices * in the order determined by the hacTree, and set retGtHapEnd to its length/end. */ { double alpha = 0.5; struct lm *lm = lmInit(0); struct cwaExtraData helper = { centerIx-startIx, endIx-startIx, alpha, lm }; int ploidy = 2; // Assuming diploid genomes here, no XXY, tetraploid etc. int gtCount = vcff->genotypeCount; // Make an slList of hapClusters, but allocate in a big block so I can use // array indexing. struct hapCluster **hapArray = lmAlloc(lm, sizeof(struct hapCluster *) * gtCount * ploidy); int i; for (i=0; i < ploidy * gtCount; i++) { hapArray[i] = lmHapCluster(&helper); if (i > 0) hapArray[i-1]->next = hapArray[i]; } boolean haveHaploid = FALSE; int varIx; struct vcfRecord *rec; for (varIx = 0, rec = vcff->records; rec != NULL && varIx < endIx; varIx++, rec = rec->next) { if (varIx < startIx) continue; int countIx = varIx - startIx; int gtIx; for (gtIx=0; gtIx < gtCount; gtIx++) { struct vcfGenotype *gt = &(rec->genotypes[gtIx]); struct hapCluster *c1 = hapArray[gtIx]; struct hapCluster *c2 = hapArray[gtCount + gtIx]; // hardwired ploidy=2 c1->gtHapIx = gtIx << 1; c1->leafCount = 1; if (gt->isPhased || gt->isHaploid || (gt->hapIxA == gt->hapIxB)) { // first haplotype's counts: if (gt->hapIxA < 0) c1->unkCounts[countIx] = 1; else if (gt->hapIxA == 0) c1->refCounts[countIx] = 1; if (gt->isHaploid) haveHaploid = TRUE; else { // got second haplotype, fill in its counts: c2->gtHapIx = (gtIx << 1) | 1; c2->leafCount = 1; if (gt->hapIxB < 0) c2->unkCounts[countIx] = 1; else if (gt->hapIxB == 0) c2->refCounts[countIx] = 1; } } else { // Missing data or unphased heterozygote, don't use haplotype info for clustering c2->gtHapIx = (gtIx << 1) | 1; c2->leafCount = 1; c1->unkCounts[countIx] = c2->unkCounts[countIx] = 1; } } if (haveHaploid) { // Some array items will have an empty cluster for missing hap2 -- // trim those from the linked list. struct hapCluster *c = hapArray[0]; while (c != NULL && c->next != NULL) { if (c->next->leafCount == 0) c->next = c->next->next; else c = c->next; } } } struct hacTree *ht = hacTreeFromItems((struct slList *)(hapArray[0]), lm, cwaDistance, cwaMerge, cwaCmp, &helper); unsigned int *gtHapOrder = needMem(vcff->genotypeCount * ploidy * sizeof(unsigned int)); rSetGtHapOrder(ht, gtHapOrder, retGtHapEnd); *retTree = ht; return gtHapOrder; } INLINE Color pgSnpColor(char *allele) /* Color allele by first base according to pgSnp palette. */ { if (allele[0] == 'A') return revCmplDisp ? MG_MAGENTA : MG_RED; else if (allele[0] == 'C') return revCmplDisp ? darkGreenColor : MG_BLUE; else if (allele[0] == 'G') return revCmplDisp ? MG_BLUE : darkGreenColor; else if (allele[0] == 'T') return revCmplDisp ? MG_RED : MG_MAGENTA; else return shadesOfGray[5]; } INLINE void abbrevAndHandleRC(char *abbrevAl, size_t abbrevAlSize, const char *fullAl) /* Limit the size of displayed allele to abbrevAlSize-1 and handle reverse-complemented display. */ { boolean fullLen = strlen(fullAl); boolean truncating = (fullLen > abbrevAlSize-1); if (truncating) { int truncLen = abbrevAlSize - 4; if (revCmplDisp) { safencpy(abbrevAl, abbrevAlSize, (fullAl + fullLen - truncLen), truncLen); reverseComplement(abbrevAl, truncLen); } else safencpy(abbrevAl, abbrevAlSize, fullAl, truncLen); safecat(abbrevAl, abbrevAlSize, "..."); } else { safecpy(abbrevAl, abbrevAlSize, fullAl); if (revCmplDisp) reverseComplement(abbrevAl, fullLen); } } INLINE void gtSummaryString(struct vcfRecord *rec, struct dyString *dy) // Make pgSnp-like mouseover text, but with genotype counts instead of allele counts. { if (isNotEmpty(rec->name) && !sameString(rec->name, ".")) dyStringPrintf(dy, "%s: ", rec->name); if (rec->alleleCount < 2) { dyStringAppendC(dy, '?'); return; } int *gtCounts = NULL, *alCounts = NULL;; int phasedGts = 0, diploidCount = 0; vcfCountGenotypes(rec, >Counts, &alCounts, &phasedGts, &diploidCount); size_t abbrevSize = 16; char displayAls[rec->alleleCount][abbrevSize]; int i; for (i = 0; i < rec->alleleCount; i++) abbrevAndHandleRC(displayAls[i], sizeof displayAls[i], rec->alleles[i]); if (diploidCount == 0) { // No diploid genotypes, just allele counts. for (i = 0; i < rec->alleleCount; i++) { if (i > 0) dyStringAppendC(dy, ' '); dyStringPrintf(dy, "%s:%d", displayAls[i], alCounts[i]); } } else { dyStringPrintf(dy, "%s/%s:%d", displayAls[0], displayAls[0], gtCounts[0]); for (i = 1; i < rec->alleleCount + 1; i++) { int j; for (j = 0; j <= i; j++) { int gtIx = vcfGenotypeIndex(j, i); if (gtCounts[gtIx] > 0) { char *alJ = (j == rec->alleleCount) ? "?" : displayAls[j]; char *alI = (i == rec->alleleCount) ? "?" : displayAls[i]; dyStringPrintf(dy, "; %s/%s:%d", alJ, alI, gtCounts[gtIx]); } } } } } void mapBoxForCenterVariant(struct vcfRecord *rec, struct hvGfx *hvg, struct track *tg, int xOff, int yOff, int width) /* Special mouseover for center variant */ { struct dyString *dy = dyStringNew(0); unsigned int chromStartMap = vcfRecordTrimIndelLeftBase(rec); unsigned int chromEndMap = vcfRecordTrimAllelesRight(rec); gtSummaryString(rec, dy); dyStringAppend(dy, " Haplotypes sorted on "); char *centerChrom = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "centerVariantChrom"); if (centerChrom == NULL || !sameString(chromName, centerChrom)) dyStringAppend(dy, "middle variant by default. "); else dyStringAppend(dy, "this variant. "); dyStringAppend(dy, "To anchor sorting to a different variant, click on that variant and " "then click on the 'Use this variant' button below the variant name."); const double scale = scaleForPixels(width); int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff; int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff; int w = x2-x1; if (w <= 1) { x1--; w = 3; } mapBoxHgcOrHgGene(hvg, chromStartMap, chromEndMap, x1, yOff, w, tg->height, tg->track, rec->name, dy->string, NULL, TRUE, NULL); } // These are initialized when we start drawing, then constant. static Color purple = 0; static Color undefYellow = 0; enum hapColorMode { altOnlyMode, refAltMode, baseMode, functionMode }; static Color colorByAltShade(int refs, int alts, int unks, Color colorShades[], int colorShadeCount) /* Coloring alternate alleles only by shades of color: shade by proportion of alts */ { if (unks > (refs + alts)) return undefYellow; int total = refs + alts + unks; int shadeIx = (alts * colorShadeCount) / total; if (shadeIx == colorShadeCount) shadeIx--; return colorShades[shadeIx]; } static Color colorByAltOnly(int refs, int alts, int unks) /* Coloring alternate alleles only: shade by proportion of alt alleles */ { return colorByAltShade(refs, alts, unks, shadesOfGray, maxShade+1); } static Color colorByRefAlt(int refs, int alts, int unks) /* Color blue for reference allele, red for alternate allele, gray for unknown, purple * for reasonably mixed. */ { const int fudgeFactor = 4; // Threshold factor for calling one color or the other when mixed if (unks > (refs + alts)) return undefYellow; if (alts > fudgeFactor * refs) return MG_RED; if (refs > fudgeFactor * alts) return MG_BLUE; return purple; } static Color colorByBase(int refs, int alts, int unks, char *refAl, char *altAl) /* Color gray for unknown or mixed, otherwise pgSnpColor of predominant allele. */ { const int fudgeFactor = 4; // Threshold for calling for one color or the other when mixed if (unks > (refs + alts)) return undefYellow; if (alts > fudgeFactor * refs) return pgSnpColor(altAl); if (refs > fudgeFactor * alts) return pgSnpColor(refAl); return shadesOfGray[5]; } static struct dnaSeq *genePredToGenomicSequence(struct genePred *pred, struct seqWindow *gSeqWin) /* Return concatenated genomic sequence of exons of pred. */ { int txLen = 0; int i; for (i=0; i < pred->exonCount; i++) txLen += (pred->exonEnds[i] - pred->exonStarts[i]); char *seq = needMem(txLen + 1); int offset = 0; for (i=0; i < pred->exonCount; i++) { int exonStart = pred->exonStarts[i]; int exonEnd = pred->exonEnds[i]; int exonLen = exonEnd - exonStart; seqWindowCopy(gSeqWin, exonStart, exonLen, seq+offset, txLen+1-offset); offset += exonLen; } if(pred->strand[0] == '-') reverseComplement(seq, txLen); struct dnaSeq *txSeq = NULL; AllocVar(txSeq); txSeq->name = cloneString(pred->name); txSeq->dna = seq; txSeq->size = txLen; return txSeq; } struct txInfo /* Transcript sequence and alignment needed for prediction of functional effect/consequences */ { struct txInfo *next; struct psl *psl; // alignment of transcript to genome struct dnaSeq *txSeq; // transcript sequence struct genbankCds *cds; // transcript CDS (possible none) struct dnaSeq *protSeq; // transcript protein sequence (possibly NULL) }; static struct txInfo *txInfoFromGenePred(struct genePred *gp, struct seqWindow *gSeqWin) /* Use gp and gSeqWin to construct transcript alignment, sequence and protein sequence if applic. */ { struct txInfo *txi; AllocVar(txi); AllocVar(txi->cds); genePredToCds(gp, txi->cds); txi->txSeq = genePredToGenomicSequence(gp, gSeqWin); int chromSize = 0; // unused txi->psl = genePredToPsl(gp, chromSize, txi->txSeq->size); pslRemoveFrameShifts(txi->psl); vpExpandIndelGaps(txi->psl, gSeqWin, txi->txSeq); txi->protSeq = NULL; if (txi->cds->end > txi->cds->start && txi->cds->startComplete) { txi->protSeq = translateSeq(txi->txSeq, txi->cds->start, FALSE); aaSeqZToX(txi->protSeq); } return txi; } static struct hash *hashExtNcbiRefSeq(struct sqlConnection *conn) /* Despite the seq/extFile structure, there is only one external file and we don't want to * keep opening and closing the file each time. Just in case there are multiple files someday, * hash extNcbiRefSeq id to open udc file handle(s). */ { struct hash *extNcbi = hashNew(0); char query[1024]; sqlSafef(query, sizeof query, "select id, path from extNcbiRefSeq"); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *extId = row[0]; char *path = row[1]; struct udcFile *udcF = udcFileOpen(path, NULL); hashAdd(extNcbi, extId, udcF); } sqlFreeResult(&sr); return extNcbi; } static void freeExtNcbiHash(struct hash **pExtNcbi) /* Clean up hash of open udcFiles created by hashExtNcbiRefSeq. */ { if (pExtNcbi && *pExtNcbi) { struct hash *hash = *pExtNcbi; struct hashCookie cookie = hashFirst(hash); struct hashEl *hel; while ((hel = hashNext(&cookie)) != NULL) udcFileClose((struct udcFile **)&hel->val); hashFree(pExtNcbi); } } static struct txInfo *txInfoInitFromPsl(struct sqlConnection *conn, char *pslTable, char *extraWhere, struct hash **retTxiHash) /* Alloc and return a list of txInfo with only psl populated, from pslTable in the current window. * Also return a hash of transcript ID to txInfo (retTxiHash). */ { struct txInfo *txiList = NULL; struct hash *txiHash = hashNew(0); int binOffset = 0; int start = max(0, winStart - GPRANGE); int end = winEnd + GPRANGE; struct sqlResult *sr = hRangeQuery(conn, pslTable, chromName, start, end, extraWhere, &binOffset); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct txInfo *txi; AllocVar(txi); txi->psl = pslLoad(row+binOffset); slAddHead(&txiList, txi); hashAdd(txiHash, txi->psl->qName, txi); } sqlFreeResult(&sr); *retTxiHash = txiHash; return txiList; } static void txiInfoAppendIdList(struct dyString *query, struct txInfo *txiList) /* Append a paren-enclosed list of quoted transcript IDs to query. */ { dyStringAppendC(query, '('); struct txInfo *txi; for (txi = txiList; txi != NULL; txi = txi->next) { if (txi != txiList) dyStringAppend(query, ", "); dyStringPrintf(query, "'%s'", txi->psl->qName); } dyStringAppendC(query, ')'); } static void txInfoAddCdsFromQuery(struct hash *txiHash, struct sqlConnection *conn, char *query) /* Perform query; results have two fields, transcript name (which must be found in txiHash) and * GenBank-formatted CDS string. For reach row from the query, parse the CDS string into the cds * in the struct txInfo for the appropriate transcript. */ { struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; char *cdsStr = row[1]; struct txInfo *txi = hashMustFindVal(txiHash, name); AllocVar(txi->cds); genbankCdsParse(cdsStr, txi->cds); } sqlFreeResult(&sr); } static struct txInfo *txInfoLoadNcbiRefSeq(struct seqWindow *gSeqWin, struct trackDb *gTdb) /* Load ncbiRefSeq[Curated] PSL (+ CDS and sequence) in current window and make txInfo for each. */ { struct txInfo *txiList = NULL; if (!trackHubDatabase(database) && hDbHasNcbiRefSeq(database)) { struct sqlConnection *conn = hAllocConn(database); struct hash *txiHash = hashNew(0); char *extraWhere = NULL; if (sameString(gTdb->track, "ncbiRefSeqCurated")) extraWhere = "qName not like 'X%'"; else if (sameString(gTdb->track, "ncbiRefSeqPredicted")) extraWhere = "qName like 'X%'"; txiList = txInfoInitFromPsl(conn, "ncbiRefSeqPsl", extraWhere, &txiHash); if (txiList) { // Now get CDS for each psl/txi: struct dyString *query = sqlDyStringCreate("select * from ncbiRefSeqCds where id in "); txiInfoAppendIdList(query, txiList); txInfoAddCdsFromQuery(txiHash, conn, query->string); // Now get transcript sequence for each psl/txi: struct hash *extNcbi = hashExtNcbiRefSeq(conn); dyStringClear(query); sqlDyStringPrintf(query, "select acc,extFile,file_offset,file_size from seqNcbiRefSeq " "where acc in "); txiInfoAppendIdList(query, txiList); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; char *extId = row[1]; off_t offset = sqlLongLong(row[2]); size_t size = sqlUnsigned(row[3]); struct udcFile *udcF = hashMustFindVal(extNcbi, extId); char *buf = needMem(size+1); udcSeek(udcF, offset); udcRead(udcF, buf, size); struct txInfo *txi = hashMustFindVal(txiHash, name); txi->txSeq = faSeqFromMemText(buf, TRUE); } sqlFreeResult(&sr); freeExtNcbiHash(&extNcbi); // Now get protein sequence (if applicable) for each psl/txi: dyStringClear(query); sqlDyStringPrintf(query, "select id, protAcc, seq from ncbiRefSeqLink nl, ncbiRefSeqPepTable np " "where nl.protAcc = np.name and id in "); txiInfoAppendIdList(query, txiList); sr = sqlGetResult(conn, query->string); while ((row = sqlNextRow(sr)) != NULL) { char *txId = row[0]; char *protId = row[1]; char *protSeq = cloneString(row[2]); struct txInfo *txi = hashMustFindVal(txiHash, txId); txi->protSeq = newDnaSeq(protSeq, strlen(protSeq), protId); } sqlFreeResult(&sr); hFreeConn(&conn); } } return txiList; } static struct txInfo *txInfoLoadRefGene(struct seqWindow *gSeqWin, struct trackDb *gTdb) /* Load refSeqAli PSL (+ genbank CDS and sequence) in current window and make txInfo for each. */ { struct txInfo *txiList = NULL; if (!trackHubDatabase(database)) { initGenbankTableNames(database); struct sqlConnection *conn = hAllocConn(database); struct hash *txiHash = NULL; txiList = txInfoInitFromPsl(conn, "refSeqAli", NULL, &txiHash); if (txiList) { // Now get CDS for each psl/txi: struct dyString *query = sqlDyStringCreate("select i.acc, c.name from %s i, %s c " "where c.id = i.cds and i.acc in ", gbCdnaInfoTable, cdsTable); txiInfoAppendIdList(query, txiList); txInfoAddCdsFromQuery(txiHash, conn, query->string); // Now get transcript and translated sequence for each psl/txi: struct txInfo *txi; for (txi = txiList; txi != NULL; txi = txi->next) { txi->txSeq = hGenBankGetMrna(database, txi->psl->qName, NULL); if (txi->cds->end > txi->cds->start && txi->cds->startComplete) { txi->protSeq = translateSeq(txi->txSeq, txi->cds->start, FALSE); aaSeqZToX(txi->protSeq); } } } } return txiList; } static struct txInfo *txInfoLoadBigGenePred(struct seqWindow *gSeqWin, struct trackDb *gTdb) /* Load up bigGenePred items in current window and make txInfo for each. */ { struct txInfo *txiList = NULL; char *fileName = cloneString(trackDbSetting(gTdb, "bigDataUrl")); if (fileName == NULL) fileName = cloneString(trackDbSetting(gTdb, "bigGeneDataUrl")); if (isNotEmpty(fileName)) { struct bbiFile *bbi = bigBedFileOpen(fileName); struct lm *lm = lmInit(0); struct bigBedInterval *bbList = bigBedIntervalQuery(bbi, chromName, winStart, winEnd, 0, lm); struct bigBedInterval *bb; for (bb = bbList; bb != NULL; bb = bb->next) { struct genePredExt *gp = genePredFromBigGenePred(chromName, bb); struct txInfo *txi = txInfoFromGenePred((struct genePred *)gp, gSeqWin); slAddHead(&txiList, txi); } bbiFileClose(&bbi); lmCleanup(&lm); } return txiList; } static struct txInfo *txInfoLoadGenePred(struct seqWindow *gSeqWin, struct trackDb *gTdb) /* Load genePreds in current window and make txInfo for each. */ { struct txInfo *txiList = NULL; if (! trackHubDatabase(database)) { struct sqlConnection *conn = hAllocConn(database); struct genePredReader *gpr = genePredReaderRangeQuery(conn, gTdb->table, chromName, winStart, winEnd, NULL); struct genePred *gp = NULL; while ((gp = genePredReaderNext(gpr)) != NULL) { struct txInfo *txi = txInfoFromGenePred(gp, gSeqWin); slAddHead(&txiList, txi); } genePredReaderFree(&gpr); hFreeConn(&conn); } return txiList; } static struct txInfo *txInfoLoad(struct seqWindow *gSeqWin, struct trackDb *tdb) /* Return a list of transcript sequences and alignments for all transcripts in the current * window for all enabled gene tracks. */ { struct txInfo *txiList = NULL; char *geneTrack = cartOrTdbString(cart, tdb, "geneTrack", NULL); struct track *gTrack = hashFindVal(trackHash, geneTrack); if (gTrack) { struct trackDb *gTdb = gTrack->tdb; // If independent transcript sequence and PSL are available, use them. if (startsWith("ncbiRefSeq", geneTrack)) { txiList = txInfoLoadNcbiRefSeq(gSeqWin, gTdb); } else if (sameString(geneTrack, "refGene")) { txiList = txInfoLoadRefGene(gSeqWin, gTdb); } else if (sameString(gTdb->type, "genePred") || sameString(gTdb->type, "bigGenePred")) { // If the track is visible, then struct genePred items have already been loaded so // just use them. Otherwise load up. if (gTrack->items) { struct linkedFeatures *lf; for (lf = gTrack->items; lf != NULL; lf = lf->next) { struct genePred *gp = lf->original; struct txInfo *txi = txInfoFromGenePred(gp, gSeqWin); slAddHead(&txiList, txi); } } else if (sameString(gTdb->type, "bigGenePred")) txiList = txInfoLoadBigGenePred(gSeqWin, gTdb); else txiList = txInfoLoadGenePred(gSeqWin, gTdb); } else errAbort("VCF txInfoLoad: expecting type 'genePred' or 'bigGenePred' for track '%s' " "in geneTrack setting, but got type '%s'", geneTrack, gTdb->type); } else if (trackImgOnly) { // For AJAX requests to redraw a single track, we have not loaded the whole trackDb, // so see if we can find tdb for geneTrack, and load its items. struct trackDb *gTdb = hTrackDbForTrack(database, geneTrack); if (gTdb) { if (startsWith("ncbiRefSeq", geneTrack)) txiList = txInfoLoadNcbiRefSeq(gSeqWin, gTdb); else if (sameString(geneTrack, "refGene")) txiList = txInfoLoadRefGene(gSeqWin, gTdb); else if (sameString(gTdb->type, "bigGenePred")) txiList = txInfoLoadBigGenePred(gSeqWin, gTdb); else txiList = txInfoLoadGenePred(gSeqWin, gTdb); } } return txiList; } static enum soTerm functionForRecord(struct vcfRecord *rec, struct seqWindow *gSeqWin, struct txInfo *txiList) /* Return the most severe functional consequence of rec for any transcript in txiList. */ { struct lm *lm = lmInit(0); // Can't use rec->chrom because that might be just "1" instead of "chr1": struct bed3 variantBed3 = { NULL, chromName, rec->chromStart, rec->chromEnd }; enum soTerm maxImpactTerm = soUnknown; struct txInfo *txi; for (txi = txiList; txi != NULL; txi = txi->next) { int alIx; // Sometimes reference allele is actually a change to the transcript for (alIx = 0; alIx < rec->alleleCount; alIx++) { char *allele = rec->alleles[alIx]; // Watch out for weird symbolic alleles like "<INS:ME:ALU>". if (sameString(allele, "<DEL>")) allele = ""; else if (allele[0] == '<') continue; // Ignore IUPAC ambiguous values in alts (can leak in from hgPhyloPlace) if (anyIupac(allele)) continue; struct vpTx *vpTx = vpGenomicToTranscript(gSeqWin, &variantBed3, allele, txi->psl, txi->txSeq); struct vpPep *vpPep = vpTranscriptToProtein(vpTx, txi->cds, txi->txSeq, txi->protSeq); struct gpFx *gpFx = vpTranscriptToGpFx(vpTx, txi->psl, txi->cds, txi->txSeq, vpPep, txi->protSeq, lm); enum soTerm term = gpFx->soNumber; if (soTermCmp(&term, &maxImpactTerm) < 0) maxImpactTerm = term; vpPepFree(&vpPep); vpTxFree(&vpTx); } } lmCleanup(&lm); return maxImpactTerm; } // tg->height needs an extra pixel at the bottom; it's eaten by the clipping rectangle: #define CLIP_PAD 1 static void drawOneRec(struct vcfRecord *rec, unsigned int *gtHapOrder, unsigned int gtHapCount, struct track *tg, struct hvGfx *hvg, int xOff, int yOff, int width, boolean isClustered, boolean isCenter, enum hapColorMode colorMode, enum soTerm funcTerm) /* Draw a stack of genotype bars for this record */ { unsigned int chromStartMap = vcfRecordTrimIndelLeftBase(rec); unsigned int chromEndMap = vcfRecordTrimAllelesRight(rec); const double scale = scaleForPixels(width); int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff; int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff; int w = x2-x1; if (w <= 1) { x1--; w = 3; } // When coloring mode is altOnly, we draw one extra pixel row at the top & one at bottom // to show the locations of variants, since the reference alleles are invisible: int extraPixel = 0; int hapHeight = tg->height - CLIP_PAD; if (colorMode == altOnlyMode || colorMode == functionMode) { hvGfxLine(hvg, x1, yOff, x2, yOff, (isClustered ? purple : shadesOfGray[5])); extraPixel = 1; hapHeight -= extraPixel*2; } double hapsPerPix = (double)gtHapCount / hapHeight; int pixIx; for (pixIx = 0; pixIx < hapHeight; pixIx++) { int gtHapOrderIxStart = (int)(hapsPerPix * pixIx); int gtHapOrderIxEnd = round(hapsPerPix * (pixIx + 1)); if (gtHapOrderIxEnd == gtHapOrderIxStart) gtHapOrderIxEnd++; int unks = 0, refs = 0, alts = 0; int gtHapOrderIx; for (gtHapOrderIx = gtHapOrderIxStart; gtHapOrderIx < gtHapOrderIxEnd; gtHapOrderIx++) { int gtHapIx = gtHapOrder[gtHapOrderIx]; int hapIx = gtHapIx & 1; int gtIx = gtHapIx >>1; struct vcfGenotype *gt = &(rec->genotypes[gtIx]); if (gt->isPhased || gt->isHaploid || (gt->hapIxA == gt->hapIxB)) { int alIx = hapIx ? gt->hapIxB : gt->hapIxA; if (alIx < 0) unks++; else if (alIx > 0) alts++; else refs++; } else unks++; } int y = yOff + extraPixel + pixIx; Color col; if (colorMode == baseMode) col = colorByBase(refs, alts, unks, rec->alleles[0], rec->alleles[1]); else if (colorMode == refAltMode) col = colorByRefAlt(refs, alts, unks); else if (colorMode == functionMode) { Color *funcShades = shadesOfGray; int funcShadeCount = maxShade+1; Color funcColor = colorFromSoTerm(funcTerm); if (funcColor == MG_RED) { funcShades = shadesOfRedOnWhite; funcShadeCount = ArraySize(shadesOfRedOnWhite); } else if (funcColor == MG_GREEN) { funcShades = darkerShadesOfGreenOnWhite; funcShadeCount = ArraySize(darkerShadesOfGreenOnWhite); } else if (funcColor == MG_BLUE) { funcShades = shadesOfBlueOnWhite; funcShadeCount = ArraySize(shadesOfBlueOnWhite); } col = colorByAltShade(refs, alts, unks, funcShades, funcShadeCount); } else col = colorByAltOnly(refs, alts, unks); if (col != MG_WHITE) hvGfxLine(hvg, x1, y, x2, y, col); } int yBot = yOff + tg->height - CLIP_PAD - 1; if (isCenter) { if (colorMode == altOnlyMode || colorMode == functionMode) { // Colorful outline to distinguish this variant: hvGfxLine(hvg, x1-1, yOff, x1-1, yBot, purple); hvGfxLine(hvg, x2+1, yOff, x2+1, yBot, purple); hvGfxLine(hvg, x1-1, yOff, x2+1, yOff, purple); hvGfxLine(hvg, x1-1, yBot, x2+1, yBot, purple); } else { // Thick black lines to distinguish this variant: hvGfxBox(hvg, x1-3, yOff, 3, tg->height, MG_BLACK); hvGfxBox(hvg, x2, yOff, 3, tg->height, MG_BLACK); hvGfxLine(hvg, x1-2, yOff, x2+2, yOff, MG_BLACK); hvGfxLine(hvg, x1-2, yBot, x2+2, yBot, MG_BLACK); } // Mouseover was handled already by mapBoxForCenterVariant } else { struct dyString *dy = dyStringNew(0); gtSummaryString(rec, dy); mapBoxHgcOrHgGene(hvg, chromStartMap, chromEndMap, x1, yOff, w, tg->height, tg->track, rec->name, dy->string, NULL, TRUE, NULL); } if (colorMode == altOnlyMode || colorMode == functionMode) hvGfxLine(hvg, x1, yBot, x2, yBot, (isClustered ? purple : shadesOfGray[5])); } static int getCenterVariantIx(struct track *tg, int seqStart, int seqEnd, struct vcfRecord *records) // If the user hasn't specified a local variant/position to use as center, // just use the median variant in window. { int defaultIx = (slCount(records)-1) / 2; char *centerChrom = cartOptionalStringClosestToHome(cart, tg->tdb, FALSE, "centerVariantChrom"); if (centerChrom != NULL && sameString(chromName, centerChrom)) { int centerPos = cartUsualIntClosestToHome(cart, tg->tdb, FALSE, "centerVariantPos", -1); int winSize = seqEnd - seqStart; if (centerPos > (seqStart - winSize) && centerPos < (seqEnd + winSize)) { int i; struct vcfRecord *rec; for (rec = records, i = 0; rec != NULL; rec = rec->next, i++) if (rec->chromStart >= centerPos) return i; return i-1; } } return defaultIx; } /* Data used when drawing mouseover titles for clusters: */ struct titleHelper { char *track; // For imageV2's map item code int startIx; // Index of first record (variant) used in clustering int centerIx; // Index of center record (variant) used in clustering int endIx; // Half-open index of last record (variant) used in clustering int nRecords; // Total number of records in position range char **refs; // Array of reference alleles for records used in clustering char **alts; // Array of alternate alleles for records used in clustering // (refs[0] and alts[0] are from record at startIx) }; void addClusterMapItem(struct hacTree *ht, int x1, int y1, int x2, int y2, struct titleHelper *helper) /* If using imageV2, add mouseover text (no link) with info about this cluster. */ { if (theImgBox && curImgTrack) { struct dyString *dy = dyStringNew(0); struct hapCluster *c = (struct hapCluster *)ht->itemOrCluster; dyStringPrintf(dy, "N=%d ", c->leafCount); while (dyStringLen(dy) < 7) dyStringAppendC(dy, ' '); if (helper->startIx > 0) dyStringAppend(dy, "..."); int i, nHapsForClustering = helper->endIx - helper->startIx; for (i=0; i < nHapsForClustering; i++) { boolean isCenter = (helper->startIx+i == helper->centerIx); char *allele = isRef(c, i) ? helper->refs[i] : helper->alts[i]; if (isCenter) dyStringAppendC(dy, '['); int altCount = c->leafCount - c->refCounts[i] - c->unkCounts[i]; if (c->refCounts[i] > 0 && altCount > 0) dyStringAppendC(dy, '*'); else if (strlen(allele) == 1) dyStringAppendC(dy, allele[0]); else dyStringPrintf(dy, "(%s)", allele); if (isCenter) dyStringAppendC(dy, ']'); } if (helper->endIx < helper->nRecords) dyStringAppend(dy, "..."); imgTrackAddMapItem(curImgTrack, TITLE_BUT_NO_LINK, dy->string, x1, y1, x2, y2, helper->track); } } /* Pixel y offset return type for recursive tree-drawing: */ enum yRetType { yrtMidPoint, yrtStart, yrtEnd, }; /* Callback for calculating y (in pixels) for a cluster node: */ typedef int yFromNodeFunc(const struct slList *itemOrCluster, void *extraData, enum yRetType yType); static int rDrawTreeInLabelArea(struct hacTree *ht, struct hvGfx *hvg, enum yRetType yType, int x, yFromNodeFunc *yFromNode, void *yh, struct titleHelper *th, boolean drawRectangle) /* Recursively draw the haplotype clustering tree in the left label area. * Returns pixel height for use at non-leaf levels of tree. */ { const int branchW = 4; int labelEnd = leftLabelX + leftLabelWidth; if (yType == yrtStart || yType == yrtEnd) { // We're just getting vertical span of a leaf cluster, not drawing any lines. int yLeft, yRight; if (ht->left) yLeft = rDrawTreeInLabelArea(ht->left, hvg, yType, x, yFromNode, yh, th, drawRectangle); else yLeft = yFromNode(ht->itemOrCluster, yh, yType); if (ht->right) yRight = rDrawTreeInLabelArea(ht->right, hvg, yType, x, yFromNode, yh, th, drawRectangle); else yRight = yFromNode(ht->itemOrCluster, yh, yType); if (yType == yrtStart) return min(yLeft, yRight); else return max(yLeft, yRight); } // Otherwise yType is yrtMidPoint. If we have 2 children, we'll be drawing some lines: if (ht->left != NULL && ht->right != NULL) { int midY; if (ht->childDistance == 0 || x+(2*branchW) > labelEnd) { // Treat this as a leaf cluster. // Recursing twice is wasteful. Could be avoided if this, and yFromNode, // returned both yStart and yEnd. However, the time to draw a tree of // 2188 hap's (1kG phase1 interim) is in the noise, so I consider it // not worth the effort of refactoring to save a sub-millisecond here. int yStartLeft = rDrawTreeInLabelArea(ht->left, hvg, yrtStart, x+branchW, yFromNode, yh, th, drawRectangle); int yEndLeft = rDrawTreeInLabelArea(ht->left, hvg, yrtEnd, x+branchW, yFromNode, yh, th, drawRectangle); int yStartRight = rDrawTreeInLabelArea(ht->right, hvg, yrtStart, x+branchW, yFromNode, yh, th, drawRectangle); int yEndRight = rDrawTreeInLabelArea(ht->right, hvg, yrtEnd, x+branchW, yFromNode, yh, th, drawRectangle); int yStart = min(yStartLeft, yStartRight); int yEnd = max(yEndLeft, yEndRight); midY = (yStart + yEnd) / 2; Color col = (ht->childDistance == 0) ? purple : MG_BLACK; if (drawRectangle || ht->childDistance != 0) { hvGfxLine(hvg, x+branchW, yStart, x+branchW, yEnd-1, col); hvGfxLine(hvg, x+branchW, yStart, labelEnd, yStart, col); hvGfxLine(hvg, x+branchW, yEnd-1, labelEnd, yEnd-1, col); } else { hvGfxLine(hvg, x, midY, x+1, midY, col); hvGfxLine(hvg, x+1, midY, labelEnd-1, yStart, col); hvGfxLine(hvg, x+1, midY, labelEnd-1, yEnd-1, col); } addClusterMapItem(ht, x, yStart, labelEnd, yEnd-1, th); } else { int leftMid = rDrawTreeInLabelArea(ht->left, hvg, yrtMidPoint, x+branchW, yFromNode, yh, th, drawRectangle); int rightMid = rDrawTreeInLabelArea(ht->right, hvg, yrtMidPoint, x+branchW, yFromNode, yh, th, drawRectangle); midY = (leftMid + rightMid) / 2; hvGfxLine(hvg, x+branchW, leftMid, x+branchW, rightMid, MG_BLACK); addClusterMapItem(ht, x, min(leftMid, rightMid), x+branchW-1, max(leftMid, rightMid), th); } if (drawRectangle || ht->childDistance != 0) hvGfxLine(hvg, x, midY, x+branchW, midY, MG_BLACK); return midY; } else if (ht->left != NULL) return rDrawTreeInLabelArea(ht->left, hvg, yType, x, yFromNode, yh, th, drawRectangle); else if (ht->right != NULL) return rDrawTreeInLabelArea(ht->right, hvg, yType, x, yFromNode, yh, th, drawRectangle); // Leaf node -- return pixel height. Draw a line if yType is midpoint. int y = yFromNode(ht->itemOrCluster, yh, yType); if (yType == yrtMidPoint && x < labelEnd) { hvGfxLine(hvg, x, y, labelEnd, y, purple); addClusterMapItem(ht, x, y, labelEnd, y+1, th); } return y; } struct yFromNodeHelper /* Pre-computed mapping from cluster nodes' gtHapIx to pixel heights. */ { unsigned int gtHapCount; unsigned int *gtHapIxToPxStart; unsigned int *gtHapIxToPxEnd; }; void initYFromNodeHelper(struct yFromNodeHelper *helper, int yOff, int height, unsigned int gtHapCount, unsigned int *gtHapOrder, int genotypeCount) /* Build a mapping of genotype and haplotype to pixel y coords. */ { helper->gtHapCount = gtHapCount; helper->gtHapIxToPxStart = needMem(genotypeCount * 2 * sizeof(unsigned int)); helper->gtHapIxToPxEnd = needMem(genotypeCount * 2 * sizeof(unsigned int)); double pxPerHap = (double)height / gtHapCount; int i; for (i = 0; i < gtHapCount; i++) { int yStart = round(i * pxPerHap); int yEnd = round((i+1) * pxPerHap); if (yEnd == yStart) yEnd++; int gtHapIx = gtHapOrder[i]; helper->gtHapIxToPxStart[gtHapIx] = yOff + yStart; helper->gtHapIxToPxEnd[gtHapIx] = yOff + yEnd; } } static int yFromHapNode(const struct slList *itemOrCluster, void *extraData, enum yRetType yType) /* Extract the gtHapIx from hapCluster (hacTree node item), find out its relative order * and translate that to a pixel height. */ { unsigned int gtHapIx = ((const struct hapCluster *)itemOrCluster)->gtHapIx; struct yFromNodeHelper *helper = extraData; int y; if (yType == yrtStart) y = helper->gtHapIxToPxStart[gtHapIx]; else if (yType == yrtEnd) y = helper->gtHapIxToPxEnd[gtHapIx]; else y = (helper->gtHapIxToPxStart[gtHapIx] + helper->gtHapIxToPxEnd[gtHapIx]) / 2; return y; } void initTitleHelper(struct titleHelper *th, char *track, int startIx, int centerIx, int endIx, int nRecords, struct vcfFile *vcff) /* Set up info including arrays of ref & alt alleles for cluster mouseover. */ { th->track = track; th->startIx = startIx; th->centerIx = centerIx; th->endIx = endIx; th->nRecords = nRecords; int len = endIx - startIx; AllocArray(th->refs, len); AllocArray(th->alts, len); struct vcfRecord *rec; int i; for (rec = vcff->records, i = 0; rec != NULL && i < endIx; rec = rec->next, i++) { if (i < startIx) continue; char refAl[16]; abbrevAndHandleRC(refAl, sizeof(refAl), rec->alleles[0]); th->refs[i-startIx] = vcfFilePooledStr(vcff, refAl); char altAl1[16]; abbrevAndHandleRC(altAl1, sizeof(altAl1), rec->alleles[1]); tolowers(altAl1); th->alts[i-startIx] = vcfFilePooledStr(vcff, altAl1); } } static void drawTreeInLabelArea(struct hacTree *ht, struct hvGfx *hvg, int yOff, int clipHeight, struct yFromNodeHelper *yHelper, struct titleHelper *titleHelper, boolean drawRectangle) /* Draw the haplotype clustering in the left label area (as much as fits there). */ { // Figure out which hvg to use, save current clipping, and clip to left label coords: struct hvGfx *hvgLL = (hvgSide != NULL) ? hvgSide : hvg; int clipXBak, clipYBak, clipWidthBak, clipHeightBak; hvGfxGetClip(hvgLL, &clipXBak, &clipYBak, &clipWidthBak, &clipHeightBak); hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, leftLabelX, yOff, leftLabelWidth, clipHeight); // Draw the tree: int x = leftLabelX; (void)rDrawTreeInLabelArea(ht, hvgLL, yrtMidPoint, x, yFromHapNode, yHelper, titleHelper, drawRectangle); // Restore the prior clipping: hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, clipXBak, clipYBak, clipWidthBak, clipHeightBak); } static void ignoreEm(char *format, va_list args) /* Ignore warnings from genotype parsing -- when there's one, there * are usually hundreds more just like it. */ { } static enum hapColorMode getColorMode(struct trackDb *tdb) /* Get the hap-cluster coloring mode from cart & tdb. */ { enum hapColorMode colorMode = altOnlyMode; char *colorBy = cartOrTdbString(cart, tdb, VCF_HAP_COLORBY_VAR, VCF_DEFAULT_HAP_COLORBY); if (sameString(colorBy, VCF_HAP_COLORBY_ALTONLY)) colorMode = altOnlyMode; else if (sameString(colorBy, VCF_HAP_COLORBY_REFALT)) colorMode = refAltMode; else if (sameString(colorBy, VCF_HAP_COLORBY_BASE)) colorMode = baseMode; else { char *geneTrack = cartOrTdbString(cart, tdb, "geneTrack", NULL); if (isNotEmpty(geneTrack) && sameString(colorBy, VCF_HAP_COLORBY_FUNCTION)) colorMode = functionMode; } return colorMode; } #define GENE_SIZE_FUDGE 2500000 static boolean vcfHapClusterDrawInit(struct track *tg, struct vcfFile *vcff, struct hvGfx *hvg, enum hapColorMode *retHapColorMode, struct seqWindow **retGSeqWin, struct txInfo **retTxiList) /* Parse vcff's genotypes and get ready to draw haplotypes. Return FALSE if nothing to draw. */ { if (vcff->records == NULL) return FALSE; undefYellow = hvGfxFindRgb(hvg, &undefinedYellowColor); if (retHapColorMode) *retHapColorMode = getColorMode(tg->tdb); pushWarnHandler(ignoreEm); struct vcfRecord *rec; for (rec = vcff->records; rec != NULL; rec = rec->next) { vcfParseGenotypesGtOnly(rec); } popWarnHandler(); if (*retHapColorMode == functionMode) { if (!exprBedColorsMade) { makeRedGreenShades(hvg); // Make darkerShadesOfGreenOnWhite for local use static struct rgbColor white = {255, 255, 255}; static struct rgbColor darkerGreen = {0, 210, 0}; hvGfxMakeColorGradient(hvg, &white, &darkerGreen, EXPR_DATA_SHADES, darkerShadesOfGreenOnWhite); } int gStart = winStart - GENE_SIZE_FUDGE; if (gStart < 0) gStart = 0; int gEnd = winEnd + GENE_SIZE_FUDGE; int chromSize = hChromSize(database, chromName); if (gEnd > chromSize) gEnd = chromSize; *retGSeqWin = chromSeqWindowNew(database, chromName, gStart, gEnd); *retTxiList = txInfoLoad(*retGSeqWin, tg->tdb); } else { *retGSeqWin = NULL; *retTxiList = NULL; } return TRUE; } static void vcfHapClusterDraw(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg, int xOff, int yOff, int width, MgFont *font, Color color, enum trackVisibility vis) /* Split samples' chromosomes (haplotypes), cluster them by center-weighted * alpha similarity, and draw in the order determined by clustering. */ { struct vcfFile *vcff = tg->extraUiData; enum hapColorMode colorMode; struct seqWindow *gSeqWin; struct txInfo *txiList; if (!vcfHapClusterDrawInit(tg, vcff, hvg, &colorMode, &gSeqWin, &txiList)) return; purple = hvGfxFindColorIx(hvg, 0x99, 0x00, 0xcc); unsigned int gtHapCount = 0; int nRecords = slCount(vcff->records); int centerIx = getCenterVariantIx(tg, seqStart, seqEnd, vcff->records); // Limit the number of variants that we compare, to keep from timing out: // (really what we should limit is the number of distinct haplo's passed to hacTree!) // In the meantime, this should at least be a cart var... int maxVariantsPerSide = 50; int startIx = max(0, centerIx - maxVariantsPerSide); int endIx = min(nRecords, centerIx+1 + maxVariantsPerSide); struct hacTree *ht = NULL; unsigned int *gtHapOrder = clusterHaps(vcff, centerIx, startIx, endIx, >HapCount, &ht); struct vcfRecord *centerRec = NULL; struct vcfRecord *rec; int ix; // Unlike drawing order (last drawn is on top), the first mapBox gets priority, // so map center variant before drawing & mapping other variants! for (rec = vcff->records, ix=0; rec != NULL; rec = rec->next, ix++) { if (ix == centerIx) { centerRec = rec; mapBoxForCenterVariant(rec, hvg, tg, xOff, yOff, width); break; } } for (rec = vcff->records, ix=0; rec != NULL; rec = rec->next, ix++) { boolean isClustered = (ix >= startIx && ix < endIx); if (ix != centerIx) { enum soTerm funcTerm = soUnknown; if (colorMode == functionMode) funcTerm = functionForRecord(rec, gSeqWin, txiList); drawOneRec(rec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, isClustered, FALSE, colorMode, funcTerm); } } // Draw the center rec on top, outlined with black lines, to make sure it is very visible: enum soTerm funcTerm = soUnknown; if (colorMode == functionMode) funcTerm = functionForRecord(centerRec, gSeqWin, txiList); drawOneRec(centerRec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, TRUE, TRUE, colorMode, funcTerm); // Draw as much of the tree as can fit in the left label area: int extraPixel = (colorMode == altOnlyMode || colorMode == functionMode) ? 1 : 0; int hapHeight = tg->height- CLIP_PAD - 2*extraPixel; struct yFromNodeHelper yHelper = {0, NULL, NULL}; initYFromNodeHelper(&yHelper, yOff+extraPixel, hapHeight, gtHapCount, gtHapOrder, vcff->genotypeCount); struct titleHelper titleHelper = { NULL, 0, 0, 0, 0, NULL, NULL }; initTitleHelper(&titleHelper, tg->track, startIx, centerIx, endIx, nRecords, vcff); char *treeAngle = cartOrTdbString(cart, tg->tdb, VCF_HAP_TREEANGLE_VAR, VCF_DEFAULT_HAP_TREEANGLE); boolean drawRectangle = sameString(treeAngle, VCF_HAP_TREEANGLE_RECTANGLE); drawTreeInLabelArea(ht, hvg, yOff+extraPixel, hapHeight+CLIP_PAD, &yHelper, &titleHelper, drawRectangle); } static void drawSampleLabels(struct vcfFile *vcff, struct hvGfx *hvg, boolean isAllDiploid, int yStart, int height, unsigned int *gtHapOrder, int gtHapCount, MgFont *font, Color color, char *track) /* Draw sample names as left labels. */ { // Figure out which hvg to use, save current clipping, and clip to left label coords: struct hvGfx *hvgLL = (hvgSide != NULL) ? hvgSide : hvg; int clipXBak, clipYBak, clipWidthBak, clipHeightBak; hvGfxGetClip(hvgLL, &clipXBak, &clipYBak, &clipWidthBak, &clipHeightBak); hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, leftLabelX, yStart, leftLabelWidth, height); if (isAllDiploid) { double pxPerGt = (double)height / vcff->genotypeCount; if (pxPerGt < tl.fontHeight + 1) warn("track %s: drawSampleLabels called with insufficient height", track); int gtIx; for (gtIx = 0; gtIx < vcff->genotypeCount; gtIx++) { int y = gtIx * pxPerGt; hvGfxTextRight(hvgLL, leftLabelX, y+yStart, leftLabelWidth-1, (int)pxPerGt, color, font, vcff->genotypeIds[gtIx]); } } else { double pxPerHt = (double)height / gtHapCount; if (pxPerHt < tl.fontHeight + 1) warn("track %s: drawSampleLabels called with insufficient height", track); int orderIx; for (orderIx = 0; orderIx < gtHapCount; orderIx++) { int gtHapIx = gtHapOrder[orderIx]; int gtIx = (gtHapIx >> 1); int y = gtIx * pxPerHt; hvGfxTextRight(hvgLL, leftLabelX, y+yStart, leftLabelWidth-1, (int)pxPerHt, color, font, vcff->genotypeIds[gtIx]); } } // Restore the prior clipping: hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, clipXBak, clipYBak, clipWidthBak, clipHeightBak); } static void drawSampleTitles(struct vcfFile *vcff, int yStart, int height, unsigned int *gtHapOrder, int gtHapCount, char *track) /* Draw mouseover labels / titles with the samples that are drawn at each pixel y offset */ { double hapPerPx = (double)gtHapCount / height; int labelEnd = leftLabelX + leftLabelWidth; struct dyString *dy = dyStringNew(0); int y; for (y = 0; y < height; y++) { dyStringClear(dy); int gtHapStart = y * hapPerPx; int gtHapEnd = (y + 1) * hapPerPx; if (gtHapEnd == gtHapStart) gtHapEnd++; char *lastSample = NULL; int gtHapIx; for (gtHapIx = gtHapStart; gtHapIx < gtHapEnd; gtHapIx++) { int gtIx = (gtHapOrder[gtHapIx] >> 1); char *sample = vcff->genotypeIds[gtIx]; if (!lastSample || differentString(sample, lastSample)) { if (isNotEmpty(dy->string)) dyStringAppend(dy, ", "); dyStringAppend(dy, sample); lastSample = sample; } } imgTrackAddMapItem(curImgTrack, TITLE_BUT_NO_LINK, dy->string, leftLabelX, y+yStart, labelEnd, y+yStart+1, track); } } static unsigned int *gtHapOrderFromGtOrder(struct vcfFile *vcff, boolean *retIsAllDiploid, int *retGtHapCount) { int ploidy = 2; // Assuming diploid genomes here, no XXY, tetraploid etc. int gtCount = vcff->genotypeCount; boolean isAllDiploid = TRUE; unsigned int *gtHapOrder = needMem(gtCount * ploidy * sizeof(unsigned int)); int orderIx = 0; int gtIx; // Determine the number of chromosome rows; for chrX, can be mix of diploid and haploid. for (gtIx=0; gtIx < gtCount; gtIx++) { int gtHapIx = (gtIx << 1); gtHapOrder[orderIx] = gtHapIx; orderIx++; struct vcfRecord *rec; for (rec = vcff->records; rec != NULL; rec = rec->next) { struct vcfGenotype *gt = &(rec->genotypes[gtIx]); if (!gt->isHaploid) { gtHapOrder[orderIx] = gtHapIx+1; orderIx++; break; } else isAllDiploid = FALSE; } } if (retIsAllDiploid) *retIsAllDiploid = isAllDiploid; if (retGtHapCount) *retGtHapCount = orderIx; return gtHapOrder; } static void vcfGtHapFileOrderDraw(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg, int xOff, int yOff, int width, MgFont *font, Color color, enum trackVisibility vis) /* Draw rows in the same fashion as vcfHapClusterDraw, but instead of clustering, use the * order in which samples appear in the VCF file. */ { struct vcfFile *vcff = tg->extraUiData; enum hapColorMode colorMode; struct seqWindow *gSeqWin; struct txInfo *txiList; if (!vcfHapClusterDrawInit(tg, vcff, hvg, &colorMode, &gSeqWin, &txiList)) return; boolean isAllDiploid; int gtHapCount; unsigned int *gtHapOrder = gtHapOrderFromGtOrder(vcff, &isAllDiploid, >HapCount); struct vcfRecord *rec; for (rec = vcff->records; rec != NULL; rec = rec->next) { enum soTerm funcTerm = soUnknown; if (colorMode == functionMode) funcTerm = functionForRecord(rec, gSeqWin, txiList); drawOneRec(rec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, FALSE, FALSE, colorMode, funcTerm); } // If height is sufficient, draw sample names as left labels; otherwise make mouseover titles // with sample names for each pixel y offset. int extraPixel = (colorMode == altOnlyMode || colorMode == functionMode) ? 1 : 0; int hapHeight = tg->height - CLIP_PAD - 2*extraPixel; int minHeightForLabels; if (isAllDiploid) minHeightForLabels = vcff->genotypeCount * (tl.fontHeight + 1); else minHeightForLabels = gtHapCount * (tl.fontHeight + 1); if (hapHeight >= minHeightForLabels) drawSampleLabels(vcff, hvg, isAllDiploid, yOff+extraPixel, hapHeight, gtHapOrder, gtHapCount, font, color, tg->track); else drawSampleTitles(vcff, yOff+extraPixel, hapHeight, gtHapOrder, gtHapCount, tg->track); } static struct phyloTree *getTreeFromFile(struct trackDb *tdb) /* Get the filename that follows trackDb setting 'hapClusterMethod treeFile ' and read it in * as a phyloTree. */ { char *hapMethod = cloneString(trackDbSetting(tdb, VCF_HAP_METHOD_VAR)); if (! startsWithWord(VCF_HAP_METHOD_TREE_FILE, nextWord(&hapMethod))) errAbort("getTreeFromFile: expected trackDb setting " VCF_HAP_METHOD_VAR "to start with '" VCF_HAP_METHOD_TREE_FILE "' followed by a file name, but got '%s'", hapMethod); char *fileName = hReplaceGbdb(nextWord(&hapMethod)); return phyloOpenTree(fileName); } struct hash *makeSampleToIx(struct vcfFile *vcff) /* Alloc & return a hash of sample names to genotype indices in vcff. */ { struct hash *sampleToIx = hashNew(0); int gtIx; for (gtIx = 0; gtIx < vcff->genotypeCount; gtIx++) hashAddInt(sampleToIx, vcff->genotypeIds[gtIx], gtIx); return sampleToIx; } static int gtIxFromSample(struct hash *sampleToIx, char *sample, char *vcfFileName) /* Look up sample's genotype index in sampleToIx, die complaining about vcfFileName if not found. */ { int gtIx = hashIntValDefault(sampleToIx, sample, -1); if (gtIx < 0) errAbort("gtIxFromSample: sample '%s' not found in VCF file %s", sample, vcfFileName); return gtIx; } static void rSetGtHapOrderFromTree(struct phyloTree *node, struct vcfFile *vcff, struct hash *sampleToIx, unsigned int *gtHapOrder, int *pGtHapCount, unsigned int *leafOrderToHapOrderStart, unsigned int *leafOrderToHapOrderEnd, int *pLeafCount) /* Set gtHapOrder to sample gt & hap indices in the order we encounter the samples in tree. */ { if (node->numEdges > 0) { int ix; for (ix = 0; ix < node->numEdges; ix++) rSetGtHapOrderFromTree(node->edges[ix], vcff, sampleToIx, gtHapOrder, pGtHapCount, leafOrderToHapOrderStart, leafOrderToHapOrderEnd, pLeafCount); } else { int gtIx = gtIxFromSample(sampleToIx, node->ident->name, vcff->fileOrUrl); int gtHapIx = (gtIx << 1); gtHapOrder[*pGtHapCount] = gtHapIx; leafOrderToHapOrderStart[*pLeafCount] = leafOrderToHapOrderEnd[*pLeafCount] = *pGtHapCount; *pGtHapCount += 1; struct vcfRecord *rec; for (rec = vcff->records; rec != NULL; rec = rec->next) { struct vcfGenotype *gt = &(rec->genotypes[gtIx]); if (!gt->isHaploid) { gtHapOrder[*pGtHapCount] = gtHapIx+1; leafOrderToHapOrderEnd[*pLeafCount] = *pGtHapCount; *pGtHapCount += 1; break; } } *pLeafCount += 1; } } static unsigned int *gtHapOrderFromTree(struct vcfFile *vcff, struct phyloTree *tree, unsigned int **retLeafOrderToHapOrderStart, unsigned int **retLeafOrderToHapOrderEnd, int *retGtHapCount) /* Alloc & return gtHapOrder, set to samples in the order we encounter them in tree. * Also build up maps of leaf order to low and high gtHapIx, for drawing the tree later. */ { struct hash *sampleToIx = makeSampleToIx(vcff); int ploidy = 2; // Assuming diploid genomes here, no XXY, tetraploid etc. int gtCount = vcff->genotypeCount; unsigned int *gtHapOrder = needMem(gtCount * ploidy * sizeof(unsigned int)); *retLeafOrderToHapOrderStart = needMem(gtCount * sizeof(unsigned int)); *retLeafOrderToHapOrderEnd = needMem(gtCount * sizeof(unsigned int)); *retGtHapCount = 0; int leafCount = 0; rSetGtHapOrderFromTree(tree, vcff, sampleToIx, gtHapOrder, retGtHapCount, *retLeafOrderToHapOrderStart, *retLeafOrderToHapOrderEnd, &leafCount); if (leafCount != vcff->genotypeCount) errAbort("gtHapOrderFromTree: tree has %d leaves, but VCF file %s has %d genotype columns", leafCount, vcff->fileOrUrl, vcff->genotypeCount); return gtHapOrder; } // Relative coordinates for tree layout, to be transformed into pixel coords later. struct nodeCoords { double rank; // Centerpoint of children's rank in terms of hap order (0 = top haplotype) int depth; // Maximum child depth + 1 }; static int phyloTreeAddNodeCoords(struct phyloTree *node, unsigned int *leafOrderToHapOrderStart, unsigned int *leafOrderToHapOrderEnd, int leafIx) /* Recursively annotate node and descendants with nodeCoords to prepare for drawing the tree. */ { struct nodeCoords *coords; AllocVar(coords); node->priv = coords; if (node->numEdges > 0) { double minRank = -1, maxRank = 0; int maxDepth = 0; int ix; for (ix = 0; ix < node->numEdges; ix++) { struct phyloTree *child = node->edges[ix]; leafIx = phyloTreeAddNodeCoords(child, leafOrderToHapOrderStart, leafOrderToHapOrderEnd, leafIx); struct nodeCoords *childCoords = child->priv; if (minRank < 0 || childCoords->rank < minRank) minRank = childCoords->rank; if (childCoords->rank > maxRank) maxRank = childCoords->rank; if (childCoords->depth > maxDepth) maxDepth = childCoords->depth; } coords->rank = (minRank + maxRank) / 2.0; coords->depth = maxDepth + 1; } else { // Leaf (sample) double rankStart = leafOrderToHapOrderStart[leafIx]; double rankEnd = leafOrderToHapOrderStart[leafIx]; coords->rank = (rankStart + rankEnd) / 2.0; leafIx++; coords->depth = 0; } return leafIx; } static int colorCmp(const void *pa, const void *pb) /* Compare two colors for sorting by numeric value. */ { const Color ca = *(Color *)pa; const Color cb = *(Color *)pb; return ca - cb; } static Color colorFromChildColors(Color *childColors, int childCount, Color defaultCol) /* If the majority of children have the same color, then return that color, otherwise defaultCol. */ { Color childColCopy[childCount]; memcpy(childColCopy, childColors, sizeof childColCopy); qsort(childColCopy, childCount, sizeof(*childColCopy), colorCmp); Color maxRunColor = childCount > 0 ? childColors[0] : defaultCol; int runLength = 1, maxRunLength = 1; int ix; for (ix = 1; ix < childCount; ix++) { if (childColors[ix] == childColors[ix-1]) { runLength++; } else { if (runLength > maxRunLength) { maxRunLength = runLength; maxRunColor = childColors[ix-1]; } runLength = 1; } } if (runLength > maxRunLength) { maxRunLength = runLength; maxRunColor = childColors[ix-1]; } if (maxRunLength > (childCount>>1)) return maxRunColor; return defaultCol; } static Color rDrawPhyloTreeInLabelArea(struct phyloTree *node, struct hvGfx *hvg, int x, int yOff, double pxPerHap, MgFont *font, struct hash *highlightSamples, struct hash *sampleColors) /* Recursively draw the tree in the left label area. */ { -const int branchW = 4; +const int branchW = 8; int labelEnd = leftLabelX + leftLabelWidth; Color color = MG_BLACK; if (!sampleColors) { // Misuse the branch length value as RGB color (if it's the typical small number, will still // draw as approximately black): unsigned int rgb = node->ident->length; color = MAKECOLOR_32( ((rgb>>16)&0xff), ((rgb>>8)&0xff), (rgb&0xff) ); } if (node->numEdges > 0) { // Draw each child and a horizontal line to child int minY = -1, maxY = 0; Color childColors[node->numEdges]; int ix; for (ix = 0; ix < node->numEdges; ix++) { struct phyloTree *child = node->edges[ix]; childColors[ix] = rDrawPhyloTreeInLabelArea(child, hvg, x+branchW, yOff, pxPerHap, font, highlightSamples, sampleColors); struct nodeCoords *childCoords = child->priv; int childY = yOff + ((0.5 + childCoords->rank) * pxPerHap); hvGfxLine(hvg, x, childY, x+branchW, childY, childColors[ix]); if (minY < 0 || childY < minY) minY = childY; if (childY > maxY) maxY = childY; } // Draw a vertical line to connect the children if (sampleColors != NULL) color = colorFromChildColors(childColors, node->numEdges, color); hvGfxLine(hvg, x, minY, x, maxY, color); } else { // Leaf node -- draw a horizontal line, and label if there is space to right of tree struct nodeCoords *coords = node->priv; int yLine = yOff + ((0.5 + coords->rank) * pxPerHap); int yBox = yLine - pxPerHap / 2; int yText = yLine - tl.fontHeight / 2; // Dunno why but the default font seems to draw with the baseline at y while the other fonts // draw with the mid line at y. if (sameOk(tl.textSize, "8")) yText += 2; if (highlightSamples && node->ident->name && hashLookup(highlightSamples, node->ident->name)) hvGfxBox(hvg, leftLabelX, yBox, leftLabelWidth, pxPerHap, MAKECOLOR_32_A(170, 255, 255, 128)); if (sampleColors != NULL) color = (Color)hashIntValDefault(sampleColors, node->ident->name, MG_BLACK); hvGfxLine(hvg, x, yLine, x+branchW, yLine, color); int textX = x + branchW + 3; if (pxPerHap >= tl.fontHeight+1 && textX < labelEnd) hvGfxText(hvg, textX, yText, MG_BLACK, font, node->ident->name); } return color; } static void drawPhyloTreeInLabelArea(struct phyloTree *tree, struct hvGfx *hvg, int yOff, int clipHeight, int gtHapCount, MgFont *font, struct hash *highlightSamples, struct hash *sampleColors) { struct hvGfx *hvgLL = (hvgSide != NULL) ? hvgSide : hvg; int clipXBak, clipYBak, clipWidthBak, clipHeightBak; hvGfxGetClip(hvgLL, &clipXBak, &clipYBak, &clipWidthBak, &clipHeightBak); hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, leftLabelX, yOff, leftLabelWidth, clipHeight); // Draw the tree: int x = leftLabelX; double pxPerHap = (double)clipHeight / gtHapCount; rDrawPhyloTreeInLabelArea(tree, hvgLL, x, yOff, pxPerHap, font, highlightSamples, sampleColors); // Restore the prior clipping: hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, clipXBak, clipYBak, clipWidthBak, clipHeightBak); } static void rHighlightSampleRows(struct phyloTree *node, struct hvGfx *hvg, int yOff, double pxPerHap, struct hash *highlightSamples) /* For each leaf node, if it is in highlightSamples then draw a highlight box for it. */ { if (node->numEdges > 0) { int ix; for (ix = 0; ix < node->numEdges; ix++) { struct phyloTree *child = node->edges[ix]; rHighlightSampleRows(child, hvg, yOff, pxPerHap, highlightSamples); } } else { // leaf node; highlight if it's in highlightSamples if (node->ident->name && hashLookup(highlightSamples, node->ident->name)) { struct nodeCoords *coords = node->priv; int y = yOff + (coords->rank * pxPerHap); hvGfxBox(hvg, insideX, y, insideWidth, pxPerHap, MAKECOLOR_32_A(170, 255, 255, 128)); } } } static struct hash *getSampleColors(struct trackDb *tdb) /* Return a hash of sample names to colors if specified in tdb, or NULL if none specified. */ { struct hash *sampleColors = NULL; char *setting = cartOrTdbString(cart, tdb, VCF_SAMPLE_COLOR_FILE, NULL); if (isNotEmpty(setting)) { // If the setting has not been set in the cart then we're getting the trackDb setting which // may specify a list of files and possibly labels like "Thing_one=file1 Thing_two=file2". // In that case, pick out the first file. if (strchr(setting, '=') || strchr(setting, ' ')) { setting = nextWord(&setting); char *eq = (strchr(setting, '=')); if (eq) setting = eq+1; } char *fileName = hReplaceGbdb(setting); struct lineFile *lf = netLineFileMayOpen(fileName); if (lf) { sampleColors = hashNew(0); char *line; while (lineFileNextReal(lf, &line)) { char *words[3]; int wordCount = chopTabs(line, words); lineFileExpectWords(lf, 2, wordCount); char *sample = words[0]; char *colorStr = words[1]; int rgb = bedParseColor(colorStr); Color color = MAKECOLOR_32( ((rgb>>16)&0xff), ((rgb>>8)&0xff), (rgb&0xff) ); hashAddInt(sampleColors, sample, color); } lineFileClose(&lf); } else warn("Can't open sampleColorFile '%s'", fileName); } return sampleColors; } static struct hash *getHighlightSamples(struct trackDb *tdb) /* Return a hash of node IDs to highlight in the phylo tree display, or NULL if none specified. */ { struct hash *highlightSamples = NULL; char *setting = cartOrTdbString(cart, tdb, "highlightIds", NULL); if (isNotEmpty(setting)) { struct slName *list = slNameListFromComma(setting); highlightSamples = hashFromSlNameList(list); } return highlightSamples; } static void vcfGtHapTreeFileDraw(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg, int xOff, int yOff, int width, MgFont *font, Color color, enum trackVisibility vis) /* Draw rows in the same fashion as vcfHapClusterDraw, but instead of clustering, use the * order in which samples appear in the VCF file. */ { struct vcfFile *vcff = tg->extraUiData; enum hapColorMode colorMode; struct seqWindow *gSeqWin; struct txInfo *txiList; if (!vcfHapClusterDrawInit(tg, vcff, hvg, &colorMode, &gSeqWin, &txiList)) return; struct phyloTree *tree = getTreeFromFile(tg->tdb); +if (tree == NULL) + { + warn("No tree in file '%s'", trackDbSetting(tg->tdb, VCF_HAP_METHOD_VAR)); + vcfGtHapFileOrderDraw(tg, seqStart, seqEnd, hvg, xOff, yOff, width, font, color, vis); + return; + } int gtHapCount; unsigned int *leafOrderToHapOrderStart, *leafOrderToHapOrderEnd; unsigned int *gtHapOrder = gtHapOrderFromTree(vcff, tree, &leafOrderToHapOrderStart, &leafOrderToHapOrderEnd, >HapCount); // Figure out rank (vertical position) and depth (horizontal position) of every node in tree: phyloTreeAddNodeCoords(tree, leafOrderToHapOrderStart, leafOrderToHapOrderEnd, 0); int extraPixel = (colorMode == altOnlyMode || colorMode == functionMode) ? 1 : 0; int hapHeight = tg->height - CLIP_PAD - 2*extraPixel; struct hash *highlightSamples = getHighlightSamples(tg->tdb); if (highlightSamples) { double pxPerHap = (double)hapHeight / gtHapCount; rHighlightSampleRows(tree, hvg, yOff+extraPixel, pxPerHap, highlightSamples); } struct vcfRecord *rec; for (rec = vcff->records; rec != NULL; rec = rec->next) { enum soTerm funcTerm = soUnknown; if (colorMode == functionMode) funcTerm = functionForRecord(rec, gSeqWin, txiList); drawOneRec(rec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, FALSE, FALSE, colorMode, funcTerm); } struct hash *sampleColors = getSampleColors(tg->tdb); drawPhyloTreeInLabelArea(tree, hvg, yOff+extraPixel, hapHeight, gtHapCount, font, highlightSamples, sampleColors); drawSampleTitles(vcff, yOff+extraPixel, hapHeight, gtHapOrder, gtHapCount, tg->track); } static int vcfHapClusterTotalHeight(struct track *tg, enum trackVisibility vis) /* Return height of haplotype graph (2 * #samples * lineHeight); * 2 because we're assuming diploid genomes here, no XXY, tetraploid etc. */ { const struct vcfFile *vcff = tg->extraUiData; if (vcff->records == NULL) return 0; int ploidy = sameString(chromName, "chrY") ? 1 : 2; int simpleHeight = ploidy * vcff->genotypeCount * tg->lineHeight; int defaultHeight = min(simpleHeight, VCF_DEFAULT_HAP_HEIGHT); char *tdbHeight = trackDbSettingOrDefault(tg->tdb, VCF_HAP_HEIGHT_VAR, NULL); if (isNotEmpty(tdbHeight)) defaultHeight = atoi(tdbHeight); int cartHeight = cartOrTdbInt(cart, tg->tdb, VCF_HAP_HEIGHT_VAR, defaultHeight); if (tg->visibility == tvSquish) cartHeight /= 2; enum hapColorMode colorMode = getColorMode(tg->tdb); int extraPixel = (colorMode == altOnlyMode || colorMode == functionMode) ? 1 : 0; int totalHeight = cartHeight + CLIP_PAD + 2*extraPixel; tg->height = min(totalHeight, maximumTrackHeight(tg)); return tg->height; } static char *vcfHapClusterTrackName(struct track *tg, void *item) /* If someone asks for itemName/mapItemName, just send name of track like wiggle. */ { return tg->track; } static void vcfHapClusterOverloadMethods(struct track *tg, struct vcfFile *vcff) /* If we confirm at load time that we can draw a haplotype graph, use * this to overwrite the methods for the rest of execution: */ { tg->heightPer = (tg->visibility == tvSquish) ? (tl.fontHeight/4) : (tl.fontHeight / 2); tg->lineHeight = tg->heightPer + 1; char *hapMethod = cartOrTdbString(cart, tg->tdb, VCF_HAP_METHOD_VAR, VCF_DEFAULT_HAP_METHOD); if (sameString(hapMethod, VCF_HAP_METHOD_FILE_ORDER)) tg->drawItems = vcfGtHapFileOrderDraw; else if (startsWithWord(VCF_HAP_METHOD_TREE_FILE, hapMethod)) tg->drawItems = vcfGtHapTreeFileDraw; else tg->drawItems = vcfHapClusterDraw; tg->totalHeight = vcfHapClusterTotalHeight; tg->itemHeight = tgFixedItemHeight; tg->itemName = vcfHapClusterTrackName; tg->mapItemName = vcfHapClusterTrackName; tg->itemStart = tgItemNoStart; tg->itemEnd = tgItemNoEnd; tg->mapsSelf = TRUE; tg->extraUiData = vcff; } static void indelTweakMapItem(struct track *tg, struct hvGfx *hvg, void *item, char *itemName, char *mapItemName, int start, int end, int x, int y, int width, int height) /* Pass the original vcf chromStart to pgSnpMapItem, so if we have trimmed an identical * first base from item's alleles and start, we will still pass the correct start to hgc. */ { struct pgSnpVcfStartEnd *psvs = item; pgSnpMapItem(tg, hvg, item, itemName, mapItemName, psvs->vcfStart, psvs->vcfEnd, x, y, width, height); } static void vcfPhasedLoadItems(struct track *tg) /* Load up one individuals phased genotypes in window */ { char *fileOrUrl = NULL; char *tbiFileOrUrl = trackDbSetting(tg->tdb, "bigDataIndex"); // unrelated to mysql /* Figure out url or file name. */ if (tg->parallelLoading) { /* do not use mysql during parallel-fetch load */ fileOrUrl = trackDbSetting(tg->tdb, "bigDataUrl"); } else { struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb); fileOrUrl = bbiNameFromSettingOrTableChrom(tg->tdb, conn, tg->table, chromName); hFreeConn(&conn); } if (isEmpty(fileOrUrl)) return; int vcfMaxErr = -1; struct vcfFile *vcff = NULL; /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { vcff = vcfTabixFileAndIndexMayOpen(fileOrUrl, tbiFileOrUrl, chromName, winStart, winEnd, vcfMaxErr, -1); if (vcff != NULL) { filterRecords(vcff, tg->tdb); filterRefOnlyAlleles(vcff, tg->tdb); // remove items that don't differ from reference // TODO: in multi-region mode, different windows end up with different sets of variants where // all are phased or not, which throws off track heights in each window. Similar to hapCluster // mode, just switch to pgSnp view when in multi-region for now. if (slCount(windows) > 1 || tg->visibility == tvDense) pgSnpMethods(tg); tg->items = vcfFileToPgSnp(vcff, tg->tdb); // pgSnp bases coloring/display decision on count of items: tg->extraUiData = vcff; tg->customInt = slCount(tg->items); // Don't vcfFileFree here -- we are using its string pointers! } else { if (tbiFileOrUrl) errAbort("Unable to open VCF file/URL '%s' with tabix index '%s'", fileOrUrl, tbiFileOrUrl); else errAbort("Unable to open VCF file/URL '%s'", fileOrUrl); } } errCatchEnd(errCatch); if (errCatch->gotError || vcff == NULL) { if (isNotEmpty(errCatch->message->string)) tg->networkErrMsg = cloneString(errCatch->message->string); tg->drawItems = bigDrawWarning; tg->totalHeight = bigWarnTotalHeight; } errCatchFree(&errCatch); } static void vcfPhasedAddMapBox(struct hvGfx *hvg, struct vcfRecord *rec, struct pgSnpVcfStartEnd *psvs, char *text, int x, int y, int width, int height, struct track *track) // Add mapbox for a tick in the vcfPhased track { mapBoxHgcOrHgGene(hvg, psvs->vcfStart, psvs->vcfEnd, x, y, width, height, track->track, rec->name, text, NULL, TRUE, NULL); } struct hapDistanceMatrixCell { /* The pair of alleles and how distant they are */ struct hapDistanceMatrixCell *next; char *sampleId; // name of this sample char *otherId; // name of other sample int alleleIx; // index into vcfRecord->genotypes int otherAlleleIx; // index into vcfRecord->genotypes for other sample double dist; // distance between this sample/allele and another sample/allele }; struct hapDistanceMatrix { /* A row of hapDistanceMatrixCells, which store the distance computed by cwaDistance() * between two different alleles */ struct hapDistanceMatrix *next; // next row in matrix char *sampleId; // name of this sample int alleleIx; // allele id of this sample, 0 or 1 struct hapDistanceMatrixCell *row; // a row of cwaDistance scores }; static int hapDistanceMatrixCellCmp(const void *item1, const void *item2) { const struct hapDistanceMatrixCell *cell1 = *(struct hapDistanceMatrixCell **)item1; const struct hapDistanceMatrixCell *cell2 = *(struct hapDistanceMatrixCell **)item2; return cell1->dist > cell2->dist; } static struct hapDistanceMatrixCell *findClosestChildAlleleToParent(char *parent, char *child, struct hapDistanceMatrix *matrix) /* Get all cells belonging to a certain parent onto a list and sort by distance */ { struct hapDistanceMatrix *vec = NULL; struct hapDistanceMatrixCell *cellList = NULL, *cell = NULL; for (vec = matrix; vec != NULL; vec = vec->next) { for (cell = vec->row; cell != NULL; cell = cell->next) { if (sameString(cell->sampleId, parent)) { struct hapDistanceMatrixCell *tmp = needMem(sizeof(struct hapDistanceMatrixCell)); tmp->next = NULL; tmp->sampleId = cloneString(cell->sampleId); tmp->alleleIx = cell->alleleIx; tmp->otherId = cloneString(vec->sampleId); tmp->otherAlleleIx = vec->alleleIx; tmp->dist = cell->dist; slAddHead(&cellList, tmp); } } } slSort(&cellList, hapDistanceMatrixCellCmp); return cellList; } static unsigned int toggleInt(unsigned int s) /* Add or subtract one */ { return s & 1 ? s - 1 : s + 1; } static void fillOutHapOrder(unsigned int *hapOrder, unsigned int hapCount, struct hapDistanceMatrixCell *c1, struct hapDistanceMatrixCell *c2, char **sampleDrawOrder) /* Assign indices to hapOrder in the order we should draw the alleles. Allows for the second parent * cell to be NULL */ { if (c1 == NULL) errAbort("Error: fillOutHapOrder passed NULL parent"); int numSamplesToDraw = hapCount / 2; char *childSampleName = c1->otherId; int childIx = stringArrayIx(childSampleName, sampleDrawOrder, numSamplesToDraw); int i; for (i = 0; i < numSamplesToDraw; i++) { char *thisSample = sampleDrawOrder[i]; short hapIx = 2*i; if (i != childIx) // fill out the parent indices, which may be above or below the child { struct hapDistanceMatrixCell *thisCell = c2 != NULL && sameString(c2->sampleId,thisSample) ? c2 : c1; if (i < childIx) { hapOrder[hapIx] = toggleInt(thisCell->alleleIx); hapOrder[hapIx+1] = thisCell->alleleIx; } else // below the child { hapOrder[hapIx] = thisCell->alleleIx; hapOrder[hapIx+1] = toggleInt(thisCell->alleleIx); } } else // fill out the child indices { if (i == 0) { if (c2) hapOrder[hapIx] = c2->otherAlleleIx; else hapOrder[hapIx] = toggleInt(c1->otherAlleleIx); hapOrder[hapIx+1] = c1->otherAlleleIx; } else if (i == 1) { hapOrder[hapIx] = c1->otherAlleleIx; if (c2) hapOrder[hapIx+1] = c2->otherAlleleIx; else hapOrder[hapIx+1] = toggleInt(c1->otherAlleleIx); } else { hapOrder[hapIx] = c2->otherAlleleIx; hapOrder[hapIx+1] = c1->otherAlleleIx; } } } } static void maybeRollBackCell(struct hapDistanceMatrixCell *c1, struct hapDistanceMatrixCell *c2, struct hapDistanceMatrixCell *c1Copy, struct hapDistanceMatrixCell *c2Copy) /* At this point c1->otherAlleleIx != c2->otherAlleleIx, however, it may be the case that c2 has a * better scoring match to c1->otherAlleleIx, so try to find it and reset the values */ { struct hapDistanceMatrixCell *tmp = c2; while (tmp->otherAlleleIx != c1->otherAlleleIx) tmp = tmp->next; if (tmp->dist < c1->dist) { *c2 = *tmp; *c1 = *c1Copy; } } static void setHapOrderFromMatrix(unsigned int *hapOrder, unsigned int hapCount, struct hapDistanceMatrix *matrix, struct hapCluster **hapArray, struct vcfFile *vcff, char *childName, char **sampleDrawOrder) /* Given a matrix where each row is an allele of the child, and each column * is the distance from the child allele to a parent allele, fill out the order * in which the haplotypes will be drawn. The order are the indexes into rec->genotypes. */ { char *parentNames[(hapCount/2) - 1]; struct hapDistanceMatrixCell *c1 = NULL; struct hapDistanceMatrixCell *c2 = NULL; int i, ix = 0; if (hapCount > 2) // is there a trio or at least part of one? { for (i = 0; i < hapCount/2 ; i++) if (!sameString(sampleDrawOrder[i], childName)) parentNames[ix++] = sampleDrawOrder[i]; // For a parent/child combo, find the most likely set of transmitted // alleles. Depending on the number of variants in the window and the // haplotypes in question, both parents can tie as the person who // contributed a given allele c1 = findClosestChildAlleleToParent(parentNames[0], childName, matrix); c2 = findClosestChildAlleleToParent(parentNames[1], childName, matrix); // NULL if only one parent if (c1 && c2) { if (c1->otherAlleleIx == c2->otherAlleleIx) { struct hapDistanceMatrixCell *c1Copy = c1; struct hapDistanceMatrixCell *c2Copy = c2; if (c1->dist >= c2->dist) { while (c1->dist >= c2->dist && c1->otherAlleleIx == c2->otherAlleleIx) c1 = c1->next; // at this point c1->otherAlleleIx != c2>otherAlleleIx, BUT c2 may have a better // scoring match to this allele and we shouldn't have advanced c1 to begin with! // This case is exercised at the following location (chr1:53896598-53897933) in // the following file: /gbdb/hg38/1000Genomes/trio/NA12878_1463_CEU/NA12878_1463_CEUTrio.chr1.vcf.gz // where the child's haplotypes are identical to both of the mother's haplotypes! maybeRollBackCell(c1, c2, c1Copy, c2Copy); } else { while (c1->dist < c2->dist && c1->otherAlleleIx == c2->otherAlleleIx) c2 = c2->next; // similar to above case but swap c1 and c2 maybeRollBackCell(c2, c1, c2Copy, c1Copy); } } } fillOutHapOrder(hapOrder, hapCount, c1, c2, sampleDrawOrder); } else { hapOrder[0] = matrix->alleleIx; hapOrder[1] = matrix->next->alleleIx; } } static void assignHapArrayIx(int *ret, struct hapCluster **hapArray, struct vcfFile *vcff, char *sample, boolean doChild) { int i; struct vcfRecord *rec = vcff->records; int ix = 0; // index into ret for (i = 0; i < slCount(hapArray); i++) { struct hapCluster *hc = hapArray[i]; struct vcfGenotype *gt = &(rec->genotypes[hc->gtHapIx >> 1]); if (!doChild && !sameString(gt->id, sample)) ret[ix++] = i; else if (doChild && sameString(gt->id, sample)) ret[ix++] = i; } } static struct hapDistanceMatrix *fillOutDistanceMatrix(struct hapCluster **hapArray, struct vcfFile *vcff, char *sample, struct cwaExtraData *helper, int gtCount) /* Allocates and fill out a struct hapDistanceMatrix, one row per child allele, and a * hapDistanceMatrixCell per parent allele */ { int parGtCount = (gtCount - 1) * 2; int i,j; struct vcfRecord *rec = vcff->records; struct hapDistanceMatrix *matrix = NULL; int childHapArrayIndices[2]; int parentHapArrayIndices[parGtCount]; assignHapArrayIx(childHapArrayIndices, hapArray, vcff, sample, TRUE); assignHapArrayIx(parentHapArrayIndices, hapArray, vcff, sample, FALSE); for (i = 0; i < 2; i++) { struct hapDistanceMatrix *row = needMem(sizeof(struct hapDistanceMatrix)); struct hapCluster *hcChild = hapArray[childHapArrayIndices[i]]; struct vcfGenotype *gt = &(rec->genotypes[hcChild->gtHapIx >> 1]); row->row = NULL; row->sampleId = cloneString(gt->id); row->alleleIx = hcChild->gtHapIx; for (j = 0; j < parGtCount; j++) { struct hapDistanceMatrixCell *cell = needMem(sizeof(struct hapDistanceMatrixCell)); struct hapCluster *hcParent = hapArray[parentHapArrayIndices[j]]; struct vcfGenotype *parGt = &(rec->genotypes[hcParent->gtHapIx >> 1]); cell->sampleId = cloneString(parGt->id); cell->alleleIx = hcParent->gtHapIx; cell->dist = cwaDistance((struct slList *)hcChild, (struct slList *)hcParent, helper); slAddHead(&(row->row), cell); } slAddHead(&matrix, row); } return matrix; } unsigned int *computeHapDist(struct vcfFile *vcff, int centerIx, int startIx, int endIx, char *sample, int gtCount, char **sampleDrawOrder) // similar to clusterHaps(), but instead of making a hacTree at the end, call cwaDistance // on each of the pairs in hapArray to make a distance matrix, then compute a hapOrder from that { double alpha = 0.5; struct lm *lm = lmInit(0); struct cwaExtraData helper = { centerIx-startIx, endIx-startIx, alpha, lm }; struct hapCluster **hapArray = lmAlloc(lm, sizeof(struct hapCluster *) * gtCount * 2); int i; for (i=0; i < 2 * gtCount; i++) { hapArray[i] = lmHapCluster(&helper); if (i > 0) hapArray[i-1]->next = hapArray[i]; } struct vcfRecord *rec; int varIx; boolean haveHaploid; for (varIx = 0, rec = vcff->records; rec != NULL && varIx < endIx; varIx++, rec = rec->next) { if (varIx < startIx) continue; int countIx = varIx - startIx; int gtIx; for (gtIx=0; gtIx < gtCount; gtIx++) { // VCF may contain genotypes other than the few we are interested, so sampleDrawOrder[gtIx] // may not be the right index in rec->genotypes we want, look it up here const struct vcfGenotype *gt = vcfRecordFindGenotype(rec, sampleDrawOrder[gtIx]); int ix = stringArrayIx(sampleDrawOrder[gtIx], vcff->genotypeIds, vcff->genotypeCount); struct hapCluster *c1 = hapArray[gtIx]; struct hapCluster *c2 = hapArray[gtCount + gtIx]; // hardwired ploidy=2 c1->gtHapIx = ix << 1; c1->leafCount = 1; if (gt->isPhased || gt->isHaploid || (gt->hapIxA == gt->hapIxB)) { // first haplotype's counts: if (gt->hapIxA < 0) c1->unkCounts[countIx] = 1; else if (gt->hapIxA == 0) c1->refCounts[countIx] = 1; if (gt->isHaploid) haveHaploid = TRUE; else { // got second haplotype, fill in its counts: c2->gtHapIx = (ix << 1) | 1; c2->leafCount = 1; if (gt->hapIxB < 0) c2->unkCounts[countIx] = 1; else if (gt->hapIxB == 0) c2->refCounts[countIx] = 1; } } else { // Missing data or unphased heterozygote, don't use haplotype info for clustering c2->gtHapIx = (ix << 1) | 1; c2->leafCount = 1; c1->unkCounts[countIx] = c2->unkCounts[countIx] = 1; } } if (haveHaploid) { // Some array items will have an empty cluster for missing hap2 -- // trim those from the linked list. struct hapCluster *c = hapArray[0]; while (c != NULL && c->next != NULL) { if (c->next->leafCount == 0) c->next = c->next->next; else c = c->next; } } } // now fill out a distance matrix based on the cwaDistance between all the pairs in hapArray struct hapDistanceMatrix *hapDistMatrix = fillOutDistanceMatrix(hapArray, vcff, sample, &helper, gtCount); unsigned int hapCount = 2 * gtCount; unsigned int *hapOrder = needMem(sizeof(unsigned int) * hapCount); setHapOrderFromMatrix(hapOrder, hapCount, hapDistMatrix, hapArray, vcff, sample, sampleDrawOrder); return hapOrder; } static void setTransmitOrder(unsigned int *gtHapOrder, char **transmitOrder, int gtHapCount, struct vcfRecord *rec, char **sampleOrder, char *childSample) /* Fill out an array of "transmitted", "untransmitted" strings for mouseover and color lookup */ { int i; int childSampleIx = stringArrayIx(childSample, sampleOrder, gtHapCount / 2); for (i = 0; i < gtHapCount; i++) { int gtIx = gtHapOrder[i]; struct vcfGenotype *gt = &(rec->genotypes[gtIx >> 1]); int alleleSampleIx = stringArrayIx(gt->id, sampleOrder, gtHapCount / 2); if (alleleSampleIx != childSampleIx) if (alleleSampleIx > childSampleIx) { transmitOrder[i] = i % 2 == 0 ? "transmitted" : "untransmitted"; } else { transmitOrder[i] = i % 2 == 0 ? "untransmitted" : "transmitted"; } else transmitOrder[i] = "child"; } } enum phasedColorMode { noColorMode, mendelDiffMode, deNovoOnlyMode, phasedFunctionMode }; static enum phasedColorMode getPhasedColorMode(struct trackDb *tdb) /* Get the coloring mode for phased trio tracks */ { enum phasedColorMode colorMode = noColorMode; char *colorBy = cartOrTdbString(cart, tdb, VCF_PHASED_COLORBY_VAR, VCF_PHASED_COLORBY_VAR); if (sameString(colorBy, VCF_PHASED_COLORBY_MENDEL_DIFF)) colorMode = mendelDiffMode; else if (sameString(colorBy, VCF_PHASED_COLORBY_DE_NOVO)) colorMode = deNovoOnlyMode; else { char *geneTrack = cartOrTdbString(cart, tdb, "geneTrack", NULL); if (isNotEmpty(geneTrack) && sameString(colorBy, VCF_PHASED_COLORBY_FUNCTION)) colorMode = phasedFunctionMode; } return colorMode; } static int getTickColor(struct track *track, struct vcfRecord *rec, int alleleIx, int nameIx, int gtHapCount, unsigned int *gtHapOrder, char *childSampleName, char *sampleOrder[], enum phasedColorMode colorMode, enum soTerm funcTerm) /* Return the color we should use for this variant */ { // if there are no parents to draw then no concept of transmitted vs unstransmitted Color col = MG_BLACK; if (colorMode == noColorMode) col = MG_BLACK; else if (colorMode == phasedFunctionMode) { col = colorFromSoTerm(funcTerm); } else { if (gtHapCount <= 2) col = MG_BLACK; else { char *sampleName = sampleOrder[nameIx]; if (!sameString(sampleName, childSampleName)) // parents only have shading in funcMode col = MG_BLACK; else // maybe draw child ticks red { struct vcfGenotype *childGt = &(rec->genotypes[gtHapOrder[alleleIx] >> 1]); int childHapIx = gtHapOrder[alleleIx] & 1 ? childGt->hapIxB : childGt->hapIxA; int parIx = alleleIx; int numSamples = gtHapCount / 2; boolean isEven = (alleleIx % 2) == 0; const int adjacentLineSet = 1; // how many haplotype "lines" away is the second parent if we are drawing in either // parent1,parent2,child or child,parent1,parent2 order: const int otherLineSet = 4; if (nameIx == 0) { if (!isEven) parIx = alleleIx + adjacentLineSet; else if (numSamples > 2) parIx = alleleIx + otherLineSet; } else if (nameIx == 1) { if (isEven) parIx = alleleIx - adjacentLineSet; else if (numSamples > 2) // don't compare the allele the child to a missing parent parIx = alleleIx + adjacentLineSet; } else { if (isEven) parIx = alleleIx - adjacentLineSet; else parIx = alleleIx - otherLineSet; } struct vcfGenotype *parentGt = &(rec->genotypes[gtHapOrder[parIx] >> 1]); if (parentGt->isPhased || (parentGt->hapIxA == 1 && parentGt->hapIxB == 1)) { int transmittedIx = gtHapOrder[parIx] & 1 ? parentGt->hapIxB : parentGt->hapIxA; int untransmittedIx = gtHapOrder[parIx] & 1 ? parentGt->hapIxA : parentGt->hapIxB; col = MG_BLACK; if (colorMode == mendelDiffMode) { if (rec->alleles[childHapIx] != rec->alleles[transmittedIx] && rec->alleles[childHapIx] == rec->alleles[untransmittedIx]) col = MG_RED; } else { if (rec->alleles[childHapIx] != rec->alleles[transmittedIx] && rec->alleles[childHapIx] != rec->alleles[untransmittedIx]) col = MG_RED; } } } } } return col; } void vcfPhasedDrawOneRecord(struct track *track, struct hvGfx *hvg, struct vcfRecord *rec, void *item, unsigned int *gtHapOrder, int gtHapCount, int xOff, int yOffsets[], char *sampleOrder[], char *childSample, double scale, char *transmitOrder[], enum phasedColorMode colorMode, enum soTerm funcTerm) // Draw a record's haplotypes on the appropriate lines { int i; int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff; int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff; struct pgSnpVcfStartEnd *psvs = (struct pgSnpVcfStartEnd *)item; int w = x2-x1; if (w < 1) w = 1; int tickHeight = track->itemHeight(track, track->items); for (i = 0; i < gtHapCount ; i++) { struct vcfGenotype *gt = &(rec->genotypes[gtHapOrder[i] >> 1]); int nameIx = stringArrayIx(gt->id, sampleOrder, track->customInt); struct dyString *mouseover = dyStringNew(0); int tickColor = getTickColor(track, rec, i, nameIx, gtHapCount, gtHapOrder, childSample, sampleOrder, colorMode, funcTerm); if (gt->isPhased || (gt->hapIxA == 1 && gt->hapIxB == 1)) // if phased or homozygous alt { int alIx = gtHapOrder[i] & 1 ? gt->hapIxB : gt->hapIxA; dyStringPrintf(mouseover, "%s ", gt->id); if (alIx != 0) // non-reference allele { if (sameString(childSample, gt->id)) // we're drawing child ticks { dyStringPrintf(mouseover, "allele: %s", rec->alleles[alIx]); } else { if (gt->isPhased) dyStringPrintf(mouseover, "likely %s allele: %s", transmitOrder[i], rec->alleles[alIx]); else { int otherIx = (alIx == gt->hapIxB ? gt->hapIxA : gt->hapIxB); dyStringPrintf(mouseover, "unphased alleles: %s/%s", rec->alleles[alIx], rec->alleles[otherIx]); } } int y = yOffsets[i] - (tickHeight/2); hvGfxBox(hvg, x1, y, w, tickHeight, tickColor); vcfPhasedAddMapBox(hvg, rec, psvs, mouseover->string, x1, y, w, tickHeight, track); } } else if (gt->hapIxA != 0 || gt->hapIxB != 0)// draw the tick between the two haplotype lines { int yMid = ((yOffsets[2*nameIx] + yOffsets[(2*nameIx)+1]) / 2); // midpoint of two haplotype lines hvGfxBox(hvg, x1, yMid - (tickHeight / 2), w, tickHeight, tickColor); dyStringPrintf(mouseover, "%s unphased genotype: %s/%s", gt->id, rec->alleles[0], rec->alleles[1]); vcfPhasedAddMapBox(hvg, rec, psvs, mouseover->string, x1, yMid, w, tickHeight, track); } } } static void vcfPhasedAddLabel(struct track *track, struct hvGfx *hvg, char *label, int trackY, int labelY, MgFont *font, Color color) // Add a VCF Sample name to the left label of the haplotype representation { int clipXBak, clipYBak, clipWidthBak, clipHeightBak; struct hvGfx *hvgLL = (hvgSide != NULL) ? hvgSide : hvg; hvGfxGetClip(hvgLL, &clipXBak, &clipYBak, &clipWidthBak, &clipHeightBak); hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, leftLabelX, trackY, leftLabelWidth, track->height); hvGfxTextRight(hvgLL, leftLabelX, labelY, leftLabelWidth, track->lineHeight, color, font, label); // Restore the prior clipping: hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, clipXBak, clipYBak, clipWidthBak, clipHeightBak); } static void vcfPhasedSetupHaplotypesLines(struct track *track, struct hvGfx *hvg, int xOff, int yOff, int width, int *retYOffsets, struct slPair *sampleNames, char *childSample, MgFont *font) /* Setup the background for drawing the ticks, the two haplotype lines for each sample, and the * transparent gray box to help distinguish between consecutive samples */ { int sampleHeight = round(track->height / track->customInt); double yHap1 = track->lineHeight; // relative offset of first haplotype line double yHap2 = sampleHeight - track->lineHeight; // relative offset of second line struct slPair *name; int i, y1, y2; struct rgbColor yellow = lightRainbowAtPos(0.2); int transYellow = MAKECOLOR_32_A(yellow.r, yellow.g, yellow.b, 100); boolean useDefaultLabel = FALSE; if (cartVarExistsAnyLevel(cart, track->tdb, FALSE, VCF_PHASED_DEFAULT_LABEL_VAR)) useDefaultLabel = cartUsualBooleanClosestToHome(cart, track->tdb, FALSE, VCF_PHASED_DEFAULT_LABEL_VAR, FALSE); boolean useAliasLabel = trackDbSettingOn(track->tdb, VCF_PHASED_TDB_USE_ALT_NAMES); if (cartVarExistsAnyLevel(cart, track->tdb, FALSE, VCF_PHASED_ALIAS_LABEL_VAR)) useAliasLabel = cartUsualBooleanClosestToHome(cart, track->tdb, FALSE, VCF_PHASED_ALIAS_LABEL_VAR, FALSE); for (name = sampleNames, i = 0; name != NULL; name = name->next, i++) { y1 = yOff + yHap1 + (i * sampleHeight); y2 = yOff + yHap2 + (i * sampleHeight); retYOffsets[2*i] = y1; retYOffsets[(2*i) + 1] = y2; // make the background of every other lane light yellow, but only when NOT doing PDF/EPS output if (hvg->pixelBased && sameString(childSample, name->name)) { hvGfxBox(hvg, xOff, y1-(track->lineHeight), width, (y2 + track->lineHeight) - (y1-track->lineHeight), transYellow); } hvGfxLine(hvg, xOff, y1, xOff+width, y1, MG_BLACK); hvGfxLine(hvg, xOff, y2, xOff+width, y2, MG_BLACK); struct dyString *label = dyStringNew(0); boolean hasAlias = isNotEmpty((char *)name->val); dyStringPrintf(label, "%s%s%s", useDefaultLabel ? name->name : "", useDefaultLabel && useAliasLabel && hasAlias ? "/" : "", useAliasLabel && hasAlias ? (char *)name->val : ""); vcfPhasedAddLabel(track, hvg, label->string, yOff, round(((y1 + y2) / 2) - (track->lineHeight / 2)), font, MG_BLACK); } } static void vcfPhasedDrawItems(struct track *track, int seqStart, int seqEnd, struct hvGfx *hvg, int xOff, int yOff, int width, MgFont *font, Color color, enum trackVisibility vis) /* Split samples' chromosomes (haplotypes), cluster them by parents, and * draw them all along a line representing each chromosome*/ { struct vcfFile *vcff = track->extraUiData; if (vcff->records == NULL) return; struct txInfo *txiList = NULL; struct seqWindow *gSeqWin = chromSeqWindowNew(database, chromName, seqStart, seqEnd); enum phasedColorMode colorMode = getPhasedColorMode(track->tdb); if (colorMode == phasedFunctionMode) txiList = txInfoLoad(gSeqWin, track->tdb); const double scale = scaleForPixels(width); boolean hideOtherSamples = cartUsualBooleanClosestToHome(cart, track->tdb, FALSE, VCF_PHASED_HIDE_OTHER_VAR, FALSE); struct slPair *pair, *sampleNames = vcfPhasedGetSampleOrder(cart, track->tdb, FALSE, hideOtherSamples); int gtCount = slCount(sampleNames); int gtHapCount = gtCount * 2; int yOffsets[gtHapCount]; // y offsets of each haplotype line char *sampleOrder[gtCount]; // order of sampleName lines int i; for (pair = sampleNames, i = 0; pair != NULL && i < gtCount; pair = pair->next, i++) sampleOrder[i] = pair->name; char *childSample = cloneString(trackDbSetting(track->tdb, VCF_PHASED_CHILD_SAMPLE_SETTING)); char *pt = strchr(childSample, '|'); if (pt != NULL) *pt = '\0'; // set up the "haplotype" lines and the transparent yellow box to delineate samples vcfPhasedSetupHaplotypesLines(track, hvg, xOff, yOff, width, yOffsets, sampleNames, childSample, font); // maybe sort the variants by haplotype then draw ticks unsigned int *hapOrder = needMem(sizeof(short) * gtHapCount); int nRecords = slCount(vcff->records); int centerIx = getCenterVariantIx(track, seqStart, seqEnd, vcff->records); int startIx = 0; int endIx = nRecords; hapOrder = computeHapDist(vcff, centerIx, startIx, endIx, childSample, gtCount, sampleOrder); struct vcfRecord *rec = NULL; struct slList *item = NULL; // array of "trans", "untrans", etc for mouseovers char *transmitOrder[gtHapCount]; setTransmitOrder(hapOrder, transmitOrder, gtHapCount, vcff->records, sampleOrder, childSample); for (rec = vcff->records, item = track->items; rec != NULL && item != NULL; rec = rec->next, item = item->next) { enum soTerm funcTerm = soUnknown; if (colorMode == phasedFunctionMode) funcTerm = functionForRecord(rec, gSeqWin, txiList); vcfPhasedDrawOneRecord(track, hvg, rec, item, hapOrder, gtHapCount, xOff, yOffsets, sampleOrder, childSample, scale, transmitOrder, colorMode, funcTerm); } } static int vcfPhasedItemHeight(struct track *tg, void *item) { return tg->heightPer; } int vcfPhasedTrackHeight(struct track *tg, enum trackVisibility vis) { const struct vcfFile *vcff = tg->extraUiData; // when doing composite track height, vcfPhasedLoadItems won't have been called yet! if (vis == tvDense) return pgSnpHeight(tg, vis); if (!vcff || vcff->records == NULL) return 0; boolean hideOtherSamples = cartUsualBooleanClosestToHome(cart, tg->tdb, FALSE, VCF_PHASED_HIDE_OTHER_VAR, FALSE); int totalSamples = slCount(vcfPhasedGetSampleOrder(cart, tg->tdb, FALSE, hideOtherSamples)); tg->lineHeight = tl.fontHeight + 1; tg->heightPer = tl.fontHeight; // if all variants in view are phased, then 3 lines per sample, // else 4 lines. The extra 2 is for clear separation int heightPerSample; if (vcff->allPhased) heightPerSample = (3 * tg->lineHeight) + 2; else heightPerSample = (4 * tg->lineHeight); tg->height = totalSamples * heightPerSample; tg->itemHeight = vcfPhasedItemHeight; // custom int is reserved for doing pgSnp coloring but as far as I can tell is // never actually used? tg->customInt = totalSamples; return tg->height; } void vcfPhasedMethods(struct track *track) /* Load items from a VCF of one individuals phased genotypes */ { knetUdcInstall(); pgSnpMethods(track); track->drawItems = vcfPhasedDrawItems; // Disinherit next/prev flag and methods since we don't support next/prev: track->nextExonButtonable = FALSE; track->nextPrevExon = NULL; track->nextPrevItem = NULL; track->loadItems = vcfPhasedLoadItems; track->totalHeight = vcfPhasedTrackHeight; track->itemName = vcfHapClusterTrackName; track->mapsSelf = TRUE; } static void vcfTabixLoadItems(struct track *tg) /* Load items in window from VCF file using its tabix index file. */ { char *fileOrUrl = NULL; char *tbiFileOrUrl = trackDbSetting(tg->tdb, "bigDataIndex"); // unrelated to mysql /* Figure out url or file name. */ if (tg->parallelLoading) { /* do not use mysql during parallel-fetch load */ fileOrUrl = trackDbSetting(tg->tdb, "bigDataUrl"); } else { struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb); fileOrUrl = bbiNameFromSettingOrTableChrom(tg->tdb, conn, tg->table, chromName); hFreeConn(&conn); } if (isEmpty(fileOrUrl)) return; fileOrUrl = hReplaceGbdb(fileOrUrl); int vcfMaxErr = -1; struct vcfFile *vcff = NULL; boolean hapClustEnabled = cartOrTdbBoolean(cart, tg->tdb, VCF_HAP_ENABLED_VAR, TRUE); if (slCount(windows)>1) hapClustEnabled = FALSE; // haplotype sorting display not currently available with multiple windows. /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { vcff = vcfTabixFileAndIndexMayOpen(fileOrUrl, tbiFileOrUrl, chromName, winStart, winEnd, vcfMaxErr, -1); if (vcff != NULL) { filterRecords(vcff, tg->tdb); int vis = tdbVisLimitedByAncestors(cart,tg->tdb,TRUE,TRUE); if (hapClustEnabled && vcff->genotypeCount > 1 && (vis == tvPack || vis == tvSquish)) vcfHapClusterOverloadMethods(tg, vcff); else { tg->items = vcfFileToPgSnp(vcff, tg->tdb); // pgSnp bases coloring/display decision on count of items: tg->customInt = slCount(tg->items); } // Don't vcfFileFree here -- we are using its string pointers! } else { if (tbiFileOrUrl) errAbort("Unable to open VCF file/URL '%s' with tabix index '%s'", fileOrUrl, tbiFileOrUrl); else errAbort("Unable to open VCF file/URL '%s'", fileOrUrl); } } errCatchEnd(errCatch); if (errCatch->gotError || vcff == NULL) { if (isNotEmpty(errCatch->message->string)) tg->networkErrMsg = cloneString(errCatch->message->string); tg->drawItems = bigDrawWarning; tg->totalHeight = bigWarnTotalHeight; } errCatchFree(&errCatch); } void vcfTabixMethods(struct track *track) /* Methods for VCF + tabix files. */ { knetUdcInstall(); pgSnpMethods(track); track->mapItem = indelTweakMapItem; // Disinherit next/prev flag and methods since we don't support next/prev: track->nextExonButtonable = FALSE; track->nextPrevExon = NULL; track->nextPrevItem = NULL; track->loadItems = vcfTabixLoadItems; track->canPack = TRUE; } static void vcfLoadItems(struct track *tg) /* Load items in window from VCF file. */ { int vcfMaxErr = -1; struct vcfFile *vcff = NULL; boolean hapClustEnabled = cartOrTdbBoolean(cart, tg->tdb, VCF_HAP_ENABLED_VAR, TRUE); if (slCount(windows)>1) hapClustEnabled = FALSE; // haplotype sorting display not currently available with multiple windows. char *table = tg->table; struct customTrack *ct = tg->customPt; struct sqlConnection *conn; if (ct == NULL) conn = hAllocConnTrack(database, tg->tdb); else { conn = hAllocConn(CUSTOM_TRASH); table = ct->dbTableName; } char *vcfFile = bbiNameFromSettingOrTable(tg->tdb, conn, table); hFreeConn(&conn); /* protect against parse error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { vcff = vcfFileMayOpen(vcfFile, chromName, winStart, winEnd, vcfMaxErr, -1, TRUE); if (vcff != NULL) { filterRecords(vcff, tg->tdb); int vis = tdbVisLimitedByAncestors(cart,tg->tdb,TRUE,TRUE); if (hapClustEnabled && vcff->genotypeCount > 1 && vcff->genotypeCount < 3000 && (vis == tvPack || vis == tvSquish)) vcfHapClusterOverloadMethods(tg, vcff); else { tg->items = vcfFileToPgSnp(vcff, tg->tdb); // pgSnp bases coloring/display decision on count of items: tg->customInt = slCount(tg->items); } // Don't vcfFileFree here -- we are using its string pointers! } else errAbort("Unable to open VCF file '%s'", vcfFile); } errCatchEnd(errCatch); if (errCatch->gotError || vcff == NULL) { if (isNotEmpty(errCatch->message->string)) tg->networkErrMsg = cloneString(errCatch->message->string); tg->drawItems = bigDrawWarning; tg->totalHeight = bigWarnTotalHeight; } errCatchFree(&errCatch); } void vcfMethods(struct track *track) /* Methods for Variant Call Format. */ { pgSnpMethods(track); track->mapItem = indelTweakMapItem; // Disinherit next/prev flag and methods since we don't support next/prev: track->nextExonButtonable = FALSE; track->nextPrevExon = NULL; track->nextPrevItem = NULL; track->loadItems = vcfLoadItems; track->canPack = TRUE; }