e46073f856770bdfef4f7637eea8f9f9297aa139 chmalee Tue Nov 19 15:57:08 2019 -0800 Initial commit of new track type vcfPhased trio. A line with ticks, one per haplotype per sample in the VCF, as specified by trackDb variables. diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c index 75140df..a4a9ed8 100644 --- src/hg/hgTracks/vcfTrack.c +++ src/hg/hgTracks/vcfTrack.c @@ -1,36 +1,37 @@ /* vcfTrack -- handlers for Variant Call Format data. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "bigWarn.h" #include "dystring.h" +#include "rainbow.h" #include "errCatch.h" #include "hacTree.h" #include "hdb.h" #include "hgColors.h" #include "hgTracks.h" #include "pgSnp.h" #include "phyloTree.h" #include "trashDir.h" #include "vcf.h" #include "vcfUi.h" #include "knetUdc.h" #include "udc.h" - +#include "memgfx.h" static boolean getMinQual(struct trackDb *tdb, double *retMinQual) /* Return TRUE and set retMinQual if cart contains minimum QUAL filter */ { if (cartOrTdbBoolean(cart, tdb, VCF_APPLY_MIN_QUAL_VAR, VCF_DEFAULT_APPLY_MIN_QUAL)) { if (retMinQual != NULL) *retMinQual = cartOrTdbDouble(cart, tdb, VCF_MIN_QUAL_VAR, VCF_DEFAULT_MIN_QUAL); return TRUE; } return FALSE; } static boolean minQualFail(struct vcfRecord *record, double minQual) /* Return TRUE if record's QUAL column value is non-numeric or is less than minQual. */ @@ -149,30 +150,55 @@ maxAltFreq = atof(lastWordInLine(data)); refFreq -= maxAltFreq; } } } } if (gotInfo) { double majorAlFreq = max(refFreq, maxAltFreq); if (majorAlFreq > (1.0 - minFreq)) return TRUE; } return FALSE; } +static void filterRefOnlyAlleles(struct vcfFile *vcff, struct trackDb *tdb) +/* Drop items from VCF that don't differ from the reference for any of the + * samples specified in trackDb */ +{ +struct vcfRecord *rec, *nextRecord, *retList = NULL; +const struct vcfGenotype *gt; + +struct slPair *sample, *sampleOrder = vcfPhasedGetSampleOrder(cart, tdb); +for (rec = vcff->records; rec != NULL; rec = nextRecord) + { + nextRecord = rec->next; + boolean allRef = TRUE; + for (sample = sampleOrder; sample != NULL; sample = sample->next) + { + gt = vcfRecordFindGenotype(rec, sample->name); + if (gt && !(gt->hapIxA == 0 && gt->hapIxB == 0) ) + allRef = FALSE; + } + if (!allRef) + slAddHead(&retList, rec); + } +slReverse(&retList); +vcff->records = retList; +} + static void filterRecords(struct vcfFile *vcff, struct trackDb *tdb) /* If a filter is specified in the cart, remove any records that don't pass filter. */ { double minQual = VCF_DEFAULT_MIN_QUAL; struct slName *filterValues = NULL; double minFreq = VCF_DEFAULT_MIN_ALLELE_FREQ; boolean gotQualFilter = getMinQual(tdb, &minQual); boolean gotFilterFilter = getFilterValues(tdb, &filterValues); boolean gotMinFreqFilter = getMinFreq(tdb, &minFreq); if (! (gotQualFilter || gotFilterFilter || gotMinFreqFilter) ) return; struct vcfRecord *rec, *nextRec, *newList = NULL; for (rec = vcff->records; rec != NULL; rec = nextRec) { @@ -185,41 +211,53 @@ slReverse(&newList); vcff->records = newList; } struct pgSnpVcfStartEnd /* This extends struct pgSnp by tacking on an original VCF chromStart and End at the end, * for use by indelTweakMapItem below. This can be cast to pgs. */ { struct pgSnp pgs; unsigned int vcfStart; unsigned int vcfEnd; }; static struct pgSnp *vcfFileToPgSnp(struct vcfFile *vcff, struct trackDb *tdb) /* Convert vcff's records to pgSnp; don't free vcff until you're done with pgSnp - * because it contains pointers into vcff's records' chrom. */ + * because it contains pointers into vcff's records' chrom. If the trackDb setting + * sampleName is present, then check whether all the records are phased or not */ { struct pgSnp *pgsList = NULL; struct vcfRecord *rec; int maxLen = 33; int maxAlCount = 5; +struct slPair *sample = NULL, *phasedSamples = NULL; +if (sameString(tdb->type, "vcfPhasedTrio")) + phasedSamples = vcfPhasedGetSampleOrder(cart, tdb); + +vcff->allPhased = TRUE; for (rec = vcff->records; rec != NULL; rec = rec->next) { struct pgSnpVcfStartEnd *psvs = needMem(sizeof(*psvs)); psvs->vcfStart = vcfRecordTrimIndelLeftBase(rec); psvs->vcfEnd = vcfRecordTrimAllelesRight(rec); + for (sample = phasedSamples; sample != NULL; sample = sample->next) + { + const struct vcfGenotype *gt = vcfRecordFindGenotype(rec, sample->name); + if (!gt->isPhased) + vcff->allPhased = FALSE; + } struct pgSnp *pgs = pgSnpFromVcfRecord(rec); memcpy(&(psvs->pgs), pgs, sizeof(*pgs)); pgs = (struct pgSnp *)psvs; // leak mem // Insertion sequences can be quite long; abbreviate here for display. int len = strlen(pgs->name); if (len > maxLen) { int maxAlLen = (maxLen / min(rec->alleleCount, maxAlCount)) - 1; pgs->name[0] = '\0'; int i; for (i = 0; i < rec->alleleCount; i++) { if (i > 0) safencat(pgs->name, len+1, "/", 1); if (i >= maxAlCount) @@ -1542,30 +1580,573 @@ tg->itemEnd = tgItemNoEnd; tg->mapsSelf = TRUE; tg->extraUiData = vcff; } static void indelTweakMapItem(struct track *tg, struct hvGfx *hvg, void *item, char *itemName, char *mapItemName, int start, int end, int x, int y, int width, int height) /* Pass the original vcf chromStart to pgSnpMapItem, so if we have trimmed an identical * first base from item's alleles and start, we will still pass the correct start to hgc. */ { struct pgSnpVcfStartEnd *psvs = item; pgSnpMapItem(tg, hvg, item, itemName, mapItemName, psvs->vcfStart, psvs->vcfEnd, x, y, width, height); } +static void vcfPhasedLoadItems(struct track *tg) +/* Load up one individuals phased genotypes in window */ +{ +char *fileOrUrl = NULL; +char *tbiFileOrUrl = trackDbSetting(tg->tdb, "bigDataIndex"); // unrelated to mysql + +/* Figure out url or file name. */ +if (tg->parallelLoading) + { + /* do not use mysql during parallel-fetch load */ + fileOrUrl = trackDbSetting(tg->tdb, "bigDataUrl"); + } +else + { + struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb); + fileOrUrl = bbiNameFromSettingOrTableChrom(tg->tdb, conn, tg->table, chromName); + hFreeConn(&conn); + } + +if (isEmpty(fileOrUrl)) + return; +int vcfMaxErr = -1; +struct vcfFile *vcff = NULL; + +/* protect against temporary network error */ +struct errCatch *errCatch = errCatchNew(); +if (errCatchStart(errCatch)) + { + vcff = vcfTabixFileAndIndexMayOpen(fileOrUrl, tbiFileOrUrl, chromName, winStart, winEnd, vcfMaxErr, -1); + if (vcff != NULL) + { + filterRecords(vcff, tg->tdb); + filterRefOnlyAlleles(vcff, tg->tdb); // remove items that don't differ from reference + + // TODO: in multi-region mode, different windows end up with different sets of variants where + // all are phased or not, which throws off track heights in each window. Similar to hapCluster + // mode, just switch to pgSnp view when in multi-region for now. + if (slCount(windows) > 1 || tg->visibility == tvDense) + pgSnpMethods(tg); + tg->items = vcfFileToPgSnp(vcff, tg->tdb); + // pgSnp bases coloring/display decision on count of items: + tg->extraUiData = vcff; + tg->customInt = slCount(tg->items); + // Don't vcfFileFree here -- we are using its string pointers! + } + else + { + if (tbiFileOrUrl) + errAbort("Unable to open VCF file/URL '%s' with tabix index '%s'", fileOrUrl, tbiFileOrUrl); + else + errAbort("Unable to open VCF file/URL '%s'", fileOrUrl); + } + } +errCatchEnd(errCatch); +if (errCatch->gotError || vcff == NULL) + { + if (isNotEmpty(errCatch->message->string)) + tg->networkErrMsg = cloneString(errCatch->message->string); + tg->drawItems = bigDrawWarning; + tg->totalHeight = bigWarnTotalHeight; + } +errCatchFree(&errCatch); +} + +static void vcfPhasedAddMapBox(struct hvGfx *hvg, struct vcfRecord *rec, struct pgSnpVcfStartEnd *psvs, char *text, int x, int y, int width, int height, struct track *track) +// Add mapbox for a tick in the vcfPhased track +{ +mapBoxHgcOrHgGene(hvg, psvs->vcfStart, psvs->vcfEnd, x, y, width, height, track->track, rec->name, text, NULL, TRUE, NULL); +} + +struct hapDistanceMatrixCell +{ +/* The pair of alleles and how distant they are */ + struct hapDistanceMatrixCell *next; + char *sampleId; // name of this sample + char *otherId; // name of other sample + short alleleIx; // index into vcfRecord->genotypes + short otherAlleleIx; // index into vcfRecord->genotypes for other sample + double dist; // distance between this sample/allele and another sample/allele +}; + +struct hapDistanceMatrix +{ +/* A row of hapDistanceMatrixCells, which store the distance computed by cwaDistance() + * between two different alleles */ + struct hapDistanceMatrix *next; // next row in matrix + char *sampleId; // name of this sample + short alleleIx; // allele id of this sample, 0 or 1 + struct hapDistanceMatrixCell *row; // a row of cwaDistance scores +}; + +static int hapDistanceMatrixCellCmp(const void *item1, const void *item2) +{ +const struct hapDistanceMatrixCell *cell1 = *(struct hapDistanceMatrixCell **)item1; +const struct hapDistanceMatrixCell *cell2 = *(struct hapDistanceMatrixCell **)item2; +return cell1->dist > cell2->dist; +} + +static struct hapDistanceMatrixCell *findClosestChildAlleleToParent(char *parent, char *child, + struct hapDistanceMatrix *matrix) +/* Get all cells belonging to a certain parent onto a list and sort by distance */ +{ +struct hapDistanceMatrix *vec = NULL; +struct hapDistanceMatrixCell *cellList = NULL, *cell = NULL; +for (vec = matrix; vec != NULL; vec = vec->next) + { + for (cell = vec->row; cell != NULL; cell = cell->next) + { + if (sameString(cell->sampleId, parent)) + { + struct hapDistanceMatrixCell *tmp = needMem(sizeof(struct hapDistanceMatrixCell)); + tmp->next = NULL; + tmp->sampleId = cloneString(cell->sampleId); + tmp->alleleIx = cell->alleleIx; + tmp->otherId = cloneString(vec->sampleId); + tmp->otherAlleleIx = vec->alleleIx; + tmp->dist = cell->dist; + slAddHead(&cellList, tmp); + } + } + } +slSort(&cellList, hapDistanceMatrixCellCmp); +return cellList; +} + +static unsigned short toggleShort(unsigned short s) +/* Add or subtract one */ +{ +return s & 1 ? s - 1 : s + 1; +} + +static void fillOutHapOrder(unsigned short *hapOrder, unsigned short hapCount, struct hapDistanceMatrixCell *c1, struct hapDistanceMatrixCell *c2) +/* Assign indices to hapOrder in the order we should draw the alleles. Allows for the second parent + * cell to be NULL */ +{ +if (c1 == NULL) + errAbort("Error: fillOutHapOrder passed NULL parent"); +int numSamplesToDraw = hapCount / 2; +int i; +for (i = 0; i < numSamplesToDraw; i++) + { + short hapIx = 2*i; + if (i == 0) // top set of lines + { + hapOrder[hapIx] = toggleShort(c1->alleleIx); + hapOrder[hapIx+1] = c1->alleleIx; + } + else if (i + 1 == numSamplesToDraw) // last set of lines + { + if (c2) + { + hapOrder[hapIx] = c2->alleleIx; + hapOrder[hapIx+1] = toggleShort(c2->alleleIx); + } + else + { + hapOrder[hapIx] = c1->otherAlleleIx; + hapOrder[hapIx+1] = toggleShort(c1->otherAlleleIx); + } + } + else // orient based on above and below + { + hapOrder[hapIx] = c1->otherAlleleIx; + hapOrder[hapIx+1] = c2->otherAlleleIx; + } + } +} + +static void setHapOrderFromMatrix(unsigned short *hapOrder, unsigned short hapCount, + struct hapDistanceMatrix *matrix, struct hapCluster **hapArray, + struct vcfFile *vcff, char *childName, char **sampleDrawOrder) +/* Given a matrix where each row is an allele of the child, and each column + * is the distance from the child allele to a parent allele, fill out the order + * in which the haplotypes will be drawn. The order are the indexes into rec->genotypes. */ +{ +char *parentNames[(hapCount/2) - 1]; +struct hapDistanceMatrixCell *c1 = NULL; +struct hapDistanceMatrixCell *c2 = NULL; +int i, ix = 0; +if (hapCount > 2) // is there a trio or at least part of one? + { + for (i = 0; i < hapCount/2 ; i++) + if (!sameString(sampleDrawOrder[i], childName)) + parentNames[ix++] = sampleDrawOrder[i]; + + // For a parent/child combo, find the most likely set of transmitted + // alleles. Depending on the number of variants in the window and the + // haplotypes in question, both parents can tie as the person who + // contributed a given allele + c1 = findClosestChildAlleleToParent(parentNames[0], childName, matrix); + c2 = findClosestChildAlleleToParent(parentNames[1], childName, matrix); // NULL if only one parent + if (c1 && c2) + { + if (c1->otherAlleleIx == c2->otherAlleleIx) + { + // first choose the lowest score, if a tie then just advance c1 + if (c1->dist >= c2->dist) + { + while (c1->dist >= c2->dist && c1->otherAlleleIx == c2->otherAlleleIx) + c1 = c1->next; + } + else + { + while (c1->dist < c2->dist && c1->otherAlleleIx == c2->otherAlleleIx) + c2 = c2->next; + } + } + } + fillOutHapOrder(hapOrder, hapCount, c1, c2); + } +else + { + hapOrder[0] = matrix->alleleIx; + hapOrder[1] = matrix->next->alleleIx; + } +} + +static void assignHapArrayIx(short *ret, struct hapCluster **hapArray, struct vcfFile *vcff, char *sample, boolean doChild) +{ +int i; +struct vcfRecord *rec = vcff->records; +int ix = 0; // index into ret +for (i = 0; i < slCount(hapArray); i++) + { + struct hapCluster *hc = hapArray[i]; + struct vcfGenotype *gt = &(rec->genotypes[hc->gtHapIx >> 1]); + if (!doChild && !sameString(gt->id, sample)) + ret[ix++] = i; + else if (doChild && sameString(gt->id, sample)) + ret[ix++] = i; + } +} + +static struct hapDistanceMatrix *fillOutDistanceMatrix(struct hapCluster **hapArray, struct vcfFile *vcff, char *sample, struct cwaExtraData *helper, int gtCount) +{ +short parGtCount = (gtCount - 1) * 2; +int i,j; +struct vcfRecord *rec = vcff->records; +struct hapDistanceMatrix *matrix = NULL; +short childHapArrayIndices[2]; +short parentHapArrayIndices[parGtCount]; +assignHapArrayIx(childHapArrayIndices, hapArray, vcff, sample, TRUE); +assignHapArrayIx(parentHapArrayIndices, hapArray, vcff, sample, FALSE); +for (i = 0; i < 2; i++) + { + struct hapDistanceMatrix *row = needMem(sizeof(struct hapDistanceMatrix)); + struct hapCluster *hcChild = hapArray[childHapArrayIndices[i]]; + struct vcfGenotype *gt = &(rec->genotypes[hcChild->gtHapIx >> 1]); + row->row = NULL; + row->sampleId = cloneString(gt->id); + row->alleleIx = hcChild->gtHapIx; + for (j = 0; j < parGtCount; j++) + { + struct hapDistanceMatrixCell *cell = needMem(sizeof(struct hapDistanceMatrixCell)); + struct hapCluster *hcParent = hapArray[parentHapArrayIndices[j]]; + struct vcfGenotype *parGt = &(rec->genotypes[hcParent->gtHapIx >> 1]); + cell->sampleId = cloneString(parGt->id); + cell->alleleIx = hcParent->gtHapIx; + cell->dist = cwaDistance((struct slList *)hcChild, (struct slList *)hcParent, helper); + slAddHead(&(row->row), cell); + } + slAddHead(&matrix, row); + } +return matrix; +} + +unsigned short *computeHapDist(struct vcfFile *vcff, int centerIx, int startIx, int endIx, char *sample, int gtCount, char **sampleDrawOrder) +// similar to clusterHaps(), but instead of making a hacTree at the end, call cwaDistance +// on each of the pairs in hapArray to make a distance matrix, then compute a hapOrder from that +{ +double alpha = 0.5; +struct lm *lm = lmInit(0); +struct cwaExtraData helper = { centerIx-startIx, endIx-startIx, alpha, lm }; +struct hapCluster **hapArray = lmAlloc(lm, sizeof(struct hapCluster *) * gtCount * 2); +int i; +for (i=0; i < 2 * gtCount; i++) + { + hapArray[i] = lmHapCluster(&helper); + if (i > 0) + hapArray[i-1]->next = hapArray[i]; + } +struct vcfRecord *rec; +int varIx; +boolean haveHaploid; +for (varIx = 0, rec = vcff->records; rec != NULL && varIx < endIx; varIx++, rec = rec->next) + { + if (varIx < startIx) + continue; + int countIx = varIx - startIx; + int gtIx; + for (gtIx=0; gtIx < gtCount; gtIx++) + { + // VCF may contain genotypes other than the few we are interested, so sampleDrawOrder[gtIx] + // may not be the right index in rec->genotypes we want, look it up here + const struct vcfGenotype *gt = vcfRecordFindGenotype(rec, sampleDrawOrder[gtIx]); + int ix = stringArrayIx(sampleDrawOrder[gtIx], vcff->genotypeIds, vcff->genotypeCount); + struct hapCluster *c1 = hapArray[gtIx]; + struct hapCluster *c2 = hapArray[gtCount + gtIx]; // hardwired ploidy=2 + c1->gtHapIx = ix << 1; + c1->leafCount = 1; + if (gt->isPhased || gt->isHaploid || (gt->hapIxA == gt->hapIxB)) + { + // first haplotype's counts: + if (gt->hapIxA < 0) + c1->unkCounts[countIx] = 1; + else if (gt->hapIxA == 0) + c1->refCounts[countIx] = 1; + if (gt->isHaploid) + haveHaploid = TRUE; + else + { + // got second haplotype, fill in its counts: + c2->gtHapIx = (ix << 1) | 1; + c2->leafCount = 1; + if (gt->hapIxB < 0) + c2->unkCounts[countIx] = 1; + else if (gt->hapIxB == 0) + c2->refCounts[countIx] = 1; + } + } + else + { + // Missing data or unphased heterozygote, don't use haplotype info for clustering + c2->gtHapIx = (ix << 1) | 1; + c2->leafCount = 1; + c1->unkCounts[countIx] = c2->unkCounts[countIx] = 1; + } + } + if (haveHaploid) + { + // Some array items will have an empty cluster for missing hap2 -- + // trim those from the linked list. + struct hapCluster *c = hapArray[0]; + while (c != NULL && c->next != NULL) + { + if (c->next->leafCount == 0) + c->next = c->next->next; + else + c = c->next; + } + } + } + +// now fill out a distance matrix based on the cwaDistance between all the pairs in hapArray +struct hapDistanceMatrix *hapDistMatrix = fillOutDistanceMatrix(hapArray, vcff, sample, &helper, gtCount); +unsigned short hapCount = 2 * gtCount; +unsigned short *hapOrder = needMem(sizeof(unsigned short) * hapCount); +setHapOrderFromMatrix(hapOrder, hapCount, hapDistMatrix, hapArray, vcff, sample, sampleDrawOrder); +return hapOrder; +} + +#define VCFPHASED_UNPHASED_COLOR MG_RED + + +void vcfPhasedDrawOneRecord(struct track *track, struct hvGfx *hvg, struct vcfRecord *rec, void *item, + unsigned short *gtHapOrder, int gtHapCount, int xOff, int yOffsets[], + char *sampleOrder[], double scale) +// Draw a record's haplotypes on the appropriate lines +{ +int i; +int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff; +int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff; +struct pgSnpVcfStartEnd *psvs = (struct pgSnpVcfStartEnd *)item; +int w = x2-x1; +if (w < 1) + w = 1; +int tickHeight = track->itemHeight(track, track->items); +for (i = 0; i < gtHapCount ; i++) + { + struct vcfGenotype *gt = &(rec->genotypes[gtHapOrder[i] >> 1]); + int nameIx = stringArrayIx(gt->id, sampleOrder, track->customInt); + struct dyString *mouseover = dyStringNew(0); + if (gt->isPhased || (gt->hapIxA == 1 && gt->hapIxB == 1)) // if phased or homozygous alt + { + int alIx = gtHapOrder[i] & 1 ? gt->hapIxB : gt->hapIxA; + int tickColor = MG_BLACK; + dyStringPrintf(mouseover, "%s ", gt->id); + if (alIx != 0) // non-reference allele + { + if (i > 1 && i < 4) // we're drawing child ticks + { + dyStringPrintf(mouseover, "allele: %s", rec->alleles[alIx]); + } + else + { + dyStringPrintf(mouseover, "%s allele: %s", i == 0 || i == 5 ? "untransmitted" : "transmitted", rec->alleles[alIx]); + } + int y = yOffsets[i] - (tickHeight/2); + hvGfxBox(hvg, x1, y, w, tickHeight, tickColor); + vcfPhasedAddMapBox(hvg, rec, psvs, mouseover->string, x1, y, w, tickHeight, track); + } + } + else if (gt->hapIxA != 0 || gt->hapIxB != 0)// draw the tick between the two haplotype lines + { + int yMid = ((yOffsets[2*nameIx] + yOffsets[(2*nameIx)+1]) / 2); // midpoint of two haplotype lines + hvGfxBox(hvg, x1, yMid - (tickHeight / 2), w, tickHeight, VCFPHASED_UNPHASED_COLOR); + dyStringPrintf(mouseover, "%s Unphased genotype: %s/%s", gt->id, rec->alleles[0], rec->alleles[1]); + vcfPhasedAddMapBox(hvg, rec, psvs, mouseover->string, x1, yMid, w, tickHeight, track); + } + } +} + +static void vcfPhasedAddLabel(struct track *track, struct hvGfx *hvg, char *label, int trackY, int labelY, MgFont *font, Color color) +// Add a VCF Sample name to the left label of the haplotype representation +{ +int clipXBak, clipYBak, clipWidthBak, clipHeightBak; +struct hvGfx *hvgLL = (hvgSide != NULL) ? hvgSide : hvg; +hvGfxGetClip(hvgLL, &clipXBak, &clipYBak, &clipWidthBak, &clipHeightBak); +hvGfxUnclip(hvgLL); +hvGfxSetClip(hvgLL, leftLabelX, trackY, leftLabelWidth, track->height); + +hvGfxTextRight(hvgLL, leftLabelX, labelY, leftLabelWidth, track->lineHeight, color, + font, label); + +// Restore the prior clipping: +hvGfxUnclip(hvgLL); +hvGfxSetClip(hvgLL, clipXBak, clipYBak, clipWidthBak, clipHeightBak); +} + +static void vcfPhasedSetupHaplotypesLines(struct track *track, struct hvGfx *hvg, int xOff, + int yOff, int width, int *retYOffsets, struct slPair *sampleNames, + MgFont *font) +/* Setup the background for drawing the ticks, the two haplotype lines for each sample, and the + * transparent gray box to help distinguish between consecutive samples */ +{ +int sampleHeight = round(track->height / track->customInt); +double yHap1 = track->lineHeight; // relative offset of first haplotype line +double yHap2 = sampleHeight - track->lineHeight; // relative offset of second line +struct slPair *name; +int i, y1, y2; +struct rgbColor yellow = lightRainbowAtPos(0.2); +int transYellow = MAKECOLOR_32_A(yellow.r, yellow.g, yellow.b, 100); + +char defaultLabelVar[1024], aliasLabelVar[1024]; +safef(defaultLabelVar, sizeof(defaultLabelVar), "%s.%s", track->track, VCF_PHASED_DEFAULT_LABEL_VAR); +safef(aliasLabelVar, sizeof(aliasLabelVar), "%s.%s", track->track, VCF_PHASED_ALIAS_LABEL_VAR); +boolean useDefaultLabel = cartUsualBoolean(cart, defaultLabelVar, TRUE); +boolean useAliasLabel = cartUsualBoolean(cart, aliasLabelVar, FALSE); + +for (name = sampleNames, i = 0; name != NULL; name = name->next, i++) + { + y1 = yOff + yHap1 + (i * sampleHeight); + y2 = yOff + yHap2 + (i * sampleHeight); + retYOffsets[2*i] = y1; + retYOffsets[(2*i) + 1] = y2; + // make the background of every other lane light yellow, but only when NOT doing PDF/EPS output + if ((hvg->pixelBased && i & 1)) + { + hvGfxBox(hvg, xOff, y1-(track->lineHeight), width, (y2 + track->lineHeight) - (y1-track->lineHeight), transYellow); + } + hvGfxLine(hvg, xOff, y1, xOff+width, y1, MG_BLACK); + hvGfxLine(hvg, xOff, y2, xOff+width, y2, MG_BLACK); + struct dyString *label = dyStringNew(0); + dyStringPrintf(label, "%s%s%s", + useDefaultLabel ? name->name : "", + useDefaultLabel && useAliasLabel ? "/" : "", + useAliasLabel ? (char *)name->val : ""); + vcfPhasedAddLabel(track, hvg, label->string, yOff, round(((y1 + y2) / 2) - (track->lineHeight / 2)), font, MG_BLACK); + } +} + +static void vcfPhasedDrawItems(struct track *track, int seqStart, int seqEnd, + struct hvGfx *hvg, int xOff, int yOff, int width, + MgFont *font, Color color, enum trackVisibility vis) +/* Split samples' chromosomes (haplotypes), cluster them by parents, and + * draw them all along a line representing each chromosome*/ +{ +struct vcfFile *vcff = track->extraUiData; +if (vcff->records == NULL) + return; + +const double scale = scaleForPixels(width); +struct slPair *pair, *sampleNames = vcfPhasedGetSampleOrder(cart, track->tdb); +int gtCount = slCount(sampleNames); +int yOffsets[gtCount * 2]; // y offsets of each haplotype line +char *sampleOrder[gtCount]; // order of sampleName lines +int i; +for (pair = sampleNames, i = 0; pair != NULL && i < gtCount; pair = pair->next, i++) + sampleOrder[i] = pair->name; +// child sample is required to be in the middle/bottom of the trio +char *childSample = gtCount > 2 ? sampleOrder[1] : sampleOrder[0]; + +// set up the "haplotype" lines and the transparent yellow box to delineate samples +vcfPhasedSetupHaplotypesLines(track, hvg, xOff, yOff, width, yOffsets, sampleNames, font); + +// sort the variants by haplotype then draw ticks +int nRecords = slCount(vcff->records); +int centerIx = getCenterVariantIx(track, seqStart, seqEnd, vcff->records); +int startIx = 0; +int endIx = nRecords; +unsigned short *hapOrder = computeHapDist(vcff, centerIx, startIx, endIx, childSample, gtCount, sampleOrder); +struct vcfRecord *rec = NULL; +struct slList *item = NULL; +for (rec = vcff->records, item = track->items; rec != NULL && item != NULL; rec = rec->next, item = item->next) + { + vcfPhasedDrawOneRecord(track, hvg, rec, item, hapOrder, gtCount * 2, xOff, yOffsets, sampleOrder, scale); + } +} + +static int vcfPhasedItemHeight(struct track *tg, void *item) +{ +return tg->heightPer; +} + +int vcfPhasedTrackHeight(struct track *tg, enum trackVisibility vis) +{ +const struct vcfFile *vcff = tg->extraUiData; +// when doing composite track height, vcfPhasedLoadItems won't have been called yet! +if (!vcff || vcff->records == NULL) + return 0; +int totalSamples = slCount(vcfPhasedGetSampleOrder(cart, tg->tdb)); +tg->lineHeight = tl.fontHeight + 1; +tg->heightPer = tl.fontHeight; +// if all variants in view are phased, then 3 lines per sample, +// else 4 lines. The extra 2 is for clear separation +int heightPerSample; +if (vcff->allPhased) + heightPerSample = (3 * tg->lineHeight) + 2; +else + heightPerSample = (4 * tg->lineHeight) + 2; +tg->height = totalSamples * heightPerSample; +tg->itemHeight = vcfPhasedItemHeight; +// custom int is reserved for doing pgSnp coloring but as far as I can tell is +// never actually used? +tg->customInt = totalSamples; +return tg->height; +} + +void vcfPhasedMethods(struct track *track) +/* Load items from a VCF of one individuals phased genotypes */ +{ +knetUdcInstall(); +pgSnpMethods(track); +track->drawItems = vcfPhasedDrawItems; +// Disinherit next/prev flag and methods since we don't support next/prev: +track->nextExonButtonable = FALSE; +track->nextPrevExon = NULL; +track->nextPrevItem = NULL; +track->loadItems = vcfPhasedLoadItems; +track->totalHeight = vcfPhasedTrackHeight; +track->itemName = vcfHapClusterTrackName; +track->mapsSelf = TRUE; +} static void vcfTabixLoadItems(struct track *tg) /* Load items in window from VCF file using its tabix index file. */ { char *fileOrUrl = NULL; char *tbiFileOrUrl = trackDbSetting(tg->tdb, "bigDataIndex"); // unrelated to mysql /* Figure out url or file name. */ if (tg->parallelLoading) { /* do not use mysql during parallel-fetch load */ fileOrUrl = trackDbSetting(tg->tdb, "bigDataUrl"); } else