dc1e0e76dbe49861bd0ebe8db64e27f587737794 max Mon Mar 30 15:40:03 2026 -0700 adding two more phased variants tracks, refs #37306 diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c index 4c6c13aa75e..6570da1f50b 100644 --- src/hg/hgc/vcfClick.c +++ src/hg/hgc/vcfClick.c @@ -213,70 +213,149 @@ { struct vcfInfoElement *el = &(rec->infoElements[i]); const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key); if (looksTabular(def, el)) { puts("
"); printf("%s: %s
\n", el->key, def->description); puts(""); int headerCount = printTabularHeaderRow(def); printTabularData(el, headerCount); puts("
"); } } } -static void vcfGenotypeTable(struct vcfRecord *rec, char *track, char **displayAls) +struct sampleMeta +/* Metadata columns for one sample, loaded from sampleMetadataFile. */ + { + char **values; /* Array of column values */ + }; + +static void loadSampleMetadata(struct trackDb *tdb, struct hash **retHash, + char ***retColNames, int *retColCount) +/* Load sample metadata from file specified in trackDb setting sampleMetadataFile. + * File format: tab-separated, first line is header starting with #sample. + * Returns a hash mapping sample name -> sampleMeta struct, plus column names and count. */ +{ +*retHash = NULL; +*retColNames = NULL; +*retColCount = 0; +char *fileName = trackDbSetting(tdb, VCF_SAMPLE_METADATA_FILE); +if (fileName == NULL) + return; +fileName = hReplaceGbdb(fileName); +struct lineFile *lf = lineFileMayOpen(fileName, TRUE); +if (lf == NULL) + return; +char *line; +int lineSize; +// Read header line +if (!lineFileNext(lf, &line, &lineSize)) + { + lineFileClose(&lf); + return; + } +// Strip leading # if present +if (line[0] == '#') + line++; +// Parse header columns +int colCount = chopByChar(line, '\t', NULL, 0); +char **allCols; +AllocArray(allCols, colCount); +chopByChar(line, '\t', allCols, colCount); +// Column 0 is sample name; metadata columns start at 1 +int metaColCount = colCount - 1; +if (metaColCount < 1) + { + lineFileClose(&lf); + return; + } +char **colNames; +AllocArray(colNames, metaColCount); +int i; +for (i = 0; i < metaColCount; i++) + colNames[i] = cloneString(allCols[i+1]); +// Read data lines +struct hash *hash = hashNew(0); +while (lineFileNext(lf, &line, &lineSize)) + { + char *row[colCount]; + int fieldCount = chopByChar(line, '\t', row, colCount); + if (fieldCount < 2) + continue; + struct sampleMeta *sm; + AllocVar(sm); + AllocArray(sm->values, metaColCount); + for (i = 0; i < metaColCount && i + 1 < fieldCount; i++) + sm->values[i] = cloneString(row[i+1]); + hashAdd(hash, row[0], sm); + } +lineFileClose(&lf); +*retHash = hash; +*retColNames = colNames; +*retColCount = metaColCount; +} + +static void vcfGenotypeTable(struct vcfRecord *rec, char *track, char **displayAls, + struct trackDb *tdb) /* Put the table containing details about each genotype into a collapsible section. */ { static struct dyString *tmp1 = NULL; if (tmp1 == NULL) tmp1 = dyStringNew(0); jsBeginCollapsibleSection(cart, track, "genotypes", "Detailed genotypes", FALSE); dyStringClear(tmp1); dyStringAppend(tmp1, rec->format); struct vcfFile *vcff = rec->file; enum vcfInfoType formatTypes[256]; char *formatKeys[256]; int formatCount = chopString(tmp1->string, ":", formatKeys, ArraySize(formatKeys)); boolean firstInfo = TRUE; int i; for (i = 0; i < formatCount; i++) { if (sameString(formatKeys[i], vcfGtGenotype)) continue; if (firstInfo) { puts("Genotype info key:
"); firstInfo = FALSE; } const struct vcfInfoDef *def = vcfInfoDefForGtKey(vcff, formatKeys[i]); char *desc = def ? def->description : "not described in VCF header"; printf("  %s: %s
\n", formatKeys[i], desc); formatTypes[i] = def ? def->type : vcfInfoString; } +// Load sample metadata if available +struct hash *metaHash = NULL; +char **metaColNames = NULL; +int metaColCount = 0; +loadSampleMetadata(tdb, &metaHash, &metaColNames, &metaColCount); hTableStart(); boolean isDiploid = sameString(vcfHaplotypeOrSample(cart), "Haplotype"); puts("Sample IDGenotype"); if (isDiploid) puts("Phased?"); for (i = 0; i < formatCount; i++) { if (sameString(formatKeys[i], vcfGtGenotype)) continue; printf("%s", formatKeys[i]); } +for (i = 0; i < metaColCount; i++) + printf("%s", metaColNames[i]); puts("\n"); for (i = 0; i < vcff->genotypeCount; i++) { struct vcfGenotype *gt = &(rec->genotypes[i]); char *hapA = ".", *hapB = "."; if (gt->hapIxA >= 0) hapA = displayAls[(unsigned char)gt->hapIxA]; if (gt->isHaploid) hapB = ""; else if (gt->hapIxB >= 0) hapB = displayAls[(unsigned char)gt->hapIxB]; char sep = gt->isHaploid ? ' ' : gt->isPhased ? '|' : '/'; char *phasing = gt->isHaploid ? NA : gt->isPhased ? "Y" : "n"; printf("%s%s%c%s", vcff->genotypeIds[i], hapA, sep, hapB); @@ -289,30 +368,42 @@ continue; printf(""); struct vcfInfoElement *el = &(gt->infoElements[j]); int k; for (k = 0; k < el->count; k++) { if (k > 0) printf(", "); if (el->missingData[k]) printf("."); else vcfPrintDatum(stdout, el->values[k], formatTypes[j]); } printf(""); } + // Print sample metadata columns + if (metaHash != NULL) + { + struct sampleMeta *sm = hashFindVal(metaHash, vcff->genotypeIds[i]); + for (j = 0; j < metaColCount; j++) + { + if (sm != NULL && sm->values[j] != NULL) + printf("%s", sm->values[j]); + else + printf(""); + } + } puts(""); } hTableEnd(); jsEndCollapsibleSection(); } static void ignoreEm(char *format, va_list args) /* Ignore warnings from genotype parsing -- when there's one, there * are usually hundreds more just like it. */ { } static void vcfGenotypesDetails(struct vcfRecord *rec, struct trackDb *tdb, char **displayAls) /* Print summary of allele and genotype frequency, plus collapsible section * with table of genotype details. */ @@ -381,31 +472,31 @@ { boolean showHW = cartOrTdbBoolean(cart, tdb, VCF_SHOW_HW_VAR, FALSE); if (showHW) { double altAf = (double)alCounts[1]/totalAlleles; printf("Hardy-Weinberg equilibrium: " "P(%s/%s) = %.3f%%; P(%s/%s) = %.3f%%; P(%s/%s) = %.3f%%
", displayAls[0], displayAls[0], 100*refAf*refAf, displayAls[0], displayAls[1], 100*2*refAf*altAf, displayAls[1], displayAls[1], 100*altAf*altAf); } } } puts("
"); -vcfGenotypeTable(rec, tdb->track, displayAls); +vcfGenotypeTable(rec, tdb->track, displayAls, tdb); puts(""); } static void pgSnpCodingDetail(struct vcfRecord *rec) /* Translate rec into pgSnp (with proper chrom name) and call Belinda's * coding effect predictor from pgSnp details. */ { char *genePredTable = "knownGene"; if (hTableExists(database, genePredTable)) { struct pgSnp *pgs = pgSnpFromVcfRecord(rec); if (!sameString(rec->chrom, seqName)) // rec->chrom might be missing "chr" prefix: pgs->chrom = seqName; printSeqCodDisplay(database, pgs, genePredTable);