dc1e0e76dbe49861bd0ebe8db64e27f587737794
max
Mon Mar 30 15:40:03 2026 -0700
adding two more phased variants tracks, refs #37306
diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c
index 4c6c13aa75e..6570da1f50b 100644
--- src/hg/hgc/vcfClick.c
+++ src/hg/hgc/vcfClick.c
@@ -213,70 +213,149 @@
{
struct vcfInfoElement *el = &(rec->infoElements[i]);
const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key);
if (looksTabular(def, el))
{
puts("
");
printf("%s: %s
\n", el->key, def->description);
puts("
");
int headerCount = printTabularHeaderRow(def);
printTabularData(el, headerCount);
puts("
");
}
}
}
-static void vcfGenotypeTable(struct vcfRecord *rec, char *track, char **displayAls)
+struct sampleMeta
+/* Metadata columns for one sample, loaded from sampleMetadataFile. */
+ {
+ char **values; /* Array of column values */
+ };
+
+static void loadSampleMetadata(struct trackDb *tdb, struct hash **retHash,
+ char ***retColNames, int *retColCount)
+/* Load sample metadata from file specified in trackDb setting sampleMetadataFile.
+ * File format: tab-separated, first line is header starting with #sample.
+ * Returns a hash mapping sample name -> sampleMeta struct, plus column names and count. */
+{
+*retHash = NULL;
+*retColNames = NULL;
+*retColCount = 0;
+char *fileName = trackDbSetting(tdb, VCF_SAMPLE_METADATA_FILE);
+if (fileName == NULL)
+ return;
+fileName = hReplaceGbdb(fileName);
+struct lineFile *lf = lineFileMayOpen(fileName, TRUE);
+if (lf == NULL)
+ return;
+char *line;
+int lineSize;
+// Read header line
+if (!lineFileNext(lf, &line, &lineSize))
+ {
+ lineFileClose(&lf);
+ return;
+ }
+// Strip leading # if present
+if (line[0] == '#')
+ line++;
+// Parse header columns
+int colCount = chopByChar(line, '\t', NULL, 0);
+char **allCols;
+AllocArray(allCols, colCount);
+chopByChar(line, '\t', allCols, colCount);
+// Column 0 is sample name; metadata columns start at 1
+int metaColCount = colCount - 1;
+if (metaColCount < 1)
+ {
+ lineFileClose(&lf);
+ return;
+ }
+char **colNames;
+AllocArray(colNames, metaColCount);
+int i;
+for (i = 0; i < metaColCount; i++)
+ colNames[i] = cloneString(allCols[i+1]);
+// Read data lines
+struct hash *hash = hashNew(0);
+while (lineFileNext(lf, &line, &lineSize))
+ {
+ char *row[colCount];
+ int fieldCount = chopByChar(line, '\t', row, colCount);
+ if (fieldCount < 2)
+ continue;
+ struct sampleMeta *sm;
+ AllocVar(sm);
+ AllocArray(sm->values, metaColCount);
+ for (i = 0; i < metaColCount && i + 1 < fieldCount; i++)
+ sm->values[i] = cloneString(row[i+1]);
+ hashAdd(hash, row[0], sm);
+ }
+lineFileClose(&lf);
+*retHash = hash;
+*retColNames = colNames;
+*retColCount = metaColCount;
+}
+
+static void vcfGenotypeTable(struct vcfRecord *rec, char *track, char **displayAls,
+ struct trackDb *tdb)
/* Put the table containing details about each genotype into a collapsible section. */
{
static struct dyString *tmp1 = NULL;
if (tmp1 == NULL)
tmp1 = dyStringNew(0);
jsBeginCollapsibleSection(cart, track, "genotypes", "Detailed genotypes", FALSE);
dyStringClear(tmp1);
dyStringAppend(tmp1, rec->format);
struct vcfFile *vcff = rec->file;
enum vcfInfoType formatTypes[256];
char *formatKeys[256];
int formatCount = chopString(tmp1->string, ":", formatKeys, ArraySize(formatKeys));
boolean firstInfo = TRUE;
int i;
for (i = 0; i < formatCount; i++)
{
if (sameString(formatKeys[i], vcfGtGenotype))
continue;
if (firstInfo)
{
puts("Genotype info key:
");
firstInfo = FALSE;
}
const struct vcfInfoDef *def = vcfInfoDefForGtKey(vcff, formatKeys[i]);
char *desc = def ? def->description : "not described in VCF header";
printf(" %s: %s
\n", formatKeys[i], desc);
formatTypes[i] = def ? def->type : vcfInfoString;
}
+// Load sample metadata if available
+struct hash *metaHash = NULL;
+char **metaColNames = NULL;
+int metaColCount = 0;
+loadSampleMetadata(tdb, &metaHash, &metaColNames, &metaColCount);
hTableStart();
boolean isDiploid = sameString(vcfHaplotypeOrSample(cart), "Haplotype");
puts("| Sample ID | Genotype | ");
if (isDiploid)
puts("Phased? | ");
for (i = 0; i < formatCount; i++)
{
if (sameString(formatKeys[i], vcfGtGenotype))
continue;
printf("%s | ", formatKeys[i]);
}
+for (i = 0; i < metaColCount; i++)
+ printf("%s | ", metaColNames[i]);
puts("
\n");
for (i = 0; i < vcff->genotypeCount; i++)
{
struct vcfGenotype *gt = &(rec->genotypes[i]);
char *hapA = ".", *hapB = ".";
if (gt->hapIxA >= 0)
hapA = displayAls[(unsigned char)gt->hapIxA];
if (gt->isHaploid)
hapB = "";
else if (gt->hapIxB >= 0)
hapB = displayAls[(unsigned char)gt->hapIxB];
char sep = gt->isHaploid ? ' ' : gt->isPhased ? '|' : '/';
char *phasing = gt->isHaploid ? NA : gt->isPhased ? "Y" : "n";
printf("| %s | %s%c%s | ", vcff->genotypeIds[i],
hapA, sep, hapB);
@@ -289,30 +368,42 @@
continue;
printf("");
struct vcfInfoElement *el = &(gt->infoElements[j]);
int k;
for (k = 0; k < el->count; k++)
{
if (k > 0)
printf(", ");
if (el->missingData[k])
printf(".");
else
vcfPrintDatum(stdout, el->values[k], formatTypes[j]);
}
printf(" | ");
}
+ // Print sample metadata columns
+ if (metaHash != NULL)
+ {
+ struct sampleMeta *sm = hashFindVal(metaHash, vcff->genotypeIds[i]);
+ for (j = 0; j < metaColCount; j++)
+ {
+ if (sm != NULL && sm->values[j] != NULL)
+ printf("%s | ", sm->values[j]);
+ else
+ printf(" | ");
+ }
+ }
puts("
");
}
hTableEnd();
jsEndCollapsibleSection();
}
static void ignoreEm(char *format, va_list args)
/* Ignore warnings from genotype parsing -- when there's one, there
* are usually hundreds more just like it. */
{
}
static void vcfGenotypesDetails(struct vcfRecord *rec, struct trackDb *tdb, char **displayAls)
/* Print summary of allele and genotype frequency, plus collapsible section
* with table of genotype details. */
@@ -381,31 +472,31 @@
{
boolean showHW = cartOrTdbBoolean(cart, tdb, VCF_SHOW_HW_VAR, FALSE);
if (showHW)
{
double altAf = (double)alCounts[1]/totalAlleles;
printf("Hardy-Weinberg equilibrium: "
"P(%s/%s) = %.3f%%; P(%s/%s) = %.3f%%; P(%s/%s) = %.3f%%
",
displayAls[0], displayAls[0], 100*refAf*refAf,
displayAls[0], displayAls[1], 100*2*refAf*altAf,
displayAls[1], displayAls[1], 100*altAf*altAf);
}
}
}
puts("
");
-vcfGenotypeTable(rec, tdb->track, displayAls);
+vcfGenotypeTable(rec, tdb->track, displayAls, tdb);
puts("");
}
static void pgSnpCodingDetail(struct vcfRecord *rec)
/* Translate rec into pgSnp (with proper chrom name) and call Belinda's
* coding effect predictor from pgSnp details. */
{
char *genePredTable = "knownGene";
if (hTableExists(database, genePredTable))
{
struct pgSnp *pgs = pgSnpFromVcfRecord(rec);
if (!sameString(rec->chrom, seqName))
// rec->chrom might be missing "chr" prefix:
pgs->chrom = seqName;
printSeqCodDisplay(database, pgs, genePredTable);