533112afe2a2005e80cdb1f82904ea65032d4302
braney
  Sat Oct 2 11:37:34 2021 -0700
split hg/lib into two separate libaries, one only used by the cgis

diff --git src/hg/lib/annoFormatVep.c src/hg/lib/annoFormatVep.c
deleted file mode 100644
index 4c54628..0000000
--- src/hg/lib/annoFormatVep.c
+++ /dev/null
@@ -1,1625 +0,0 @@
-/* annoFormatVep -- write functional predictions in the same format as Ensembl's
- * Variant Effect Predictor to fileName, interpreting input rows according to config.
- * See http://uswest.ensembl.org/info/docs/variation/vep/vep_formats.html */
-
-/* Copyright (C) 2014 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
-
-#include "annoFormatVep.h"
-#include "annoGratorGpVar.h"
-#include "annoGratorQuery.h"
-#include "annoStreamDbPslPlus.h"
-#include "asParse.h"
-#include "bigGenePred.h"
-#include "dystring.h"
-#include "genePred.h"
-#include "gpFx.h"
-#include "hdb.h"
-#include "hgHgvs.h"
-#include "htmshell.h"
-#include "pgSnp.h"
-#include "portable.h"
-#include "vcf.h"
-#include <time.h>
-
-struct annoFormatVepExtraItem
-    // A single input column whose value should be placed in the Extras output column,
-    // identified by tag.
-    {
-    struct annoFormatVepExtraItem *next;
-    char *tag;					// Keyword to use in extras column (tag=value;)
-    char *description;				// Text description for output header
-    int rowIx;					// Offset of column in row from data source
-						// (N/A for wig sources)
-    boolean isRegulatory;			// TRUE if overlap implies regulatory_region
-    boolean isBoolean;				// TRUE if we don't need to print out the value,
-						// only whether it is found
-    };
-
-struct annoFormatVep;
-
-struct annoFormatVepExtraSource
-    // A streamer or grator that supplies at least one value for Extras output column.
-    {
-    struct annoFormatVepExtraSource *next;
-    struct annoStreamer *source;		// streamer or grator: same pointers as below
-    struct annoFormatVepExtraItem *items;	// one or more columns of source and their tags
-
-    void (*printExtra)(struct annoFormatVepExtraSource *self,
-                       struct annoFormatVep *afVep, struct annoFormatVepExtraItem *extraItem,
-		       struct annoRow *extraRows, struct annoRow *gpvRow, boolean *pGotExtra);
-    /* Method for printing items from this source; pGotExtra is in/out to keep track of
-     * whether any call has actually printed something yet. */
-    };
-
-struct annoFormatVepConfig
-    // Describe the primary source and grators (whose rows must be delivered in this order)
-    // that provide data for VEP output columns.
-    {
-    struct annoStreamer *variantSource;		// Primary source: variants
-    char *variantDescription;			// Description to appear in output header
-    struct annoStreamer *gpVarSource;		// annoGratorGpVar makes the core predictions
-    char *gpVarDescription;			// Description to appear in output header
-    struct annoStreamer *snpSource;		// Latest dbSNP provides IDs of known variants
-    char *snpDescription;			// Description to appear in output header
-    struct annoFormatVepExtraSource *extraSources;	// Everything else that may be tacked on
-    };
-
-enum afVariantDataType
-    // What type of primary data (variants) are we integrating with?
-    {
-    afvdtInvalid,                       // uninitialized/unknown
-    afvdtVcf,                           // primary data source is VCF
-    afvdtPgSnpTable,                    // primary data source is pgSnp table with bin in .as
-    afvdtPgSnpFile                      // primary data source is pgSnp file, no bin
-    };
-
-struct annoFormatVep
-// Subclass of annoFormatter that writes VEP-equivalent output to a file.
-    {
-    struct annoFormatter formatter;	// superclass / external interface
-    struct annoFormatVepConfig *config;	// Description of input sources and values for Extras col
-    char *fileName;			// Output filename
-    FILE *f;				// Output file handle
-    struct lm *lm;			// localmem for scratch storage
-    struct dyString *dyScratch;		// dyString for local temporary use
-    struct annoAssembly *assembly;	// Reference assembly (for sequence)
-    int lmRowCount;			// counter for periodic localmem cleanup
-    int varNameIx;			// Index of name column from variant source, or -1 if N/A
-    int varAllelesIx;			// Index of alleles column from variant source, or -1
-    int txNameIx;                       // Index of transcript name from (big)genePred/PSL
-    int geneNameIx;			// Index of gene name from (big)genePred/PSL+ if included
-    int hgvsGIx;			// Index of HGVS g. column from annoGratorGpVar
-    int hgvsCNIx;			// Index of HGVS c./n. column from annoGratorGpVar
-    int hgvsPIx;			// Index of HGVS p. column from annoGratorGpVar
-    int snpNameIx;			// Index of name column from dbSNP source, or -1
-    boolean needHeader;			// TRUE if we should print out the header
-    enum afVariantDataType variantType; // Are variants VCF or a flavor of pgSnp?
-    boolean doHtml;			// TRUE if we should include html tags & make a <table>.
-    struct seqWindow *gSeqWin;          // genomic sequence fetcher for HGVS term generation
-    boolean hgvsMakeG;                  // Generate genomic (g.) HGVS terms only if this is set
-    boolean hgvsBreakDelIns;            // Include deleted sequence (not only ins) e.g. delGGinsAT
-    };
-
-
-INLINE void afVepLineBreak(FILE *f, boolean doHtml)
-/* Depending on whether we're printing HTML, print either a newline or <BR>. */
-{
-if (doHtml)
-    fputs("<BR>", f);
-fputc('\n', f);
-}
-
-INLINE void afVepStartRow(FILE *f, boolean doHtml)
-/* If we're printing HTML, print a <TR><TD>. */
-{
-if (doHtml)
-    fputs("<TR><TD>", f);
-}
-
-INLINE void afVepNextColumn(FILE *f, boolean doHtml)
-/* Depending on whether we're printing HTML, print either a tab or </TD><TD>. */
-{
-if (doHtml)
-    fputs("</TD><TD>", f);
-else
-    fputc('\t', f);
-}
-
-INLINE void afVepEndRow(FILE *f, boolean doHtml)
-/* If we're printing HTML, print a </TD></TR>, otherwise, just a newline. */
-{
-if (doHtml)
-    fputs("</TD></TR>\n", f);
-else
-    fputc('\n', f);
-}
-
-INLINE void afVepPuts(char *text, struct annoFormatVep *self)
-/* Write text to self->f.  If we're printing HTML, write encoded text, otherwise just write text. */
-{
-if (self->doHtml)
-    htmTextOut(self->f, text);
-else
-    fputs(text, self->f);
-}
-
-static void afVepPrintf(struct annoFormatVep *self, char *format, ...)
-/*  Printf to self->f, but if self->doHtml then encode before writing out. */
-#ifdef __GNUC__
-__attribute__((format(printf, 2, 3)))
-#endif
-    ;
-
-static void afVepPrintf(struct annoFormatVep *self, char *format, ...)
-/*  Printf to self->f, but if self->doHtml then encode before writing out. */
-{
-va_list args;
-va_start(args, format);
-if (self->doHtml)
-    vaHtmlFprintf(self->f, format, args);
-else
-    vfprintf(self->f, format, args);
-va_end(args);
-}
-
-static void afVepPrintHeaderExtraTags(struct annoFormatVep *self, char *bStart, char *bEnd)
-/* For each extra column described in config, write out its tag and a brief description.
- * bStart and bEnd are for bold output in HTML, or if not HTML, starting the line with "## ". */
-{
-struct annoFormatVepExtraSource *extras = self->config->extraSources, *extraSrc;
-if (extras == NULL)
-    return;
-fprintf(self->f, "%sKeys for Extra column items:%s", bStart, bEnd);
-afVepLineBreak(self->f, self->doHtml);
-for (extraSrc = extras;  extraSrc != NULL;  extraSrc = extraSrc->next)
-    {
-    struct annoFormatVepExtraItem *extraItem;
-    for (extraItem = extraSrc->items;  extraItem != NULL;  extraItem = extraItem->next)
-	if (isNotEmpty(extraItem->tag))
-	    {
-	    fprintf(self->f, "%s%s%s: %s", bStart, extraItem->tag, bEnd, extraItem->description);
-	    afVepLineBreak(self->f, self->doHtml);
-	    }
-    }
-}
-
-static void afVepPrintHeaderDate(FILE *f, boolean doHtml)
-/* VEP header includes a date formatted like "2012-06-16 16:09:38" */
-{
-long now = clock1();
-struct tm *tm = localtime(&now);
-fprintf(f, "## Output produced at %d-%02d-%02d %02d:%02d:%02d",
-	1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec);
-afVepLineBreak(f, doHtml);
-}
-
-static char *columnLabels[] = { "Uploaded Variation",
-				"Location",
-				"Allele",
-				"Gene",
-				"Feature",
-				"Feature type",
-				"Consequence",
-				"Position in cDNA",
-				"Position in CDS",
-				"Position in protein",
-				"Amino acid change",
-				"Codon change",
-				"Co-located Variation",
-				"Extra",
-				NULL };
-
-static void afVepPrintColumnLabels(struct annoFormatVep *self)
-/* If we're printing HTML, begin a table and use TH for labels; otherwise just
- * print out a tab-sep text line with labels.  VEP doesn't begin this line with a #! */
-{
-char **pLabel = columnLabels;
-if (self->doHtml)
-    {
-    fputs("<BR><TABLE class='stdTbl'>\n<TR><TH>", self->f);
-    fputs(*pLabel++, self->f);
-    while (*pLabel != NULL)
-	{
-	fputs("</TH><TH>", self->f);
-	fputs(*pLabel++, self->f);
-	}
-    fputs("</TH></TR>\n", self->f);
-    }
-else
-    {
-    fputs(*pLabel++, self->f);
-    while (*pLabel != NULL)
-	{
-	fputc('\t', self->f);
-	fputs(*pLabel++, self->f);
-	}
-    fputc('\n', self->f);
-    }
-}
-
-static void afVepPrintHeader(struct annoFormatVep *self, char *db)
-/* Print a header that looks almost like a VEP header. */
-{
-FILE *f = self->f;
-boolean doHtml = self->doHtml;
-char *bStart = doHtml ? "<B>" : "## ";
-char *bEnd = doHtml ? "</B>" : "";
-if (!doHtml)
-    {
-    // Suppress these lines from HTML output -- IMO they're better suited for a file header:
-    fputs("## ENSEMBL VARIANT EFFECT PREDICTOR format (UCSC Variant Annotation Integrator)", f);
-    afVepLineBreak(f, doHtml);
-    afVepPrintHeaderDate(f, doHtml);
-    fprintf(f, "## Connected to UCSC database %s", db);
-    afVepLineBreak(f, doHtml);
-    }
-struct annoFormatVepConfig *config = self->config;
-fprintf(f, "%sVariants:%s ", bStart, bEnd);
-afVepPrintf(self, "%s", config->variantDescription);
-if (! strstr(config->variantSource->name, "/trash/"))
-    fprintf(f, " (%s)", config->variantSource->name);
-afVepLineBreak(f, doHtml);
-fprintf(f, "%sTranscripts:%s %s (%s)", bStart, bEnd,
-	config->gpVarDescription, config->gpVarSource->name);
-afVepLineBreak(f, doHtml);
-if (config->snpSource != NULL)
-    {
-    fprintf(f, "%sdbSNP:%s %s (%s)", bStart, bEnd,
-	    config->snpDescription, config->snpSource->name);
-    afVepLineBreak(f, doHtml);
-    }
-afVepPrintHeaderExtraTags(self, bStart, bEnd);
-afVepPrintColumnLabels(self);
-self->needHeader = FALSE;
-}
-
-static void afVepInitialize(struct annoFormatter *fSelf, struct annoStreamer *primarySource,
-			    struct annoStreamer *integrators)
-/* Print header, regardless of whether we get any data after this. */
-{
-struct annoFormatVep *self = (struct annoFormatVep *)fSelf;
-if (self->needHeader)
-    afVepPrintHeader(self, primarySource->assembly->name);
-}
-
-static void afVepComment(struct annoFormatter *fSelf, char *content)
-/* Print out a comment, either starting with "# " or as a warnBox depending on doHtml. */
-{
-if (strchr(content, '\n'))
-    errAbort("afVepComment: no multi-line input");
-struct annoFormatVep *self = (struct annoFormatVep *)fSelf;
-if (self->doHtml)
-    warn("%s", content);
-else
-    fprintf(self->f, "# %s\n", content);
-}
-
-static void compressDashes(char *string)
-/* If string has a run of '-' characters, turn it into single '-'. */
-{
-char *p = string;
-while ((p = strchr(p, '-')) != NULL)
-    {
-    char *end = p+1;
-    while (*end == '-')
-	end++;
-    if (end > p+1)
-	memmove(p+1, end, strlen(end)+1);
-    p++;
-    }
-}
-
-static char *afVepGetAlleles(struct annoFormatVep *self, struct annoRow *varRow)
-/* Return a string with slash-separated alleles.  Result is alloc'd by self->lm. */
-{
-char **varWords = (char **)(varRow->data);
-char *alleles = NULL;
-if (self->variantType == afvdtVcf)
-    alleles = lmCloneString(self->lm, vcfGetSlashSepAllelesFromWords(varWords, self->dyScratch));
-else if (self->varAllelesIx >= 0)
-    alleles = lmCloneString(self->lm, varWords[self->varAllelesIx]);
-else
-    errAbort("annoFormatVep: afVepSetConfig didn't specify how to get alleles");
-compressDashes(alleles);
-return alleles;
-}
-
-static void afVepPrintNameAndLoc(struct annoFormatVep *self, struct annoRow *varRow)
-/* Print variant name and position in genome. */
-{
-char **varWords = (char **)(varRow->data);
-uint start1Based = varRow->start + 1;
-// Use variant name if available, otherwise construct an identifier:
-if (self->varNameIx >= 0 && !sameString(varWords[self->varNameIx], "."))
-    afVepPuts(varWords[self->varNameIx], self);
-else
-    {
-    char *alleles = afVepGetAlleles(self, varRow);
-    afVepPrintf(self, "%s_%u_%s", varRow->chrom, start1Based, alleles);
-    }
-afVepNextColumn(self->f, self->doHtml);
-// Location is chr:start for single-base, chr:start-end for indels:
-if (varRow->end == start1Based)
-    afVepPrintf(self, "%s:%u", varRow->chrom, start1Based);
-else if (start1Based > varRow->end)
-    afVepPrintf(self, "%s:%u-%u", varRow->chrom, varRow->end, start1Based);
-else
-    afVepPrintf(self, "%s:%u-%u", varRow->chrom, start1Based, varRow->end);
-afVepNextColumn(self->f, self->doHtml);
-}
-
-INLINE void afVepPrintPlaceholders(FILE *f, int count, boolean doHtml)
-/* VEP uses "-" for N/A.  Sometimes there are several consecutive N/A columns.
- * Count = 0 means print "-" with no tab. Count > 0 means print that many "-" columns. */
-{
-if (count == 0)
-    fputc('-', f);
-else
-    {
-    int i;
-    for (i = 0;  i < count;  i++)
-	{
-	fputc('-', f);
-	afVepNextColumn(f, doHtml);
-	}
-    }
-}
-
-#define afVepPrintPlaceholder(f, doHtml) afVepPrintPlaceholders(f, 1, doHtml)
-
-INLINE char *placeholderForEmpty(char *val)
-/* Return VEP placeholder "-" if val is empty, otherwise return val. */
-{
-return (isEmpty(val) ? "-" : val);
-}
-
-
-static void afVepPrintGene(struct annoFormatVep *self, struct annoRow *gpvRow)
-/* If the genePred portion of gpvRow contains a gene name (in addition to transcript name),
- * print it out; otherwise print a placeholder. */
-{
-if (self->geneNameIx >= 0)
-    {
-    char **words = (char **)(gpvRow->data);
-    afVepPuts(placeholderForEmpty(words[self->geneNameIx]), self);
-    afVepNextColumn(self->f, self->doHtml);
-    }
-else
-    afVepPrintPlaceholder(self->f, self->doHtml);
-}
-
-static void limitLength(char *seq, int baseCount, char *unit)
-/* If seq is longer than an abbreviated version of itself, change it to the abbreviated version. */
-{
-if (isEmpty(seq))
-    return;
-int len = strlen(seq);
-const int elipsLen = 3;
-char lengthNote[512];
-safef(lengthNote, sizeof(lengthNote), "(%d %s)", len, unit);
-if (len > 2*baseCount + elipsLen + strlen(lengthNote))
-    {
-    // First baseCount bases, then "...":
-    int offset = baseCount;
-    safecpy(seq+offset, len+1-offset, "...");
-    offset += elipsLen;
-    // then last baseCount bases:
-    safecpy(seq+offset, len+1-offset, seq+len-baseCount);
-    offset += baseCount;
-    // then lengthNote:
-    safecpy(seq+offset, len+1-offset, lengthNote);
-    }
-}
-
-static void tweakStopCodonAndLimitLength(char *aaSeq, char *codonSeq)
-/* If aa from gpFx has a stop 'Z', replace it with "*".
- * If the strings are very long, truncate with a note about how long they are. */
-{
-char *earlyStop = strchr(aaSeq, 'Z');
-if (earlyStop)
-    {
-    earlyStop[0] = '*';
-    earlyStop[1] = '\0';
-    }
-limitLength(aaSeq, 5, "aa");
-limitLength(codonSeq, 12, "nt");
-}
-
-INLINE char *dashForEmpty(char *s)
-/* Represent empty alleles in insertions/deletions by the customary "-". */
-{
-if (isEmpty(s))
-    return "-";
-else
-    return s;
-}
-
-static void afVepPrintPredictions(struct annoFormatVep *self, struct annoRow *varRow,
-				  struct annoRow *gpvRow, struct gpFx *gpFx)
-/* Print VEP columns computed by annoGratorGpVar (or placeholders) */
-{
-// variant allele used to calculate the consequence (or first alternate allele)
-char *abbrevAllele = cloneString(gpFx->gAllele);
-limitLength(abbrevAllele, 24, "nt");
-afVepPuts(placeholderForEmpty(abbrevAllele), self);
-afVepNextColumn(self->f, self->doHtml);
-// ID of affected gene
-afVepPrintGene(self, gpvRow);
-// ID of feature
-afVepPrintf(self, "%s", placeholderForEmpty(gpFx->transcript));
-afVepNextColumn(self->f, self->doHtml);
-// type of feature {Transcript, RegulatoryFeature, MotifFeature}
-if (gpFx->soNumber == intergenic_variant)
-    afVepPrintPlaceholder(self->f, self->doHtml);
-else
-    {
-    fputs("Transcript", self->f);
-    afVepNextColumn(self->f, self->doHtml);
-    }
-// consequence: SO term e.g. splice_region_variant
-afVepPuts(soTermToString(gpFx->soNumber), self);
-afVepNextColumn(self->f, self->doHtml);
-if (gpFx->detailType == codingChange)
-    {
-    struct codingChange *change = &(gpFx->details.codingChange);
-    uint refLen = strlen(change->txRef), altLen = strlen(change->txAlt);
-    boolean isInsertion = (refLen == 0);
-    boolean isDeletion = altLen < refLen;
-    if (isInsertion)
-	{
-	fprintf(self->f, "%u-%u", change->cDnaPosition, change->cDnaPosition+1);
-	afVepNextColumn(self->f, self->doHtml);
-	fprintf(self->f, "%u-%u", change->cdsPosition, change->cdsPosition+1);
-	}
-    else if (isDeletion && refLen > 1)
-	{
-	fprintf(self->f, "%u-%u", change->cDnaPosition+1, change->cDnaPosition+refLen);
-	afVepNextColumn(self->f, self->doHtml);
-	fprintf(self->f, "%u-%u", change->cdsPosition+1, change->cdsPosition+refLen);
-	}
-    else
-	{
-	fprintf(self->f, "%u", change->cDnaPosition+1);
-	afVepNextColumn(self->f, self->doHtml);
-	fprintf(self->f, "%u", change->cdsPosition+1);
-	}
-    afVepNextColumn(self->f, self->doHtml);
-    fprintf(self->f, "%u", change->pepPosition+1);
-    afVepNextColumn(self->f, self->doHtml);
-    int variantFrame = change->cdsPosition % 3;
-    strLower(change->codonOld);
-    int upLen = min(strlen(change->codonOld+variantFrame), refLen);
-    toUpperN(change->codonOld+variantFrame, upLen);
-    strLower(change->codonNew);
-    int alleleLength = altLen;
-    // watch out for symbolic alleles [actually now that we're using txAlt we shouldn't see these]:
-    if (sameString(change->txAlt, "<X>") || sameString(change->txAlt, "<*>"))
-        alleleLength = upLen;
-    else if (startsWith("<", change->txAlt))
-        alleleLength = 0;
-    toUpperN(change->codonNew+variantFrame, alleleLength);
-    tweakStopCodonAndLimitLength(change->aaOld, change->codonOld);
-    tweakStopCodonAndLimitLength(change->aaNew, change->codonNew);
-    fprintf(self->f, "%s/%s", dashForEmpty(change->aaOld), dashForEmpty(change->aaNew));
-    afVepNextColumn(self->f, self->doHtml);
-    fprintf(self->f, "%s/%s", dashForEmpty(change->codonOld), dashForEmpty(change->codonNew));
-    afVepNextColumn(self->f, self->doHtml);
-    }
-else if (gpFx->detailType == nonCodingExon)
-    {
-    struct nonCodingExon *change = &(gpFx->details.nonCodingExon);
-    uint refLen = strlen(change->txRef), altLen = strlen(change->txAlt);
-    boolean isInsertion = (refLen == 0);
-    boolean isDeletion = altLen < refLen;
-    int cDnaPosition = change->cDnaPosition;
-    if (isInsertion)
-	fprintf(self->f, "%u-%u", cDnaPosition, cDnaPosition+1);
-    else if (isDeletion)
-	fprintf(self->f, "%u-%u", cDnaPosition+1, cDnaPosition+refLen);
-    else
-	fprintf(self->f, "%u", cDnaPosition+1);
-    afVepNextColumn(self->f, self->doHtml);
-    // Coding effect columns (except for cDnaPosition) are N/A:
-    afVepPrintPlaceholders(self->f, 4, self->doHtml);
-    }
-else
-    // Coding effect columns are N/A:
-    afVepPrintPlaceholders(self->f, 5, self->doHtml);
-freez(&abbrevAllele);
-}
-
-static void afVepPrintExistingVar(struct annoFormatVep *self, struct annoRow *varRow,
-				  struct annoStreamRows gratorData[], int gratorCount)
-/* Print existing variant ID (or placeholder) */
-{
-if (self->snpNameIx >= 0)
-    {
-    if (gratorCount < 2 || gratorData[1].streamer != self->config->snpSource)
-	errAbort("annoFormatVep: config error, snpSource is not where expected");
-    struct annoRow *snpRows = gratorData[1].rowList, *row;
-    if (snpRows != NULL)
-	{
-	int count = 0;
-	for (row = snpRows;  row != NULL;  row = row->next)
-	    {
-	    char **snpWords = (char **)(row->data);
-	    if (row->start == varRow->start && row->end == varRow->end)
-		{
-		if (count > 0)
-		    fputc(',', self->f);
-		afVepPrintf(self, "%s", snpWords[self->snpNameIx]);
-		count++;
-		}
-	    }
-	if (count == 0)
-	    afVepPrintPlaceholder(self->f, self->doHtml);
-	else
-	    afVepNextColumn(self->f, self->doHtml);
-	}
-    else
-	afVepPrintPlaceholder(self->f, self->doHtml);
-    }
-else
-    afVepPrintPlaceholder(self->f, self->doHtml);
-}
-
-static boolean isCodingSnv(struct annoRow *primaryRow, struct gpFx *gpFx)
-/* Return TRUE if this is a single-nucleotide non-synonymous change. */
-{
-if (gpFx == NULL ||
-    gpFx->detailType != codingChange ||
-    gpFx->soNumber == synonymous_variant)
-    return FALSE;
-char *txRef = gpFx->details.codingChange.txRef;
-char *txAlt = gpFx->details.codingChange.txAlt;
-if (txRef == NULL || txAlt == NULL ||
-    strlen(txRef) != 1 || sameString(txRef, "-") ||
-    strlen(txAlt) != 1 || sameString(txAlt, "-"))
-    return FALSE;
-return TRUE;
-}
-
-static int commaSepFindIx(char *item, char *s)
-/* Treating comma-separated, non-NULL s as an array of words,
- * return the index of item in the array, or -1 if item is not in array. */
-{
-int itemLen = strlen(item);
-int ix = 0;
-char *p = strchr(s, ',');
-while (p != NULL)
-    {
-    int elLen = (p - s);
-    if (elLen == itemLen && strncmp(s, item, itemLen) == 0)
-	return ix;
-    s = p+1;
-    p = strchr(s, ',');
-    ix++;
-    }
-if (strlen(s) == itemLen && strncmp(s, item, itemLen) == 0)
-    return ix;
-return -1;
-}
-
-static int commaSepFindIntIx(int item, char *s)
-/* Treating comma-separated, non-NULL s as an array of words that encode integers,
- * return the index of item in the array, or -1 if item is not in array. */
-{
-char itemString[64];
-safef(itemString, sizeof(itemString), "%d", item);
-return commaSepFindIx(itemString, s);
-}
-
-static char *commaSepWordFromIxOrWhole(int ix, char *s, struct lm *lm)
-/* Treating comma-separated, non-NULL s as an array of words,
- * return the word at ix in the array -- but if s has no commas,
- * just return (lmCloned) s.  errAborts if s is comma-sep but ix is out of bounds. */
-{
-int i = 0;
-char *p = strchr(s, ',');
-if (p == NULL)
-    return lmCloneString(lm, s);
-while (p != NULL)
-    {
-    if (i == ix)
-	return lmCloneStringZ(lm, s, p-s);
-    s = p+1;
-    p = strchr(s, ',');
-    i++;
-    }
-if (i == ix)
-    return lmCloneString(lm, s);
-errAbort("commaSepWordFromIxOrWhole: Bad index %d for string '%s'", ix, s);
-return NULL;
-}
-
-INLINE void afVepNewExtra(struct annoFormatVep *self, boolean *pGotExtra)
-/* If we already printed an extra item, print the extra column's separator; set pGotExtra. */
-{
-assert(pGotExtra);
-if (*pGotExtra)
-    {
-    if (self->doHtml)
-	fputs("; ", self->f);
-    else
-	fputc(';', self->f);
-    }
-*pGotExtra = TRUE;
-}
-
-static void afVepPrintDbNsfpSift(struct annoFormatVep *self,
-				 struct annoFormatVepExtraSource *extraSrc,
-				 struct annoRow *extraRows, struct gpFx *gpFx, char *ensTxId,
-				 boolean *pGotExtra)
-/* Match the allele from gpFx to the per-allele scores in row from dbNsfpSift. */
-{
-// Look up column indices only once:
-static int ensTxIdIx=-1, altAl1Ix, score1Ix, altAl2Ix, score2Ix, altAl3Ix, score3Ix;
-if (ensTxIdIx == -1)
-    {
-    struct asColumn *columns = extraSrc->source->asObj->columnList;
-    ensTxIdIx = asColumnMustFindIx(columns, "ensTxId");
-    altAl1Ix = asColumnMustFindIx(columns, "altAl1");
-    score1Ix = asColumnMustFindIx(columns, "score1");
-    altAl2Ix = asColumnMustFindIx(columns, "altAl2");
-    score2Ix = asColumnMustFindIx(columns, "score2");
-    altAl3Ix = asColumnMustFindIx(columns, "altAl3");
-    score3Ix = asColumnMustFindIx(columns, "score3");
-    }
-
-struct annoRow *row;
-for (row = extraRows;  row != NULL;  row = row->next)
-    {
-    // Skip this row unless it contains the ensTxId found by getDbNsfpEnsTx
-    // (but handle rare cases where dbNsfpSeqChange has "." for ensTxId, lame)
-    char **words = row->data;
-    if (differentString(ensTxId, ".") && differentString(words[ensTxIdIx], ".") &&
-	commaSepFindIx(ensTxId, words[ensTxIdIx]) < 0)
-	continue;
-    struct annoFormatVepExtraItem *extraItem = extraSrc->items;
-    char *scoreStr = NULL;
-    if (sameString(gpFx->gAllele, words[altAl1Ix]))
-	scoreStr = words[score1Ix];
-    else if (sameString(gpFx->gAllele, words[altAl2Ix]))
-	scoreStr = words[score2Ix];
-    else if (sameString(gpFx->gAllele, words[altAl3Ix]))
-	scoreStr = words[score3Ix];
-    double score = atof(scoreStr);
-    char prediction = '?';
-    if (score < 0.05)
-	prediction = 'D';
-    else
-	prediction = 'T';
-    if (isNotEmpty(scoreStr) && differentString(scoreStr, "."))
-	{
-	afVepNewExtra(self, pGotExtra);
-	fprintf(self->f, "%s=%c(%s)", extraItem->tag, prediction, scoreStr);
-	}
-    }
-}
-
-static void afVepPrintDbNsfpPolyPhen2(struct annoFormatVep *self,
-				      struct annoFormatVepExtraSource *extraSrc,
-				      struct annoRow *extraRows, struct gpFx *gpFx,
-				      boolean *pGotExtra)
-/* Match the allele from gpFx to the per-allele scores in each row from dbNsfpPolyPhen2. */
-{
-// Look up column indices only once:
-static int aaPosIx=-1,
-    altAl1Ix, hDivScore1Ix, hDivPred1Ix, hVarScore1Ix, hVarPred1Ix,
-    altAl2Ix, hDivScore2Ix, hDivPred2Ix, hVarScore2Ix, hVarPred2Ix,
-    altAl3Ix, hDivScore3Ix, hDivPred3Ix, hVarScore3Ix, hVarPred3Ix;
-if (aaPosIx == -1)
-    {
-    struct asColumn *columns = extraSrc->source->asObj->columnList;
-    aaPosIx = asColumnMustFindIx(columns, "uniProtAaPos");
-    altAl1Ix = asColumnMustFindIx(columns, "altAl1");
-    hDivScore1Ix = asColumnMustFindIx(columns, "hDivScore1");
-    hDivPred1Ix = asColumnMustFindIx(columns, "hDivPred1");
-    hVarScore1Ix = asColumnMustFindIx(columns, "hVarScore1");
-    hVarPred1Ix = asColumnMustFindIx(columns, "hVarPred1");
-    altAl2Ix = asColumnMustFindIx(columns, "altAl2");
-    hDivScore2Ix = asColumnMustFindIx(columns, "hDivScore2");
-    hDivPred2Ix = asColumnMustFindIx(columns, "hDivPred2");
-    hVarScore2Ix = asColumnMustFindIx(columns, "hVarScore2");
-    hVarPred2Ix = asColumnMustFindIx(columns, "hVarPred2");
-    altAl3Ix = asColumnMustFindIx(columns, "altAl3");
-    hDivScore3Ix = asColumnMustFindIx(columns, "hDivScore3");
-    hDivPred3Ix = asColumnMustFindIx(columns, "hDivPred3");
-    hVarScore3Ix = asColumnMustFindIx(columns, "hVarScore3");
-    hVarPred3Ix = asColumnMustFindIx(columns, "hVarPred3");
-    }
-
-struct codingChange *cc = &(gpFx->details.codingChange);
-int count = 0;
-struct annoRow *row;
-for (row = extraRows;  row != NULL;  row = row->next)
-    {
-    char **words = row->data;
-//#*** polyphen2 can actually have multiple scores/preds for the same pepPosition...
-//#*** so what we really should do is loop on comma-sep words[aaPosIx] and print scores/preds
-//#*** whenever pepPosition matches.
-    int txIx = commaSepFindIntIx(cc->pepPosition+1, words[aaPosIx]);
-    if (txIx < 0)
-	continue;
-    struct annoFormatVepExtraItem *extraItem = extraSrc->items;
-    for (;  extraItem != NULL;  extraItem = extraItem->next)
-	{
-	boolean isHdiv = (stringIn("HDIV", extraItem->tag) != NULL);
-	int predIx = -1, scoreIx = -1;
-	if (sameString(gpFx->gAllele, words[altAl1Ix]))
-	    {
-	    predIx = isHdiv ? hDivPred1Ix : hVarPred1Ix;
-	    scoreIx = isHdiv ? hDivScore1Ix : hVarScore1Ix;
-	    }
-	else if (sameString(gpFx->gAllele, words[altAl2Ix]))
-	    {
-	    predIx = isHdiv ? hDivPred2Ix : hVarPred2Ix;
-	    scoreIx = isHdiv ? hDivScore2Ix : hVarScore2Ix;
-	    }
-	else if (sameString(gpFx->gAllele, words[altAl3Ix]))
-	    {
-	    predIx = isHdiv ? hDivPred3Ix : hVarPred3Ix;
-	    scoreIx = isHdiv ? hDivScore3Ix : hVarScore3Ix;
-	    }
-	char *pred = (predIx < 0) ? NULL : words[predIx];
-	if (pred == NULL || sameString(pred, "."))
-	    continue;
-	pred = commaSepWordFromIxOrWhole(txIx, pred, self->lm);
-	if (isNotEmpty(pred))
-	    {
-	    if (count == 0)
-		{
-		afVepNewExtra(self, pGotExtra);
-		fprintf(self->f, "%s=", extraItem->tag);
-		}
-	    else
-		fputc(',', self->f);
-	    char *score = (scoreIx < 0) ? "?" : commaSepWordFromIxOrWhole(txIx, words[scoreIx], self->lm);
-	    fprintf(self->f, "%s(%s)", pred, score);
-	    count++;
-	    }
-	}
-    }
-}
-
-static void afVepPrintDbNsfpMutationTA(struct annoFormatVep *self,
-				       struct annoFormatVepExtraSource *extraSrc,
-				       struct annoRow *extraRows, struct gpFx *gpFx, char *ensTxId,
-				       boolean *pGotExtra)
-/* Match the allele from gpFx to the per-allele scores in row from dbNsfpMutationTaster
- * or dbNsfpMutationAssessor -- they have identical column indices. */
-{
-// Look up column indices only once:
-static int ensTxIdIx=-1, altAl1Ix, score1Ix, pred1Ix,
-    altAl2Ix, score2Ix, pred2Ix, altAl3Ix, score3Ix, pred3Ix;
-if (ensTxIdIx == -1)
-    {
-    struct asColumn *columns = extraSrc->source->asObj->columnList;
-    ensTxIdIx = asColumnMustFindIx(columns, "ensTxId");
-    altAl1Ix = asColumnMustFindIx(columns, "altAl1");
-    score1Ix = asColumnMustFindIx(columns, "score1");
-    pred1Ix = asColumnMustFindIx(columns, "pred1");
-    altAl2Ix = asColumnMustFindIx(columns, "altAl2");
-    score2Ix = asColumnMustFindIx(columns, "score2");
-    pred2Ix = asColumnMustFindIx(columns, "pred2");
-    altAl3Ix = asColumnMustFindIx(columns, "altAl3");
-    score3Ix = asColumnMustFindIx(columns, "score3");
-    pred3Ix = asColumnMustFindIx(columns, "pred3");
-    }
-
-struct annoRow *row;
-for (row = extraRows;  row != NULL;  row = row->next)
-    {
-    // Skip this row unless it contains the ensTxId found by getDbNsfpEnsTx
-    // (but handle rare cases where dbNsfpSeqChange has "." for ensTxId, lame)
-    char **words = row->data;
-    if (differentString(ensTxId, ".") && differentString(words[ensTxIdIx], ".") &&
-	commaSepFindIx(ensTxId, words[ensTxIdIx]) < 0)
-	continue;
-    struct annoFormatVepExtraItem *extraItem = extraSrc->items;
-    char *score = NULL, *pred = NULL;
-    if (sameString(gpFx->gAllele, words[altAl1Ix]))
-	{
-	score = words[score1Ix];
-	pred = words[pred1Ix];
-	}
-    else if (sameString(gpFx->gAllele, words[altAl2Ix]))
-	{
-	score = words[score2Ix];
-	pred = words[pred2Ix];
-	}
-    else if (sameString(gpFx->gAllele, words[altAl3Ix]))
-	{
-	score = words[score3Ix];
-	pred = words[pred3Ix];
-	}
-    if (isNotEmpty(score) && differentString(score, "."))
-	{
-	afVepNewExtra(self, pGotExtra);
-	if (isEmpty(pred))
-	    pred = "?";
-	fprintf(self->f, "%s=%s(%s)", extraItem->tag, pred, score);
-	}
-    }
-}
-
-static void afVepPrintDbNsfpLrt(struct annoFormatVep *self,
-				struct annoFormatVepExtraSource *extraSrc,
-				struct annoRow *extraRows, struct gpFx *gpFx, char *ensTxId,
-				boolean *pGotExtra)
-/* Match the allele from gpFx to the per-allele scores in row from dbNsfpLrt --
- * it also has omega{1,2,3} columns, but I'm not sure what those mean so am leaving out for now. */
-{
-// Look up column indices only once:
-static int ensTxIdIx=-1, altAl1Ix, score1Ix, pred1Ix,
-    altAl2Ix, score2Ix, pred2Ix, altAl3Ix, score3Ix, pred3Ix;
-if (ensTxIdIx == -1)
-    {
-    struct asColumn *columns = extraSrc->source->asObj->columnList;
-    ensTxIdIx = asColumnMustFindIx(columns, "ensTxId");
-    altAl1Ix = asColumnMustFindIx(columns, "altAl1");
-    score1Ix = asColumnMustFindIx(columns, "score1");
-    pred1Ix = asColumnMustFindIx(columns, "pred1");
-    altAl2Ix = asColumnMustFindIx(columns, "altAl2");
-    score2Ix = asColumnMustFindIx(columns, "score2");
-    pred2Ix = asColumnMustFindIx(columns, "pred2");
-    altAl3Ix = asColumnMustFindIx(columns, "altAl3");
-    score3Ix = asColumnMustFindIx(columns, "score3");
-    pred3Ix = asColumnMustFindIx(columns, "pred3");
-    }
-
-struct annoRow *row;
-for (row = extraRows;  row != NULL;  row = row->next)
-    {
-    // Skip this row unless it contains the ensTxId found by getDbNsfpEnsTx
-    // (but handle rare cases where dbNsfpSeqChange has "." for ensTxId, lame)
-    char **words = row->data;
-    if (differentString(ensTxId, ".") && differentString(words[ensTxIdIx], ".") &&
-	commaSepFindIx(ensTxId, words[ensTxIdIx]) < 0)
-	continue;
-    struct annoFormatVepExtraItem *extraItem = extraSrc->items;
-    char *score = NULL, *pred = NULL;
-    if (sameString(gpFx->gAllele, words[altAl1Ix]))
-	{
-	score = words[score1Ix];
-	pred = words[pred1Ix];
-	}
-    else if (sameString(gpFx->gAllele, words[altAl2Ix]))
-	{
-	score = words[score2Ix];
-	pred = words[pred2Ix];
-	}
-    else if (sameString(gpFx->gAllele, words[altAl3Ix]))
-	{
-	score = words[score3Ix];
-	pred = words[pred3Ix];
-	}
-    if (isNotEmpty(score) && differentString(score, "."))
-	{
-	afVepNewExtra(self, pGotExtra);
-	if (isEmpty(pred))
-	    pred = "?";
-	fprintf(self->f, "%s=%s(%s)", extraItem->tag, pred, score);
-	}
-    }
-}
-
-static void afVepPrintDbNsfpVest(struct annoFormatVep *self,
-                                 struct annoFormatVepExtraSource *extraSrc,
-                                 struct annoRow *extraRows, struct gpFx *gpFx, char *ensTxId,
-                                 boolean *pGotExtra)
-/* Match the allele from gpFx to the per-allele scores in row from dbNsfpVest */
-{
-// TODO: also compare var[123] protein change columns to gpFx to make sure we have the right one
-// Look up column indices only once:
-static int ensTxIdIx=-1, altAl1Ix, score1Ix, altAl2Ix, score2Ix, altAl3Ix, score3Ix;
-if (ensTxIdIx == -1)
-    {
-    struct asColumn *columns = extraSrc->source->asObj->columnList;
-    ensTxIdIx = asColumnMustFindIx(columns, "ensTxId");
-    altAl1Ix = asColumnMustFindIx(columns, "altAl1");
-    score1Ix = asColumnMustFindIx(columns, "score1");
-    altAl2Ix = asColumnMustFindIx(columns, "altAl2");
-    score2Ix = asColumnMustFindIx(columns, "score2");
-    altAl3Ix = asColumnMustFindIx(columns, "altAl3");
-    score3Ix = asColumnMustFindIx(columns, "score3");
-    }
-
-struct annoRow *row;
-for (row = extraRows;  row != NULL;  row = row->next)
-    {
-    // Skip this row unless it contains the ensTxId found by getDbNsfpEnsTx
-    char **words = row->data;
-    if (differentString(ensTxId, ".") && differentString(words[ensTxIdIx], ".") &&
-	commaSepFindIx(ensTxId, words[ensTxIdIx]) < 0)
-	continue;
-    struct annoFormatVepExtraItem *extraItem = extraSrc->items;
-    char *score = NULL;
-    if (sameString(gpFx->gAllele, words[altAl1Ix]))
-	score = words[score1Ix];
-    else if (sameString(gpFx->gAllele, words[altAl2Ix]))
-	score = words[score2Ix];
-    else if (sameString(gpFx->gAllele, words[altAl3Ix]))
-	score = words[score3Ix];
-    if (isNotEmpty(score) && differentString(score, "."))
-	{
-	afVepNewExtra(self, pGotExtra);
-	fprintf(self->f, "%s=%s", extraItem->tag, score);
-	}
-    }
-}
-
-static void afVepPrintDbNsfpInterPro(struct annoFormatVep *self,
-				     struct annoFormatVepExtraSource *extraSrc,
-				     struct annoRow *extraRows, struct gpFx *gpFx, char *ensTxId,
-				     boolean *pGotExtra)
-/* Print out any overlapping (comma-sep list, HTML-encoded ',' and '=') InterPro domains. */
-{
-// Look up column indices only once:
-static int domainsIx = -1;
-if (domainsIx == -1)
-    {
-    struct asColumn *columns = extraSrc->source->asObj->columnList;
-    domainsIx = asColumnMustFindIx(columns, "domains");
-    }
-
-struct annoRow *row;
-for (row = extraRows;  row != NULL;  row = row->next)
-    {
-    char **words = row->data;
-    struct annoFormatVepExtraItem *extraItem = extraSrc->items;
-    char *domains = words[domainsIx];
-    int lastC = strlen(domains) - 1;
-    if (domains[lastC] == ',')
-	domains[lastC] = '\0';
-    if (isNotEmpty(domains) && differentString(domains, "."))
-	{
-	afVepNewExtra(self, pGotExtra);
-	fprintf(self->f, "%s=%s", extraItem->tag, domains);
-	}
-    }
-}
-
-static boolean allelesAgree(char altNt, char altAa, char **words)
-/* Return TRUE if dbNsfpSeqChange words have altAa associated with altNt. */
-{
-//#*** TODO: handle stop codon representation
-if ((altNt == words[11][0] && altAa == words[12][0]) ||
-    (altNt == words[13][0] && altAa == words[14][0]) ||
-    (altNt == words[15][0] && altAa == words[16][0]))
-    return TRUE;
-return FALSE;
-}
-
-static struct annoRow *getRowsFromSource(struct annoStreamer *src,
-					 struct annoStreamRows gratorData[], int gratorCount)
-/* Search gratorData for src, and return its rows when found. */
-{
-int i;
-for (i = 0;  i < gratorCount;  i++)
-    {
-    if (gratorData[i].streamer == src)
-	return gratorData[i].rowList;
-    }
-errAbort("annoFormatVep: Can't find source %s in gratorData", src->name);
-return NULL;
-}
-
-static char *getDbNsfpEnsTx(struct annoFormatVep *self, struct gpFx *gpFx,
-			    struct annoStreamRows *gratorData, int gratorCount)
-/* Find the Ensembl transcript ID, if any, for which dbNsfp has results consistent
- * with gpFx. */
-{
-int i;
-for (i = 0;  i < gratorCount;  i++)
-    {
-    struct annoStreamer *source = gratorData[i].streamer;
-    if (!sameString(source->asObj->name, "dbNsfpSeqChange")) //#*** need metadata!
-	continue;
-    struct codingChange *cc = &(gpFx->details.codingChange);
-    struct annoRow *extraRows = getRowsFromSource(source, gratorData, gratorCount);
-    struct annoRow *row;
-    for (row = extraRows;  row != NULL;  row = row->next)
-	{
-	char **words = row->data;
-	if (strcasecmp(cc->codonOld, words[7]) != 0)
-	    continue;
-	if (!allelesAgree(gpFx->gAllele[0], cc->aaNew[0], words))
-	    continue;
-	int txIx = commaSepFindIntIx(cc->pepPosition+1, words[10]);
-	if (txIx >= 0)
-	    {
-	    if (sameString(words[4], "."))
-		return ".";
-	    char *ensTxId = commaSepWordFromIxOrWhole(txIx, words[4], self->lm);
-	    return ensTxId;
-	    }
-	}
-    break;
-    }
-return NULL;
-}
-
-static void afVepPrintExtrasDbNsfp(struct annoFormatVep *self, struct annoRow *varRow,
-				   struct annoRow *gpvRow, struct gpFx *gpFx,
-				   struct annoStreamRows gratorData[], int gratorCount,
-				   boolean *pGotExtra)
-/* Print the Extra column's tag=value; components from dbNSFP data, if we have any. */
-{
-// dbNSFP has data only for coding non-synonymous single-nucleotide changes:
-if (!isCodingSnv(varRow, gpFx))
-    return;
-// Does dbNsfpSeqChange have a coding change consistent with gpFx?:
-char *ensTxId = getDbNsfpEnsTx(self, gpFx, gratorData, gratorCount);
-// Now cycle through selected dbNsfp* sources, printing out scores for ensTxId:
-struct annoFormatVepExtraSource *extras = self->config->extraSources, *extraSrc;
-for (extraSrc = extras;  extraSrc != NULL;  extraSrc = extraSrc->next)
-    {
-    char *asObjName = extraSrc->source->asObj->name;
-    if (!startsWith("dbNsfp", asObjName))
-	continue;
-    struct annoRow *extraRows = getRowsFromSource(extraSrc->source, gratorData, gratorCount);
-    if (sameString(asObjName, "dbNsfpPolyPhen2"))
-	{
-	// PolyPhen2 is based on UniProt proteins, not GENCODE/Ensembl transcripts,
-	// so ensTxId doesn't apply.
-	afVepPrintDbNsfpPolyPhen2(self, extraSrc, extraRows, gpFx, pGotExtra);
-	if (ensTxId == NULL)
-	    break; // all done now, no need to keep looking
-	}
-    else if (ensTxId != NULL)
-	{
-	if (sameString(asObjName, "dbNsfpSift"))
-	    afVepPrintDbNsfpSift(self, extraSrc, extraRows, gpFx, ensTxId, pGotExtra);
-	else if (sameString(asObjName, "dbNsfpMutationTaster") ||
-		 sameString(asObjName, "dbNsfpMutationAssessor"))
-	    afVepPrintDbNsfpMutationTA(self, extraSrc, extraRows, gpFx, ensTxId, pGotExtra);
-	else if (sameString(asObjName, "dbNsfpLrt"))
-	    afVepPrintDbNsfpLrt(self, extraSrc, extraRows, gpFx, ensTxId, pGotExtra);
-        else if (sameString(asObjName, "dbNsfpVest"))
-	    afVepPrintDbNsfpVest(self, extraSrc, extraRows, gpFx, ensTxId, pGotExtra);
-	else if (sameString(asObjName, "dbNsfpInterPro"))
-	    afVepPrintDbNsfpInterPro(self, extraSrc, extraRows, gpFx, ensTxId, pGotExtra);
-	else if (!sameString(asObjName, "dbNsfpSeqChange"))
-	    errAbort("Unrecognized asObj->name '%s' from dbNSFP source '%s'",
-		     asObjName, extraSrc->source->name);
-	}
-    }
-}
-
-static void afVepPrintExtrasHgvs(struct annoFormatVep *self, struct annoRow *gpvRow,
-                                 boolean *pGotExtra)
-/* If not empty, print HGVS terms (last column of gpvRow). */
-{
-char **row = gpvRow->data;
-if (isNotEmpty(row[self->hgvsGIx]))
-    {
-    afVepNewExtra(self, pGotExtra);
-    fprintf(self->f, "HGVSG=%s", row[self->hgvsGIx]);
-    }
-if (isNotEmpty(row[self->hgvsCNIx]))
-    {
-    afVepNewExtra(self, pGotExtra);
-    fprintf(self->f, "HGVSCN=%s", row[self->hgvsCNIx]);
-    }
-if (isNotEmpty(row[self->hgvsPIx]))
-    {
-    afVepNewExtra(self, pGotExtra);
-    fprintf(self->f, "HGVSP=%s", row[self->hgvsPIx]);
-    }
-}
-
-static void afVepPrintExtraWigVec(struct annoFormatVepExtraSource *extraSrc,
-                                  struct annoFormatVep *self,
-                                  struct annoFormatVepExtraItem *extraItem,
-                                  struct annoRow *extraRows, struct annoRow *gpvRow,
-                                  boolean *pGotExtra)
-/* Print values from a wig source with type arWigVec (per-base floats) */
-{
-int i;
-struct annoRow *row;
-for (i = 0, row = extraRows;  row != NULL;  i++, row = row->next)
-    {
-    float *vector = row->data;
-    int len = row->end - row->start;
-    int j;
-    for (j = 0;  j < len;  j++)
-	{
-	if (i+j == 0)
-	    {
-	    afVepNewExtra(self, pGotExtra);
-	    fprintf(self->f, "%s=", extraItem->tag);
-	    }
-	else
-	    fputc(',', self->f);
-	fprintf(self->f, "%g", vector[j]);
-	}
-    }
-}
-
-static void afVepPrintExtraWigSingle(struct annoFormatVepExtraSource *extraSrc,
-                                     struct annoFormatVep *self,
-                                     struct annoFormatVepExtraItem *extraItem,
-                                     struct annoRow *extraRows, struct annoRow *gpvRow,
-                                     boolean *pGotExtra)
-/* Print values from a wig source with type arWigSingle (one double per row, e.g. an average) */
-{
-struct annoRow *row;
-for (row = extraRows;  row != NULL;  row = row->next)
-    {
-    double *pValue = row->data;
-    if (row == extraRows)
-        {
-        afVepNewExtra(self, pGotExtra);
-        fprintf(self->f, "%s=", extraItem->tag);
-        }
-    else
-        fputc(',', self->f);
-    fprintf(self->f, "%g", *pValue);
-    }
-}
-
-static void afVepPrintExtraWords(struct annoFormatVepExtraSource *extraSrc,
-                                 struct annoFormatVep *self,
-				 struct annoFormatVepExtraItem *extraItem,
-				 struct annoRow *extraRows, struct annoRow *gpvRow,
-                                 boolean *pGotExtra)
-/* Print comma-separated values in the specified column from the usual array-of-words. */
-{
-if (extraItem->rowIx < 0)
-    errAbort("annoFormatVep: invalid rowIx for tag %s", extraItem->tag);
-char **gpvWords = gpvRow ? gpvRow->data : NULL;
-char *gpvTranscript = gpvWords ? gpvWords[self->txNameIx] : NULL;
-boolean gotGpvTx = FALSE;
-int i;
-struct annoRow *row;
-for (i = 0, row = extraRows;  row != NULL;  row = row->next)
-    {
-    char **words = row->data;
-    char *extraTranscript = words[0];
-    // If extraSrc happens to be gpVarSource, use only the row that matches the current
-    // transcript, and use it only once.
-    if (extraSrc->source == self->config->gpVarSource)
-        {
-        extraTranscript = words[self->txNameIx];
-        if (gpvTranscript == NULL || differentString(extraTranscript, gpvTranscript) || gotGpvTx)
-            continue;
-        gotGpvTx = TRUE;
-        }
-    char *val = words[extraItem->rowIx];
-    if (isEmpty(val))
-        continue;
-    // Strip trailing comma if found
-    int valLen = strlen(val);
-    if (val[valLen-1] == ',')
-        val[valLen-1] = '\0';
-    if (i == 0)
-	{
-	afVepNewExtra(self, pGotExtra);
-	fprintf(self->f, "%s=", extraItem->tag);
-        if (extraItem->isBoolean)
-            fputs("YES", self->f);
-	}
-    else if (! extraItem->isBoolean)
-	fputc(',', self->f);
-    if (! extraItem->isBoolean)
-        {
-        // Watch out for characters that will mess up parsing of EXTRAS column:
-        subChar(val, '=', '_');
-        subChar(val, ';', '_');
-        afVepPuts(val, self);
-        }
-    i++;
-    }
-}
-
-static void afVepPrintExtrasOther(struct annoFormatVep *self, struct annoRow *varRow,
-				  struct annoRow *gpvRow, struct gpFx *gpFx,
-				  struct annoStreamRows gratorData[], int gratorCount,
-				  boolean includeRegulatory, boolean *pGotExtra)
-/* Print the Extra column's tag=value; components (other than dbNSFP) if we have any. */
-{
-struct annoFormatVepExtraSource *extras = self->config->extraSources, *extraSrc;
-for (extraSrc = extras;  extraSrc != NULL;  extraSrc = extraSrc->next)
-    {
-    char *asObjName = extraSrc->source->asObj->name;
-    if (startsWith("dbNsfp", asObjName))
-	continue;
-    struct annoRow *extraRows = getRowsFromSource(extraSrc->source, gratorData, gratorCount);
-    if (extraRows != NULL)
-	{
-	struct annoFormatVepExtraItem *extraItem;
-	for (extraItem = extraSrc->items;  extraItem != NULL;  extraItem = extraItem->next)
-	    if (includeRegulatory || ! extraItem->isRegulatory)
-		extraSrc->printExtra(extraSrc, self, extraItem, extraRows, gpvRow, pGotExtra);
-	}
-    }
-// VEP automatically adds DISTANCE for upstream/downstream variants
-if (gpFx && (gpFx->soNumber == upstream_gene_variant || gpFx->soNumber == downstream_gene_variant))
-    {
-    afVepNewExtra(self, pGotExtra);
-    // In order to really understand the boundary conditions of Ensembl's DISTANCE function,
-    // I'll have to read the code someday.  Upstream deletions on + strand still not handled
-    // quite right.
-    int ensemblFudge = 0;
-    int distance = gpvRow->start - varRow->start + ensemblFudge;
-    if (distance < 0)
-	{
-	ensemblFudge = 1;
-	distance = varRow->start - gpvRow->end + ensemblFudge;
-	}
-    fprintf(self->f, "DISTANCE=%d", distance);
-    }
-boolean includeExonNumber = TRUE;  //#*** optional in VEP
-if (includeExonNumber && gpFx)
-    {
-    // Add Exon or intron number if applicable
-    enum detailType deType = gpFx->detailType;
-    int num = -1, count = 0;
-    if (deType == codingChange)
-        {
-	num = gpFx->details.codingChange.exonNumber;
-	count = gpFx->details.codingChange.exonCount;
-        }
-    else if (deType == nonCodingExon)
-        {
-	num = gpFx->details.nonCodingExon.exonNumber;
-	count = gpFx->details.nonCodingExon.exonCount;
-        }
-    else if (deType == intron)
-        {
-	num = gpFx->details.intron.intronNumber;
-        count = gpFx->details.intron.intronCount;
-        }
-    if (num >= 0)
-	{
-	afVepNewExtra(self, pGotExtra);
-        fprintf(self->f, "%s=%d/%d", (deType == intron) ? "INTRON" : "EXON", num+1, count);
-        }
-    }
-if (!*pGotExtra)
-    afVepPrintPlaceholders(self->f, 0, self->doHtml);
-}
-
-static void afVepLmCleanup(struct annoFormatVep *self)
-/* Clean out localmem every 1k rows. */
-{
-self->lmRowCount++;
-if (self->lmRowCount > 1024)
-    {
-    lmCleanup(&(self->lm));
-    self->lm = lmInit(0);
-    self->lmRowCount = 0;
-    }
-}
-
-static void afVepPrintOneLineGpVar(struct annoFormatVep *self, struct annoStreamRows *varData,
-				   struct annoRow *gpvRow,
-				   struct annoStreamRows gratorData[], int gratorCount)
-/* Print one line of VEP with a coding gene consequence: a variant, an allele,
- * functional consequences of that allele, and whatever else is included in the config. */
-{
-afVepLmCleanup(self);
-struct annoRow *varRow = varData->rowList;
-struct gpFx *gpFx = annoGratorGpVarGpFxFromRow(self->config->gpVarSource, gpvRow, self->lm);
-afVepStartRow(self->f, self->doHtml);
-afVepPrintNameAndLoc(self, varRow);
-afVepPrintPredictions(self, varRow, gpvRow, gpFx);
-afVepPrintExistingVar(self, varRow, gratorData, gratorCount);
-boolean gotExtra = FALSE;
-afVepPrintExtrasDbNsfp(self, varRow, gpvRow, gpFx, gratorData, gratorCount, &gotExtra);
-afVepPrintExtrasHgvs(self, gpvRow, &gotExtra);
-afVepPrintExtrasOther(self, varRow, gpvRow, gpFx, gratorData, gratorCount, FALSE, &gotExtra);
-afVepEndRow(self->f, self->doHtml);
-}
-
-static boolean afVepIsRegulatory(struct annoFormatVep *self,
-				 struct annoStreamRows gratorData[], int gratorCount)
-/* Return TRUE if one of the extraSource items is a regulatory region. */
-{
-struct annoFormatVepExtraSource *extras = self->config->extraSources, *extraSrc;
-for (extraSrc = extras;  extraSrc != NULL;  extraSrc = extraSrc->next)
-    {
-    struct annoRow *extraRows = getRowsFromSource(extraSrc->source, gratorData, gratorCount);
-    if (extraRows != NULL)
-	{
-	struct annoFormatVepExtraItem *extraItem;
-	for (extraItem = extraSrc->items;  extraItem != NULL;  extraItem = extraItem->next)
-	    {
-	    if (extraItem->isRegulatory)
-		return TRUE;
-	    }
-	}
-    }
-return FALSE;
-}
-
-static char *afVepGetFirstAltAllele(struct annoFormatVep *self, struct annoRow *varRow)
-/* For VEP we must print out some alternate allele, whether the alternate allele is
- * related to any other info (like coding changes) or not.  Pick the first non-reference
- * allele. */
-{
-int refAlBufSize = varRow->end - varRow->start + 1;
-char refAllele[refAlBufSize];
-annoAssemblyGetSeq(self->assembly, varRow->chrom, varRow->start, varRow->end,
-		   refAllele, sizeof(refAllele));
-struct variant *variant = NULL;
-if (self->variantType == afvdtVcf)
-    variant = variantFromVcfAnnoRow(varRow, refAllele, self->lm, self->dyScratch);
-else if (self->variantType == afvdtPgSnpTable)
-    variant = variantFromPgSnpAnnoRow(varRow, refAllele, TRUE, self->lm);
-else if (self->variantType == afvdtPgSnpFile)
-    variant = variantFromPgSnpAnnoRow(varRow, refAllele, FALSE, self->lm);
-return firstAltAllele(variant->alleles);
-}
-
-static void afVepPrintPredictionsReg(struct annoFormatVep *self, char *alt)
-/* Print VEP allele, placeholder gene and item names, 'RegulatoryFeature',
- * 'regulatory_region_variant', and placeholder coding effect columns. */
-{
-afVepPuts(alt, self);
-afVepNextColumn(self->f, self->doHtml);
-afVepPrintPlaceholders(self->f, 2, self->doHtml);
-fputs("RegulatoryFeature", self->f);
-afVepNextColumn(self->f, self->doHtml);
-fputs(soTermToString(regulatory_region_variant), self->f);
-afVepNextColumn(self->f, self->doHtml);
-// Coding effect columns are N/A:
-afVepPrintPlaceholders(self->f, 5, self->doHtml);
-}
-
-static void afVepPrintExtrasHgvsGOnly(struct annoFormatVep *self, struct annoRow *varRow, char *alt,
-                                      boolean *pGotExtra)
-/* Make our own HGVS g. term (when we aren't taking HGVS terms from annoGratorGpVar). */
-{
-if (self->hgvsMakeG)
-    {
-    struct bed3 *variantBed = (struct bed3 *)varRow;
-    char *chromAcc = hRefSeqAccForChrom(self->assembly->name, varRow->chrom);
-    char *hgvsG = hgvsGFromVariant(self->gSeqWin, variantBed, alt, chromAcc, self->hgvsBreakDelIns);
-    afVepNewExtra(self, pGotExtra);
-    fprintf(self->f, "HGVSG=%s", hgvsG);
-    freeMem(hgvsG);
-    }
-}
-
-static void afVepPrintRegulatory(struct annoFormatVep *self, struct annoStreamRows *varData,
-				 struct annoStreamRows gratorData[], int gratorCount)
-/* If there are EXTRAS column sources that imply regulatory_region_variant, print out
- * lines with that consequence (no gpFx/dbNSFP). */
-{
-if (afVepIsRegulatory(self, gratorData, gratorCount))
-    {
-    afVepLmCleanup(self);
-    struct annoRow *varRow = varData->rowList;
-    afVepStartRow(self->f, self->doHtml);
-    afVepPrintNameAndLoc(self, varRow);
-    char *alt = afVepGetFirstAltAllele(self, varRow);
-    afVepPrintPredictionsReg(self, alt);
-    afVepPrintExistingVar(self, varRow, gratorData, gratorCount);
-    boolean gotExtra = FALSE;
-    afVepPrintExtrasHgvsGOnly(self, varRow, alt, &gotExtra);
-    afVepPrintExtrasOther(self, varRow, NULL, NULL, gratorData, gratorCount, TRUE, &gotExtra);
-    afVepEndRow(self->f, self->doHtml);
-    }
-}
-
-static void afVepFormatOne(struct annoFormatter *fSelf, struct annoStreamRows *primaryData,
-			   struct annoStreamRows gratorData[], int gratorCount)
-/* Print one variant's VEP (possibly multiple lines) using collected rows. */
-{
-struct annoFormatVep *self = (struct annoFormatVep *)fSelf;
-struct annoRow *gpVarRows = gratorData[0].rowList, *gpvRow;
-for (gpvRow = gpVarRows;  gpvRow != NULL;  gpvRow = gpvRow->next)
-    afVepPrintOneLineGpVar(self, primaryData, gpvRow, gratorData, gratorCount);
-afVepPrintRegulatory(self, primaryData, gratorData, gratorCount);
-}
-
-static void afVepEndHtml(struct annoFormatVep *self)
-{
-fputs("</TABLE><BR>\n", self->f);
-}
-
-static void afVepClose(struct annoFormatter **pFSelf)
-/* Close file handle, free self. */
-{
-if (pFSelf == NULL)
-    return;
-struct annoFormatVep *self = *(struct annoFormatVep **)pFSelf;
-if (self->doHtml)
-    afVepEndHtml(self);
-freeMem(self->fileName);
-carefulClose(&(self->f));
-lmCleanup(&(self->lm));
-dyStringFree(&(self->dyScratch));
-chromSeqWindowFree(&self->gSeqWin);
-annoFormatterFree(pFSelf);
-}
-
-static void afVepSetConfig(struct annoFormatVep *self, struct annoFormatVepConfig *config)
-/* Check config and figure out where various output columns will come from. */
-{
-self->config = config;
-struct asColumn *varAsColumns = config->variantSource->asObj->columnList;
-self->varNameIx = -1;
-self->varAllelesIx = -1;
-if (asObjectsMatch(config->variantSource->asObj, pgSnpAsObj()))
-    self->variantType = afvdtPgSnpTable;
-else if (asObjectsMatch(config->variantSource->asObj, pgSnpFileAsObj()))
-    self->variantType = afvdtPgSnpFile;
-else if (asObjectsMatch(config->variantSource->asObj, vcfAsObj()))
-    {
-    self->variantType = afvdtVcf;
-    self->varNameIx = asColumnMustFindIx(varAsColumns, "id");
-    }
-else
-    errAbort("afVepSetConfig: variant source %s doesn't look like pgSnp or VCF",
-	     config->variantSource->name);
-if (self->variantType == afvdtPgSnpTable || self->variantType == afvdtPgSnpFile)
-    {
-    // pgSnp's "name" column actually contains slash-separated alleles
-    self->varAllelesIx = asColumnMustFindIx(varAsColumns, "name");
-    }
-if (config->gpVarSource == NULL)
-    errAbort("afVepSetConfig: config must have a gpVarSource");
-else if (! asColumnNamesMatchFirstN(config->gpVarSource->asObj, genePredAsObj(), 10) &&
-         ! asColumnNamesMatchFirstN(config->gpVarSource->asObj, bigGenePredAsObj(), 16) &&
-         ! asColumnNamesMatchFirstN(config->gpVarSource->asObj, annoStreamDbPslPlusAsObj(),
-                                    PSLPLUS_NUM_COLS))
-    errAbort("afVepSetConfig: gpVarSource %s doesn't look like genePred or pslPlus",
-	     config->gpVarSource->name);
-// refGene and augmented knownGene have extra name fields that have the HGNC gene symbol:
-struct asColumn *gpvAsColumns = config->gpVarSource->asObj->columnList;
-self->txNameIx = asColumnFindIx(gpvAsColumns, "name");
-if (self->txNameIx < 0)
-    self->txNameIx = asColumnFindIx(gpvAsColumns, "qName");
-if (self->txNameIx < 0)
-    errAbort("afVepSetConfig: can't find name or qName column in gpVarSource %s",
-             config->gpVarSource->name);
-self->geneNameIx = asColumnFindIx(gpvAsColumns, "geneSymbol");
-if (self->geneNameIx < 0)
-    self->geneNameIx = asColumnFindIx(gpvAsColumns, "kgXref_geneSymbol");
-if (self->geneNameIx < 0)
-    self->geneNameIx = asColumnFindIx(gpvAsColumns, "name2");
-if (self->geneNameIx < 0)
-    self->geneNameIx = asColumnFindIx(gpvAsColumns, "proteinID");
-self->hgvsGIx = asColumnMustFindIx(gpvAsColumns, "hgvsG");
-self->hgvsCNIx = asColumnMustFindIx(gpvAsColumns, "hgvsCN");
-self->hgvsPIx = asColumnMustFindIx(gpvAsColumns, "hgvsP");
-if (config->snpSource != NULL)
-    {
-    struct asColumn *snpAsColumns = config->snpSource->asObj->columnList;
-    self->snpNameIx = asColumnMustFindIx(snpAsColumns, "name");
-    }
-else
-    self->snpNameIx = -1;
-}
-
-struct annoFormatVepConfig *annoFormatVepConfigNew(struct annoStreamer *variantSource,
-						   char *variantDescription,
-						   struct annoStreamer *gpVarSource,
-						   char *gpVarDescription,
-						   struct annoStreamer *snpSource,
-						   char *snpDescription)
-/* Return a basic configuration for VEP output.  variantSource and gpVarSource must be
- * provided; snpSource can be NULL. */
-{
-struct annoFormatVepConfig *config;
-AllocVar(config);
-config->variantSource = variantSource;
-config->variantDescription = cloneString(variantDescription);
-config->gpVarSource = gpVarSource;
-config->gpVarDescription = cloneString(gpVarDescription);
-config->snpSource = snpSource;
-config->snpDescription = cloneString(snpDescription);
-return config;
-}
-
-struct annoFormatter *annoFormatVepNew(char *fileName, boolean doHtml,
-				       struct annoStreamer *variantSource,
-				       char *variantDescription,
-				       struct annoStreamer *gpVarSource,
-				       char *gpVarDescription,
-				       struct annoStreamer *snpSource,
-				       char *snpDescription,
-				       struct annoAssembly *assembly)
-/* Return a formatter that will write functional predictions in the same format as Ensembl's
- * Variant Effect Predictor to fileName (can be "stdout").
- * variantSource and gpVarSource must be provided; snpSource can be NULL. */
-{
-struct annoFormatVep *self;
-AllocVar(self);
-struct annoFormatter *fSelf = &(self->formatter);
-fSelf->getOptions = annoFormatterGetOptions;
-fSelf->setOptions = annoFormatterSetOptions;
-fSelf->initialize = afVepInitialize;
-fSelf->comment = afVepComment;
-fSelf->formatOne = afVepFormatOne;
-fSelf->close = afVepClose;
-self->fileName = cloneString(fileName);
-self->f = mustOpen(fileName, "w");
-self->lm = lmInit(0);
-self->dyScratch = dyStringNew(0);
-self->needHeader = TRUE;
-self->doHtml = doHtml;
-self->assembly = assembly;
-struct annoFormatVepConfig *config = annoFormatVepConfigNew(variantSource, variantDescription,
-							    gpVarSource, gpVarDescription,
-							    snpSource, snpDescription);
-afVepSetConfig(self, config);
-return (struct annoFormatter *)self;
-}
-
-static void afvAddExtraItemMaybeReg(struct annoFormatter *fSelf, struct annoStreamer *extraSource,
-				    char *tag, char *description, char *column, boolean isBoolean,
-                                    boolean isReg)
-/* Tell annoFormatVep that it should include the given column of extraSource
- * in the EXTRAS column with tag.  The VEP header will include tag's description.
- * If isReg, overlap implies that the variant has consequence regulatory_region_variant.
- * For some special-cased sources e.g. dbNsfp files, column may be NULL/ignored. */
-{
-struct annoFormatVep *self = (struct annoFormatVep *)fSelf;
-struct annoFormatVepExtraSource *src;
-for (src = self->config->extraSources;  src != NULL;  src = src->next)
-    if (src->source == extraSource)
-	break;
-if (src == NULL)
-    {
-    AllocVar(src);
-    src->source = extraSource;
-    if (extraSource->rowType == arWigVec)
-	src->printExtra = afVepPrintExtraWigVec;
-    else if (extraSource->rowType == arWigSingle)
-	src->printExtra = afVepPrintExtraWigSingle;
-    else
-	src->printExtra = afVepPrintExtraWords;
-    slAddTail(&(self->config->extraSources), src);
-    }
-struct annoFormatVepExtraItem *item;
-AllocVar(item);
-item->tag = cloneString(tag);
-item->description = cloneString(description);
-item->rowIx = -1;
-if (isNotEmpty(column))
-    item->rowIx = asColumnMustFindIx(extraSource->asObj->columnList, column);
-item->isRegulatory = isReg;
-item->isBoolean = isBoolean;
-slAddTail(&(src->items), item);
-}
-
-void annoFormatVepAddExtraItem(struct annoFormatter *fSelf, struct annoStreamer *extraSource,
-			       char *tag, char *description, char *column, boolean isBoolean)
-/* Tell annoFormatVep that it should include the given column of extraSource
- * in the EXTRAS column with tag.  The VEP header will include tag's description.
- * For some special-cased sources e.g. dbNsfp files, column may be NULL/ignored.
- * If isBoolean, the column's description won't be output, only whether a match was found. */
-{
-afvAddExtraItemMaybeReg(fSelf, extraSource, tag, description, column, isBoolean, FALSE);
-}
-
-void annoFormatVepAddRegulatory(struct annoFormatter *fSelf, struct annoStreamer *regSource,
-				char *tag, char *description, char *column)
-/* Tell annoFormatVep to use the regulatory_region_variant consequence if there is overlap
- * with regSource, and to include the given column of regSource in the EXTRAS column.
- * The VEP header will include tag's description. */
-{
-afvAddExtraItemMaybeReg(fSelf, regSource, tag, description, column, FALSE, TRUE);
-struct annoFormatVep *self = (struct annoFormatVep *)fSelf;
-if (self->gSeqWin == NULL)
-    {
-    char *db = regSource->assembly->name;
-    self->gSeqWin = chromSeqWindowNew(db, NULL, 0, 0);
-    }
-}
-
-void annoFormatVepSetHgvsOutOptions(struct annoFormatter *fSelf, uint hgvsOutOptions)
-/* Import the HGVS output options described in hgHgvs.h */
-{
-struct annoFormatVep *self = (struct annoFormatVep *)fSelf;
-// We only do g. terms, for regulatory region variants, so no need for transcript/protein opts.
-if (hgvsOutOptions & HGVS_OUT_G)
-    self->hgvsMakeG = TRUE;
-if (hgvsOutOptions & HGVS_OUT_BREAK_DELINS)
-    self->hgvsBreakDelIns = TRUE;
-}