c693f56c78b1d5a56c53bf7473ff850446c4f1f4
angie
  Mon Oct 17 15:47:46 2011 -0700
Feature #3710 (vcfTabix UI options): added filtering on minor allelefrequency (actually, 1 - major allele frequency), if INFO column
contains the AF tag, or AC and AN tags.
Also, in vcfTrack.c, fixed a bug in multi-allele abbreviation code
in vcfFileToPgSnp.

diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c
index 14ab5c6..dd49567 100644
--- src/hg/hgTracks/vcfTrack.c
+++ src/hg/hgTracks/vcfTrack.c
@@ -7,31 +7,31 @@
 #include "hacTree.h"
 #include "hdb.h"
 #include "hgTracks.h"
 #include "pgSnp.h"
 #include "trashDir.h"
 #include "vcf.h"
 #include "vcfUi.h"
 #if (defined USE_TABIX && defined KNETFILE_HOOKS)
 #include "knetUdc.h"
 #include "udc.h"
 #endif//def USE_TABIX && KNETFILE_HOOKS
 
 #ifdef USE_TABIX
 
 static boolean getMinQual(struct trackDb *tdb, double *retMinQual, boolean compositeLevel)
-/* Return TRUE and set retMinQual id cart contains minimum QUAL filter */
+/* Return TRUE and set retMinQual if cart contains minimum QUAL filter */
 {
 char cartVar[512];
 safef(cartVar, sizeof(cartVar), "%s."VCF_APPLY_MIN_QUAL_VAR, tdb->track);
 if (cartUsualBooleanClosestToHome(cart, tdb, compositeLevel,
 				  VCF_APPLY_MIN_QUAL_VAR, VCF_DEFAULT_APPLY_MIN_QUAL))
     {
     if (retMinQual != NULL)
 	*retMinQual = cartUsualDoubleClosestToHome(cart, tdb, compositeLevel, VCF_MIN_QUAL_VAR,
 						   VCF_DEFAULT_MIN_QUAL);
     return TRUE;
     }
 return FALSE;
 }
 
 static boolean minQualFail(struct vcfRecord *record, double minQual)
@@ -58,86 +58,165 @@
     return TRUE;
     }
 return FALSE;
 }
 
 static boolean filterColumnFail(struct vcfRecord *record, struct slName *filterValues)
 /* Return TRUE if record's FILTER column value(s) matches one of filterValues (from cart). */
 {
 int i;
 for (i = 0;  i < record->filterCount;  i++)
     if (slNameInList(filterValues, record->filters[i]))
 	return TRUE;
 return FALSE;
 }
 
+static boolean getMinFreq(struct trackDb *tdb, double *retMinFreq, boolean compositeLevel)
+/* Return TRUE and set retMinFreq if cart contains nonzero minimum minor allele frequency. */
+{
+char cartVar[512];
+//#*** is there an ExistsClosestToHome?
+safef(cartVar, sizeof(cartVar), "%s."VCF_MIN_ALLELE_FREQ_VAR, tdb->track);
+if (cartVarExists(cart, cartVar))
+    {
+    double minFreq = cartUsualDoubleClosestToHome(cart, tdb, compositeLevel,
+					    VCF_MIN_ALLELE_FREQ_VAR, VCF_DEFAULT_MIN_ALLELE_FREQ);
+    if (minFreq > 0)
+	{
+	if (retMinFreq != NULL)
+	    *retMinFreq = minFreq;
+	return TRUE;
+	}
+    }
+return FALSE;
+}
+
+static boolean minFreqFail(struct vcfRecord *record, double minFreq)
+/* Return TRUE if record's INFO include AF (alternate allele frequencies) or AC+AN
+ * (alternate allele counts and total count of observed alleles) and the minor allele
+ * frequency < minFreq -- or rather, major allele frequency > (1 - minFreq) because
+ * variants with > 2 alleles might have some significant minor frequencies along with
+ * tiny minor frequencies). */
+{
+struct vcfFile *vcff = record->file;
+boolean gotInfo = FALSE;
+double refFreq = 1.0;
+double maxAltFreq = 0.0;
+int i;
+const struct vcfInfoElement *afEl = vcfRecordFindInfo(record, "AF");
+const struct vcfInfoDef *afDef = vcfInfoDefForKey(vcff, "AF");
+if (afEl != NULL && afDef != NULL && afDef->type == vcfInfoFloat)
+    {
+    // If INFO includes alt allele freqs, use them directly.
+    gotInfo = TRUE;
+    for (i = 0;  i < afEl->count;  i++)
+	{
+	double altFreq = afEl->values[i].datFloat;
+	refFreq -= altFreq;
+	if (altFreq > maxAltFreq)
+	    maxAltFreq = altFreq;
+	}
+    }
+else
+    {
+    // Calculate alternate allele freqs from AC and AN:
+    const struct vcfInfoElement *acEl = vcfRecordFindInfo(record, "AC");
+    const struct vcfInfoDef *acDef = vcfInfoDefForKey(vcff, "AC");
+    const struct vcfInfoElement *anEl = vcfRecordFindInfo(record, "AN");
+    const struct vcfInfoDef *anDef = vcfInfoDefForKey(vcff, "AN");
+    if (acEl != NULL && acDef != NULL && acDef->type == vcfInfoInteger &&
+	anEl != NULL && anDef != NULL && anDef->type == vcfInfoInteger && anEl->count == 1)
+	{
+	gotInfo = TRUE;
+	int totalCount = anEl->values[0].datFloat;
+	for (i = 0;  i < acEl->count;  i++)
+	    {
+	    int altCount = acEl->values[i].datFloat;
+	    double altFreq = (double)altCount / totalCount;
+	    refFreq -= altFreq;
+	    if (altFreq < maxAltFreq)
+		maxAltFreq = altFreq;
+	    }
+	}
+    }
+if (gotInfo)
+    {
+    double majorAlFreq = max(refFreq, maxAltFreq);
+    if (majorAlFreq > (1.0 - minFreq))
+	return TRUE;
+    }
+return FALSE;
+}
+
 static void filterRecords(struct vcfFile *vcff, struct trackDb *tdb)
 /* If a filter is specified in the cart, remove any records that don't pass filter. */
 {
 boolean compositeLevel = isNameAtCompositeLevel(tdb, tdb->track);
-double minQual = 0;
+double minQual = VCF_DEFAULT_MIN_QUAL;
 struct slName *filterValues = NULL;
+double minFreq = VCF_DEFAULT_MIN_ALLELE_FREQ;
 boolean gotQualFilter = getMinQual(tdb, &minQual, compositeLevel);
 boolean gotFilterFilter = getFilterValues(tdb, &filterValues, compositeLevel);
-if (! (gotQualFilter || gotFilterFilter) )
+boolean gotMinFreqFilter = getMinFreq(tdb, &minFreq, compositeLevel);
+if (! (gotQualFilter || gotFilterFilter || gotMinFreqFilter) )
     return;
 
 struct vcfRecord *rec, *nextRec, *newList = NULL;
 for (rec = vcff->records;  rec != NULL;  rec = nextRec)
     {
     nextRec = rec->next;
     if (! ((gotQualFilter && minQualFail(rec, minQual)) ||
-	   (gotFilterFilter && filterColumnFail(rec, filterValues))) )
+	   (gotFilterFilter && filterColumnFail(rec, filterValues)) ||
+	   (gotMinFreqFilter && minFreqFail(rec, minFreq)) ))
 	slAddHead(&newList, rec);
     }
 slReverse(&newList);
 vcff->records = newList;
 }
 
 static struct pgSnp *vcfFileToPgSnp(struct vcfFile *vcff, struct trackDb *tdb)
 /* Convert vcff's records to pgSnp; don't free vcff until you're done with pgSnp
  * because it contains pointers into vcff's records' chrom. */
 {
 struct pgSnp *pgsList = NULL;
 struct vcfRecord *rec;
 int maxLen = 33;
+int maxAlCount = 5;
 for (rec = vcff->records;  rec != NULL;  rec = rec->next)
     {
     struct pgSnp *pgs = pgSnpFromVcfRecord(rec);
     // Insertion sequences can be quite long; abbreviate here for display.
     int len = strlen(pgs->name);
     if (len > maxLen)
 	{
-	if (strchr(pgs->name, '/') != NULL)
-	    {
-	    char *copy = cloneString(pgs->name);
-	    char *allele[8];
-	    int cnt = chopByChar(copy, '/', allele, pgs->alleleCount);
-	    int maxAlLen = maxLen / pgs->alleleCount;
+	int maxAlLen = maxLen / min(rec->alleleCount, maxAlCount);
 	    pgs->name[0] = '\0';
 	    int i;
-	    for (i = 0;  i < cnt;  i++)
+	for (i = 0;  i < rec->alleleCount;  i++)
 		{
 		if (i > 0)
 		    safencat(pgs->name, len+1, "/", 1);
-		if (strlen(allele[i]) > maxAlLen-3)
-		    strcpy(allele[i]+maxAlLen-3, "...");
-		safencat(pgs->name, len+1, allele[i], maxAlLen);
+	    if (i >= maxAlCount)
+		{
+		safecat(pgs->name, len+1, "...");
+		pgs->alleleCount = maxAlCount;
+		break;
 		}
+	    if (strlen(rec->alleles[i]) > maxAlLen-3)
+		strcpy(rec->alleles[i]+maxAlLen-3, "...");
+	    safencat(pgs->name, len+1, rec->alleles[i], maxAlLen);
 	    }
-	else
-	    strcpy(pgs->name+maxLen-3, "...");
 	}
     slAddHead(&pgsList, pgs);
     }
 slReverse(&pgsList);
 return pgsList;
 }
 
 
 // Center-weighted alpha clustering of haplotypes -- see Redmine #3711, #2823 note 7
 // It might be nice to use an allele-frequency representation here instead of [ACGTN] strings
 // with "N" for missing info or differences, but keep it simple.
 
 struct cwaExtraData
 /* Helper data for hacTree clustering of haplotypes by center-weighted alpha distance */
     {