src/hg/hgTracks/hapmapTrack.c 1.45

1.45 2009/03/06 23:19:40 angie
HapMap SNPs: Added distinction between expected and observed heterozygosity for filtering. Finally checking in Phase III filtering code in hgTracks/hapmapTrack.c.
Index: src/hg/hgTracks/hapmapTrack.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTracks/hapmapTrack.c,v
retrieving revision 1.44
retrieving revision 1.45
diff -b -B -U 4 -r1.44 -r1.45
--- src/hg/hgTracks/hapmapTrack.c	3 Sep 2008 19:19:02 -0000	1.44
+++ src/hg/hgTracks/hapmapTrack.c	6 Mar 2009 23:19:40 -0000	1.45
@@ -7,34 +7,42 @@
 
 #include "hapmapSnps.h"
 #include "hapmapAllelesOrtho.h"
 #include "hapmapAllelesSummary.h"
+#include "hapmapPhaseIIISummary.h"
 
-char *mixedFilter = HAP_POP_MIXED_DEFAULT;
-char *popCountFilter = HAP_POP_COUNT_DEFAULT;
-char *observedFilter = HAP_TYPE_DEFAULT;
+static char const rcsid[] = "$Id$";
+
+// These values are all overwritten in loadFilters() below.
+char *mixedFilter = HAP_FILTER_DEFAULT;
+char *popCountFilter = HAP_FILTER_DEFAULT;
+char *observedFilter = HAP_FILTER_DEFAULT;
 float minFreqFilter = 0.0;
 float maxFreqFilter = 0.5;
 float minHetFilter = 0.0;
-float maxHetFilter = 0.5;
-char *monoFilterCEU = HAP_MONO_DEFAULT;
-char *monoFilterCHB = HAP_MONO_DEFAULT;
-char *monoFilterJPT = HAP_MONO_DEFAULT;
-char *monoFilterYRI = HAP_MONO_DEFAULT;
-char *chimpFilter = HAP_CHIMP_DEFAULT;
-int chimpQualFilter = 0;
-char *macaqueFilter = HAP_MACAQUE_DEFAULT;
-int macaqueQualFilter = 0;
+float maxHetFilter = 0.5; // 1.0 for observed het in PhaseIII version of the track.
 
+char *monoFilter[HAP_PHASEIII_POPCOUNT];
+// Indices of HapMap PhaseII pops in there:
+#define CEU 1
+#define CHB 2
+#define JPT 5
+#define YRI 10
+
+/* Someday these should come from a trackDb setting: */
+char *orthoFilter[HAP_ORTHO_COUNT];
+int orthoQualFilter[HAP_ORTHO_COUNT];
+#define CHIMP 0
+#define MACAQUE 1
 
-boolean isTransitionObserved(char *observed)
+static boolean isTransitionObserved(char *observed)
 {
 if (sameString(observed, "A/G")) return TRUE;
 if (sameString(observed, "C/T")) return TRUE;
 return FALSE;
 }
 
-boolean isComplexObserved(char *observed)
+static boolean isComplexObserved(char *observed)
 /* return TRUE if not simple bi-allelic A/C, A/G, A/T, etc. */
 {
 if (sameString(observed, "A/C")) return FALSE;
 if (sameString(observed, "A/G")) return FALSE;
@@ -44,13 +52,56 @@
 if (sameString(observed, "G/T")) return FALSE;
 return TRUE;
 }
 
-void loadFilters()
-{
-mixedFilter = cartUsualString(cart, HAP_POP_MIXED, HAP_POP_MIXED_DEFAULT);
-popCountFilter = cartUsualString(cart, HAP_POP_COUNT, HAP_POP_COUNT_DEFAULT);
-observedFilter = cartUsualString(cart, HAP_TYPE, HAP_TYPE_DEFAULT);
+/* This maps new cart variable names to old, for backwards compatibility: */
+static char *cartVarNameUpdates[16][2] =
+    { { HAP_POP_MIXED, "hapmapSnps.isMixed" },
+      { HAP_POP_COUNT, "hapmapSnps.popCount" },
+      { HAP_TYPE, "hapmapSnps.polyType" },
+      { HAP_MIN_FREQ, "hapmapSnps.minorAlleleFreqMinimum" },
+      { HAP_MAX_FREQ, "hapmapSnps.minorAlleleFreqMaximum" },
+      { HAP_MIN_HET, "hapmapSnps.hetMinimum" },
+      { HAP_MAX_EXPECTED_HET, "hapmapSnps_hetMaximum" },
+      { HAP_MAX_EXPECTED_HET, "hapmapSnps.hetMaximum" },
+      { HAP_MONO_PREFIX "_CEU", "hapmapSnps.monomorphic.ceu" },
+      { HAP_MONO_PREFIX "_CHB", "hapmapSnps.monomorphic.chb" },
+      { HAP_MONO_PREFIX "_JPT", "hapmapSnps.monomorphic.jpt" },
+      { HAP_MONO_PREFIX "_YRI", "hapmapSnps.monomorphic.yri" },
+      { HAP_ORTHO_PREFIX "_Chimp", "hapmapSnps.chimp" },
+      { HAP_ORTHO_QUAL_PREFIX "_Chimp", "hapmapSnps.chimpQualScore" },
+      { HAP_ORTHO_PREFIX "_Macaque", "hapmapSnps.macaque" },
+      { HAP_ORTHO_QUAL_PREFIX "_Macaque", "hapmapSnps.macaqueQualScore" },
+    };
+
+static void updateOldCartVars()
+/* Detect old names for cart variables, in case they are still hanging around
+ * in people's carts.  When found, set the new var to the old val unless it 
+ * would clobber something specifically set for the new var.  Delete old 
+ * cart var name. */
+{
+int i;
+for (i = 0;  i < ArraySize(cartVarNameUpdates);  i++)
+    {
+    char *newName = cartVarNameUpdates[i][0];
+    char *oldName = cartVarNameUpdates[i][1];
+    char *oldVal = cartOptionalString(cart, oldName);
+    if (oldVal != NULL)
+	{
+	if (cartOptionalString(cart, newName) == NULL)
+	    cartSetString(cart, newName, oldVal);
+	cartRemove(cart, oldName);
+	}
+    }
+}
+
+static void loadFilters(boolean isPhaseIII)
+/* Translate filter cart settings into global variables. */
+{
+updateOldCartVars();
+mixedFilter = cartUsualString(cart, HAP_POP_MIXED, HAP_FILTER_DEFAULT);
+popCountFilter = cartUsualString(cart, HAP_POP_COUNT, HAP_FILTER_DEFAULT);
+observedFilter = cartUsualString(cart, HAP_TYPE, HAP_FILTER_DEFAULT);
 minFreqFilter = sqlFloat(cartUsualString(cart, HAP_MIN_FREQ, HAP_MIN_FREQ_DEFAULT));
 if (minFreqFilter < 0.0) 
     {
     minFreqFilter = 0.0;
@@ -67,66 +118,75 @@
     {
     minHetFilter = 0.0;
     cartSetDouble(cart, HAP_MIN_HET, 0.0);
     }
-maxHetFilter = sqlFloat(cartUsualString(cart, HAP_MAX_HET, HAP_MAX_HET_DEFAULT));
-if (maxHetFilter > 0.5) 
+if (isPhaseIII)
+    {
+    maxHetFilter = sqlFloat(cartUsualString(cart, HAP_MAX_OBSERVED_HET,
+					    HAP_MAX_OBSERVED_HET_DEFAULT));
+    float defaultMaxHetF = atof(HAP_MAX_OBSERVED_HET_DEFAULT);
+    if (maxHetFilter > defaultMaxHetF)
     {
-    maxHetFilter = 0.5;
-    cartSetDouble(cart, HAP_MAX_HET, 0.5);
+	maxHetFilter = defaultMaxHetF;
+	cartSetDouble(cart, HAP_MAX_OBSERVED_HET, defaultMaxHetF);
+	}
     }
-monoFilterCEU = cartUsualString(cart, HAP_MONO_CEU, HAP_MONO_DEFAULT);
-monoFilterCHB = cartUsualString(cart, HAP_MONO_CHB, HAP_MONO_DEFAULT);
-monoFilterJPT = cartUsualString(cart, HAP_MONO_JPT, HAP_MONO_DEFAULT);
-monoFilterYRI = cartUsualString(cart, HAP_MONO_YRI, HAP_MONO_DEFAULT);
-chimpFilter = cartUsualString(cart, HAP_CHIMP, HAP_CHIMP_DEFAULT);
-chimpQualFilter = 
-    sqlUnsigned(cartUsualString(cart, HAP_CHIMP_QUAL, HAP_CHIMP_QUAL_DEFAULT));
-if (chimpQualFilter < 0) 
+else
+    {
+    maxHetFilter = sqlFloat(cartUsualString(cart, HAP_MAX_EXPECTED_HET,
+					    HAP_MAX_EXPECTED_HET_DEFAULT));
+    float defaultMaxHetF = atof(HAP_MAX_EXPECTED_HET_DEFAULT);
+    if (maxHetFilter > defaultMaxHetF)
     {
-    chimpQualFilter = 0;
-    cartSetInt(cart, HAP_CHIMP_QUAL, 0);
+	maxHetFilter = defaultMaxHetF;
+	cartSetDouble(cart, HAP_MAX_EXPECTED_HET, defaultMaxHetF);
     }
-if (chimpQualFilter > 100) 
+    }
+char cartVar[128];
+int i;
+for (i = 0;  i < HAP_PHASEIII_POPCOUNT;  i++)
     {
-    chimpQualFilter = 100;
-    cartSetInt(cart, HAP_CHIMP_QUAL, 100);
+    safef(cartVar, sizeof(cartVar), "%s_%s", HAP_MONO_PREFIX, hapmapPhaseIIIPops[i]);
+    monoFilter[i] = cartUsualString(cart, cartVar, HAP_FILTER_DEFAULT);
     }
-
-macaqueFilter = cartUsualString(cart, HAP_MACAQUE, HAP_MACAQUE_DEFAULT);
-macaqueQualFilter = 
-    sqlUnsigned(cartUsualString(cart, HAP_MACAQUE_QUAL, HAP_MACAQUE_QUAL_DEFAULT));
-if (macaqueQualFilter < 0) 
+for (i = 0; i < HAP_ORTHO_COUNT;  i++)
+    {
+    safef(cartVar, sizeof(cartVar), "%s_%s", HAP_ORTHO_PREFIX, hapmapOrthoSpecies[i]);
+    orthoFilter[i] = cartUsualString(cart, cartVar, HAP_FILTER_DEFAULT);
+    safef(cartVar, sizeof(cartVar), "%s_%s", HAP_ORTHO_QUAL_PREFIX, hapmapOrthoSpecies[i]);
+    orthoQualFilter[i] = cartUsualInt(cart, cartVar, atoi(HAP_ORTHO_QUAL_DEFAULT));
+    if (orthoQualFilter[i] < 0)
     {
-    macaqueQualFilter = 0;
-    cartSetInt(cart, HAP_MACAQUE_QUAL, 0);
+	orthoQualFilter[i] = 0;
+	cartSetInt(cart, cartVar, 0);
     }
-if (macaqueQualFilter > 100) 
+    if (orthoQualFilter[i] > 100)
     {
-    macaqueQualFilter = 100;
-    cartSetInt(cart, HAP_MACAQUE_QUAL, 100);
+	orthoQualFilter[i] = 100;
+	cartSetInt(cart, cartVar, 100);
+	}
     }
-
 }
 
-boolean allDefaults()
+static boolean allDefaults(boolean isPhaseIII)
 /* return TRUE if all filters are set to default values */
 {
-if (differentString(mixedFilter, HAP_POP_MIXED_DEFAULT)) return FALSE;
-if (differentString(popCountFilter, HAP_POP_COUNT_DEFAULT)) return FALSE;
-if (differentString(observedFilter, HAP_TYPE_DEFAULT)) return FALSE;
+if (differentString(mixedFilter, HAP_FILTER_DEFAULT)) return FALSE;
+if (differentString(popCountFilter, HAP_FILTER_DEFAULT)) return FALSE;
+if (differentString(observedFilter, HAP_FILTER_DEFAULT)) return FALSE;
 if (minFreqFilter > sqlFloat(HAP_MIN_FREQ_DEFAULT)) return FALSE;
 if (maxFreqFilter < sqlFloat(HAP_MAX_FREQ_DEFAULT)) return FALSE;
 if (minHetFilter > sqlFloat(HAP_MIN_HET_DEFAULT)) return FALSE;
-if (maxFreqFilter < sqlFloat(HAP_MAX_HET_DEFAULT)) return FALSE;
-if (differentString(monoFilterCEU, HAP_MONO_DEFAULT)) return FALSE;
-if (differentString(monoFilterCHB, HAP_MONO_DEFAULT)) return FALSE;
-if (differentString(monoFilterJPT, HAP_MONO_DEFAULT)) return FALSE;
-if (differentString(monoFilterYRI, HAP_MONO_DEFAULT)) return FALSE;
-if (differentString(chimpFilter, HAP_CHIMP_DEFAULT)) return FALSE;
-if (chimpQualFilter > sqlUnsigned(HAP_CHIMP_QUAL_DEFAULT)) return FALSE;
-if (differentString(macaqueFilter, HAP_MACAQUE_DEFAULT)) return FALSE;
-if (macaqueQualFilter > sqlUnsigned(HAP_MACAQUE_QUAL_DEFAULT)) return FALSE;
+char *defaultMaxHet = isPhaseIII ? HAP_MAX_OBSERVED_HET_DEFAULT : HAP_MAX_EXPECTED_HET_DEFAULT;
+if (maxHetFilter < sqlFloat(defaultMaxHet)) return FALSE;
+int i;
+for (i = 0;  i < HAP_PHASEIII_POPCOUNT;  i++)
+    if (differentString(monoFilter[i], HAP_FILTER_DEFAULT)) return FALSE;
+for (i = 0;  i < HAP_ORTHO_COUNT;  i++)
+    {
+    if (differentString(orthoFilter[i], HAP_FILTER_DEFAULT)) return FALSE;
+    if (orthoQualFilter[i] > sqlUnsigned(HAP_ORTHO_QUAL_DEFAULT)) return FALSE;
+    }
 return TRUE;
 }
 
 
@@ -137,11 +196,16 @@
 struct sqlConnection *conn = hAllocConn(database);
 struct sqlResult *sr = NULL;
 char **row = NULL;
 int rowOffset = 1;
+int i;
 
-if (sameString(tg->mapName, "hapmapAllelesChimp") || sameString(tg->mapName, "hapmapAllelesMacaque"))
-{
+char orthoTable[HDB_MAX_TABLE_STRING];
+for (i = 0;  i < HAP_ORTHO_COUNT;  i++)
+    {
+    safef(orthoTable, sizeof(orthoTable), "hapmapAlleles%s", hapmapOrthoSpecies[i]);
+    if (sameString(tg->mapName, orthoTable))
+	{
     struct hapmapAllelesOrtho *orthoLoadItem, *orthoItemList = NULL;
     sr = hRangeQuery(conn, tg->mapName, chromName, winStart, winEnd, NULL, &rowOffset);
     while ((row = sqlNextRow(sr)) != NULL)
         {
@@ -152,9 +216,10 @@
     hFreeConn(&conn);
     slSort(&orthoItemList, bedCmp);
     tg->items = orthoItemList;
     return;
-}
+	}
+    }
 
 struct hapmapSnps *simpleLoadItem, *simpleItemList = NULL;
 sr = hRangeQuery(conn, tg->mapName, chromName, winStart, winEnd, NULL, &rowOffset);
 while ((row = sqlNextRow(sr)) != NULL)
@@ -167,9 +232,9 @@
 slSort(&simpleItemList, bedCmp);
 tg->items = simpleItemList;
 }
 
-boolean orthoMatchSummary(struct hapmapAllelesOrtho *orthoItem, 
+boolean orthoMatchSummaryPhaseII(struct hapmapAllelesOrtho *orthoItem, 
                           struct hapmapAllelesSummary *summaryItem)
 /* compare an ortho item to a summary item */
 /* return FALSE if not matching */
 /* probably could use bedCmp to do this */
@@ -180,9 +245,9 @@
 if (differentString(orthoItem->name, summaryItem->name)) return FALSE;
 return TRUE;
 }
 
-boolean simpleMatchSummary(struct hapmapSnps *simpleItem, 
+boolean simpleMatchSummaryPhaseII(struct hapmapSnps *simpleItem, 
                            struct hapmapAllelesSummary *summaryItem)
 /* compare a simple item to a summary item */
 /* return FALSE if not matching */
 /* probably could use bedCmp to do this */
@@ -193,9 +258,9 @@
 if (differentString(simpleItem->name, summaryItem->name)) return FALSE;
 return TRUE;
 }
 
-float getMinFreq(struct hapmapAllelesSummary *summaryItem)
+float getMinFreqPhaseII(struct hapmapAllelesSummary *summaryItem)
 {
 float freqCEU = 0.5;
 float freqCHB = 0.5;
 float freqJPT = 0.5;
@@ -228,9 +293,9 @@
 if (freqYRI < ret) ret = freqYRI;
 return ret;
 }
 
-float getMaxFreq(struct hapmapAllelesSummary *summaryItem)
+float getMaxFreqPhaseII(struct hapmapAllelesSummary *summaryItem)
 {
 float freqCEU = 0.0;
 float freqCHB = 0.0;
 float freqJPT = 0.0;
@@ -264,9 +329,9 @@
 return ret;
 }
 
 
-char *getMajorAllele(struct hapmapAllelesSummary *summaryItem)
+char *getMajorAllelePhaseII(struct hapmapAllelesSummary *summaryItem)
 {
 char *allele = NULL;
 
 if (sameString(summaryItem->isMixed, "YES"))
@@ -285,9 +350,9 @@
     return cloneString("none");
 return allele;
 }
 
-char *getMinorAllele(struct hapmapAllelesSummary *summaryItem, char *majorAllele)
+char *getMinorAllelePhaseII(struct hapmapAllelesSummary *summaryItem, char *majorAllele)
 /* return the allele that isn't the majorAllele */
 {
 char *allele = NULL;
 if (!majorAllele) return cloneString("none");
@@ -301,9 +366,9 @@
     return cloneString("none");
 return allele;
 }
 
-boolean orthoAlleleCheck(struct hapmapAllelesSummary *summaryItem, char *species)
+boolean orthoAlleleCheckPhaseII(struct hapmapAllelesSummary *summaryItem, char *species)
 /* return TRUE if disqualified from being ortho mismatch */
 /* orthoAllele is adjusted for strand; population allele is not */
 {
 char *orthoAllele = NULL;
@@ -328,13 +393,43 @@
 char *subString = strstr(summaryItem->observed, orthoAllele);
 if (subString) return TRUE;
 
 return FALSE;
+}
 
+static boolean filterObserved(boolean complexObserved, boolean transitionObserved)
+/* Return TRUE if observedFilter disqualifies the observed types. */
+{
+if (sameString(observedFilter, "complex") && !complexObserved) return TRUE;
+if (sameString(observedFilter, "bi-alleleic") && complexObserved) return TRUE;
+if (sameString(observedFilter, "transition") && complexObserved) return TRUE;
+if (sameString(observedFilter, "transition") && !transitionObserved) return TRUE;
+if (sameString(observedFilter, "transversion") && complexObserved) return TRUE;
+if (sameString(observedFilter, "transversion") && transitionObserved) return TRUE;
+return FALSE;
 }
 
+static boolean filterFreq(boolean isMixed, float minFreq, float maxFreq)
+/* Return TRUE if {min,max}FreqFilter disqualifies item. */
+{
+if (minFreqFilter > atof(HAP_MIN_FREQ_DEFAULT))
+    {
+    /* disqualify mixed items when freq filter is set */
+    if (isMixed) return TRUE;
+    if (minFreq < minFreqFilter) return TRUE;
+    }
 
-boolean filterOne(struct hapmapAllelesSummary *summaryItem)
+if (maxFreqFilter < atof(HAP_MAX_FREQ_DEFAULT))
+    {
+    /* disqualify mixed items when freq filter is set */
+    if (isMixed) return TRUE;
+    if (maxFreq > maxFreqFilter) return TRUE;
+    }
+return FALSE;
+}
+
+
+static boolean filterOnePhaseII(struct hapmapAllelesSummary *summaryItem)
 /* return TRUE if summaryItem is disqualified for any reason */
 {
 boolean complexObserved = isComplexObserved(summaryItem->observed);
 boolean transitionObserved = isTransitionObserved(summaryItem->observed);
@@ -348,107 +443,87 @@
     return TRUE;
 if (sameString(popCountFilter, "1-3 populations") && summaryItem->popCount == 4)
     return TRUE;
 
-if (sameString(observedFilter, "complex") && !complexObserved) return TRUE;
-if (sameString(observedFilter, "bi-alleleic") && complexObserved) return TRUE;
-if (sameString(observedFilter, "transition") && complexObserved) return TRUE;
-if (sameString(observedFilter, "transition") && !transitionObserved) return TRUE;
-if (sameString(observedFilter, "transversion") && complexObserved) return TRUE;
-if (sameString(observedFilter, "transversion") && transitionObserved) return TRUE;
-
-if (minFreqFilter > sqlFloat(HAP_MIN_FREQ_DEFAULT))
-    {
-    /* disqualify mixed items when freq filter is set */
-    if (sameString(summaryItem->isMixed, "YES")) return TRUE;
-    float minFreq = getMinFreq(summaryItem);
-    if (minFreq < minFreqFilter) return TRUE;
-    }
+if (filterObserved(complexObserved, transitionObserved))
+    return TRUE;
 
-if (maxFreqFilter < sqlFloat(HAP_MAX_FREQ_DEFAULT))
-    {
-    /* disqualify mixed items when freq filter is set */
-    if (sameString(summaryItem->isMixed, "YES")) return TRUE;
-    float maxFreq = getMaxFreq(summaryItem);
-    if (maxFreq > maxFreqFilter) return TRUE;
-    }
+if (filterFreq(sameString(summaryItem->isMixed, "YES"),
+	       getMinFreqPhaseII(summaryItem), getMaxFreqPhaseII(summaryItem)))
+    return TRUE;
 
 if (summaryItem->score/1000.0 < minHetFilter) return TRUE;
 if (summaryItem->score/1000.0 > maxHetFilter) return TRUE;
 
 if (summaryItem->totalAlleleCountCEU > 0)
     {
-    if (sameString(monoFilterCEU, "yes") && summaryItem->majorAlleleCountCEU != summaryItem->totalAlleleCountCEU) return TRUE;
-    if (sameString(monoFilterCEU, "no") && summaryItem->majorAlleleCountCEU == summaryItem->totalAlleleCountCEU) return TRUE;
+    if (sameString(monoFilter[CEU], "yes") && summaryItem->majorAlleleCountCEU != summaryItem->totalAlleleCountCEU) return TRUE;
+    if (sameString(monoFilter[CEU], "no") && summaryItem->majorAlleleCountCEU == summaryItem->totalAlleleCountCEU) return TRUE;
     }
 if (summaryItem->totalAlleleCountCHB > 0)
     {
-    if (sameString(monoFilterCHB, "yes") && summaryItem->majorAlleleCountCHB != summaryItem->totalAlleleCountCHB) return TRUE;
-    if (sameString(monoFilterCHB, "no") && summaryItem->majorAlleleCountCHB == summaryItem->totalAlleleCountCHB) return TRUE;
+    if (sameString(monoFilter[CHB], "yes") && summaryItem->majorAlleleCountCHB != summaryItem->totalAlleleCountCHB) return TRUE;
+    if (sameString(monoFilter[CHB], "no") && summaryItem->majorAlleleCountCHB == summaryItem->totalAlleleCountCHB) return TRUE;
     }
 if (summaryItem->totalAlleleCountJPT > 0)
     {
-    if (sameString(monoFilterJPT, "yes") && summaryItem->majorAlleleCountJPT != summaryItem->totalAlleleCountJPT) return TRUE;
-    if (sameString(monoFilterJPT, "no") && summaryItem->majorAlleleCountJPT == summaryItem->totalAlleleCountJPT) return TRUE;
+    if (sameString(monoFilter[JPT], "yes") && summaryItem->majorAlleleCountJPT != summaryItem->totalAlleleCountJPT) return TRUE;
+    if (sameString(monoFilter[JPT], "no") && summaryItem->majorAlleleCountJPT == summaryItem->totalAlleleCountJPT) return TRUE;
     }
 if (summaryItem->totalAlleleCountYRI > 0)
     {
-    if (sameString(monoFilterYRI, "yes") && summaryItem->majorAlleleCountYRI != summaryItem->totalAlleleCountYRI) return TRUE;
-    if (sameString(monoFilterYRI, "no") && summaryItem->majorAlleleCountYRI == summaryItem->totalAlleleCountYRI) return TRUE;
+    if (sameString(monoFilter[YRI], "yes") && summaryItem->majorAlleleCountYRI != summaryItem->totalAlleleCountYRI) return TRUE;
+    if (sameString(monoFilter[YRI], "no") && summaryItem->majorAlleleCountYRI == summaryItem->totalAlleleCountYRI) return TRUE;
     }
 
 /* ortho quality */
-if (summaryItem->chimpAlleleQuality < chimpQualFilter) return TRUE;
-if (summaryItem->macaqueAlleleQuality < macaqueQualFilter) return TRUE;
+if (summaryItem->chimpAlleleQuality < orthoQualFilter[CHIMP]) return TRUE;
+if (summaryItem->macaqueAlleleQuality < orthoQualFilter[MACAQUE]) return TRUE;
 
 /* orthoAlleles */
 /* if any filter set, data must be available */
-if (differentString(chimpFilter, "no filter") && sameString(summaryItem->chimpAllele, "none"))
+if (differentString(orthoFilter[CHIMP], "no filter") && sameString(summaryItem->chimpAllele, "none"))
     return TRUE;
-if (differentString(macaqueFilter, "no filter") && sameString(summaryItem->macaqueAllele, "none"))
+if (differentString(orthoFilter[MACAQUE], "no filter") && sameString(summaryItem->macaqueAllele, "none"))
     return TRUE;
-if (differentString(chimpFilter, "no filter") && sameString(summaryItem->chimpAllele, "N"))
+if (differentString(orthoFilter[CHIMP], "no filter") && sameString(summaryItem->chimpAllele, "N"))
     return TRUE;
-if (differentString(macaqueFilter, "no filter") && sameString(summaryItem->macaqueAllele, "N")) 
+if (differentString(orthoFilter[MACAQUE], "no filter") && sameString(summaryItem->macaqueAllele, "N")) 
     return TRUE;
 
 /* If mixed, then we can't tell if we have a match, so must filter out. */
-if (sameString(chimpFilter, "matches major human allele") && sameString(summaryItem->isMixed, "YES")) 
-    return TRUE;
-if (sameString(macaqueFilter, "matches major human allele") && sameString(summaryItem->isMixed, "YES")) 
-    return TRUE;
-if (sameString(chimpFilter, "matches minor human allele") && sameString(summaryItem->isMixed, "YES")) 
+if (sameString(orthoFilter[CHIMP], "matches major human allele") && sameString(summaryItem->isMixed, "YES")) 
     return TRUE;
-if (sameString(macaqueFilter, "matches minor human allele") && sameString(summaryItem->isMixed, "YES")) 
+if (sameString(orthoFilter[MACAQUE], "matches major human allele") && sameString(summaryItem->isMixed, "YES")) 
     return TRUE;
-if (sameString(chimpFilter, "matches neither human allele") && sameString(summaryItem->isMixed, "YES")) 
+if (sameString(orthoFilter[CHIMP], "matches minor human allele") && sameString(summaryItem->isMixed, "YES")) 
     return TRUE;
-if (sameString(macaqueFilter, "matches neither human allele") && sameString(summaryItem->isMixed, "YES")) 
+if (sameString(orthoFilter[MACAQUE], "matches minor human allele") && sameString(summaryItem->isMixed, "YES")) 
     return TRUE;
 
-char *majorAllele = getMajorAllele(summaryItem);
-if (sameString(chimpFilter, "matches major human allele") && differentString(summaryItem->chimpAllele, majorAllele)) 
+char *majorAllele = getMajorAllelePhaseII(summaryItem);
+if (sameString(orthoFilter[CHIMP], "matches major human allele") && differentString(summaryItem->chimpAllele, majorAllele)) 
     return TRUE;
-if (sameString(macaqueFilter, "matches major human allele") && differentString(summaryItem->macaqueAllele, majorAllele)) 
+if (sameString(orthoFilter[MACAQUE], "matches major human allele") && differentString(summaryItem->macaqueAllele, majorAllele)) 
     return TRUE;
 
-char *minorAllele = getMinorAllele(summaryItem, majorAllele);
-if (sameString(chimpFilter, "matches minor human allele") && differentString(summaryItem->chimpAllele, minorAllele)) 
+char *minorAllele = getMinorAllelePhaseII(summaryItem, majorAllele);
+if (sameString(orthoFilter[CHIMP], "matches minor human allele") && differentString(summaryItem->chimpAllele, minorAllele)) 
     return TRUE;
-if (sameString(macaqueFilter, "matches minor human allele") && differentString(summaryItem->macaqueAllele, minorAllele)) 
+if (sameString(orthoFilter[MACAQUE], "matches minor human allele") && differentString(summaryItem->macaqueAllele, minorAllele)) 
     return TRUE;
 
 /* mismatches */
-if (sameString(chimpFilter, "matches neither human allele") && orthoAlleleCheck(summaryItem, "chimp"))
+if (sameString(orthoFilter[CHIMP], "matches neither human allele") && orthoAlleleCheckPhaseII(summaryItem, "chimp"))
     return TRUE;
-if (sameString(macaqueFilter, "matches neither human allele") && orthoAlleleCheck(summaryItem, "macaque"))
+if (sameString(orthoFilter[MACAQUE], "matches neither human allele") && orthoAlleleCheckPhaseII(summaryItem, "macaque"))
     return TRUE;
 
 return FALSE;
 }
 
 
-struct hapmapAllelesOrtho *filterOrthoList(struct hapmapAllelesOrtho *orthoList, 
+struct hapmapAllelesOrtho *filterOrthoListPhaseII(struct hapmapAllelesOrtho *orthoList, 
                                            struct hapmapAllelesSummary *summaryList)
 /* delete from orthoList based on filters */
 /* could use slList and combine filterOrthoList() with filterSimpleList() */
 {
@@ -458,14 +533,14 @@
 struct hapmapAllelesOrtho *ret = NULL;
 
 while (orthoItem)
     {
-    if (!orthoMatchSummary(orthoItem, summaryItem))
+    if (!orthoMatchSummaryPhaseII(orthoItem, summaryItem))
        {
        summaryItem = summaryItem->next;
        continue;
        }
-    if (!filterOne(summaryItem))
+    if (!filterOnePhaseII(summaryItem))
         {
 	orthoItemClone = CloneVar(orthoItem);
 	orthoItemClone->next = NULL;
         slSafeAddHead(&ret, orthoItemClone);
@@ -476,9 +551,9 @@
 slSort(&ret, bedCmp);
 return ret;
 }
 
-struct hapmapSnps *filterSimpleList(struct hapmapSnps *simpleList, 
+struct hapmapSnps *filterSimpleListPhaseII(struct hapmapSnps *simpleList, 
                                     struct hapmapAllelesSummary *summaryList)
 /* delete from simpleList based on filters */
 /* could use slList and combine filterOrthoList() with filterSimpleList() */
 {
@@ -488,14 +563,14 @@
 struct hapmapSnps *ret = NULL;
 
 while (simpleItem)
     {
-    if (!simpleMatchSummary(simpleItem, summaryItem))
+    if (!simpleMatchSummaryPhaseII(simpleItem, summaryItem))
        {
        summaryItem = summaryItem->next;
        continue;
        }
-    if (!filterOne(summaryItem))
+    if (!filterOnePhaseII(summaryItem))
         {
 	simpleItemClone = CloneVar(simpleItem);
 	simpleItemClone->next = NULL;
         slSafeAddHead(&ret, simpleItemClone);
@@ -507,25 +582,15 @@
 return ret;
 }
 
 
-
-static void hapmapLoad(struct track *tg)
-/* load filtered data */
+static void hapmapLoadPhaseII(struct track *tg)
+/* load filtered data from HapMap PhaseII (4 populations) */
 {
 struct sqlConnection *conn = hAllocConn(database);
 struct sqlResult *sr = NULL;
 char **row = NULL;
 int rowOffset;
-
-loadFilters();
-
-if (allDefaults() || !hTableExists(database, "hapmapAllelesSummary")) 
-    {
-    hapmapLoadSimple(tg);
-    return;
-    }
-
 /* first load summary data */
 struct hapmapAllelesSummary *summaryLoadItem, *summaryItemList = NULL;
 sr = hRangeQuery(conn, "hapmapAllelesSummary", chromName, winStart, winEnd, NULL, &rowOffset);
 while ((row = sqlNextRow(sr)) != NULL)
@@ -535,11 +600,17 @@
     }
 sqlFreeResult(&sr);
 slSort(&summaryItemList, bedCmp);
 
-if (sameString(tg->mapName, "hapmapAllelesChimp") || sameString(tg->mapName, "hapmapAllelesMacaque"))
-{
-    struct hapmapAllelesOrtho *orthoLoadItem, *orthoItemList = NULL, *orthoItemsFiltered = NULL;
+int i;
+char orthoTable[HDB_MAX_TABLE_STRING];
+for (i = 0;  i < HAP_ORTHO_COUNT;  i++)
+    {
+    safef(orthoTable, sizeof(orthoTable), "hapmapAlleles%s", hapmapOrthoSpecies[i]);
+    if (sameString(tg->mapName, orthoTable))
+	{
+	struct hapmapAllelesOrtho *orthoLoadItem, *orthoItemList = NULL;
+	struct hapmapAllelesOrtho *orthoItemsFiltered = NULL;
     sr = hRangeQuery(conn, tg->mapName, chromName, winStart, winEnd, NULL, &rowOffset);
     while ((row = sqlNextRow(sr)) != NULL)
         {
         orthoLoadItem = hapmapAllelesOrthoLoad(row+rowOffset);
@@ -547,12 +618,13 @@
         }
     sqlFreeResult(&sr);
     hFreeConn(&conn);
     slSort(&orthoItemList, bedCmp);
-    orthoItemsFiltered = filterOrthoList(orthoItemList, summaryItemList);
+	orthoItemsFiltered = filterOrthoListPhaseII(orthoItemList, summaryItemList);
     tg->items = orthoItemsFiltered;
     return;
-}
+	}
+    }
 
 struct hapmapSnps *simpleLoadItem, *simpleItemList = NULL, *simpleItemsFiltered = NULL;
 sr = hRangeQuery(conn, tg->mapName, chromName, winStart, winEnd, NULL, &rowOffset);
 while ((row = sqlNextRow(sr)) != NULL)
@@ -562,12 +634,174 @@
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 slSort(&simpleItemList, bedCmp);
-simpleItemsFiltered = filterSimpleList(simpleItemList, summaryItemList);
+simpleItemsFiltered = filterSimpleListPhaseII(simpleItemList, summaryItemList);
 tg->items = simpleItemsFiltered;
 }
 
+struct hash *loadSummaryHashPhaseIII(struct sqlConnection *conn)
+/* Load PhaseIII summary info into a hash (key = SNP rsId). */
+{
+struct hash *summaryHash = hashNew(14);
+char **row;
+int rowOffset;
+struct sqlResult *sr = hRangeQuery(conn, "hapmapPhaseIIISummary",
+				   chromName, winStart, winEnd, NULL, &rowOffset);
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    struct hapmapPhaseIIISummary *s = hapmapPhaseIIISummaryLoad(row+rowOffset);
+    hashAdd(summaryHash, s->name, s);
+    }
+sqlFreeResult(&sr);
+return summaryHash;
+}
+
+boolean filterOnePhaseIII(struct hapmapPhaseIIISummary *summary)
+/* return TRUE if summary is disqualified for any reason */
+{
+boolean complexObserved = isComplexObserved(summary->observed);
+boolean transitionObserved = isTransitionObserved(summary->observed);
+
+if (sameString(mixedFilter, "mixed") && !summary->isMixed) 
+    return TRUE;
+if (sameString(mixedFilter, "not mixed") && summary->isMixed) 
+    return TRUE;
+
+if (sameString(popCountFilter, "all 11 Phase III populations") && summary->popCount != 11) 
+    return TRUE;
+if (sameString(popCountFilter, "all 4 Phase II populations") && summary->phaseIIPopCount != 4)
+    return TRUE;
+
+if (filterObserved(complexObserved, transitionObserved))
+    return TRUE;
+
+if (filterFreq(summary->isMixed, summary->minFreq, summary->maxFreq))
+    return TRUE;
+
+if (summary->score/1000.0 < minHetFilter || summary->score/1000.0 > maxHetFilter) return TRUE;
+
+int i;
+for (i = 0;  i < HAP_PHASEIII_POPCOUNT;  i++)
+    {
+    if (summary->foundInPop[i])
+	{
+	if (sameString(monoFilter[i], "yes") && !summary->monomorphicInPop[i]) return TRUE;
+	if (sameString(monoFilter[i], "no")  &&  summary->monomorphicInPop[i]) return TRUE;
+	}
+    }
+
+/* ortho quality and alleles */
+for (i = 0;  i < HAP_ORTHO_COUNT; i++)
+    {
+    if (summary->orthoQuals[i] < orthoQualFilter[i]) return TRUE;
+    /* if any filter set, data must be available */
+    if (differentString(orthoFilter[i], "no filter") && summary->orthoAlleles[i] == 'N')
+	return TRUE;
+    /* If mixed, then we can't tell if we have a match, so must filter out. */
+    if (summary->isMixed &&
+	(sameString(orthoFilter[i], "matches major human allele") ||
+	 sameString(orthoFilter[i], "matches minor human allele")))
+	return TRUE;
+    if (sameString(orthoFilter[i], "matches major human allele") &&
+	summary->orthoAlleles[i] != summary->overallMajorAllele)
+	return TRUE;
+    if (sameString(orthoFilter[i], "matches minor human allele") &&
+	summary->orthoAlleles[i] != summary->overallMinorAllele)
+	return TRUE;
+    /* mismatches */
+    if (sameString(orthoFilter[i], "matches neither human allele"))
+	{
+	char observed[128];
+	safecpy(observed, sizeof(observed), summary->observed);
+	#define MAX_OBS_ALS 10
+	char *obsAls[MAX_OBS_ALS];
+	int alCount = chopByChar(observed, '/', obsAls, ArraySize(obsAls));
+	if (alCount > MAX_OBS_ALS)
+	    errAbort("Too many observed alleles for HapMap SNP %s", summary->name);
+	int j;
+	for (j = 0;  j < alCount;  j++)
+	    if (strlen(obsAls[j]) == 1 && summary->orthoAlleles[i] == obsAls[j][0]) return TRUE;
+	}
+    }
+return FALSE;
+}
+
+boolean loadOrthosPhaseIII(struct track *tg, struct sqlConnection *conn, struct hash *summaryHash)
+/* If tg->mapName is a hapmapAlleles table, load & filter into tg->items and return TRUE. */
+{
+int i;
+char orthoTable[HDB_MAX_TABLE_STRING];
+for (i = 0;  i < HAP_ORTHO_COUNT;  i++)
+    {
+    safef(orthoTable, sizeof(orthoTable), "hapmapAlleles%s", hapmapOrthoSpecies[i]);
+    if (sameString(tg->mapName, orthoTable))
+	{
+	struct hapmapAllelesOrtho *item, *itemList = NULL;
+	struct hapmapPhaseIIISummary *summary;
+	int rowOffset;
+	struct sqlResult *sr = hRangeQuery(conn, tg->mapName,
+					   chromName, winStart, winEnd, NULL, &rowOffset);
+	char **row;
+	while ((row = sqlNextRow(sr)) != NULL)
+	    {
+	    item = hapmapAllelesOrthoLoad(row+rowOffset);
+	    summary = (struct hapmapPhaseIIISummary *)hashMustFindVal(summaryHash, item->name);
+	    if (!filterOnePhaseIII(summary))
+		slAddHead(&itemList, item);
+	    }
+	sqlFreeResult(&sr);
+	slSort(&itemList, bedCmp);
+	tg->items = itemList;
+	return TRUE;
+	}
+    }
+return FALSE;
+}
+
+static void hapmapLoadPhaseIII(struct track *tg)
+/* load filtered data from HapMap PhaseIII (11 populations) */
+{
+struct sqlConnection *conn = hAllocConn(database);
+char **row = NULL;
+int rowOffset;
+struct hash *summaryHash = loadSummaryHashPhaseIII(conn);
+
+if (loadOrthosPhaseIII(tg, conn, summaryHash))
+    return;
+
+struct hapmapSnps *item, *itemList = NULL;
+struct hapmapPhaseIIISummary *summary;
+struct sqlResult *sr = hRangeQuery(conn, tg->mapName, chromName, winStart, winEnd, NULL, &rowOffset);
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    item = hapmapSnpsLoad(row+rowOffset);
+    summary = (struct hapmapPhaseIIISummary *)hashMustFindVal(summaryHash, item->name);
+    if (!filterOnePhaseIII(summary))
+	slAddHead(&itemList, item);
+    }
+sqlFreeResult(&sr);
+hFreeConn(&conn);
+slSort(&itemList, bedCmp);
+tg->items = itemList;
+}
+
+
+static void hapmapLoad(struct track *tg)
+/* load filtered data */
+{
+boolean isPhaseIII = sameString(trackDbSettingOrDefault(tg->tdb, "hapmapPhase", "II"), "III");
+loadFilters(isPhaseIII);
+if (allDefaults(isPhaseIII) ||
+    (isPhaseIII && !hTableExists(database, "hapmapPhaseIIISummary")) ||
+    (!isPhaseIII & !hTableExists(database, "hapmapAllelesSummary")))
+    hapmapLoadSimple(tg);
+else if (isPhaseIII)
+    hapmapLoadPhaseIII(tg);
+else
+    hapmapLoadPhaseII(tg);
+}
+
 void hapmapDrawAt(struct track *tg, void *item, struct hvGfx *hvg, int xOff, int y, double scale, MgFont *font, Color color, enum trackVisibility vis)
 /* Draw the hapmap snps at a given position. */
 /* Display allele when zoomed to base level. */
 {
@@ -584,18 +818,26 @@
 char *allele = NULL;
 char *strand = NULL;
 int chromStart = 0;
 int chromEnd = 0;
-
-if (sameString(tg->mapName, "hapmapAllelesChimp") || sameString(tg->mapName, "hapmapAllelesMacaque"))
+boolean isOrtho = FALSE;
+int i;
+char orthoTable[HDB_MAX_TABLE_STRING];
+for (i = 0;  i < HAP_ORTHO_COUNT;  i++)
+    {
+    safef(orthoTable, sizeof(orthoTable), "hapmapAlleles%s", hapmapOrthoSpecies[i]);
+    if (sameString(tg->mapName, orthoTable))
     {
     struct hapmapAllelesOrtho *thisItem = item;
     chromStart = thisItem->chromStart;
     chromEnd = thisItem->chromEnd;
     allele = cloneString(thisItem->orthoAllele);
     strand = cloneString(thisItem->strand);
+	isOrtho = TRUE;
+	break;
     }
-else
+    }
+if (!isOrtho)
     {
     struct hapmapSnps *thisItem = item;
     chromStart = thisItem->chromStart;
     chromEnd = thisItem->chromEnd;
@@ -640,15 +882,21 @@
 int totalCount = 0;
 int minorCount = 0;
 int gradient = 0;
 
-if (sameString(tg->mapName, "hapmapAllelesChimp") || sameString(tg->mapName, "hapmapAllelesMacaque"))
+int i;
+char orthoTable[HDB_MAX_TABLE_STRING];
+for (i = 0;  i < HAP_ORTHO_COUNT;  i++)
+    {
+    safef(orthoTable, sizeof(orthoTable), "hapmapAlleles%s", hapmapOrthoSpecies[i]);
+    if (sameString(tg->mapName, orthoTable))
     {
     struct hapmapAllelesOrtho *thisItem = item;
     gradient = grayInRange(thisItem->score, 0, 100);
     col = shadesOfBrown[gradient];
     return col;
     }
+    }
 
 struct hapmapSnps *thisItem = item;
 /* Could also use score, since I've set that based on minor allele frequency. */
 totalCount = thisItem->homoCount1 + thisItem->homoCount2;