04a8bccb5e10e09d1c37b01541389aa5083ccf7b angie Mon Feb 7 15:25:24 2011 -0800 Track #1684 (SNPs 132 (dbSNP)): Added filtering by allele frequency:min/max minor allele freq and min chromosome sample count (2N). diff --git src/hg/hgTracks/variation.c src/hg/hgTracks/variation.c index 0ebe7ba..695a9d5 100644 --- src/hg/hgTracks/variation.c +++ src/hg/hgTracks/variation.c @@ -1,26 +1,29 @@ /* variation.c - hgTracks routines that are specific to the tracks in * the variation group */ #include "variation.h" #include "imageV2.h" static char const rcsid[] = "$Id: variation.c,v 1.148 2010/06/07 16:54:21 angie Exp $"; static double snp125AvHetCutoff = SNP125_DEFAULT_MIN_AVHET; static int snp125WeightCutoff = SNP125_DEFAULT_MAX_WEIGHT; -static int snp132MinSubmitters = 0; +static int snp132MinSubmitters = SNP132_DEFAULT_MIN_SUBMITTERS; +static float snp132MinMinorAlFreq = SNP132_DEFAULT_MIN_MINOR_AL_FREQ; +static float snp132MaxMinorAlFreq = SNP132_DEFAULT_MAX_MINOR_AL_FREQ; +static int snp132MinAlFreq2N = SNP132_DEFAULT_MIN_AL_FREQ_2N; // Globals for caching cart coloring and filtering settings for snp125+ tracks: static enum snp125ColorSource snp125ColorSource = SNP125_DEFAULT_COLOR_SOURCE; static enum snp125Color *snp125LocTypeCart = NULL; static enum snp125Color *snp125ClassCart = NULL; static enum snp125Color *snp125MolTypeCart = NULL; static enum snp125Color *snp125ValidCart = NULL; static struct hash *snp125FuncCartColorHash = NULL; static struct hash *snp125FuncCartNameHash = NULL; static enum snp125Color *snp132ExceptionsCart = NULL; static enum snp125Color *snp132BitfieldsCart = NULL; static boolean snp125LocTypeFilterOn = FALSE; static boolean snp125ClassFilterOn = FALSE; static boolean snp125MolTypeFilterOn = FALSE; @@ -107,30 +110,70 @@ if (el->weight > snp125WeightCutoff) return FALSE; return TRUE; } boolean snp132MinSubmittersFilterItem(void *item) /* Return TRUE if item passes filter. */ { struct snp132Ext *el = item; if (el->submitterCount < snp132MinSubmitters) return FALSE; return TRUE; } +static float snp132MajorAlleleFreq(const struct snp132Ext *snp) +/* Some SNPs have >2 alleles, so minor allele frequency is harder to define. + * So instead, I'm using major allele frequency -- (1 - major) can be a proxy for minor. */ +{ +float majorAlF = 0.0; +int i; +for (i = 0; i < snp->alleleFreqCount; i++) + if (snp->alleleFreqs[i] > majorAlF) + majorAlF = snp->alleleFreqs[i]; +return majorAlF; +} + +static boolean snp132MinorAlFreqFilterItem(void *item) +/* Return TRUE if item passes filter, i.e. has a minor allele frequency >= threshold + * (but if the range has not been changed from defaults, don't require that item has + * any allele frequency data). */ +{ +if (snp132MinMinorAlFreq == SNP132_DEFAULT_MIN_MINOR_AL_FREQ && + snp132MaxMinorAlFreq == SNP132_DEFAULT_MAX_MINOR_AL_FREQ) + return TRUE; +struct snp132Ext *el = item; +float majorAlFreq = snp132MajorAlleleFreq(el); +return (((1.0 - majorAlFreq) >= snp132MinMinorAlFreq) && + ((1.0 - majorAlFreq) <= snp132MaxMinorAlFreq)); +} + +static boolean snp132MinAlFreq2NFilterItem(void *item) +/* Return TRUE if item passes filter, i.e. has a 2N chromosome count > threshold + * (but if threshold is 0, don't require that item has any allele frequency data). */ +{ +if (snp132MinAlFreq2N == 0) + return TRUE; +struct snp132Ext *el = item; +int twoN = 0; +int i; +for (i = 0; i < el->alleleFreqCount; i++) + twoN += (int)(round(el->alleleNs[i])); +return (twoN >= snp132MinAlFreq2N); +} + boolean snpSourceFilterItem(struct track *tg, void *item) /* Return TRUE if item passes filter, i.e. has no excluded property. */ { struct snp *el = item; int snpSource = 0; for (snpSource=0; snpSource<snpSourceCartSize; snpSource++) if (containsStringNoCase(el->source,snpSourceDataName[snpSource])) if (sameString(snpSourceCart[snpSource], "exclude") ) return FALSE; return TRUE; } boolean snpMolTypeFilterItem(struct track *tg, void *item) /* Return TRUE if item passes filter, i.e. has no excluded property. */ @@ -341,30 +384,32 @@ { struct slList *newList = NULL, *el, *next; for (el = tg->items; el != NULL; el = next) { next = el->next; if (snp125AvHetFilterItem(el) && snp125WeightFilterItem(el) && snp125MolTypeFilterItem(el) && snp125ClassFilterItem(el) && snp125ValidFilterItem(el) && snp125FuncFilterItem(el) && (version >= 128 || snp125LocTypeFilterItem(el)) && (version < 132 || (snp132MinSubmittersFilterItem(el) && + snp132MinorAlFreqFilterItem(el) && + snp132MinAlFreq2NFilterItem(el) && snp132ExceptionFilterItem(el) && snp132BitfieldFilterItem(el)))) slAddHead(&newList, el); } slReverse(&newList); tg->items = newList; } struct orthoBed /* Abbreviated version of orthoAlleles: bed4 plus chimp allele */ { struct orthoBed *next; /* Next in singly linked list. */ char *chrom; /* Human chromosome or FPC contig */ unsigned chromStart; /* Start position in chromosome */ unsigned chromEnd; /* End position in chromosome */ @@ -469,42 +514,30 @@ continue; } /* update the snp->name with the ortho data */ dyStringPrintf(extra, "%s %s>%s", snpItem->name, orthoItem->chimp, snpItem->observed); snpItem->name = cloneString(extra->string); dyStringClear(extra); /* increment the list pointers */ snpItem = snpItem->next; orthoItem = orthoItem->next; } freeDyString(&extra); sqlFreeResult(&sr); hFreeConn(&conn); } -static float snp132MajorAlleleFreq(const struct snp132Ext *snp) -/* Some SNPs have >2 alleles, so minor allele frequency is harder to define. - * So instead, I'm using major allele frequency -- (1 - major) can be a proxy for minor. */ -{ -float majorAlF = 0.0; -int i; -for (i = 0; i < snp->alleleFreqCount; i++) - if (snp->alleleFreqs[i] > majorAlF) - majorAlF = snp->alleleFreqs[i]; -return majorAlF; -} - static Color snp132ColorByAlleleFreq(struct snp132Ext *snp, struct hvGfx *hvg) /* If snp has allele freq data, return a shade from red (rare) to blue (common); * otherwise return black. */ { static boolean colorsInited = FALSE; static Color redToBlue[EXPR_DATA_SHADES]; static struct rgbColor red = {255, 0, 0}; static struct rgbColor blue = {0, 0, 255}; if (!colorsInited) hvGfxMakeColorGradient(hvg, &red, &blue, EXPR_DATA_SHADES, redToBlue); if (snp->alleleFreqCount > 0) { float majorAlF = snp132MajorAlleleFreq(snp); // >2 common alleles (e.g. at VNTR sites) can cause low major allele freq; // cap at 0.5 to avoid overflow in the shade calculation. @@ -608,30 +641,36 @@ char cartVar[512]; safef(cartVar, sizeof(cartVar), "%s.minAvHet", track); snp125AvHetCutoff = cartUsualDouble(cart, cartVar, // Check old cart var name: cartUsualDouble(cart, "snp125AvHetCutoff", SNP125_DEFAULT_MIN_AVHET)); safef(cartVar, sizeof(cartVar), "%s.maxWeight", track); int defaultMaxWeight = SNP125_DEFAULT_MAX_WEIGHT; char *setting = trackDbSetting(tdb, "defaultMaxWeight"); if (isNotEmpty(setting)) defaultMaxWeight = atoi(setting); snp125WeightCutoff = cartUsualInt(cart, cartVar, // Check old cart var name and tdb default: cartUsualInt(cart, "snp125WeightCutoff", defaultMaxWeight)); safef(cartVar, sizeof(cartVar), "%s.minSubmitters", track); snp132MinSubmitters = cartUsualInt(cart, cartVar, SNP132_DEFAULT_MIN_SUBMITTERS); +safef(cartVar, sizeof(cartVar), "%s.minMinorAlFreq", track); +snp132MinMinorAlFreq = cartUsualDouble(cart, cartVar, SNP132_DEFAULT_MIN_MINOR_AL_FREQ); +safef(cartVar, sizeof(cartVar), "%s.maxMinorAlFreq", track); +snp132MaxMinorAlFreq = cartUsualDouble(cart, cartVar, SNP132_DEFAULT_MAX_MINOR_AL_FREQ); +safef(cartVar, sizeof(cartVar), "%s.minAlFreq2N", tdb->track); +snp132MinAlFreq2N = cartUsualInt(cart, cartVar, SNP132_DEFAULT_MIN_AL_FREQ_2N); snp125MolTypeFilter = snp125FilterFromCart(cart, track, "molType", &snp125MolTypeFilterOn); snp125ClassFilter = snp125FilterFromCart(cart, track, "class", &snp125ClassFilterOn); snp125ValidFilter = snp125FilterFromCart(cart, track, "valid", &snp125ValidFilterOn); snp125FuncFilter = snp125FilterFromCart(cart, track, "func", &snp125FuncFilterOn); snp125LocTypeFilter = snp125FilterFromCart(cart, track, "locType", &snp125LocTypeFilterOn); snp132ExceptionFilter = snp125FilterFromCart(cart, track, "exceptions", &snp132ExceptionFilterOn); snp132BitfieldFilter = snp125FilterFromCart(cart, track, "bitfields", &snp132BitfieldFilterOn); snp125ColorSource = snp125ColorSourceFromCart(cart, tdb); snp125MolTypeCart = snp125ColorsFromCart(track, "molType", snp125MolTypeOldColorVars, TRUE, snp125MolTypeDefault, snp125MolTypeArraySize); snp125ClassCart = snp125ColorsFromCart(track, "class", snp125ClassOldColorVars, TRUE, snp125ClassDefault, snp125ClassArraySize); snp125ValidCart = snp125ColorsFromCart(track, "valid", snp125ValidOldColorVars, TRUE,