380a1b308bd3bb4f4e52d89ef9e1ccb962892bab angie Tue Oct 3 14:10:37 2017 -0700 Major changes to annoGratorGpVar, annoFormatVep and gpFx.c with the addition of functional effect prediction to variantProjector using PSL+CDS from annoStreamDbPslPlus, which enables accurate predictions even when the genome and transcript have indel differences. struct gpFx includes new members exonCount, txRef and txAlt so that gpFx and variantProjector can compute those and send them forward to annoFormatVep, instead of annoFormatVep computing them assuming that genome and transcript match perfectly. annoGratorGpVar passes forward the new gpFx members in output columns and, when input is PSL+CDS instead of genePred, uses variantProjector instead of gpFx to do functional predictions. diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c index 3432428..a803b7f 100644 --- src/hg/lib/gpFx.c +++ src/hg/lib/gpFx.c @@ -208,39 +208,39 @@ if (varStart < pred->cdsEnd && varEnd > pred->cdsStart) txc.endInCds = txc.cdsSize; } if (pred->strand[0] == '-') txc = txCoordsReverse(&txc); if ((txc.startInCdna == -1) != (txc.endInCdna == -1) || (txc.startInCds >= 0 && txc.endInCds < 0)) errAbort("getTxCoords: inconsistent start/ends for variant %s:%d-%d in %s at %s:%d-%d: " "startInCdna=%d, endInCdna=%d; startInCds=%d, endInCds=%d", variant->chrom, varStart+1, varEnd, pred->name, pred->chrom, pred->txStart, pred->txEnd, txc.startInCdna, txc.endInCdna, txc.startInCds, txc.endInCds); return txc; } -struct gpFx *gpFxNew(char *allele, char *transcript, enum soTerm soNumber, +struct gpFx *gpFxNew(char *gAllele, char *transcript, enum soTerm soNumber, enum detailType detailType, struct lm *lm) /* Fill in the common members of gpFx; leave soTerm-specific members for caller to fill in. */ { struct gpFx *effect; lmAllocVar(lm, effect); -effect->allele = collapseDashes(lmCloneString(lm, allele)); -if (isAllNt(effect->allele, strlen(effect->allele))) - touppers(effect->allele); +effect->gAllele = collapseDashes(lmCloneString(lm, gAllele)); +if (isAllNt(effect->gAllele, strlen(effect->gAllele))) + touppers(effect->gAllele); effect->transcript = lmCloneString(lm, transcript); effect->soNumber = soNumber; effect->detailType = detailType; return effect; } static char * mergeAllele(char *transcript, int offset, int variantWidth, char *newAlleleSeq, int alleleLength, struct lm *lm) /* merge a variant into an allele */ { char *newTranscript = NULL; //#*** This will be incorrect for an MNV that spans exon boundary -- //#*** so we should also clip allele to cds portion(s?!) before calling this. if (variantWidth == alleleLength) { @@ -256,74 +256,79 @@ // copy over the part before the variant memcpy(newTranscript, transcript, offset); // copy in the new allele memcpy(&newTranscript[offset], newAlleleSeq, alleleLength); // copy in the part after the variant memcpy(&newTranscript[offset + alleleLength], restOfTranscript, strlen(restOfTranscript) + 1); } return newTranscript; } -static void setNCExonVals(struct gpFx *gpFx, int exonIx, int cdnaPos) +void gpFxSetNoncodingInfo(struct gpFx *gpFx, int exonIx, int exonCount, int cdnaPos, + char *txRef, char *txAlt, struct lm *lm) /* This gpFx is for a variant in exon of non-coding gene or UTR exon of coding gene; * set details.nonCodingExon values. */ { gpFx->details.nonCodingExon.exonNumber = exonIx; +gpFx->details.nonCodingExon.exonCount = exonCount; gpFx->details.nonCodingExon.cDnaPosition = cdnaPos; +gpFx->details.nonCodingExon.txRef = lmCloneString(lm, txRef); +gpFx->details.nonCodingExon.txAlt = lmCloneString(lm, txAlt); } static struct gpFx *gpFxCheckUtr( struct allele *allele, struct genePred *pred, struct txCoords *txc, int exonIx, boolean predIsNmd, - struct lm *lm) + char *txRef, char *txAlt, struct lm *lm) /* check for effects in UTR of coding gene -- caller ensures it's in exon, pred is coding * and exonIx has been strand-adjusted */ { struct gpFx *gpFx = NULL; enum soTerm term = 0; struct variant *variant = allele->variant; if ((variant->chromStart < pred->cdsStart && variant->chromEnd > pred->txStart) || (variant->chromStart == pred->cdsStart && variant->chromEnd == pred->cdsStart)) // insertion // we're in left UTR term = (*pred->strand == '-') ? _3_prime_UTR_variant : _5_prime_UTR_variant; else if ((variant->chromStart < pred->txEnd && variant->chromEnd > pred->cdsEnd) || (variant->chromStart == pred->cdsEnd && variant->chromEnd == pred->cdsEnd)) //insertion // we're in right UTR term = (*pred->strand == '-') ? _5_prime_UTR_variant : _3_prime_UTR_variant; if (term != 0) { if (predIsNmd) // This transcript is already subject to nonsense-mediated decay, so the effect // is probably not a big deal: term = NMD_transcript_variant; gpFx = gpFxNew(allele->sequence, pred->name, term, nonCodingExon, lm); - setNCExonVals(gpFx, exonIx, txc->startInCdna); + gpFxSetNoncodingInfo(gpFx, exonIx, pred->exonCount, txc->startInCdna, txRef, txAlt, lm); } return gpFx; } static struct gpFx *gpFxChangedNoncodingExon(struct allele *allele, struct genePred *pred, - struct txCoords *txc, int exonIx, struct lm *lm) + struct txCoords *txc, int exonIx, + char *txRef, char *txAlt, struct lm *lm) /* generate an effect for a variant in a non-coding transcript */ { struct gpFx *gpFx = gpFxNew(allele->sequence, pred->name, non_coding_transcript_exon_variant, nonCodingExon, lm); -setNCExonVals(gpFx, exonIx, txc->startInCdna); +gpFxSetNoncodingInfo(gpFx, exonIx, pred->exonCount, txc->startInCdna, txRef, txAlt, lm); return gpFx; } static int getCodingOffsetInTx(struct genePred *pred, char strand) /* Skip past UTR (portions of) exons to get offset of CDS relative to transcript start. * The strand arg is used instead of pred->strand. */ { int offset = 0; int iStart = 0, iIncr = 1; boolean isRc = (strand == '-'); if (isRc) { // Work our way left from the last exon. iStart = pred->exonCount - 1; iIncr = -1; @@ -501,49 +506,42 @@ { if (codingSeq == NULL) errAbort("truncateAtStopCodon: null input"); char *p = codingSeq; while (p[0] != '\0' && p[1] != '\0' && p[2] != '\0') { if (isStopCodon(p)) { p[3] = '\0'; break; } p += 3; } } -static char *gpFxModifyCodingSequence(char *oldCodingSeq, struct genePred *pred, - int startInCds, int endInCds, struct allele *allele, +static char *gpFxModifyCodingSequence(char *oldCodingSeq, int startInCds, int endInCds, char *alt, int *retCdsBasesAdded, struct lm *lm) /* Return a new coding sequence that is oldCodingSeq with allele applied. */ { -boolean isRc = (pred->strand[0] == '-'); -char *newAlleleSeq = allele->sequence; +char *newAlleleSeq = alt; int newAlLen = strlen(newAlleleSeq); if (! isAllNt(newAlleleSeq, newAlLen)) { // symbolic -- may be deletion or insertion, but we can't tell. :( newAlleleSeq = ""; newAlLen = 0; } -if (isRc && newAlLen > 0) - { - newAlleleSeq = lmCloneString(lm, newAlleleSeq); - reverseComplement(newAlleleSeq, newAlLen); - } int variantSizeOnCds = endInCds - startInCds; if (variantSizeOnCds < 0) errAbort("gpFx: endInCds (%d) < startInCds (%d)", endInCds, startInCds); char *newCodingSeq = mergeAllele(oldCodingSeq, startInCds, variantSizeOnCds, newAlleleSeq, newAlLen, lm); // If newCodingSequence has an early stop, truncate there: truncateAtStopCodon(newCodingSeq); if (retCdsBasesAdded) *retCdsBasesAdded = newAlLen - variantSizeOnCds; return newCodingSeq; } static void setSpecificCodingSoTerm(struct gpFx *effect, char *oldAa, char *newAa, int cdsBasesAdded) /* Assuming that deletions are marked with dashes in newCodingSequence, @@ -572,32 +570,32 @@ effect->soNumber = stop_lost; else effect->soNumber = inframe_deletion; } else effect->soNumber = frameshift_variant; } else { // Not a deletion; could be single-base (including early stop) or insertion if (newAaSize < oldAaSize) { // Not a deletion but protein got smaller; must have been an early stop codon, // possibly inserted or following a frameshift caused by an insertion. int frame = cc->cdsPosition % 3; - int alleleLength = strlen(effect->allele); - if (! isAllNt(effect->allele, alleleLength)) + int alleleLength = strlen(cc->txAlt); + if (! isAllNt(cc->txAlt, alleleLength)) // symbolic -- may be deletion or insertion, but we can't tell. :( alleleLength = 0; int i, affectedCodons = (frame + alleleLength + 2) / 3; boolean stopGain = FALSE; for (i = 0; i < affectedCodons; i++) if (cc->aaNew[i] == 'Z') { effect->soNumber = stop_gained; stopGain = TRUE; break; } if (! stopGain) { if (newAa[newAaSize-1] != 'Z') errAbort("gpFx: new protein is smaller but last base in new sequence " @@ -626,60 +624,64 @@ { if (oldAa[oldAaSize-1] == 'Z') effect->soNumber = stop_lost; else effect->soNumber = incomplete_terminal_codon_variant; } else effect->soNumber = missense_variant; } } } } static struct gpFx *gpFxChangedCds(struct allele *allele, struct genePred *pred, struct txCoords *txc, int exonIx, boolean predIsNmd, - struct dnaSeq *transcriptSequence, struct lm *lm) + struct dnaSeq *transcriptSequence, char *txRef, char *txAlt, + struct lm *lm) /* calculate effect of allele change on coding transcript */ { // calculate original and variant coding DNA and AA's boolean addedBasesForFrame = FALSE; char *oldCodingSequence = getCodingSequence(pred, transcriptSequence->dna, &addedBasesForFrame, lm); int startInCds = txc->startInCds, endInCds = txc->endInCds; if (addedBasesForFrame) { // The annotated CDS exons were not all in frame, so getCodingSequence added 'N's // and now we can't simply use txc->startInCds. startInCds = getCorrectedCdsOffset(pred, txc->startInCds); endInCds = getCorrectedCdsOffset(pred, txc->endInCds); } int oldCdsLen = strlen(oldCodingSequence); char *oldaa = lmSimpleTranslate(lm, oldCodingSequence, oldCdsLen); int cdsBasesAdded = 0; -char *newCodingSequence = gpFxModifyCodingSequence(oldCodingSequence, pred, startInCds, endInCds, - allele, &cdsBasesAdded, lm); +char *newCodingSequence = gpFxModifyCodingSequence(oldCodingSequence, startInCds, endInCds, + txAlt, &cdsBasesAdded, lm); int newCdsLen = strlen(newCodingSequence); char *newaa = lmSimpleTranslate(lm, newCodingSequence, newCdsLen); // allocate the effect structure - fill in soNumber and details below struct gpFx *effect = gpFxNew(allele->sequence, pred->name, coding_sequence_variant, codingChange, lm); struct codingChange *cc = &effect->details.codingChange; cc->cDnaPosition = txc->startInCdna; +cc->txRef = lmCloneString(lm, txRef); +cc->txAlt = lmCloneString(lm, txAlt); cc->cdsPosition = startInCds; cc->exonNumber = exonIx; +cc->exonCount = pred->exonCount; int pepPos = startInCds / 3; // At this point we don't use genePredExt's exonFrames field -- we just assume that // the CDS starts in frame. That's not always the case (e.g. ensGene has some CDSs // that begin out of frame), so watch out for early truncation of oldCodingSequence // due to stop codon in the wrong frame: if (pepPos >= strlen(oldaa)) return effect; cc->pepPosition = pepPos; if (cdsBasesAdded % 3 == 0) { // Common case: substitution, same number of old/new codons/peps: int refPepEnd = (endInCds + 2) / 3; int numOldCodons = refPepEnd - pepPos, numNewCodons = numOldCodons; if (cdsBasesAdded > 0) { @@ -694,38 +696,50 @@ numOldCodons = numNewCodons + (-cdsBasesAdded / 3); } cc->codonOld = lmCloneStringZ(lm, oldCodingSequence + pepPos*3, numOldCodons*3); cc->codonNew = lmCloneStringZ(lm, newCodingSequence + pepPos*3, numNewCodons*3); cc->aaOld = lmCloneStringZ(lm, oldaa + pepPos, numOldCodons); cc->aaNew = lmCloneStringZ(lm, newaa + pepPos, numNewCodons); } else { // frameshift -- who knows how many codons we can reliably predict... cc->codonOld = lmCloneString(lm, oldCodingSequence + pepPos*3); cc->codonNew = lmCloneString(lm, newCodingSequence + pepPos*3); cc->aaOld = lmCloneString(lm, oldaa + pepPos); cc->aaNew = lmCloneString(lm, newaa + pepPos); } +if (cdsBasesAdded != 0) + { + // indel; trim identical bases at the beginning/end, except for stop codon at end. + int pepLen = strlen(cc->aaOld); + uint pepEnd = pepPos + pepLen; + boolean endsWithStop = (cc->aaOld[pepLen-1] == 'Z'); + if (endsWithStop) + cc->aaOld[pepLen-1] = '!'; + int placeholder = 0; + trimRefAlt(cc->aaOld, cc->aaNew, &cc->pepPosition, &pepEnd, &placeholder, &placeholder); + if (endsWithStop) + cc->aaOld[strlen(cc->aaOld)-1] = 'Z'; + } if (predIsNmd) // This transcript is already subject to nonsense-mediated decay, so the effect // is probably not a big deal: effect->soNumber = NMD_transcript_variant; else setSpecificCodingSoTerm(effect, oldaa, newaa, cdsBasesAdded); - return effect; } boolean hasAltAllele(struct allele *alleles) /* Return TRUE if alleles include at least one non-reference allele. */ { while (alleles != NULL && alleles->isReference) alleles = alleles->next; return (alleles != NULL); } char *firstAltAllele(struct allele *alleles) /* Ensembl always reports an alternate allele, even if that allele is not being used * to calculate any consequence. When allele doesn't really matter, just use the @@ -737,142 +751,162 @@ errAbort("firstAltAllele: no alt allele in list"); return alleles->sequence; } static struct gpFx *gpFxInExon(struct variant *variant, struct txCoords *txc, int exonIx, struct genePred *pred, boolean predIsNmd, struct dnaSeq *transcriptSeq, struct lm *lm) /* Given a variant that overlaps an exon of pred, figure out what each allele does. */ { struct gpFx *effectsList = NULL; struct allele *allele = variant->alleles; for ( ; allele ; allele = allele->next) { if (!allele->isReference) { + // Trim redundant bases from transcript ref and alt and adjust cdna and cds coords. + struct txCoords txcTrim = *txc; + int refLen = txcTrim.endInCdna - txcTrim.startInCdna; + char txRef[refLen + 1]; + safencpy(txRef, sizeof(txRef), transcriptSeq->dna + txcTrim.startInCdna, refLen); + touppers(txRef); + int altLen = strlen(allele->sequence); + char txAlt[altLen + 1]; + safecpy(txAlt, sizeof(txAlt), allele->sequence); + if (pred->strand[0] == '-') + reverseComplement(txAlt, altLen); + trimRefAlt(txRef, txAlt, (uint *)&txcTrim.startInCdna, (uint *)&txcTrim.endInCdna, + &refLen, &altLen); + if (txcTrim.startInCds >= 0) + txcTrim.startInCds += (txcTrim.startInCdna - txc->startInCdna); + if (txcTrim.endInCds >= 0) + txcTrim.endInCds += (txcTrim.endInCdna - txc->endInCdna); if (pred->cdsStart != pred->cdsEnd) { // first find effects of allele in UTR, if any effectsList = slCat(effectsList, - gpFxCheckUtr(allele, pred, txc, exonIx, predIsNmd, lm)); - if (txc->startInCds >= 0) + gpFxCheckUtr(allele, pred, &txcTrim, exonIx, predIsNmd, + txRef, txAlt, lm)); + if (txcTrim.startInCds >= 0) effectsList = slCat(effectsList, - gpFxChangedCds(allele, pred, txc, exonIx, predIsNmd, - transcriptSeq, lm)); + gpFxChangedCds(allele, pred, &txcTrim, exonIx, predIsNmd, + transcriptSeq, txRef, txAlt, lm)); } else effectsList = slCat(effectsList, - gpFxChangedNoncodingExon(allele, pred, txc, exonIx, lm)); - + gpFxChangedNoncodingExon(allele, pred, &txcTrim, exonIx, + txRef, txAlt, lm)); if (!predIsNmd) { // Was entire exon deleted? int exonNumPos = exonIx; if (pred->strand[0] == '-') exonNumPos = pred->exonCount - 1 - exonIx; uint exonStart = pred->exonStarts[exonNumPos], exonEnd = pred->exonEnds[exonNumPos]; if (variant->chromStart <= exonStart && variant->chromEnd >= exonEnd) { - struct gpFx *effect = gpFxNew(allele->sequence, pred->name, exon_loss, + struct gpFx *effect = gpFxNew(allele->sequence, pred->name, exon_loss_variant, nonCodingExon, lm); - setNCExonVals(effect, exonIx, txc->startInCdna); + gpFxSetNoncodingInfo(effect, exonIx, pred->exonCount, txcTrim.startInCdna, + txRef, txAlt, lm); slAddTail(&effectsList, effect); } else { // If variant is in exon *but* within 3 bases of splice site, // it also qualifies as splice_region_variant: if ((variant->chromEnd > exonEnd-3 && variant->chromStart < exonEnd && exonIx < pred->exonCount - 1) || (variant->chromEnd > exonStart && variant->chromStart < exonStart+3 && exonIx > 0)) { struct gpFx *effect = gpFxNew(allele->sequence, pred->name, splice_region_variant, nonCodingExon, lm); - setNCExonVals(effect, exonIx, txc->startInCdna); + gpFxSetNoncodingInfo(effect, exonIx, pred->exonCount, txcTrim.startInCdna, + txRef, txAlt, lm); slAddTail(&effectsList, effect); } } } } } return effectsList; } -static struct gpFx *gpFxInIntron(struct variant *variant, struct txCoords *txc, int intronIx, +static struct gpFx *gpFxInIntron(struct variant *variant, int intronIx, struct genePred *pred, boolean predIsNmd, char *altAllele, struct lm *lm) // Annotate a variant that overlaps an intron (and possibly splice region) //#*** TODO: watch out for "introns" that are actually indels between tx seq and ref genome! { struct gpFx *effectsList = NULL; boolean minusStrand = (pred->strand[0] == '-'); // If on - strand, flip intron number back to + strand for getting intron coords: int intronPos = minusStrand ? (pred->exonCount - intronIx - 2) : intronIx; int intronStart = pred->exonEnds[intronPos]; int intronEnd = pred->exonStarts[intronPos+1]; if (variant->chromEnd > intronStart && variant->chromStart < intronEnd) { enum soTerm soNumber = intron_variant; if (variant->chromEnd > intronStart && variant->chromStart < intronStart+2) // Within 2 bases of intron start(/end for '-'): soNumber = minusStrand ? splice_acceptor_variant : splice_donor_variant; if (variant->chromEnd > intronEnd-2 && variant->chromStart < intronEnd) // Within 2 bases of intron end(/start for '-'): soNumber = minusStrand ? splice_donor_variant : splice_acceptor_variant; else if ((variant->chromEnd >= intronStart+2 && variant->chromStart < intronStart+8) || (variant->chromEnd > intronEnd-8 && variant->chromStart <= intronEnd-2)) // Within 3 to 8 bases of intron start or end: soNumber = splice_region_variant; if (predIsNmd) // This transcript is already subject to nonsense-mediated decay, so the effect // is probably not a big deal: soNumber = NMD_transcript_variant; struct gpFx *effects = gpFxNew(altAllele, pred->name, soNumber, intron, lm); effects->details.intron.intronNumber = intronIx; + effects->details.intron.intronCount = pred->exonCount - 1; slAddTail(&effectsList, effects); } return effectsList; } static struct gpFx *gpFxCheckTranscript(struct variant *variant, struct genePred *pred, struct dnaSeq *transcriptSeq, struct lm *lm) /* Check to see if variant overlaps an exon and/or intron of pred. */ { struct gpFx *effectsList = NULL; uint varStart = variant->chromStart, varEnd = variant->chromEnd; if (varStart < pred->txEnd && varEnd > pred->txStart) { boolean predIsNmd = genePredNmdTarget(pred); char *defaultAltAllele = firstAltAllele(variant->alleles); struct txCoords txc = getTxCoords(variant, pred); // Simplest case first: variant starts and ends in a single exon or single intron if (txc.startInExon == txc.endInExon && txc.startExonIx == txc.endExonIx) { int ix = txc.startExonIx; if (txc.startInExon) { // Exonic variant; figure out what kind: effectsList = slCat(effectsList, gpFxInExon(variant, &txc, ix, pred, predIsNmd, transcriptSeq, lm)); } else { // Intronic (and/or splice) variant: effectsList = slCat(effectsList, - gpFxInIntron(variant, &txc, ix, pred, predIsNmd, defaultAltAllele, - lm)); + gpFxInIntron(variant, ix, pred, predIsNmd, defaultAltAllele, lm)); } } else { if (!predIsNmd) { // Let the user beware -- this variant is just complex (it overlaps at least one // exon/intron boundary). It could be an insertion, an MNV (multi-nt var) or // a deletion. struct gpFx *effect = gpFxNew(defaultAltAllele, pred->name, complex_transcript_variant, none, lm); effectsList = slCat(effectsList, effect); } // But we can at least say which introns and/or exons are affected. // Transform exon and intron numbers into ordered integers, -1 (upstream) through @@ -885,65 +919,65 @@ // vieEnd == vieStart-1 ==> insertion at exon/intron boundary // vieEnd == vieStart-2 ==> insertion at exon-exon boundary (i.e. ref has deletion!) if ((vieEnd != vieStart-1 && vieEnd != vieStart-2) || varStart != varEnd) errAbort("gpFxCheckTranscript: expecting insertion in pred=%s " "but varStart=%d, varEnd=%d, vieStart=%d, vieEnd=%d, " "starts in %son, ends in %son", pred->name, varStart, varEnd, vieStart, vieEnd, (txc.startInExon ? "ex" : "intr"), (txc.endInExon ? "ex" : "intr")); // Since it's an insertion, remember that end is before start. if (txc.startInExon) { // Intronic end precedes exonic start. Watch out for upstream as "intron[-1]": if (txc.endExonIx >= 0) effectsList = slCat(effectsList, - gpFxInIntron(variant, &txc, txc.endExonIx, pred, predIsNmd, + gpFxInIntron(variant, txc.endExonIx, pred, predIsNmd, defaultAltAllele, lm)); effectsList = slCat(effectsList, gpFxInExon(variant, &txc, txc.startExonIx, pred, predIsNmd, transcriptSeq, lm)); } else { // Exonic end precedes intronic start. effectsList = slCat(effectsList, gpFxInExon(variant, &txc, txc.endExonIx, pred, predIsNmd, transcriptSeq, lm)); // Watch out for downstream as "intron[lastExonIx]" if (txc.startExonIx < txc.exonCount - 1) effectsList = slCat(effectsList, - gpFxInIntron(variant, &txc, txc.startExonIx, pred, + gpFxInIntron(variant, txc.startExonIx, pred, predIsNmd, defaultAltAllele, lm)); } } // end if variant is insertion else { // MNV or deletion - consider each overlapping intron and/or exon int ie; // Watch out for upstream (vieStart < 0) and downstream (vieEnd > last exon). for (ie = max(vieStart, 0); ie <= min(vieEnd, 2*(pred->exonCount-1)); ie++) { boolean isExon = (ie%2 == 0); int ix = ie / 2; if (isExon) effectsList = slCat(effectsList, gpFxInExon(variant, &txc, ix, pred, predIsNmd, transcriptSeq, lm)); else effectsList = slCat(effectsList, - gpFxInIntron(variant, &txc, ix, pred, predIsNmd, + gpFxInIntron(variant, ix, pred, predIsNmd, defaultAltAllele, lm)); } // end for each (partial) exon/intron overlapping variant } // end if variant is MNV or deletion } // end if variant is complex } // end if variant overlaps pred return effectsList; } static struct gpFx *gpFxCheckUpDownstream(struct variant *variant, struct genePred *pred, struct lm *lm) // check to see if the variant is up or downstream { struct gpFx *effectsList = NULL; char *defaultAltAllele = firstAltAllele(variant->alleles);