099102d63cdcd41f251d2748e532440b71e45d40 angie Wed Sep 11 14:38:44 2013 -0700 Correcting off-by-one intron number; adding recognition of stop_lost caused by inframe_deletion. diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c index ed25514..112cc59 100644 --- src/hg/lib/gpFx.c +++ src/hg/lib/gpFx.c @@ -1,687 +1,692 @@ /* gpFx --- routines to calculate the effect of variation on a genePred */ #include "common.h" #include "genePred.h" #include "gpFx.h" static char *collapseDashes(char *str) // Trim extra hyphen characters at end of str. { int len = strlen(str); if (len > 1) { char *p = str + len - 1; while (*p == '-' && p > str) *p-- = '\0'; } return str; } struct gpFx *gpFxNew(char *allele, char *transcript, enum soTerm soNumber, enum detailType detailType, struct lm *lm) /* Fill in the common members of gpFx; leave soTerm-specific members for caller to fill in. */ { struct gpFx *effect; lmAllocVar(lm, effect); effect->allele = collapseDashes(strUpper(lmCloneString(lm, allele))); effect->transcript = lmCloneString(lm, transcript); effect->soNumber = soNumber; effect->detailType = detailType; return effect; } static char * mergeAllele(char *transcript, int offset, int variantWidth, char *newAlleleSeq, int alleleLength, struct lm *lm) /* merge a variant into an allele */ { char *newTranscript = NULL; if (variantWidth == alleleLength) { newTranscript = lmCloneString(lm, transcript); memcpy(&newTranscript[offset], newAlleleSeq, alleleLength); } else { int insertionSize = alleleLength - variantWidth; int newLength = strlen(transcript) + insertionSize; newTranscript = lmAlloc(lm, newLength + 1); char *restOfTranscript = &transcript[offset + variantWidth]; // copy over the part before the variant memcpy(newTranscript, transcript, offset); // copy in the new allele memcpy(&newTranscript[offset], newAlleleSeq, alleleLength); // copy in the part after the variant memcpy(&newTranscript[offset + alleleLength], restOfTranscript, strlen(restOfTranscript) + 1); } return newTranscript; } static int getCodingOffsetInTx(struct genePred *pred, char strand) /* Skip past UTR (portions of) exons to get offset of CDS relative to transcript start. * The strand arg is used instead of pred->strand. */ { int offset = 0; int iStart = 0, iIncr = 1; boolean isRc = (strand == '-'); if (isRc) { // Work our way left from the last exon. iStart = pred->exonCount - 1; iIncr = -1; } int ii; // trim off the 5' UTR (or 3' UTR if strand is '-') for (ii = iStart; ii >= 0 && ii < pred->exonCount; ii += iIncr) { if ((!isRc && (pred->cdsStart >= pred->exonStarts[ii]) && (pred->cdsStart < pred->exonEnds[ii])) || (isRc && (pred->cdsEnd > pred->exonStarts[ii]) && (pred->cdsEnd <= pred->exonEnds[ii]))) break; int exonSize = pred->exonEnds[ii] - pred->exonStarts[ii]; offset += exonSize; } assert( !(strand == '+' && ii >= pred->exonCount) ); assert( !(strand == '-' && ii < 0) ); // clip off part of UTR in exon that has CDS in it too int exonOffset = pred->cdsStart - pred->exonStarts[ii]; if (strand == '-') exonOffset = pred->exonEnds[ii] - pred->cdsEnd; offset += exonOffset; return offset; } char *gpFxModifySequence(struct allele *allele, struct genePred *pred, int exonNum, int exonQStart, struct dnaSeq *transcriptSequence, struct genePred **retNewPred, int *retTranscriptOffset, int *retBasesAdded, int *retCdsOffset, int *retCdsBasesAdded, struct lm *lm) /* modify a transcript to what it'd be if the alternate allele were present; * if transcript length changes, make retNewPred a shallow copy of pred w/tweaked coords. */ { // clip allele to exon struct allele *clipAllele = alleleClip(allele, pred->exonStarts[exonNum], pred->exonEnds[exonNum], lm); struct variant *clipVariant = clipAllele->variant; boolean predIsRc = (*pred->strand == '-'); // change transcript at variant point int exonOffset = clipVariant->chromStart - pred->exonStarts[exonNum]; if (predIsRc) exonOffset = clipVariant->chromEnd - pred->exonStarts[exonNum]; int transcriptOffset = exonQStart + exonOffset; int variantWidth = clipVariant->chromEnd - clipVariant->chromStart; int basesAdded = clipAllele->length - variantWidth; char *newAlleleSeq = lmCloneString(lm, clipAllele->sequence); if (predIsRc) { transcriptOffset = transcriptSequence->size - transcriptOffset; reverseComplement(newAlleleSeq, strlen(newAlleleSeq)); } // If clipAllele overlaps CDS, figure out cdsOffset and cdsBasesAdded: int cdsOffset = -1, cdsBasesAdded = 0; if (clipVariant->chromStart < pred->cdsEnd && clipVariant->chromEnd > pred->cdsStart) { struct allele *clipAlleleCds = alleleClip(clipAllele, pred->cdsStart, pred->cdsEnd, lm); struct variant *clipVariantCds = clipAlleleCds->variant; int exonOffsetCds = clipVariantCds->chromStart - pred->exonStarts[exonNum]; if (predIsRc) exonOffsetCds = clipVariantCds->chromEnd - pred->exonStarts[exonNum]; int cdsOffsetInTx = exonQStart + exonOffsetCds; if (predIsRc) cdsOffsetInTx = transcriptSequence->size - cdsOffsetInTx; int cdsQStart = getCodingOffsetInTx(pred, pred->strand[0]); cdsOffset = cdsOffsetInTx - cdsQStart; int variantWidthCds = clipVariantCds->chromEnd - clipVariantCds->chromStart; cdsBasesAdded = clipAlleleCds->length - variantWidthCds; } if (basesAdded != 0 && retNewPred != NULL) { // OK, now we have to make a new genePred that matches the changed sequence struct genePred *newPred = lmCloneVar(lm, pred); size_t size = pred->exonCount * sizeof(pred->exonStarts[0]); newPred->exonStarts = lmCloneMem(lm, pred->exonStarts, size); newPred->exonEnds = lmCloneMem(lm, pred->exonEnds, size); if (newPred->cdsStart > clipVariant->chromEnd) newPred->cdsStart += basesAdded; else if (newPred->cdsStart > clipVariant->chromStart && cdsBasesAdded < 0) newPred->cdsStart += cdsBasesAdded; if (newPred->cdsEnd > clipVariant->chromEnd) newPred->cdsEnd += basesAdded; else if (newPred->cdsEnd > clipVariant->chromStart && cdsBasesAdded < 0) newPred->cdsEnd += cdsBasesAdded; newPred->exonEnds[exonNum] += basesAdded; int ii; for (ii = exonNum+1; ii < pred->exonCount; ii++) { newPred->exonStarts[ii] += basesAdded; newPred->exonEnds[ii] += basesAdded; } newPred->txEnd = newPred->exonEnds[newPred->exonCount - 1]; *retNewPred = newPred; } // make the change in the sequence char *retSequence = lmCloneString(lm, transcriptSequence->dna); retSequence = mergeAllele( retSequence, transcriptOffset, variantWidth, newAlleleSeq, clipAllele->length, lm); if (retTranscriptOffset != NULL) *retTranscriptOffset = transcriptOffset; if (retBasesAdded != NULL) *retBasesAdded = basesAdded; if (retCdsOffset != NULL) *retCdsOffset = cdsOffset; if (retCdsBasesAdded != NULL) *retCdsBasesAdded = cdsBasesAdded; return retSequence; } static char *getCodingSequence(struct genePred *pred, char *transcriptSequence, struct lm *lm) /* extract the CDS from a transcript */ { if (*pred->strand == '-') reverseComplement(transcriptSequence, strlen(transcriptSequence)); // Get leftmost CDS boundary to trim off 5' (or 3' if on minus strand): int cdsOffset = getCodingOffsetInTx(pred, '+'); char *newString = lmCloneString(lm, transcriptSequence + cdsOffset); // trim off 3' (or 5' if on minus strand) newString[genePredCdsSize(pred)] = 0; // correct for strand if (*pred->strand == '-') { reverseComplement(transcriptSequence, strlen(transcriptSequence)); reverseComplement(newString, strlen(newString)); } return newString; } static struct dnaSeq *lmNewSimpleSeq(struct lm *lm, char *seq) /* Like newDnaSeq but no name and using localmem. */ { struct dnaSeq *simpleSeq; lmAllocVar(lm, simpleSeq); simpleSeq->dna = seq; simpleSeq->size = strlen(seq); return simpleSeq; } static char *lmSimpleTranslate(struct lm *lm, struct dnaSeq *dnaSeq) /* Just translate dnaSeq into pep, use 'Z' and truncate for stop codon, and use localmem. */ { int aaLen = (dnaSeq->size / 3) + 1; char *pep = lmAlloc(lm, aaLen + 1); int i; for (i = 0; i < dnaSeq->size; i += 3) { char aa = lookupCodon(dnaSeq->dna + i); pep[i/3] = aa; if (aa == '\0') { pep[i/3] = 'Z'; break; } } if (i < dnaSeq->size && pep[i/3] != 'Z') // incomplete final codon pep[i/3] = 'X'; return pep; } static void getSequences(struct genePred *pred, char *transcriptSequence, char **retCodingSequence, char **retAaSeq, struct lm *lm) /* get coding sequences for a transcript and a variant transcript */ { char *codingSequence = getCodingSequence(pred, transcriptSequence, lm); struct dnaSeq *codingDna = lmNewSimpleSeq(lm, codingSequence); char *aaSeq = lmSimpleTranslate(lm, codingDna); char *stop = strchr(aaSeq, 'Z'); if (stop != NULL) { // If aaSeq was truncated by a stop codon, truncate codingSequence as well: int codingLength = 3*strlen(aaSeq); codingSequence[codingLength] = '\0'; } *retCodingSequence = codingSequence; *retAaSeq = aaSeq; } static void setNCExonVals(struct gpFx *gpFx, int exonNum, int cDnaPosition) /* This gpFx is for a variant in exon of non-coding gene or UTR exon of coding gene; * set details.nonCodingExon values. */ { gpFx->details.nonCodingExon.exonNumber = exonNum; gpFx->details.nonCodingExon.cDnaPosition = cDnaPosition; } struct gpFx *gpFxCheckUtr( struct allele *allele, struct genePred *pred, int exonNum, int cDnaPosition, struct lm *lm) /* check for effects in UTR of coding gene -- caller ensures it's in exon, pred is coding * and exonNum has been strand-adjusted */ { struct gpFx *gpFx = NULL; enum soTerm term = 0; struct variant *variant = allele->variant; if (variant->chromStart < pred->cdsStart && variant->chromEnd > pred->txStart) // we're in left UTR term = (*pred->strand == '-') ? _3_prime_UTR_variant : _5_prime_UTR_variant; else if (variant->chromStart < pred->txEnd && variant->chromEnd > pred->cdsEnd) // we're in right UTR term = (*pred->strand == '-') ? _5_prime_UTR_variant : _3_prime_UTR_variant; if (term != 0) { gpFx = gpFxNew(allele->sequence, pred->name, term, nonCodingExon, lm); setNCExonVals(gpFx, exonNum, cDnaPosition); } return gpFx; } struct gpFx *gpFxChangedNoncodingExon(struct allele *allele, struct genePred *pred, int exonNum, int cDnaPosition, struct lm *lm) /* generate an effect for a variant in a non-coding transcript */ { struct gpFx *gpFx = gpFxNew(allele->sequence, pred->name, non_coding_exon_variant, nonCodingExon, lm); if (*pred->strand == '-') exonNum = pred->exonCount - exonNum - 1; setNCExonVals(gpFx, exonNum, cDnaPosition); return gpFx; } boolean isSafeFromNMD(int exonNum, struct variant *variant, struct genePred *pred) /* Return TRUE if variant in strand-corrected exonNum is in the region * of pred that would make it safe from Nonsense-Mediated Decay (NMD). * NMD is triggered by the presence of a stop codon that appears * before ~50bp before the end of the last exon. In other words, if * there's a stop codon with a detectable downstream splice junction, * translation is prevented -- see * http://en.wikipedia.org/wiki/MRNA_surveillance#Mechanism_in_mammals */ { boolean lastExonNum = pred->exonCount - 1; if (exonNum == lastExonNum) return TRUE; int nextToLastExonNum = pred->exonCount - 2; if (exonNum == nextToLastExonNum) { // Is it within 50bp of 3' end of exon? if (pred->strand[0] == '-') return (variant->chromEnd < pred->exonStarts[1] + 50); else return (variant->chromStart > pred->exonEnds[nextToLastExonNum] - 50); } return FALSE; } void setSpecificCodingSoTerm(struct gpFx *effect, char *oldAa, char *newAa, int cdsBasesAdded, boolean safeFromNMD) /* Assuming that deletions are marked with dashes in newCodingSequence, * and that effect fields aside from soNumber are already populated, use the * pep seqs and number of dashes to determine the appropriate SO term. * Probably the majority of cases will be synonymous_variant or missense_variant, * but we have to check for several other special cases esp. indels. */ { struct codingChange *cc = &effect->details.codingChange; int oldAaSize = strlen(oldAa), newAaSize = strlen(newAa); if (sameString(newAa, oldAa)) { if (cc->pepPosition == oldAaSize-1 && cc->aaOld[0] == 'Z') effect->soNumber = stop_retained_variant; else effect->soNumber = synonymous_variant; } else { if (cdsBasesAdded < 0) { - // Got a deletion variant -- check frame: + // Got a deletion variant -- check frame (and whether we lost a stop codon): if ((cdsBasesAdded % 3) == 0) + { + if (strchr(cc->aaOld, 'Z') && !strchr(cc->aaNew, 'Z')) + effect->soNumber = stop_lost; + else effect->soNumber = inframe_deletion; + } else effect->soNumber = frameshift_variant; } else { // Not a deletion; could be single-base (including early stop) or insertion if (newAaSize < oldAaSize) { // Not a deletion but protein got smaller; must have been an early stop codon, // possibly following a frameshift caused by an insertion. if (cc->aaNew[0] != 'Z') { if (newAa[newAaSize-1] != 'Z') errAbort("gpFx: new protein is smaller but last base in new sequence " "is '%c' not 'Z'", newAa[newAaSize-1]); effect->soNumber = frameshift_variant; } else if (safeFromNMD) effect->soNumber = stop_gained; else effect->soNumber = NMD_transcript_variant; } else if (newAaSize > oldAaSize) { // protein got bigger; insertion or lost stop codon if (cc->aaOld[0] == 'Z') effect->soNumber = stop_lost; else if ((cdsBasesAdded % 3) == 0) effect->soNumber = inframe_insertion; else effect->soNumber = frameshift_variant; } else { // Single aa change if (cc->pepPosition == 0 && cc->aaOld[0] == 'M') effect->soNumber = initiator_codon_variant; else if (cc->pepPosition == oldAaSize-1) { if (oldAa[oldAaSize-1] == 'Z') effect->soNumber = stop_lost; else effect->soNumber = incomplete_terminal_codon_variant; } else effect->soNumber = missense_variant; } } } } struct gpFx *gpFxChangedCodingExon(struct allele *allele, struct genePred *pred, int exonNum, struct dnaSeq *transcriptSequence, char *newSequence, struct genePred *newPred, int cDnaPosition, int basesAdded, int cdsPosition, int cdsBasesAdded, struct lm *lm) /* calculate effect of allele change on coding transcript */ { struct gpFx *effectsList = NULL; if (*pred->strand == '-') exonNum = pred->exonCount - exonNum - 1; // first find effects of allele in UTR, if any effectsList = gpFxCheckUtr(allele, pred, exonNum, cDnaPosition, lm); // calculate original and variant coding DNA and AA's char *oldCodingSequence, *newCodingSequence, *oldaa, *newaa; getSequences(pred, transcriptSequence->dna, &oldCodingSequence, &oldaa, lm); getSequences(newPred, newSequence, &newCodingSequence, &newaa, lm); // if coding sequence hasn't changed (just UTR), we're done if (sameString(oldCodingSequence, newCodingSequence)) return effectsList; // allocate the effect structure - fill in soNumber and details below struct gpFx *effect = gpFxNew(allele->sequence, pred->name, coding_sequence_variant, codingChange, lm); struct codingChange *cc = &effect->details.codingChange; cc->cDnaPosition = cDnaPosition; cc->cdsPosition = cdsPosition; cc->exonNumber = exonNum; int pepPos = cdsPosition / 3; cc->pepPosition = pepPos; if (cdsBasesAdded % 3 == 0) { // Common case: substitution, same number of old/new codons/peps: int numOldCodons = (1 + allele->length / 3), numNewCodons = (1 + allele->length / 3); if (cdsBasesAdded > 0) { // insertion: more new codons than old numOldCodons = (cdsPosition % 3) == 0 ? 0 : 1; numNewCodons = numOldCodons + (cdsBasesAdded / 3); } else if (cdsBasesAdded < 0) { // deletion: more old codons than new numNewCodons = (cdsPosition % 3) == 0 ? 0 : 1; numOldCodons = numNewCodons + (-cdsBasesAdded / 3); } cc->codonOld = lmCloneStringZ(lm, oldCodingSequence + pepPos*3, numOldCodons*3); cc->codonNew = lmCloneStringZ(lm, newCodingSequence + pepPos*3, numNewCodons*3); cc->aaOld = lmCloneStringZ(lm, oldaa + pepPos, numOldCodons); cc->aaNew = lmCloneStringZ(lm, newaa + pepPos, numNewCodons); } else { // frameshift -- who knows how many codons we can reliably predict... cc->codonOld = lmCloneString(lm, oldCodingSequence + pepPos*3); cc->codonNew = lmCloneString(lm, newCodingSequence + pepPos*3); cc->aaOld = lmCloneString(lm, oldaa + pepPos); cc->aaNew = lmCloneString(lm, newaa + pepPos); } // show specific coding changes by making before-and-after subsequences: boolean safeFromNMD = isSafeFromNMD(exonNum, allele->variant, pred); setSpecificCodingSoTerm(effect, oldaa, newaa, cdsBasesAdded, safeFromNMD); slAddHead(&effectsList, effect); return effectsList; } struct gpFx *gpFxInExon(struct allele *allele, struct genePred *pred, char *refAllele, int exonNum, int exonQStart, struct dnaSeq *transcriptSequence, struct lm *lm) /* generate an effect from a different allele in an exon */ { struct genePred *newPred = pred; int cDnaPosition = 0, basesAdded = 0, cdsPosition = 0, cdsBasesAdded = 0; char *newSequence = gpFxModifySequence(allele, pred, exonNum, exonQStart, transcriptSequence, &newPred, &cDnaPosition, &basesAdded, &cdsPosition, &cdsBasesAdded, lm); if (sameString(newSequence, transcriptSequence->dna)) { struct variant *variant = allele->variant; errAbort("gpFxInExon: %s:%d-%d, allele='%s', refAllele='%s', no change, shouldn't be here", variant->chrom, variant->chromStart+1, variant->chromEnd, allele->sequence, refAllele); return NULL; // no change in transcript -- but allele should always be non-reference } struct gpFx *effectsList; if (pred->cdsStart != pred->cdsEnd) effectsList = gpFxChangedCodingExon(allele, pred, exonNum, transcriptSequence, newSequence, newPred, cDnaPosition, basesAdded, cdsPosition, cdsBasesAdded, lm); else { effectsList = gpFxChangedNoncodingExon(allele, pred, exonNum, cDnaPosition, lm); } return effectsList; } static boolean hasAltAllele(struct allele *alleles, char *refAllele) /* Make sure there's something to work on here... */ { while (alleles != NULL && sameString(alleles->sequence, refAllele)) alleles = alleles->next; return (alleles != NULL); } static char *firstAltAllele(struct allele *alleles, char *refAllele) /* Ensembl always reports an alternate allele, even if that allele is not being used * to calculate any consequence. When allele doesn't really matter, just use the * first alternate allele that is given. */ { while (alleles != NULL && sameString(alleles->sequence, refAllele)) alleles = alleles->next; if (alleles == NULL) errAbort("firstAltAllele: no alt allele in list"); return alleles->sequence; } static struct gpFx *gpFxCheckExons(struct variant *variantList, struct genePred *pred, char *refAllele, struct dnaSeq *transcriptSequence, struct lm *lm) // check to see if the variant is in an exon { struct gpFx *effectsList = NULL; int exonQStart = 0; int ii; for (ii=0; ii < pred->exonCount; ii++) { struct variant *variant = variantList; for(; variant ; variant = variant->next) { // check if in an exon int end = variant->chromEnd; if (variant->chromStart == end) end++; int size; if ((size = positiveRangeIntersection(pred->exonStarts[ii], pred->exonEnds[ii], variant->chromStart, end)) > 0) { if (size != end - variant->chromStart) { // variant is not completely in exon char *altAllele = firstAltAllele(variant->alleles, refAllele); struct gpFx *effect = gpFxNew(altAllele, pred->name, complex_transcript_variant, none, lm); effectsList = slCat(effectsList, effect); } struct allele *allele = variant->alleles; for(; allele ; allele = allele->next) { if (differentString(allele->sequence, refAllele)) effectsList = slCat(effectsList, gpFxInExon(allele, pred, refAllele, ii, exonQStart, transcriptSequence, lm)); } } } exonQStart += (pred->exonEnds[ii] - pred->exonStarts[ii]); } return effectsList; } static struct gpFx *gpFxCheckIntrons(struct variant *variant, struct genePred *pred, char *altAllele, struct lm *lm) // check to see if a single variant overlaps an intron (and possibly splice region) //#*** TODO: watch out for "introns" that are actually indels between tx seq and ref genome! { struct gpFx *effectsList = NULL; boolean minusStrand = (pred->strand[0] == '-'); int ii; for (ii=0; ii < pred->exonCount - 1; ii++) { int intronStart = pred->exonEnds[ii]; int intronEnd = pred->exonStarts[ii+1]; if (variant->chromEnd > intronStart && variant->chromStart < intronEnd) { enum soTerm soNumber = intron_variant; if (variant->chromEnd > intronStart && variant->chromStart < intronStart+2) // Within 2 bases of intron start(/end for '-'): soNumber = minusStrand ? splice_acceptor_variant : splice_donor_variant; if (variant->chromEnd > intronEnd-2 && variant->chromStart < intronEnd) // Within 2 bases of intron end(/start for '-'): soNumber = minusStrand ? splice_donor_variant : splice_acceptor_variant; else if ((variant->chromEnd > intronStart+3 && variant->chromStart < intronStart+8) || (variant->chromEnd > intronEnd-8 && variant->chromStart < intronEnd+3)) // Within 3 to 8 bases of intron start or end: soNumber = splice_region_variant; struct gpFx *effects = gpFxNew(altAllele, pred->name, soNumber, intron, lm); - effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 1) : ii; + effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 2) : ii; slAddHead(&effectsList, effects); } else if ((variant->chromEnd > intronStart-3 && variant->chromStart < intronStart) || (variant->chromEnd > intronEnd && variant->chromStart < intronEnd+3)) { // if variant is in exon *but* within 3 bases of splice site, // it also qualifies as splice_region_variant: struct gpFx *effects = gpFxNew(altAllele, pred->name, splice_region_variant, intron, lm); effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 1) : ii; slAddHead(&effectsList, effects); } } return effectsList; } static struct gpFx *gpFxCheckBackground(struct variant *variant, struct genePred *pred, char *refAllele, struct lm *lm) // check to see if the variant is up or downstream or in intron of the gene { struct gpFx *effectsList = NULL; char *altAllele = firstAltAllele(variant->alleles, refAllele); for(; variant ; variant = variant->next) { // is this variant in an intron effectsList = slCat(effectsList, gpFxCheckIntrons(variant, pred, altAllele, lm)); // Is this variant to the left or right of transcript? enum soTerm soNumber = 0; if (variant->chromStart < pred->txStart && variant->chromEnd > pred->txStart - GPRANGE) { if (*pred->strand == '+') soNumber = upstream_gene_variant; else soNumber = downstream_gene_variant; } else if (variant->chromEnd > pred->txEnd && variant->chromStart < pred->txEnd + GPRANGE) { if (*pred->strand == '+') soNumber = downstream_gene_variant; else soNumber = upstream_gene_variant; } if (soNumber != 0) { struct gpFx *effects = gpFxNew(altAllele, pred->name, soNumber, none, lm); effectsList = slCat(effectsList, effects); } } return effectsList; } static void checkVariantList(struct variant *variant) // check to see that we either have one variant (possibly with multiple // alleles) or that if we have a list of variants, they only have // one allele a piece. { if (variant->next == NULL) // just one variant return; for(; variant; variant = variant->next) if (variant->numAlleles != 1) errAbort("gpFxPredEffect needs either 1 variant, or only 1 allele in all variants"); } struct gpFx *gpFxPredEffect(struct variant *variant, struct genePred *pred, char *refAllele, struct dnaSeq *transcriptSequence, struct lm *lm) // return the predicted effect(s) of a variation list on a genePred { struct gpFx *effectsList = NULL; // make sure we can deal with the variants that are coming in checkVariantList(variant); // If only the reference allele has been observed, skip it: if (! hasAltAllele(variant->alleles, refAllele)) return NULL; // check to see if SNP is up or downstream or in intron effectsList = slCat(effectsList, gpFxCheckBackground(variant, pred, refAllele, lm)); // check to see if SNP is in the transcript effectsList = slCat(effectsList, gpFxCheckExons(variant, pred, refAllele, transcriptSequence, lm)); //#*** sort by transcript name? transcript start? return effectsList; }