2b5eb866f050d964d8964ec5a84f7b63889cc6b1 angie Mon Jun 3 14:39:36 2013 -0700 New CGI, hgVai (Variant Annotation Integrator): simple checklist-styleUI by which user can select variants that they have uploaded; gene predictions to identify which part of a gene, if any, is hit by each variant; several additional sources of annotations/predictions e.g. dbNSFP scores and conserved elements/scores; and several filters to constrain output to the variants most likely to have a functional effect. Along with the new CGI, there are various lib bugfixes and improvements, a new hg/lib/tests/ testcase, and some test file changes to accomodate data updates to both knownGene and the pg* tables in knownGene. refs #6152 diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c index cb58375..5025ce1 100644 --- src/hg/lib/gpFx.c +++ src/hg/lib/gpFx.c @@ -88,30 +88,33 @@ // clip allele to exon struct allele *clipAllele = alleleClip(allele, pred->exonStarts[exonNum], pred->exonEnds[exonNum], lm); // change transcript at variant point int exonOffset = clipAllele->variant->chromStart - pred->exonStarts[exonNum]; int transcriptOffset = exonQStart + exonOffset; int variantWidth = clipAllele->variant->chromEnd - clipAllele->variant->chromStart; if (variantWidth != clipAllele->length && retNewPred != NULL) { // OK, now we have to make a new genePred that matches the changed sequence int basesAdded = clipAllele->length - variantWidth; struct genePred *newPred = lmCloneVar(lm, pred); + size_t size = pred->exonCount * sizeof(pred->exonStarts[0]); + newPred->exonStarts = lmCloneMem(lm, pred->exonStarts, size); + newPred->exonEnds = lmCloneMem(lm, pred->exonEnds, size); int alEnd = clipAllele->variant->chromEnd; if (newPred->cdsStart > alEnd) newPred->cdsStart += basesAdded; if (newPred->cdsEnd > alEnd) newPred->cdsEnd += basesAdded; newPred->exonEnds[exonNum] += basesAdded; int ii; for (ii = exonNum+1; ii < pred->exonCount; ii++) { newPred->exonStarts[ii] += basesAdded; newPred->exonEnds[ii] += basesAdded; } newPred->txEnd = newPred->exonEnds[newPred->exonCount - 1]; *retNewPred = newPred; } @@ -272,42 +275,41 @@ int exonNum, int cDnaPosition, struct lm *lm) /* check for effects in UTR of coding gene -- caller ensures it's in exon, pred is coding * and exonNum has been strand-adjusted */ { struct gpFx *gpFx = NULL; enum soTerm term = 0; struct variant *variant = allele->variant; if (variant->chromStart < pred->cdsStart && variant->chromEnd > pred->txStart) // we're in left UTR term = (*pred->strand == '-') ? _3_prime_UTR_variant : _5_prime_UTR_variant; else if (variant->chromStart < pred->txEnd && variant->chromEnd > pred->cdsEnd) // we're in right UTR term = (*pred->strand == '-') ? _5_prime_UTR_variant : _3_prime_UTR_variant; if (term != 0) { - gpFx = gpFxNew(allele->sequence, pred->name, term, nonCodingExon, lm); + gpFx = gpFxNew("", pred->name, term, nonCodingExon, lm); setNCExonVals(gpFx, exonNum, cDnaPosition); } return gpFx; } struct gpFx *gpFxChangedNoncodingExon(struct allele *allele, struct genePred *pred, int exonNum, int cDnaPosition, struct lm *lm) /* generate an effect for a variant in a non-coding transcript */ { -struct gpFx *gpFx = gpFxNew(allele->sequence, pred->name, non_coding_exon_variant, nonCodingExon, - lm); +struct gpFx *gpFx = gpFxNew("", pred->name, non_coding_exon_variant, nonCodingExon, lm); if (*pred->strand == '-') exonNum = pred->exonCount - exonNum - 1; setNCExonVals(gpFx, exonNum, cDnaPosition); return gpFx; } boolean isSafeFromNMD(int exonNum, struct variant *variant, struct genePred *pred) /* Return TRUE if variant in strand-corrected exonNum is in the region * of pred that would make it safe from Nonsense-Mediated Decay (NMD). * NMD is triggered by the presence of a stop codon that appears * before ~50bp before the end of the last exon. In other words, if * there's a stop codon with a detectable downstream splice junction, * translation is prevented -- see * http://en.wikipedia.org/wiki/MRNA_surveillance#Mechanism_in_mammals */ { @@ -381,32 +383,37 @@ else if (newAaSize > oldAaSize) { // protein got bigger; insertion or lost stop codon if (cc->aaOld[0] == 'Z') effect->soNumber = stop_lost; else if ((cdsChangeLength % 3) == 0) effect->soNumber = inframe_insertion; else effect->soNumber = frameshift_variant; } else { // Single aa change if (cc->pepPosition == 0 && cc->aaOld[0] == 'M') effect->soNumber = initiator_codon_variant; - else if (cc->pepPosition == oldAaSize-1 && oldAa[oldAaSize-1] != 'Z') + else if (cc->pepPosition == oldAaSize-1) + { + if (oldAa[oldAaSize-1] == 'Z') + effect->soNumber = stop_lost; + else effect->soNumber = incomplete_terminal_codon_variant; + } else effect->soNumber = missense_variant; } } } } struct gpFx *gpFxChangedCodingExon(struct allele *allele, struct genePred *pred, int exonNum, char *transcriptSequence, char *newSequence, struct genePred *newPred, struct lm *lm) /* calculate effect of allele change on coding transcript */ { struct gpFx *effectsList = NULL; if (*pred->strand == '-') exonNum = pred->exonCount - exonNum - 1;