099102d63cdcd41f251d2748e532440b71e45d40 angie Wed Sep 11 14:38:44 2013 -0700 Correcting off-by-one intron number; adding recognition of stop_lost caused by inframe_deletion. diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c index ed25514..112cc59 100644 --- src/hg/lib/gpFx.c +++ src/hg/lib/gpFx.c @@ -331,33 +331,38 @@ * but we have to check for several other special cases esp. indels. */ { struct codingChange *cc = &effect->details.codingChange; int oldAaSize = strlen(oldAa), newAaSize = strlen(newAa); if (sameString(newAa, oldAa)) { if (cc->pepPosition == oldAaSize-1 && cc->aaOld[0] == 'Z') effect->soNumber = stop_retained_variant; else effect->soNumber = synonymous_variant; } else { if (cdsBasesAdded < 0) { - // Got a deletion variant -- check frame: + // Got a deletion variant -- check frame (and whether we lost a stop codon): if ((cdsBasesAdded % 3) == 0) + { + if (strchr(cc->aaOld, 'Z') && !strchr(cc->aaNew, 'Z')) + effect->soNumber = stop_lost; + else effect->soNumber = inframe_deletion; + } else effect->soNumber = frameshift_variant; } else { // Not a deletion; could be single-base (including early stop) or insertion if (newAaSize < oldAaSize) { // Not a deletion but protein got smaller; must have been an early stop codon, // possibly following a frameshift caused by an insertion. if (cc->aaNew[0] != 'Z') { if (newAa[newAaSize-1] != 'Z') errAbort("gpFx: new protein is smaller but last base in new sequence " "is '%c' not 'Z'", newAa[newAaSize-1]); @@ -579,31 +584,31 @@ int intronEnd = pred->exonStarts[ii+1]; if (variant->chromEnd > intronStart && variant->chromStart < intronEnd) { enum soTerm soNumber = intron_variant; if (variant->chromEnd > intronStart && variant->chromStart < intronStart+2) // Within 2 bases of intron start(/end for '-'): soNumber = minusStrand ? splice_acceptor_variant : splice_donor_variant; if (variant->chromEnd > intronEnd-2 && variant->chromStart < intronEnd) // Within 2 bases of intron end(/start for '-'): soNumber = minusStrand ? splice_donor_variant : splice_acceptor_variant; else if ((variant->chromEnd > intronStart+3 && variant->chromStart < intronStart+8) || (variant->chromEnd > intronEnd-8 && variant->chromStart < intronEnd+3)) // Within 3 to 8 bases of intron start or end: soNumber = splice_region_variant; struct gpFx *effects = gpFxNew(altAllele, pred->name, soNumber, intron, lm); - effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 1) : ii; + effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 2) : ii; slAddHead(&effectsList, effects); } else if ((variant->chromEnd > intronStart-3 && variant->chromStart < intronStart) || (variant->chromEnd > intronEnd && variant->chromStart < intronEnd+3)) { // if variant is in exon *but* within 3 bases of splice site, // it also qualifies as splice_region_variant: struct gpFx *effects = gpFxNew(altAllele, pred->name, splice_region_variant, intron, lm); effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 1) : ii; slAddHead(&effectsList, effects); } } return effectsList; }