099102d63cdcd41f251d2748e532440b71e45d40
angie
  Wed Sep 11 14:38:44 2013 -0700
Correcting off-by-one intron number; adding recognition of stop_lost caused by inframe_deletion.
diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c
index ed25514..112cc59 100644
--- src/hg/lib/gpFx.c
+++ src/hg/lib/gpFx.c
@@ -331,33 +331,38 @@
  * but we have to check for several other special cases esp. indels. */
 {
 struct codingChange *cc = &effect->details.codingChange;
 int oldAaSize = strlen(oldAa), newAaSize = strlen(newAa);
 if (sameString(newAa, oldAa))
     {
     if (cc->pepPosition == oldAaSize-1 && cc->aaOld[0] == 'Z')
 	effect->soNumber = stop_retained_variant;
     else
 	effect->soNumber = synonymous_variant;
     }
 else
     {
     if (cdsBasesAdded < 0)
 	{
-	// Got a deletion variant -- check frame:
+	// Got a deletion variant -- check frame (and whether we lost a stop codon):
 	if ((cdsBasesAdded % 3) == 0)
+	    {
+	    if (strchr(cc->aaOld, 'Z') && !strchr(cc->aaNew, 'Z'))
+		effect->soNumber = stop_lost;
+	    else
 		effect->soNumber = inframe_deletion;
+	    }
 	else
 	    effect->soNumber = frameshift_variant;
 	}
     else
 	{
 	// Not a deletion; could be single-base (including early stop) or insertion
 	if (newAaSize < oldAaSize)
 	    {
 	    // Not a deletion but protein got smaller; must have been an early stop codon,
 	    // possibly following a frameshift caused by an insertion.
 	    if (cc->aaNew[0] != 'Z')
 		{
 		if (newAa[newAaSize-1] != 'Z')
 		    errAbort("gpFx: new protein is smaller but last base in new sequence "
 			     "is '%c' not 'Z'", newAa[newAaSize-1]);
@@ -579,31 +584,31 @@
     int intronEnd = pred->exonStarts[ii+1];
     if (variant->chromEnd > intronStart && variant->chromStart < intronEnd)
 	{
 	enum soTerm soNumber = intron_variant;
 	if (variant->chromEnd > intronStart && variant->chromStart < intronStart+2)
 	    // Within 2 bases of intron start(/end for '-'):
 	    soNumber = minusStrand ? splice_acceptor_variant : splice_donor_variant;
 	if (variant->chromEnd > intronEnd-2 && variant->chromStart < intronEnd)
 	    // Within 2 bases of intron end(/start for '-'):
 	    soNumber = minusStrand ? splice_donor_variant : splice_acceptor_variant;
 	else if ((variant->chromEnd > intronStart+3 && variant->chromStart < intronStart+8) ||
 		 (variant->chromEnd > intronEnd-8 && variant->chromStart < intronEnd+3))
 	    // Within 3 to 8 bases of intron start or end:
 	    soNumber = splice_region_variant;
 	struct gpFx *effects = gpFxNew(altAllele, pred->name, soNumber, intron, lm);
-	effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 1) : ii;
+	effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 2) : ii;
 	slAddHead(&effectsList, effects);
 	}
     else if ((variant->chromEnd > intronStart-3 && variant->chromStart < intronStart) ||
 	     (variant->chromEnd > intronEnd && variant->chromStart < intronEnd+3))
 	{
 	// if variant is in exon *but* within 3 bases of splice site,
 	// it also qualifies as splice_region_variant:
 	struct gpFx *effects = gpFxNew(altAllele, pred->name,
 				       splice_region_variant, intron, lm);
 	effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 1) : ii;
 	slAddHead(&effectsList, effects);
 	}
     }
 return effectsList;
 }