2b5eb866f050d964d8964ec5a84f7b63889cc6b1
angie
  Mon Jun 3 14:39:36 2013 -0700
New CGI, hgVai (Variant Annotation Integrator): simple checklist-styleUI by which user can select variants that they have uploaded; gene
predictions to identify which part of a gene, if any, is hit by each
variant; several additional sources of annotations/predictions e.g.
dbNSFP scores and conserved elements/scores; and several filters to
constrain output to the variants most likely to have a functional effect.
Along with the new CGI, there are various lib bugfixes and improvements,
a new hg/lib/tests/ testcase, and some test file changes to accomodate
data updates to both knownGene and the pg* tables in knownGene.
refs #6152

diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c
index cb58375..5025ce1 100644
--- src/hg/lib/gpFx.c
+++ src/hg/lib/gpFx.c
@@ -88,30 +88,33 @@
 // clip allele to exon
 struct allele *clipAllele = alleleClip(allele, pred->exonStarts[exonNum], pred->exonEnds[exonNum],
 				       lm);
 
 // change transcript at variant point
 int exonOffset = clipAllele->variant->chromStart - pred->exonStarts[exonNum];
 int transcriptOffset = exonQStart + exonOffset;
 
 int variantWidth = clipAllele->variant->chromEnd - clipAllele->variant->chromStart;
 
 if (variantWidth != clipAllele->length && retNewPred != NULL)
     {
     // OK, now we have to make a new genePred that matches the changed sequence
     int basesAdded = clipAllele->length - variantWidth;
     struct genePred *newPred = lmCloneVar(lm, pred);
+    size_t size = pred->exonCount * sizeof(pred->exonStarts[0]);
+    newPred->exonStarts = lmCloneMem(lm, pred->exonStarts, size);
+    newPred->exonEnds = lmCloneMem(lm, pred->exonEnds, size);
     int alEnd = clipAllele->variant->chromEnd;
     if (newPred->cdsStart > alEnd)
 	newPred->cdsStart += basesAdded;
     if (newPred->cdsEnd > alEnd)
 	newPred->cdsEnd += basesAdded;
     newPred->exonEnds[exonNum] += basesAdded;
     int ii;
     for (ii = exonNum+1;  ii < pred->exonCount;  ii++)
 	{
 	newPred->exonStarts[ii] += basesAdded;
 	newPred->exonEnds[ii] += basesAdded;
 	}
     newPred->txEnd = newPred->exonEnds[newPred->exonCount - 1];
     *retNewPred = newPred;
     }
@@ -272,42 +275,41 @@
 			   int exonNum, int cDnaPosition, struct lm *lm)
 /* check for effects in UTR of coding gene -- caller ensures it's in exon, pred is coding
  * and exonNum has been strand-adjusted */
 {
 struct gpFx *gpFx = NULL;
 enum soTerm term = 0;
 struct variant *variant = allele->variant;
 if (variant->chromStart < pred->cdsStart && variant->chromEnd > pred->txStart)
     // we're in left UTR
     term = (*pred->strand == '-') ? _3_prime_UTR_variant : _5_prime_UTR_variant;
 else if (variant->chromStart < pred->txEnd && variant->chromEnd > pred->cdsEnd)
     // we're in right UTR
     term = (*pred->strand == '-') ? _5_prime_UTR_variant : _3_prime_UTR_variant;
 if (term != 0)
     {
-    gpFx = gpFxNew(allele->sequence, pred->name, term, nonCodingExon, lm);
+    gpFx = gpFxNew("", pred->name, term, nonCodingExon, lm);
     setNCExonVals(gpFx, exonNum, cDnaPosition);
     }
 return gpFx;
 }
 
 struct gpFx *gpFxChangedNoncodingExon(struct allele *allele, struct genePred *pred,
 				      int exonNum, int cDnaPosition, struct lm *lm)
 /* generate an effect for a variant in a non-coding transcript */
 {
-struct gpFx *gpFx = gpFxNew(allele->sequence, pred->name, non_coding_exon_variant, nonCodingExon,
-			    lm);
+struct gpFx *gpFx = gpFxNew("", pred->name, non_coding_exon_variant, nonCodingExon, lm);
 if (*pred->strand == '-')
     exonNum = pred->exonCount - exonNum - 1;
 setNCExonVals(gpFx, exonNum, cDnaPosition);
 return gpFx;
 }
 
 boolean isSafeFromNMD(int exonNum, struct variant *variant, struct genePred *pred)
 /* Return TRUE if variant in strand-corrected exonNum is in the region
  * of pred that would make it safe from Nonsense-Mediated Decay (NMD).
  * NMD is triggered by the presence of a stop codon that appears
  * before ~50bp before the end of the last exon.  In other words, if
  * there's a stop codon with a detectable downstream splice junction,
  * translation is prevented -- see
  * http://en.wikipedia.org/wiki/MRNA_surveillance#Mechanism_in_mammals */
 {
@@ -381,32 +383,37 @@
 	else if (newAaSize > oldAaSize)
 	    {
 	    // protein got bigger; insertion or lost stop codon
 	    if (cc->aaOld[0] == 'Z')
 		effect->soNumber = stop_lost;
 	    else if ((cdsChangeLength % 3) == 0)
 		effect->soNumber = inframe_insertion;
 	    else
 		effect->soNumber = frameshift_variant;
 	    }
 	else
 	    {
 	    // Single aa change
 	    if (cc->pepPosition == 0 && cc->aaOld[0] == 'M')
 		effect->soNumber = initiator_codon_variant;
-	    else if (cc->pepPosition == oldAaSize-1 && oldAa[oldAaSize-1] != 'Z')
+	    else if (cc->pepPosition == oldAaSize-1)
+		{
+		if (oldAa[oldAaSize-1] == 'Z')
+		    effect->soNumber = stop_lost;
+		else
 		effect->soNumber = incomplete_terminal_codon_variant;
+		}
 	    else
 		effect->soNumber = missense_variant;
 	    }
 	}
     }
 }
 
 struct gpFx *gpFxChangedCodingExon(struct allele *allele, struct genePred *pred, int exonNum,
 				   char *transcriptSequence, char *newSequence,
 				   struct genePred *newPred, struct lm *lm)
 /* calculate effect of allele change on coding transcript */
 {
 struct gpFx *effectsList = NULL;
 if (*pred->strand == '-')
     exonNum = pred->exonCount - exonNum - 1;