1c9a4cb6897ec2924b5d7f46e4c6b9162d94d6ba
angie
  Mon Apr 22 16:43:15 2013 -0700
Converted variant and gpFx to use localmem.  Also got rid of a coupleunnecessary conversions from genePred to bed and psl in annoGratorGpVar
and gpFx. refs #6152

diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c
index 573c5fb..cb58375 100644
--- src/hg/lib/gpFx.c
+++ src/hg/lib/gpFx.c
@@ -6,169 +6,139 @@
 
 static char *collapseDashes(char *str)
 // Trim extra hyphen characters at end of str.
 {
 int len = strlen(str);
 if (len > 1)
     {
     char *p = str + len - 1;
     while (*p == '-' && p > str)
 	*p-- = '\0';
     }
 return str;
 }
 
 struct gpFx *gpFxNew(char *allele, char *transcript, enum soTerm soNumber,
-		     enum detailType detailType)
+		     enum detailType detailType, struct lm *lm)
 /* Fill in the common members of gpFx; leave soTerm-specific members for caller to fill in. */
 {
 struct gpFx *effect;
-AllocVar(effect);
-effect->allele = collapseDashes(strUpper(cloneString(allele)));
-effect->transcript = cloneString(transcript);
+lmAllocVar(lm, effect);
+effect->allele = collapseDashes(strUpper(lmCloneString(lm, allele)));
+effect->transcript = lmCloneString(lm, transcript);
 effect->soNumber = soNumber;
 effect->detailType = detailType;
 return effect;
 }
 
 static unsigned countDashes(char *string)
 /* count the number of dashes in a string */
 {
 int count = 0;
 
 while(*string)
     {
     if (*string == '-')
 	count++;
     string++;
     }
 
 return count;
 }
 
 static char * mergeAllele(char *transcript, int offset, int variantWidth,
-    char *newAlleleSeq, int alleleLength)
+			  char *newAlleleSeq, int alleleLength, struct lm *lm)
 /* merge a variant into an allele */
 {
-char *newTranscript = transcript;
+char *newTranscript = NULL;
 
 if (variantWidth == alleleLength)
     {
-    // for the moment, we're sticking the dashes into the transcripts
-    if (1) //noDashes(newAlleleSeq))
-	{
-	memcpy(&transcript[offset], newAlleleSeq, alleleLength);
-	}
-#ifdef DOESNTKNOWABOUTTHEOFFSETARGUMENT
-    // if we decide not to put the dashes into the resulting transcripts
-    // this will have to be re-written to grok the offset argument
-    else
-	{
-	char *transcriptSource = transcript;
-	int ii;
-	for(ii=0; ii < variantWidth; ii++)
-	    {
-	    if (*newAlleleSeq == '-')
-		{
-		transcriptSource++;
-		}
-	    else
-		{
-		*transcript++ = *newAlleleSeq++;
-		transcriptSource++;
-		}
-	    }
-	while(*transcriptSource)
-	    *transcript++ = *transcriptSource++;
-	*transcript = 0;
-	}
-#endif
+    newTranscript = lmCloneString(lm, transcript);
+    memcpy(&newTranscript[offset], newAlleleSeq, alleleLength);
     }
 else  if (variantWidth > alleleLength)
-    errAbort("allele not padded");
+    errAbort("gpFx: allele not padded");
 else 
     {
     int insertionSize = alleleLength - variantWidth;
     int newLength = strlen(transcript) + insertionSize;
-    newTranscript = needMem(newLength + 1);
+    newTranscript = lmAlloc(lm, newLength + 1);
     char *restOfTranscript = &transcript[offset + variantWidth];
 
     // copy over the part before the variant
     memcpy(newTranscript, transcript, offset);
 
     // copy in the new allele
     memcpy(&newTranscript[offset], newAlleleSeq, alleleLength);
 
     // copy in the new allele
     memcpy(&newTranscript[offset + alleleLength], restOfTranscript, 
 	strlen(restOfTranscript) + 1);
     }
 
 return newTranscript;
 }
 
 char *gpFxModifySequence(struct allele *allele, struct genePred *pred, 
-			 int exonNum, struct psl *transcriptPsl, struct dnaSeq *transcriptSequence,
-			 struct genePred **retNewPred)
+			 int exonNum, int exonQStart, struct dnaSeq *transcriptSequence,
+			 struct genePred **retNewPred, struct lm *lm)
 /* modify a transcript to what it'd be if the alternate allele were present;
  * if transcript length changes, make retNewPred a shallow copy of pred w/tweaked coords. */
 {
 // clip allele to exon
-struct allele *clipAllele = alleleClip(allele, transcriptPsl->tStarts[exonNum], 
-	transcriptPsl->tStarts[exonNum] + transcriptPsl->blockSizes[exonNum]);
+struct allele *clipAllele = alleleClip(allele, pred->exonStarts[exonNum], pred->exonEnds[exonNum],
+				       lm);
 
 // change transcript at variant point
-int exonOffset = clipAllele->variant->chromStart - transcriptPsl->tStarts[exonNum];
-int transcriptOffset = transcriptPsl->qStarts[exonNum] + exonOffset;
+int exonOffset = clipAllele->variant->chromStart - pred->exonStarts[exonNum];
+int transcriptOffset = exonQStart + exonOffset;
 
 int variantWidth = clipAllele->variant->chromEnd - clipAllele->variant->chromStart;
 
 if (variantWidth != clipAllele->length && retNewPred != NULL)
     {
     // OK, now we have to make a new genePred that matches the changed sequence
     int basesAdded = clipAllele->length - variantWidth;
-    struct genePred *newPred = CloneVar(pred);
+    struct genePred *newPred = lmCloneVar(lm, pred);
     int alEnd = clipAllele->variant->chromEnd;
     if (newPred->cdsStart > alEnd)
 	newPred->cdsStart += basesAdded;
     if (newPred->cdsEnd > alEnd)
 	newPred->cdsEnd += basesAdded;
     newPred->exonEnds[exonNum] += basesAdded;
     int ii;
     for (ii = exonNum+1;  ii < pred->exonCount;  ii++)
 	{
 	newPred->exonStarts[ii] += basesAdded;
 	newPred->exonEnds[ii] += basesAdded;
 	}
     newPred->txEnd = newPred->exonEnds[newPred->exonCount - 1];
     *retNewPred = newPred;
     }
 
-char *retSequence = cloneString(transcriptSequence->dna);
-char *newAlleleSeq = cloneString(clipAllele->sequence);
+char *retSequence = lmCloneString(lm, transcriptSequence->dna);
+char *newAlleleSeq = lmCloneString(lm, clipAllele->sequence);
 if (*pred->strand == '-')
     {
     transcriptOffset = transcriptSequence->size - (transcriptOffset + strlen(newAlleleSeq));
     reverseComplement(newAlleleSeq, strlen(newAlleleSeq));
     }
 
 // make the change in the sequence
 retSequence = mergeAllele( retSequence, transcriptOffset, variantWidth, newAlleleSeq,
-			   clipAllele->length);
-
-// clean up
-freeMem(newAlleleSeq);
+			   clipAllele->length, lm);
 
 return retSequence;
 }
 
 static int getCodingOffsetInTx(struct genePred *pred, char strand)
 /* Skip past UTR (portions of) exons to get offset of CDS relative to transcript start.
  * The strand arg is used instead of pred->strand. */
 {
 int offset = 0;
 int iStart = 0, iIncr = 1;
 if (strand == '-')
     {
     // Work our way left from the last exon.
     iStart = pred->exonCount - 1;
     iIncr = -1;
@@ -179,39 +149,39 @@
     {
     if ((pred->cdsStart >= pred->exonStarts[ii]) &&
 	(pred->cdsStart < pred->exonEnds[ii]))
 	break;
     int exonSize = pred->exonEnds[ii] - pred->exonStarts[ii];
     offset += exonSize;
     }
 assert( !(strand == '+' && ii >= pred->exonCount) );
 assert( !(strand == '-' && ii < 0) );
 // clip off part of UTR in exon that has CDS in it too
 int exonOffset = pred->cdsStart - pred->exonStarts[ii];
 offset += exonOffset;
 return offset;
 }
 
-static char *getCodingSequence(struct genePred *pred, char *transcriptSequence)
+static char *getCodingSequence(struct genePred *pred, char *transcriptSequence, struct lm *lm)
 /* extract the CDS from a transcript */
 {
 if (*pred->strand == '-')
     reverseComplement(transcriptSequence, strlen(transcriptSequence));
 
 // Get leftmost CDS boundary to trim off 5' (or 3' if on minus strand):
 int cdsOffset = getCodingOffsetInTx(pred, '+');
-char *newString = cloneString(transcriptSequence + cdsOffset);
+char *newString = lmCloneString(lm, transcriptSequence + cdsOffset);
 
 // trim off 3' (or 5' if on minus strand)
 newString[genePredCdsSize(pred)] = 0;
 
 // correct for strand
 if (*pred->strand == '-')
     {
     reverseComplement(transcriptSequence, strlen(transcriptSequence));
     reverseComplement(newString, strlen(newString));
     }
 
 return newString;
 }
 
 static int firstChange(char *string1, char *string2, int *numDifferent)
@@ -228,87 +198,116 @@
 int lastChange = firstChange;
 
 if (numDifferent != NULL)
     {
     for (; (*string1) && (*string2); string1++, string2++, count++)
 	{
 	if (*string1 != *string2)
 	    lastChange = count;
 	}
     *numDifferent = lastChange - firstChange + 1;
     }
 
 return firstChange;
 }
 
+static struct dnaSeq *lmNewSimpleSeq(struct lm *lm, char *seq)
+/* Like newDnaSeq but no name and using localmem. */
+{
+struct dnaSeq *simpleSeq;
+lmAllocVar(lm, simpleSeq);
+simpleSeq->dna = seq;
+simpleSeq->size = strlen(seq);
+return simpleSeq;
+}
+
+static char *lmSimpleTranslate(struct lm *lm, struct dnaSeq *dnaSeq)
+/* Just translate dnaSeq into pep, use 'Z' and truncate for stop codon, and use localmem. */
+{
+int aaLen = (dnaSeq->size / 3) + 1;
+char *pep = lmAlloc(lm, aaLen + 1);
+int i;
+for (i = 0;  i < dnaSeq->size;  i += 3)
+    {
+    char aa = lookupCodon(dnaSeq->dna + i);
+    pep[i/3] = aa;
+    if (aa == '\0')
+	{
+	pep[i/3] = 'Z';
+	break;
+	}
+    }
+if (i < dnaSeq->size && pep[i/3] != 'Z')
+    // incomplete final codon
+    pep[i/3] = 'X';
+return pep;
+}
+
 static void getSequences(struct genePred *pred, char *transcriptSequence,
-    char **retCodingSequence,  char **retAaSeq)
+			 char **retCodingSequence,  char **retAaSeq, struct lm *lm)
 /* get coding sequences for a transcript and a variant transcript */
 {
-char *codingSequence = getCodingSequence(pred, transcriptSequence);
-struct dnaSeq *codingDna = newDnaSeq(codingSequence, strlen(codingSequence), NULL);
-// TRUE here truncates at stop, but doesn't include the Z:
-aaSeq *aaSeq = translateSeq(codingDna, 0, FALSE);
-char *stop = strchr(aaSeq->dna, 'Z');
+char *codingSequence = getCodingSequence(pred, transcriptSequence, lm);
+struct dnaSeq *codingDna = lmNewSimpleSeq(lm, codingSequence);
+char *aaSeq = lmSimpleTranslate(lm, codingDna);
+char *stop = strchr(aaSeq, 'Z');
 if (stop != NULL)
     {
-    *(stop+1) = '\0';
     // If aaSeq was truncated by a stop codon, truncate codingSequence as well:
-    int codingLength = 3*strlen(aaSeq->dna);
+    int codingLength = 3*strlen(aaSeq);
     codingSequence[codingLength] = '\0';
     }
 *retCodingSequence = codingSequence;
-*retAaSeq = cloneString(aaSeq->dna);
-freez(aaSeq);
-freez(codingDna);
+*retAaSeq = aaSeq;
 }
 
 static void setNCExonVals(struct gpFx *gpFx, int exonNum, int cDnaPosition)
 /* This gpFx is for a variant in exon of non-coding gene or UTR exon of coding gene;
  * set details.nonCodingExon values. */
 {
 gpFx->details.nonCodingExon.exonNumber = exonNum;
 gpFx->details.nonCodingExon.cDnaPosition = cDnaPosition;
 }
 
 struct gpFx *gpFxCheckUtr( struct allele *allele, struct genePred *pred, 
-			   int exonNum, int cDnaPosition)
+			   int exonNum, int cDnaPosition, struct lm *lm)
 /* check for effects in UTR of coding gene -- caller ensures it's in exon, pred is coding
  * and exonNum has been strand-adjusted */
 {
 struct gpFx *gpFx = NULL;
 enum soTerm term = 0;
 struct variant *variant = allele->variant;
 if (variant->chromStart < pred->cdsStart && variant->chromEnd > pred->txStart)
     // we're in left UTR
     term = (*pred->strand == '-') ? _3_prime_UTR_variant : _5_prime_UTR_variant;
 else if (variant->chromStart < pred->txEnd && variant->chromEnd > pred->cdsEnd)
     // we're in right UTR
     term = (*pred->strand == '-') ? _5_prime_UTR_variant : _3_prime_UTR_variant;
 if (term != 0)
     {
-    gpFx = gpFxNew(allele->sequence, pred->name, term, nonCodingExon);
+    gpFx = gpFxNew(allele->sequence, pred->name, term, nonCodingExon, lm);
     setNCExonVals(gpFx, exonNum, cDnaPosition);
     }
 return gpFx;
 }
 
 struct gpFx *gpFxChangedNoncodingExon(struct allele *allele, struct genePred *pred,
-				      int exonNum, int cDnaPosition)
+				      int exonNum, int cDnaPosition, struct lm *lm)
 /* generate an effect for a variant in a non-coding transcript */
 {
-struct gpFx *gpFx = gpFxNew(allele->sequence, pred->name, non_coding_exon_variant, nonCodingExon);
+struct gpFx *gpFx = gpFxNew(allele->sequence, pred->name, non_coding_exon_variant, nonCodingExon,
+			    lm);
 if (*pred->strand == '-')
     exonNum = pred->exonCount - exonNum - 1;
 setNCExonVals(gpFx, exonNum, cDnaPosition);
 return gpFx;
 }
 
 boolean isSafeFromNMD(int exonNum, struct variant *variant, struct genePred *pred)
 /* Return TRUE if variant in strand-corrected exonNum is in the region
  * of pred that would make it safe from Nonsense-Mediated Decay (NMD).
  * NMD is triggered by the presence of a stop codon that appears
  * before ~50bp before the end of the last exon.  In other words, if
  * there's a stop codon with a detectable downstream splice junction,
  * translation is prevented -- see
  * http://en.wikipedia.org/wiki/MRNA_surveillance#Mechanism_in_mammals */
 {
@@ -391,281 +390,246 @@
 	    }
 	else
 	    {
 	    // Single aa change
 	    if (cc->pepPosition == 0 && cc->aaOld[0] == 'M')
 		effect->soNumber = initiator_codon_variant;
 	    else if (cc->pepPosition == oldAaSize-1 && oldAa[oldAaSize-1] != 'Z')
 		effect->soNumber = incomplete_terminal_codon_variant;
 	    else
 		effect->soNumber = missense_variant;
 	    }
 	}
     }
 }
 
-struct gpFx *gpFxChangedCodingExon(struct allele *allele, struct genePred *pred,
-    int exonNum, struct psl *transcriptPsl, char *transcriptSequence,
-    char *newSequence, struct genePred *newPred)
+struct gpFx *gpFxChangedCodingExon(struct allele *allele, struct genePred *pred, int exonNum,
+				   char *transcriptSequence, char *newSequence,
+				   struct genePred *newPred, struct lm *lm)
 /* calculate effect of allele change on coding transcript */
 {
 struct gpFx *effectsList = NULL;
 if (*pred->strand == '-')
     exonNum = pred->exonCount - exonNum - 1;
 
 // first find effects of allele in UTR, if any
 int dnaChangeLength;
 int cDnaPosition = firstChange(newSequence, transcriptSequence, &dnaChangeLength);
-effectsList = gpFxCheckUtr(allele, pred, exonNum, cDnaPosition);
+effectsList = gpFxCheckUtr(allele, pred, exonNum, cDnaPosition, lm);
 
-// calculate original and variant coding AA's
+// calculate original and variant coding DNA and AA's
 char *oldCodingSequence, *newCodingSequence, *oldaa, *newaa;
-getSequences(pred, transcriptSequence, &oldCodingSequence, &oldaa);
-getSequences(newPred, newSequence, &newCodingSequence, &newaa);
+getSequences(pred, transcriptSequence, &oldCodingSequence, &oldaa, lm);
+getSequences(newPred, newSequence, &newCodingSequence, &newaa, lm);
 
 // if coding sequence hasn't changed, we're done (allele == reference allele)
 if (sameString(oldCodingSequence, newCodingSequence))
     return effectsList;
 
 // allocate the effect structure - fill in soNumber and details below
-struct gpFx *effect = gpFxNew(allele->sequence, pred->name, coding_sequence_variant, codingChange);
+struct gpFx *effect = gpFxNew(allele->sequence, pred->name, coding_sequence_variant, codingChange,
+			      lm);
 struct codingChange *cc = &effect->details.codingChange;
 cc->cDnaPosition = cDnaPosition;
 int cdsChangeLength;
 cc->cdsPosition = firstChange( newCodingSequence, oldCodingSequence, &cdsChangeLength);
 cc->exonNumber = exonNum;
 
 // show specific coding changes by making before-and-after subsequences:
 int codonPosStart = (cc->cdsPosition / 3) ;
 int codonPosEnd = ((cdsChangeLength - 1 + cc->cdsPosition) / 3) ;
 int numCodons = (codonPosEnd - codonPosStart + 1);
-cc->codonOld = cloneStringZ(oldCodingSequence + codonPosStart*3, numCodons*3);
-cc->codonNew = cloneStringZ(newCodingSequence + codonPosStart*3, numCodons*3);
+cc->codonOld = lmCloneStringZ(lm, oldCodingSequence + codonPosStart*3, numCodons*3);
+cc->codonNew = lmCloneStringZ(lm, newCodingSequence + codonPosStart*3, numCodons*3);
 
 cc->pepPosition = codonPosStart;
-cc->aaOld = cloneStringZ(oldaa + cc->pepPosition, numCodons);
-cc->aaNew = cloneStringZ(newaa + cc->pepPosition, numCodons);
+cc->aaOld = lmCloneStringZ(lm, oldaa + cc->pepPosition, numCodons);
+cc->aaNew = lmCloneStringZ(lm, newaa + cc->pepPosition, numCodons);
 
 boolean safeFromNMD = isSafeFromNMD(exonNum, allele->variant, pred);
 setSpecificCodingSoTerm(effect, oldaa, newaa, countDashes(newCodingSequence), cdsChangeLength,
 			safeFromNMD);
 
 slAddHead(&effectsList, effect);
 return effectsList;
 }
 
 
-struct gpFx *gpFxInExon(struct allele *allele, struct genePred *pred, 
-    int exonNum, struct psl *transcriptPsl, struct dnaSeq *transcriptSequence)
+struct gpFx *gpFxInExon(struct allele *allele, struct genePred *pred, int exonNum,
+			int exonQStart, struct dnaSeq *transcriptSequence,
+			struct lm *lm)
 /* generate an effect from a different allele in an exon */
 {
 struct genePred *newPred = pred;
-char *newSequence = gpFxModifySequence(allele, pred, exonNum,
-				       transcriptPsl, transcriptSequence, &newPred);
+char *newSequence = gpFxModifySequence(allele, pred, exonNum, exonQStart, transcriptSequence,
+				       &newPred, lm);
 
 if (sameString(newSequence, transcriptSequence->dna))
     return NULL;  // no change in transcript
 
 struct gpFx *effectsList;
 if (pred->cdsStart != pred->cdsEnd)
-    effectsList = gpFxChangedCodingExon(allele, pred, exonNum, transcriptPsl, 
-					transcriptSequence->dna, newSequence, newPred);
+    effectsList = gpFxChangedCodingExon(allele, pred, exonNum, transcriptSequence->dna,
+					newSequence, newPred, lm);
 else
     {
     int cDnaPosition = firstChange(newSequence, transcriptSequence->dna, NULL);
-    effectsList = gpFxChangedNoncodingExon(allele, pred, exonNum, cDnaPosition);
+    effectsList = gpFxChangedNoncodingExon(allele, pred, exonNum, cDnaPosition, lm);
     }
 
-if (newPred != pred)
-    freeMem(newPred);
 return effectsList;
 }
 
-static struct psl *genePredToPsl(struct genePred *pred)
-/* generate a PSL alignment to a transcript from a genePred */
-{
-int qSize = genePredBases(pred);
-#define BOGUS_CHROM_SIZE  0
-struct psl *psl = pslNew(pred->name, qSize, 0, qSize,
-                         pred->chrom, BOGUS_CHROM_SIZE, 
-			 pred->txStart, pred->txEnd,
-			 pred->strand, pred->exonCount, 0);
-psl->match = psl->qSize;
-
-int i, qNext = 0;
-for (i = 0; i < pred->exonCount; i++)
-    {
-    int exonSize =  pred->exonEnds[i] - pred->exonStarts[i];
-
-    psl->qStarts[i] = qNext;
-    //psl->tStarts[i] = pred->exonStarts[i] + pred->txStart;
-    psl->tStarts[i] = pred->exonStarts[i];
-    psl->blockSizes[i] = exonSize;
-
-/* notnow
-    if (i > 0)
-        {
-        psl->tNumInsert += 1;
-        psl->tBaseInsert += psl->tStarts[i] - pslTEnd(psl, i-1);
-        }
-*/
-
-    psl->blockCount++;
-    qNext += psl->blockSizes[i];
-    }
-
-return psl;
-}
-
-static struct gpFx *gpFxCheckExons(struct variant *variant, 
-    struct genePred *pred, struct dnaSeq *transcriptSequence)
+static struct gpFx *gpFxCheckExons(struct variant *variantList, struct genePred *pred,
+				   struct dnaSeq *transcriptSequence, struct lm *lm)
 // check to see if the variant is in an exon
 {
-int ii;
 struct gpFx *effectsList = NULL;
-struct psl *transcriptPsl = genePredToPsl(pred);
-
-// should copy transcriptSequence if we have more than one variant!!
-for(; variant ; variant = variant->next)
-    {
-    struct allele *allele = variant->alleles;
-    for(; allele ; allele = allele->next)
-	{
+int exonQStart = 0;
+int ii;
 	for(ii=0; ii < pred->exonCount; ii++)
 	    {
+    struct variant *variant = variantList;
+    for(; variant ; variant = variant->next)
+	{
 	    // check if in an exon
-	    int size;
 	    int end = variant->chromEnd;
 	    if (variant->chromStart == end)
 		end++;
-	    if ((size = positiveRangeIntersection(pred->exonStarts[ii], 
-		pred->exonEnds[ii], 
+	int size;
+	if ((size = positiveRangeIntersection(pred->exonStarts[ii], pred->exonEnds[ii], 
 		variant->chromStart, end)) > 0)
 		{
 		if (size != end - variant->chromStart)
 		    {
-		    struct gpFx *effect = gpFxNew(allele->sequence, pred->name,
-						  complex_transcript_variant, none);
+		// variant is not completely in exon
+		struct gpFx *effect = gpFxNew("", pred->name,
+					      complex_transcript_variant, none, lm);
 		    effectsList = slCat(effectsList, effect);
 		    }
-		effectsList = slCat(effectsList, gpFxInExon(allele, pred, ii,
-		    transcriptPsl, transcriptSequence));
+	    struct allele *allele = variant->alleles;
+	    for(; allele ; allele = allele->next)
+		{
+		effectsList = slCat(effectsList, gpFxInExon(allele, pred, ii, exonQStart,
+							    transcriptSequence, lm));
 		}
 	    }
 	}
+    exonQStart += (pred->exonEnds[ii] - pred->exonStarts[ii]);
     }
 
 return effectsList;
 }
 
-static struct gpFx *gpFxCheckIntrons(struct variant *variant, struct genePred *pred)
+static struct gpFx *gpFxCheckIntrons(struct variant *variant, struct genePred *pred, struct lm *lm)
 // check to see if a single variant overlaps an intron (and possibly splice region)
 //#*** TODO: watch out for "introns" that are actually indels between tx seq and ref genome!
 {
 struct gpFx *effectsList = NULL;
 boolean minusStrand = (pred->strand[0] == '-');
 int ii;
 for (ii=0; ii < pred->exonCount - 1; ii++)
     {
     int intronStart = pred->exonEnds[ii];
     int intronEnd = pred->exonStarts[ii+1];
     if (variant->chromEnd > intronStart && variant->chromStart < intronEnd)
 	{
 	enum soTerm soNumber = intron_variant;
 	if (variant->chromEnd > intronStart && variant->chromStart < intronStart+2)
 	    // Within 2 bases of intron start(/end for '-'):
 	    soNumber = minusStrand ? splice_acceptor_variant : splice_donor_variant;
 	if (variant->chromEnd > intronEnd-2 && variant->chromStart < intronEnd)
 	    // Within 2 bases of intron end(/start for '-'):
 	    soNumber = minusStrand ? splice_donor_variant : splice_acceptor_variant;
 	else if ((variant->chromEnd > intronStart+3 && variant->chromStart < intronStart+8) ||
 		 (variant->chromEnd > intronEnd-8 && variant->chromStart < intronEnd+3))
 	    // Within 3 to 8 bases of intron start or end:
 	    soNumber = splice_region_variant;
-	struct gpFx *effects = gpFxNew("", pred->name, soNumber, intron);
+	struct gpFx *effects = gpFxNew("", pred->name, soNumber, intron, lm);
 	effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 1) : ii;
 	slAddHead(&effectsList, effects);
 	}
     else if ((variant->chromEnd > intronStart-3 && variant->chromStart < intronStart) ||
 	     (variant->chromEnd > intronEnd && variant->chromStart < intronEnd+3))
 	{
 	// if variant is in exon *but* within 3 bases of splice site,
 	// it also qualifies as splice_region_variant:
-	struct gpFx *effects = gpFxNew("", pred->name, splice_region_variant, intron);
+	struct gpFx *effects = gpFxNew("", pred->name, splice_region_variant, intron, lm);
 	effects->details.intron.intronNumber = minusStrand ? (pred->exonCount - ii - 1) : ii;
 	slAddHead(&effectsList, effects);
 	}
     }
 return effectsList;
 }
 
 
-static struct gpFx *gpFxCheckBackground(struct variant *variant, 
-    struct genePred *pred)
+static struct gpFx *gpFxCheckBackground(struct variant *variant, struct genePred *pred,
+					struct lm *lm)
 // check to see if the variant is up or downstream or in intron of the gene 
 {
 struct gpFx *effectsList = NULL;
 
 for(; variant ; variant = variant->next)
     {
     // is this variant in an intron
-    effectsList = slCat(effectsList, gpFxCheckIntrons(variant, pred));
+    effectsList = slCat(effectsList, gpFxCheckIntrons(variant, pred, lm));
 
     // Is this variant to the left or right of transcript?
     enum soTerm soNumber = 0;
     if (variant->chromStart < pred->txStart &&
 	variant->chromEnd > pred->txStart - GPRANGE)
 	{
 	if (*pred->strand == '+')
 	    soNumber = upstream_gene_variant;
 	else
 	    soNumber = downstream_gene_variant;
 	}
     else if (variant->chromEnd > pred->txEnd &&
 	     variant->chromStart < pred->txEnd + GPRANGE)
 	{
 	if (*pred->strand == '+')
 	    soNumber = downstream_gene_variant;
 	else
 	    soNumber = upstream_gene_variant;
 	}
     if (soNumber != 0)
 	{
-	struct gpFx *effects = gpFxNew("", pred->name, soNumber, none);
+	struct gpFx *effects = gpFxNew("", pred->name, soNumber, none, lm);
 	effectsList = slCat(effectsList, effects);
 	}
     }
 
 return effectsList;
 }
 
 static void checkVariantList(struct variant *variant)
 // check to see that we either have one variant (possibly with multiple
 // alleles) or that if we have a list of variants, they only have
 // one allele a piece.
 {
 if (variant->next == NULL)	 // just one variant
     return;
 
 for(; variant; variant = variant->next)
     if (variant->numAlleles != 1)
 	errAbort("gpFxPredEffect needs either 1 variant, or only 1 allele in all variants");
 }
 
 struct gpFx *gpFxPredEffect(struct variant *variant, struct genePred *pred,
-    struct dnaSeq *transcriptSequence)
+			    struct dnaSeq *transcriptSequence, struct lm *lm)
 // return the predicted effect(s) of a variation list on a genePred
 {
 struct gpFx *effectsList = NULL;
 
 // make sure we can deal with the variants that are coming in
 checkVariantList(variant);
 
 // check to see if SNP is up or downstream in intron 
-effectsList = slCat(effectsList, gpFxCheckBackground(variant, pred));
+effectsList = slCat(effectsList, gpFxCheckBackground(variant, pred, lm));
 
 // check to see if SNP is in the transcript
-effectsList = slCat(effectsList, gpFxCheckExons(variant, pred, 
-    transcriptSequence));
+effectsList = slCat(effectsList, gpFxCheckExons(variant, pred, transcriptSequence, lm));
+
+//#*** sort by position
 
 return effectsList;
 }