1212de18f5cfcd2473cc9f761ff7b089b3e88493
braney
  Fri Apr 20 16:47:23 2012 -0700
more work to advance #6152.   Now with documentation of synonymous changes, the actual changes in (non)synonumous changes, and a fix to gnawing problem of the exonstarts in the genePred disappearing
diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c
index 268481a..1341d24 100644
--- src/hg/lib/gpFx.c
+++ src/hg/lib/gpFx.c
@@ -6,36 +6,42 @@
 char *gpFxModifySequence(struct allele *allele, struct genePred *pred, 
     int exonNum, struct psl *transcriptPsl, struct dnaSeq *transcriptSequence)
 /* modify a transcript to what it'd be if the alternate allele were present */
 {
 //if ((pred->optFields & genePredExonFramesFld) == 0)
  //   genePredAddExonFrames(pred);
 
 // change transcript at variant point
 int exonOffset = allele->variant->chromStart - transcriptPsl->tStarts[exonNum];
 int transcriptOffset = transcriptPsl->qStarts[exonNum] + exonOffset;
 
 if (allele->length != allele->variant->chromEnd - allele->variant->chromStart)
     errAbort("only support alleles the same length as the reference");
 
 char *retSequence = cloneString(transcriptSequence->dna);
+char *newAllele = cloneString(allele->sequence);
 if (*pred->strand == '-')
+    {
     transcriptOffset = transcriptSequence->size - (transcriptOffset + 1);
+    reverseComplement(newAllele, strlen(newAllele));
+    }
 
 // make the change in the sequence
-memcpy(&retSequence[transcriptOffset], allele->sequence, 
-    allele->length);
+memcpy(&retSequence[transcriptOffset], newAllele, allele->length);
+
+// clean up
+freeMem(newAllele);
 
 return retSequence;
 }
 
 char *getCodingSequence(struct genePred *pred, char *transcriptSequence)
 /* extract the CDS from a transcript */
 {
 int ii;
 
 if (*pred->strand == '-')
     reverseComplement(transcriptSequence, strlen(transcriptSequence));
 
 // trim off the 5'
 char *ptr = transcriptSequence;
 for(ii=0; ii < pred->exonCount; ii++)
@@ -86,57 +92,74 @@
     return effectsList;  // no change in transcript
 
 // check to see if coding sequence is changed
 // calculate original coding AA's
 char *oldCodingSequence = getCodingSequence(pred, transcriptSequence->dna);
 struct dnaSeq *oldCodingDna = newDnaSeq(oldCodingSequence, 
 	strlen(oldCodingSequence), pred->name);
 aaSeq *oldaa = translateSeq(oldCodingDna, 0, FALSE);
 
 // calculate variant coding AA's
 char *newCodingSequence = getCodingSequence(pred, newSequence);
 struct dnaSeq *newCodingDna = newDnaSeq(newCodingSequence, 
     strlen(newCodingSequence), pred->name);
 aaSeq *newaa = translateSeq(newCodingDna, 0, FALSE);
 
-//transcript ID, exon number(s), cDNA position, CDS position, peptide position, alternate amino acids, alternate codons" 9
+struct gpFx *effects;
+AllocVar(effects);
+slAddHead(&effectsList, effects);
+effects->so.sub.codingChange.transcript = cloneString(pred->name);
+effects->so.sub.codingChange.cDnaPosition = firstChange( newSequence, 
+    transcriptSequence->dna);
+effects->so.sub.codingChange.cdsPosition = firstChange( newCodingSequence,
+    oldCodingSequence);
+if (*pred->strand == '-')
+    effects->so.sub.codingChange.exonNumber = pred->exonCount - exonNum;
+else
+    effects->so.sub.codingChange.exonNumber = exonNum;
+
+int codonPos = (effects->so.sub.codingChange.cdsPosition / 3) * 3;
+
+char buffer[100];
+safef(buffer, sizeof buffer, "%c%c%c > %c%c%c",
+    toupper(oldCodingSequence[codonPos + 0]),
+    toupper(oldCodingSequence[codonPos + 1]),
+    toupper(oldCodingSequence[codonPos + 2]),
+    toupper(newCodingSequence[codonPos + 0]),
+    toupper(newCodingSequence[codonPos + 1]),
+    toupper(newCodingSequence[codonPos + 2]));
+
+effects->so.sub.codingChange.codonChanges = cloneString(buffer);
 
 if (sameString(newaa->dna, oldaa->dna))
     {
     // synonymous change
+    effects->so.soNumber = synonymous_variant;
+    effects->so.sub.codingChange.pepPosition = 0;
+    effects->so.sub.codingChange.aaChanges = "";
     }
 else
     {
     // non-synonymous change
-    struct gpFx *effects;
-    AllocVar(effects);
     effects->so.soNumber = non_synonymous_variant;
-    effects->so.sub.codingChange.transcript = cloneString(pred->name);
-    if (*pred->strand == '-')
-	effects->so.sub.codingChange.exonNumber = exonNum;
-	effects->so.sub.codingChange.exonNumber = pred->exonCount - exonNum;
-    effects->so.sub.codingChange.cDnaPosition = firstChange( newSequence, 
-	transcriptSequence->dna);
-    effects->so.sub.codingChange.cdsPosition = firstChange( newCodingSequence,
-	oldCodingSequence);
+
     effects->so.sub.codingChange.pepPosition = firstChange( newaa->dna,
 	oldaa->dna);
-	    /*
-	    char *aaChanges;
-	    char *codonChanges;
-	    */
-    slAddHead(&effectsList, effects);
+    safef(buffer, sizeof buffer, "%c > %c",
+	toupper(oldaa->dna[effects->so.sub.codingChange.pepPosition]),
+	toupper(newaa->dna[effects->so.sub.codingChange.pepPosition]));
+    effects->so.sub.codingChange.aaChanges = cloneString(buffer);
     }
 
 return effectsList;
 }
 
 
 struct gpFx *gpFxInExon(struct allele *allele, struct genePred *pred, 
     int exonNum, struct psl *transcriptPsl, struct dnaSeq *transcriptSequence)
 {
 struct gpFx *effectsList = NULL;
 
 if (positiveRangeIntersection(pred->cdsStart, pred->cdsEnd,
 	allele->variant->chromStart, allele->variant->chromEnd))
     {
     // we're in CDS