37977956905d588f73ee5f635fae2f609cb40842
angie
  Wed Aug 9 12:52:00 2017 -0700
Assorted little util functions in support of adding HGVS output to hgVai -- refs #19968

diff --git src/lib/dnaseq.c src/lib/dnaseq.c
index 11a007c..2eaee42 100644
--- src/lib/dnaseq.c
+++ src/lib/dnaseq.c
@@ -53,30 +53,44 @@
 }
 
 void freeDnaSeqList(struct dnaSeq **pSeqList)
 /* Free up list of DNA sequences. */
 {
 struct dnaSeq *seq, *next;
 
 for (seq = *pSeqList; seq != NULL; seq = next)
     {
     next = seq->next;
     freeDnaSeq(&seq);
     }
 *pSeqList = NULL;
 }
 
+char *dnaSeqCannibalize(struct dnaSeq **pSeq)
+/* Return the already-allocated dna string and free the dnaSeq container. */
+{
+char *seq = NULL;
+if (pSeq && *pSeq)
+    {
+    struct dnaSeq *dnaSeq = *pSeq;
+    seq = dnaSeq->dna;
+    dnaSeq->dna = NULL;
+    freeDnaSeq(pSeq);
+    }
+return seq;
+}
+
 boolean seqIsLower(bioSeq *seq)
 /* Return TRUE if sequence is all lower case. */
 {
 int size = seq->size, i;
 char *poly = seq->dna;
 for (i=0; i<size; ++i)
     if (!islower(poly[i]))
         return FALSE;
 return TRUE;
 }
 
 boolean seqIsDna(bioSeq *seq)
 /* Make educated guess whether sequence is DNA or protein. */
 {
 return isDna(seq->dna, seq->size);
@@ -116,30 +130,43 @@
 *pep = 0;
 assert(actualSize <= inSize/3+1);
 seq->size = actualSize;
 seq->name = cloneString(inSeq->name);
 return seq;
 }
 
 aaSeq *translateSeq(struct dnaSeq *inSeq, unsigned offset, boolean stop)
 /* Return a translated sequence.  Offset is position of first base to
  * translate. If stop is TRUE then stop at first stop codon.  (Otherwise 
  * represent stop codons as 'Z'). */
 {
 return translateSeqN(inSeq, offset, 0, stop);
 }
 
+void aaSeqZToX(aaSeq *aa)
+/* If seq has a 'Z' for stop codon, possibly followed by other bases, change the 'Z' to an X
+ * (compatible with dnautil's aminoAcidTable) and truncate there. */
+{
+char *p = strchr(aa->dna, 'Z');
+if (p)
+    {
+    *p++ = 'X';
+    *p = '\0';
+    aa->size = strlen(aa->dna);
+    }
+}
+
 bioSeq *whichSeqIn(bioSeq **seqs, int seqCount, char *letters)
 /* Figure out which if any sequence letters is in. */
 {
 aaSeq *seq;
 int i;
 
 for (i=0; i<seqCount; ++i)
     {
     seq = seqs[i];
     if (seq->dna <= letters && letters < seq->dna + seq->size)
         return seq;
     }
 internalErr();
 return NULL;
 }