07f0e0134a6eee29337add57582421937c617e9a
kent
  Wed Nov 2 14:17:47 2016 -0700
Refactoring code to convert bam to a psl from hgTracks/bamTrack.c refs #13673

diff --git src/lib/bamFile.c src/lib/bamFile.c
index 3248c60..beeb94c 100644
--- src/lib/bamFile.c
+++ src/lib/bamFile.c
@@ -1,25 +1,26 @@
 /* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "portable.h"
 #include "bamFile.h"
 #include "htmshell.h"
 #include "udc.h"
+#include "psl.h"
 
 // If KNETFILE_HOOKS is used (as recommended!), then we can simply call bam_index_load
 // without worrying about the samtools lib creating local cache files in cgi-bin:
 
 static bam_index_t *bamOpenIdx(samfile_t *sam, char *fileOrUrl)
 /* If fileOrUrl has a valid accompanying .bai file, parse and return the index;
  * otherwise return NULL. */
 {
 if (sam->format.format == cram) 
     return sam_index_load(sam, fileOrUrl);
 
 // assume that index is a .bai file 
 char indexName[4096];
 safef(indexName, sizeof indexName, "%s.bai", fileOrUrl);
 return sam_index_load2(sam, fileOrUrl, indexName);
@@ -578,15 +579,92 @@
 }
 
 void bamChromInfoFreeList(struct bamChromInfo **pList)
 /* Free a list of dynamically allocated bamChromInfo's */
 {
 struct bamChromInfo *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     bamChromInfoFree(&el);
     }
 *pList = NULL;
 }
 
+struct psl *bamToPslUnscored(const bam1_t *bam, const bam_hdr_t *hdr)
+/* Translate BAM's numeric CIGAR encoding into PSL sufficient for cds.c (just coords,
+ * no scoring info) */
+{
+const bam1_core_t *core = &bam->core;
+struct psl *psl;
+AllocVar(psl);
+boolean isRc = (core->flag & BAM_FREVERSE);
+psl->strand[0] = isRc ? '-' : '+';
+psl->qName = cloneString(bam1_qname(bam));
+psl->tName = cloneString(hdr->target_name[core->tid]);
+unsigned blockCount = 0;
+unsigned *blockSizes, *qStarts, *tStarts;
+AllocArray(blockSizes, core->n_cigar);
+AllocArray(qStarts, core->n_cigar);
+AllocArray(tStarts, core->n_cigar);
+int tPos = core->pos, qPos = 0, qLength = 0;
+unsigned int *cigar = bam1_cigar(bam);
+int i;
+for (i = 0;  i < core->n_cigar;  i++)
+    {
+    char op;
+    int n = bamUnpackCigarElement(cigar[i], &op);
+    switch (op)
+	{
+	case 'X': // mismatch (gapless aligned block)
+	case '=': // match (gapless aligned block)
+	case 'M': // match or mismatch (gapless aligned block)
+	    blockSizes[blockCount] = n;
+	    qStarts[blockCount] = qPos;
+	    tStarts[blockCount] = tPos;
+	    blockCount++;
+	    tPos += n;
+	    qPos += n;
+	    qLength += n;
+	    break;
+	case 'I': // inserted in query
+	    qPos += n;
+	    qLength += n;
+	    break;
+	case 'D': // deleted from query
+	case 'N': // long deletion from query (intron as opposed to small del)
+	    tPos += n;
+	    break;
+	case 'S': // skipped query bases at beginning or end ("soft clipping")
+	    qPos += n;
+	    qLength += n;
+	    break;
+	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
+	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
+	    break;
+	default:
+	    errAbort("bamToPsl: unrecognized CIGAR op %c -- update me", op);
+	}
+    }
+
+if (blockCount == 0)
+    {
+    pslFree(&psl);
+    return NULL;
+    }
+
+psl->tSize = hdr->target_len[core->tid];
+psl->tStart = tStarts[0];
+psl->tEnd = tStarts[blockCount-1] + blockSizes[blockCount-1];
+psl->qSize = qLength;
+psl->qStart = qStarts[0];
+psl->qEnd = qStarts[blockCount-1] + blockSizes[blockCount-1];
+if (isRc)
+    reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
+psl->blockCount = blockCount;
+psl->blockSizes = blockSizes;
+psl->qStarts = qStarts;
+psl->tStarts = tStarts;
+return psl;
+}
+