93e8347fde902dd92ba2b90b94d7e2ed0d99b03d
braney
  Tue Jan 21 16:09:01 2014 -0800
fix gff3ToPsl to work with NCBI's gff alignments of haplotype chroms toreference chroms.

diff --git src/lib/psl.c src/lib/psl.c
index ca75f20..8e4611a 100644
--- src/lib/psl.c
+++ src/lib/psl.c
@@ -1798,104 +1798,139 @@
  * updated to with the new amount of space. */
 {
 int blockSpace = *blockSpacePtr;
 int newSpace = 2 * blockSpace;
 ExpandArray(psl->blockSizes, blockSpace, newSpace);
 ExpandArray(psl->qStarts, blockSpace, newSpace);
 ExpandArray(psl->tStarts, blockSpace, newSpace);
 if (psl->qSequence != NULL)
     {
     ExpandArray(psl->qSequence, blockSpace, newSpace);
     ExpandArray(psl->tSequence, blockSpace, newSpace);
     }
 *blockSpacePtr = newSpace;
 }
 
-static boolean getNextCigarOp(char **ptr, char *op, int *size)
-/* parts the next cigar op */
+static boolean getNextCigarOp(char *startPtr, boolean reverse, char **ptr, char *op, int *size)
+/* gets one cigar op out of the CIGAR string.  Reverse the order if asked */
 {
 char *str = *ptr;
 
-if ((str == NULL) || (*str == 0))
+if (str == NULL)
+    return FALSE;
+
+if ((!reverse && (*str == 0)) || (reverse && (str == startPtr)))
     return FALSE;
 
 // between each cigar op there could be nothing, or a space, or a plus
+if (reverse)
+    {
+    char *end = str - 1;
+    for(;*end ; end--)
+	{
+	if (isalpha(*end))
+	    break;
+	}
+    str = end;
+    *ptr = end;
+    }
+else
+    {
     char *end = str + 1;
     for(;*end ; end++)
 	{
 	if (! (isdigit(*end)  || (*end == ' ') || (*end == '+')))
 	    break;
 	}
 
     *ptr = end;
+    }
 
 *op = *str++;
 *size = atoi(str);
 
 return TRUE;
 }
 
 struct psl* pslFromGff3Cigar(char *qName, int qSize, int qStart, int qEnd,
                              char *tName, int tSize, int tStart, int tEnd,
                              char* strand, char *cigar)
 /* create a PSL from a GFF3-style cigar formatted alignment */
 {
 // this is currently tested by the gff3ToPsl
 int blocksAlloced = 4;
 struct psl *psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, strand, blocksAlloced, 0);
 
-char cigarSpec[strlen(cigar+1)];  // copy since parsing is destructive
-strcpy(cigarSpec, cigar);
-char *cigarNext = cigarSpec;
 char op;
 int size;
 int qNext = qStart, qBlkEnd = qEnd;
 int totalSize = 0;
 
 if (strand[0] == '-')
     reverseIntRange(&qNext, &qBlkEnd, qSize);
 int tNext = tStart, tBlkEnd = tEnd;
 if (strand[1] == '-')
     reverseIntRange(&tNext, &tBlkEnd, tSize);
 
-while(getNextCigarOp(&cigarNext, &op, &size))
+if (cigar == NULL)
+    {
+    // no cigar means one block
+    size = qEnd - qStart;
+    totalSize += size;
+    psl->blockSizes[psl->blockCount] = size;
+    psl->qStarts[psl->blockCount] = qNext;
+    psl->tStarts[psl->blockCount] = tNext;
+    psl->blockCount++;
+    tNext += size;
+    qNext += size;
+    }
+else
+    {
+    char cigarSpec[strlen(cigar+1)];  // copy since parsing is destructive
+    strcpy(cigarSpec, cigar);
+    char *cigarNext = cigarSpec;
+    if (strand[0] == '-')
+	for(; *cigarNext; cigarNext++)
+	    ;
+    while(getNextCigarOp(cigarSpec, (strand[0] == '-'), &cigarNext, &op, &size))
 	{
 	switch (op)
 	    {
 	    case 'M': // match or mismatch (gapless aligned block)
 		if (psl->blockCount == blocksAlloced)
 		    pslGrow(psl, &blocksAlloced);
 
 		totalSize += size;
 		psl->blockSizes[psl->blockCount] = size;
 		psl->qStarts[psl->blockCount] = qNext;
 		psl->tStarts[psl->blockCount] = tNext;
 		psl->blockCount++;
 		tNext += size;
 		qNext += size;
 		break;
 	    case 'I': // inserted in target
 		tNext += size;
 		break;
 	    case 'D': // deleted from target
 		qNext += size;
 		break;
 	    
 	    default:
 		errAbort("unrecognized CIGAR op %c in %s", op, cigar);
 	    }
 	}
+    }
 assert(qNext == qBlkEnd);
 assert(tNext == tBlkEnd);
 psl->match = totalSize;
 return psl;
 }
 
 int pslRangeTreeOverlap(struct psl *psl, struct rbTree *rangeTree)
 /* Return amount that psl overlaps (on target side) with rangeTree. */
 {
 int i;
 int overlap = 0;
 boolean isRc = (psl->strand[1] == '-');
 for (i=0; i<psl->blockCount; ++i)
     {
     int start = psl->tStarts[i];