6e5ee11ca95cd971984038cf65bae00d9c898707
galt
  Wed Jun 4 15:40:02 2014 -0700
Since we have git, it is easy to rename errabort.c to errAbort.c without losing any history.
diff --git src/hg/splignToPsl/splignToPsl.c src/hg/splignToPsl/splignToPsl.c
index 1115ec9..429266c 100644
--- src/hg/splignToPsl/splignToPsl.c
+++ src/hg/splignToPsl/splignToPsl.c
@@ -1,450 +1,450 @@
 /* ncbiTabToPsl - Convert NCBI tab-file alignments to PSL format. */
 
 /* Copyright (C) 2011 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "dystring.h"
-#include "errabort.h"
+#include "errAbort.h"
 #include "psl.h"
 #include "sqlNum.h"
 #include "dnautil.h"
 #include "obscure.h"
 #include "alignSeqSizes.h"
 #include "splignAlign.h"
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
     "splignToPsl - Convert splign tab-file pair alignments to PSL format\n"
     "\n"
     "usage:\n"
     "   splignToPsl [options] in out.psl\n"
     "\n"
     "Options:\n"
     "   -qSizesFile=szfile - read query sizes from tab file of <seqName> <size>\n"
     "   -qSizes='id1 size1 ..' - set query sizes, value is white-space separated pairs of sequence and size\n"
     "   -tSizesFile=szfile - read target sizes from tab file of <seqName> <size>\n"
     "   -tSizes='id1 size1 ..' - set target sizes, value is white-space separated pairs of sequence and size\n"
     "\n"
     "alignment format is:\n"
     "        1  query name\n"
     "        2  target name\n"
     "        3  ids\n"
     "        4  aligned block overall size. Including indel and misMatch\n"
     "        5  query start\n"
     "        6  query end\n"
     "        7  target start\n"
     "        8  target end\n"
     "        9  block type. exon, gap \n"
     "        10 alignment anontation. M-match, R-misMatch, I-insert, D-delete\n"
   );
 }
 
 
 /* command line */
 static struct optionSpec optionSpec[] = {
     {"qSizesFile", OPTION_STRING},
     {"qSizes", OPTION_STRING},
     {"tSizesFile", OPTION_STRING},
     {"tSizes", OPTION_STRING},
     {NULL, 0}
 };
 
 
 struct parser
 /* data used during parsing */
 {
     struct lineFile* lf;            /* alignment file */
     struct alignSeqSizes* ass;      /* sequence sizes */
     struct psl* psl;                /* PSL being constructed */
     struct splignAlign *recCache;  /* record catch */
     unsigned pslMaxBlks;            /* memory allocated for PSL blocks */
     char* curQName;              /* current alignment id */
     int curQEnd;            /* query end of the current alignment */
 };
 
 void parseErr(struct parser *pr, char* format, ...)
 /* prototype to get gnu attribute check */
 #if defined(__GNUC__)
 __attribute__((format(printf, 2, 3)))
 #endif
 ;
 
 void parseErr(struct parser *pr, char* format, ...)
 /* abort on parse error */
 {
 va_list args;
 va_start(args, format);
 fprintf(stderr, "parse error: %s:%d: ",
         pr->lf->fileName, pr->lf->lineIx);
 vaErrAbort(format, args);
 va_end(args);
 
 }
 
 struct splignAlign *loadNextRec(struct parser *pr)
 /* read the next record, if one is cached, return it instead. NULL on EOF */
 {
 struct splignAlign *rec = NULL;
 if (pr->recCache != NULL)
     {
     rec = pr->recCache;
     pr->recCache = NULL;
     }
 else
     {
     char *row[SPLIGNALIGN_NUM_COLS];
     if (lineFileChopNextTab(pr->lf, row, ArraySize(row)))
         rec = splignAlignLoad(row);
     }
 return rec;
 }
 
 void growPslBlocks(struct parser *pr, unsigned newBlks)
 /* grow memory allocated to PSL to be able to hold newBLks */
 {
 int oldSize = pr->pslMaxBlks * sizeof(unsigned);
 int newSize = newBlks * sizeof(unsigned);
 pr->psl->blockSizes = needMoreMem(pr->psl->blockSizes, oldSize, newSize);
 pr->psl->qStarts = needMoreMem(pr->psl->qStarts, oldSize, newSize);
 pr->psl->tStarts = needMoreMem(pr->psl->tStarts, oldSize, newSize);
 pr->pslMaxBlks = newBlks;
 }
 
 boolean nextAlign(struct parser *pr)
 /* start a new alignment, allocating psl, etc. */
 {
 struct splignAlign *rec = loadNextRec(pr);
 if (rec == NULL)
     return FALSE;
 
 AllocVar(pr->psl);
 growPslBlocks(pr, 8);
 pr->psl->qName = cloneString(rec->query);
 pr->psl->qSize = alignSeqSizesMustGetQuery(pr->ass, rec->query);
 pr->psl->tName = cloneString(rec->target);
 pr->psl->tSize = alignSeqSizesMustGetTarget(pr->ass, rec->target);
 
 /* strand is negative if addressed are reversed */
 pr->psl->strand[0] = (rec->qStart > rec->qEnd) ? '-' : '+';
 pr->psl->strand[1] = (rec->tStart > rec->tEnd) ? '-' : '+';
 
 pr->curQName = cloneString(rec->query); //* need end to be saved
 pr->curQEnd = 0;
 /* save record for block parsing */
 pr->recCache = rec;
 return TRUE;
 }
 
 int convertCoords(struct parser *pr, char strand, int alnStart, int alnStop,
                   int seqSize, int *startRet)
 /* convert a start/end pair to start and return block size */
 {
 if (((alnStart > alnStop) ? '-' : '+') != strand)
     parseErr(pr, "strand implied by addresses doesn't match expected: %d-%d vs %c",
              alnStart, alnStop, strand);
 if (strand == '-')
     {
     int hold = alnStart;
     alnStart = alnStop;
     alnStop = hold;
     }
 alnStart--;
 if (strand == '-')
     reverseIntRange(&alnStart, &alnStop, seqSize);
 
 *startRet = alnStart;
 return (alnStop - alnStart);
 
 }
 
 void addBlock(struct parser *pr,int  *qStart, int *tStart, int size, int match)
 {
 int iBlk = pr->psl->blockCount;
 
 if (iBlk >= pr->pslMaxBlks)
     growPslBlocks(pr, 2*pr->pslMaxBlks);
 
 pr->psl->blockSizes[iBlk] = size;
 pr->psl->blockCount++;
 
 /* get new coords */
 convertCoords(pr, pr->psl->strand[0], *qStart, *qStart + size,
 	      pr->psl->qSize, &(pr->psl->qStarts[iBlk]));
 
 if(pr->psl->strand[1] == '-')
     convertCoords(pr, pr->psl->strand[1], *tStart, *tStart - size,
 		  pr->psl->tSize, &(pr->psl->tStarts[iBlk]));
 else
     convertCoords(pr, pr->psl->strand[1], *tStart, *tStart + size,
                   pr->psl->tSize, &(pr->psl->tStarts[iBlk]));
 
 /* update stats */
 
 pr->psl->match += match;
 pr->psl->misMatch += (size - match);
 if(pr->psl->strand[1] == '-')
   {
   pr->psl->tStarts[iBlk] = *tStart - size;
   *tStart -= size;
   }
 else
   {
     pr->psl->tStarts[iBlk] = *tStart;
     *tStart += size ;
   }
 pr->psl->qStarts[iBlk] = *qStart;
 *qStart += size ;
 
 }
 
 
 void addRec(struct parser *pr, struct splignAlign* rec)
 /* add a record to the psl. It probably multi blocks for psl */
 {
 int size=0, match=0, insert=0, delete=0; 
 int qStart= rec->qStart -1, 
     tStart=pr->psl->strand[1]== '+'? rec->tStart - 1:rec->tStart;
 int anonSize = strlen(rec->anotation);
 int i =0;
 int anoStage = 0; /*anotation stage, 1-M, 2-R, 3-I, 4-D, 5-already get number for M,R stage */
 
 int shift = 1, shiftBack = -1;
 
 if(pr->psl->strand[1] == '-')
     {/* get shift amount */
     shift = -1;
     shiftBack = 1;
     }
 
 
 for(i=0; i < anonSize; i++)
     {
     if(rec->anotation[i] == 'M')
 	{/*is in a new match blocks */
 	
 	/* check previous stage and shift start*/
 	if(anoStage == 3)
 	    {
 	    tStart += insert;
 	    insert = 0;
 	    }
 	if(anoStage == 4)
 	    {
 	    qStart += delete;
 	    delete = 0;
 	    }
 	size++;
 	match++;
 	anoStage = 1;
 	continue;
 	}
     else if(rec->anotation[i] == 'R')
 	{/*is in a new misMatch Block */
 	
 	/* check previous stage and shift start*/
 	if(anoStage == 3)
 	    {
 	    tStart += insert;
 	    insert = 0;
 	    }
 	if(anoStage == 4)
 	    {
 	    qStart += delete;
 	    delete = 0;
 	    }
 	size++;
 	anoStage = 2;
 	continue;
 	}
     else if(rec->anotation[i] == 'I' )
 	{/* Is it insert? Add a new block to psl if it is. */
 	if (anoStage != 0)
 	    addBlock(pr, &qStart, &tStart, size, match);
 	anoStage = 3;
 	size = 0;
 	match = 0;
 	insert += shift;
 	continue;
 	}
     else if( rec->anotation[i] == 'D')
 	{/* Is it delete?  Add a new block to psl if it is. */
 	if (anoStage != 0)
 	    addBlock(pr, &qStart, &tStart, size, match);
 	anoStage = 4;
 	size = 0;
 	match = 0;
 	delete += shift;
 	continue;
 	}
     
     if(insert > 1|| delete > 1|| anoStage == 5)
 	/* check whether get fragment size */
 	continue;
     else
 	{
 	int tempSize = atoi(&(rec->anotation[i]));
 	switch(anoStage)
 	    {
 	    case 1:	//match block
 		size += (tempSize - 1);
 		match += (tempSize - 1);
 		anoStage = 5;
 		break;
 	    case 2: // misMatch block
 		size += (tempSize - 1);
 		anoStage = 5;
 		break;
 	    case 3: // insert in target 
 		insert = insert + shiftBack + tempSize;
 		break;
 	    case 4: // delete in target
 		delete = delete + shiftBack + tempSize;
 		break;
 	    case 5:
 		continue;
 	    }
 	}
     }
 
 // add the last block
 if(anoStage == 5 || anoStage == 1 || anoStage == 2)
     addBlock(pr, &qStart, &tStart, size, match);
 }
 
 
 void pslRcForSplign(struct psl *psl)
 {
 int i;
 
 /* swap strand, forcing target to have an explict strand */
 psl->strand[0] = (psl->strand[0] != '-') ? '-' : '+';
 psl->strand[1] = (psl->strand[1] != '-') ? '-' : '+';
 
 for (i = 0; i < psl->blockCount; i++)
     {
     psl->qStarts[i] = psl->qSize - (psl->qStarts[i] + psl->blockSizes[i]);
     }
 reverseUnsigned(psl->qStarts, psl->blockCount);
 reverseUnsigned(psl->blockSizes, psl->blockCount);
 }
 
 
 
 
 void convertAlign(struct parser *pr, FILE* out)
 /* read and comvert an alignment to psl. */
 {
 struct splignAlign* rec;
 int iBlk, i;
 boolean hasExon = FALSE;
 
 /* parser rows for this alignment */
 while ((rec = loadNextRec(pr)) != NULL)
     {
     if (!sameString(rec->query, pr->curQName) || 
 	pr->curQEnd > rec->qStart )
         {/* check whether this is same alignment. Next alignment, save it */
         pr->curQName = rec->query;
 	pr->curQEnd = rec->qEnd; 
 	pr->recCache = rec;
 	break;
         }
     pr->curQEnd = rec->qEnd; /* record current query end for future check*/
     if (stringIn("exon", rec->type)) /* check whether is an exon */ 
 	{
         addRec(pr, rec);
 	hasExon=TRUE;
 	}
     splignAlignFree(&rec);
     }
 
 if(hasExon)
     {/* finish psl */
     iBlk = pr->psl->blockCount-1;
     
     pr->psl->qStart = pr->psl->qStarts[0];
     pr->psl->qEnd = pr->psl->qStarts[iBlk] + pr->psl->blockSizes[iBlk];
     if (pr->psl->strand[0] == '-')
 	reverseIntRange(&pr->psl->qStart, &pr->psl->qEnd, pr->psl->qSize);
     
     pr->psl->tStart = pr->psl->tStarts[0];
     pr->psl->tEnd = pr->psl->tStarts[iBlk] + pr->psl->blockSizes[iBlk];
     if (pr->psl->strand[1] == '-')
 	{
 	pr->psl->tEnd = pr->psl->tStarts[0] + pr->psl->blockSizes[0];
 	pr->psl->tStart = pr->psl->tStarts[iBlk];
 	reverseInts(pr->psl->tStarts, pr->psl->blockCount);
 	}
     
     /* turn in BLAT style untranslated alignment  */
     if (pr->psl->strand[1] == '-')
 	{
     if (pr->psl->strand[0] == '-')
         parseErr(pr, "can't handle neg to neg alignments");
     pslRcForSplign(pr->psl);
 	}
     
     /* count gap info */
     for(i = 1; i < pr->psl->blockCount; i++)
 	{
 	if(pr->psl->qStarts[i] > pr->psl->qStarts[i-1] + pr->psl->blockSizes[i-1])
 	    {
 	    pr->psl->qNumInsert++;
 	    pr->psl->qBaseInsert += (pr->psl->qStarts[i] - 
 				     pr->psl->qStarts[i-1] - pr->psl->blockSizes[i-1]);
 	    }
 	if(pr->psl->tStarts[i] > pr->psl->tStarts[i-1] + pr->psl->blockSizes[i-1])
 	    {
 	    pr->psl->tNumInsert++;
 	    pr->psl->tBaseInsert += (pr->psl->tStarts[i] -
 				     pr->psl->tStarts[i-1]  - pr->psl->blockSizes[i-1]);
 	    }
 	}
     
     pr->psl->strand[1] = '\0';
     pslTabOut(pr->psl, out);
     }
 
 pslFree(&pr->psl);
 }
 
 
 void splignToPsl(struct alignSeqSizes *ass, char *inName, char *outName)
 /* splignToPsl - Convert NCBI tab splign alignments to psl format. */
 {
 struct parser parser;
 FILE *outFh;
 ZeroVar(&parser);
 parser.lf = lineFileOpen(inName, TRUE);
 parser.ass = ass;
 outFh = mustOpen(outName, "w");
 
 while (nextAlign(&parser))
     convertAlign(&parser, outFh);
 
 lineFileClose(&parser.lf);
 carefulClose(&outFh);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 struct alignSeqSizes* ass;
 optionInit(&argc, argv, optionSpec);
 if (argc != 3)
     usage();
 ass = alignSeqSizesNew(optionVal("qSizesFile", NULL),
                        optionVal("qSizes", NULL),
                        optionVal("tSizesFile", NULL),
                        optionVal("tSizes", NULL));
 splignToPsl(ass, argv[1], argv[2]);
 alignSeqSizesFree(&ass);
 return 0;
 }