d93f47cd567bb70aba94de233054e8b4fae00f7d
braney
  Fri May 29 15:33:58 2020 -0700
remove frameshifts from psls in hgVai

diff --git src/lib/psl.c src/lib/psl.c
index 3346ffd..f6b65c4 100644
--- src/lib/psl.c
+++ src/lib/psl.c
@@ -1,2131 +1,2152 @@
 /* psl.c was originally generated by the autoSql program, which also 
  * generated as_psl.h and as_psl.sql.  This module links the database and the RAM 
  * representation of objects. 
  *
  * This file is copyright 2002 Jim Kent, but license is hereby
  * granted for all use - public, private or commercial. */
 
 #include "common.h"
 #include "sqlNum.h"
 #include "sqlList.h"
 #include "localmem.h"
 #include "psl.h"
 #include "hash.h"
 #include "linefile.h"
 #include "dnaseq.h"
 #include "dystring.h"
 #include "fuzzyFind.h"
 #include "aliType.h"
 #include "binRange.h"
 #include "rangeTree.h"
 
 
 struct psl *pslxLoad(char **row)
 /* Load a psl from row fetched with select * from psl
  * from database.  Dispose of this with pslFree(). */
 {
 struct psl *ret = pslLoad(row);
 int retSize;
 sqlStringDynamicArray(row[21],&ret->qSequence, &retSize);
 sqlStringDynamicArray(row[22],&ret->tSequence, &retSize);
 return ret;
 }
 
 struct psl *pslLoad(char **row)
 /* Load a psl from row fetched with select * from psl
  * from database.  Dispose of this with pslFree(). */
 {
 struct psl *ret;
 int sizeOne;
 
 AllocVar(ret);
 ret->blockCount = sqlUnsigned(row[17]);
 ret->match = sqlUnsigned(row[0]);
 ret->misMatch = sqlUnsigned(row[1]);
 ret->repMatch = sqlUnsigned(row[2]);
 ret->nCount = sqlUnsigned(row[3]);
 ret->qNumInsert = sqlUnsigned(row[4]);
 ret->qBaseInsert = sqlSigned(row[5]);
 ret->tNumInsert = sqlUnsigned(row[6]);
 ret->tBaseInsert = sqlSigned(row[7]);
 strcpy(ret->strand, row[8]);
 ret->qName = cloneString(row[9]);
 ret->qSize = sqlUnsigned(row[10]);
 ret->qStart = sqlUnsigned(row[11]);
 ret->qEnd = sqlUnsigned(row[12]);
 ret->tName = cloneString(row[13]);
 ret->tSize = sqlUnsigned(row[14]);
 ret->tStart = sqlUnsigned(row[15]);
 ret->tEnd = sqlUnsigned(row[16]);
 sqlUnsignedDynamicArray(row[18], &ret->blockSizes, &sizeOne);
 if (sizeOne != ret->blockCount)
     {
     printf("sizeOne bloxksizes %d bs %d block=%s\n",sizeOne, ret->blockCount,row[18]);
     }
 assert(sizeOne == ret->blockCount);
 sqlUnsignedDynamicArray(row[19], &ret->qStarts, &sizeOne);
 if (sizeOne != ret->blockCount)
     {
     printf("sizeOne qStarts %d bs %d\n",sizeOne, ret->blockCount);
     }
 assert(sizeOne == ret->blockCount);
 sqlUnsignedDynamicArray(row[20], &ret->tStarts, &sizeOne);
 if (sizeOne != ret->blockCount)
     {
     printf("sizeOne tStarts %d bs %d\n",sizeOne, ret->blockCount);
     }
 assert(sizeOne == ret->blockCount);
 return ret;
 }
 
 struct psl *pslCommaIn(char **pS, struct psl *ret)
 /* Create a psl out of a comma separated string. 
  * This will fill in ret if non-null, otherwise will
  * return a new psl */
 {
 char *s = *pS;
 int i;
 
 if (ret == NULL)
     AllocVar(ret);
 ret->match = sqlUnsignedComma(&s);
 ret->misMatch = sqlUnsignedComma(&s);
 ret->repMatch = sqlUnsignedComma(&s);
 ret->nCount = sqlUnsignedComma(&s);
 ret->qNumInsert = sqlUnsignedComma(&s);
 ret->qBaseInsert = sqlSignedComma(&s);
 ret->tNumInsert = sqlUnsignedComma(&s);
 ret->tBaseInsert = sqlSignedComma(&s);
 sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand));
 ret->qName = sqlStringComma(&s);
 ret->qSize = sqlUnsignedComma(&s);
 ret->qStart = sqlUnsignedComma(&s);
 ret->qEnd = sqlUnsignedComma(&s);
 ret->tName = sqlStringComma(&s);
 ret->tSize = sqlUnsignedComma(&s);
 ret->tStart = sqlUnsignedComma(&s);
 ret->tEnd = sqlUnsignedComma(&s);
 ret->blockCount = sqlUnsignedComma(&s);
 s = sqlEatChar(s, '{');
 AllocArray(ret->blockSizes, ret->blockCount);
 for (i=0; i<ret->blockCount; ++i)
     {
     ret->blockSizes[i] = sqlUnsignedComma(&s);
     }
 s = sqlEatChar(s, '}');
 s = sqlEatChar(s, ',');
 s = sqlEatChar(s, '{');
 AllocArray(ret->qStarts, ret->blockCount);
 for (i=0; i<ret->blockCount; ++i)
     {
     ret->qStarts[i] = sqlUnsignedComma(&s);
     }
 s = sqlEatChar(s, '}');
 s = sqlEatChar(s, ',');
 s = sqlEatChar(s, '{');
 AllocArray(ret->tStarts, ret->blockCount);
 for (i=0; i<ret->blockCount; ++i)
     {
     ret->tStarts[i] = sqlUnsignedComma(&s);
     }
 s = sqlEatChar(s, '}');
 s = sqlEatChar(s, ',');
 *pS = s;
 return ret;
 }
 
 void pslFree(struct psl **pEl)
 /* Free a single dynamically allocated psl such as created
  * with pslLoad(). */
 {
 struct psl *el;
 
 if ((el = *pEl) == NULL) return;
 freeMem(el->qName);
 freeMem(el->tName);
 freeMem(el->blockSizes);
 freeMem(el->qStarts);
 freeMem(el->tStarts);
 if (el->qSequence)
     {
     freeMem(el->qSequence[0]);
     freeMem(el->qSequence);
     }
 if (el->tSequence)
     {
     freeMem(el->tSequence[0]);
     freeMem(el->tSequence);
     }
 freez(pEl);
 }
 
 void pslFreeList(struct psl **pList)
 /* Free a list of dynamically allocated psl's */
 {
 struct psl *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     pslFree(&el);
     }
 *pList = NULL;
 }
 
 void pslOutput(struct psl *el, FILE *f, char sep, char lastSep) 
 /* Print out psl.  Separate fields with sep. Follow last field with lastSep. */
 {
 int i;
 fprintf(f, "%u", el->match);
 fputc(sep,f);
 fprintf(f, "%u", el->misMatch);
 fputc(sep,f);
 fprintf(f, "%u", el->repMatch);
 fputc(sep,f);
 fprintf(f, "%u", el->nCount);
 fputc(sep,f);
 fprintf(f, "%u", el->qNumInsert);
 fputc(sep,f);
 fprintf(f, "%d", el->qBaseInsert);
 fputc(sep,f);
 fprintf(f, "%u", el->tNumInsert);
 fputc(sep,f);
 fprintf(f, "%d", el->tBaseInsert);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->strand);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->qName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->qSize);
 fputc(sep,f);
 fprintf(f, "%u", el->qStart);
 fputc(sep,f);
 fprintf(f, "%u", el->qEnd);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->tName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->tSize);
 fputc(sep,f);
 fprintf(f, "%u", el->tStart);
 fputc(sep,f);
 fprintf(f, "%u", el->tEnd);
 fputc(sep,f);
 fprintf(f, "%u", el->blockCount);
 fputc(sep,f);
 if (sep == ',') fputc('{',f);
 for (i=0; i<el->blockCount; ++i)
     {
     fprintf(f, "%u", el->blockSizes[i]);
     fputc(',', f);
     }
 if (sep == ',') fputc('}',f);
 fputc(sep,f);
 if (sep == ',') fputc('{',f);
 for (i=0; i<el->blockCount; ++i)
     {
     fprintf(f, "%u", el->qStarts[i]);
     fputc(',', f);
     }
 if (sep == ',') fputc('}',f);
 fputc(sep,f);
 if (sep == ',') fputc('{',f);
 for (i=0; i<el->blockCount; ++i)
     {
     fprintf(f, "%u", el->tStarts[i]);
     fputc(',', f);
     }
 if (sep == ',') fputc('}',f);
 if (el->qSequence)
     {
     fputc(sep,f);
     if (sep == ',') fputc('{',f);
     for (i=0; i<el->blockCount; ++i)
 	{
 	fprintf(f, "%s", el->qSequence[i]);
 	fputc(',', f);
 	}
     if (sep == ',') fputc('}',f);
     fputc(sep,f);
     if (sep == ',') fputc('{',f);
     for (i=0; i<el->blockCount; ++i)
 	{
 	fprintf(f, "%s", el->tSequence[i]);
 	fputc(',', f);
 	}
     if (sep == ',') fputc('}',f);
     }
 
 fputc(lastSep,f);
 if (ferror(f))
     {
     perror("Error writing psl file\n");
     errAbort("\n");
     }
 }
 
 /* ----- end autoSql generated part --------------- */
 
 void pslOutputShort(struct psl *el, FILE *f, char sep, char lastSep) 
 /* Print out psl.  Separate fields with sep. Follow last field with lastSep. */
 {
 fprintf(f, "%u", el->match);
 fputc(sep,f);
 fprintf(f, "%u", el->misMatch);
 fputc(sep,f);
 fprintf(f, "%u", el->repMatch);
 fputc(sep,f);
 fprintf(f, "%u", el->qNumInsert);
 fputc(sep,f);
 fprintf(f, "%d", el->qBaseInsert);
 fputc(sep,f);
 fprintf(f, "%u", el->tNumInsert);
 fputc(sep,f);
 fprintf(f, "%d", el->tBaseInsert);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->strand);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->qName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->qStart);
 fputc(sep,f);
 fprintf(f, "%u", abs(el->qEnd - el->qStart));
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->tName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->tStart);
 fputc(sep,f);
 fprintf(f, "%u", abs(el->tEnd - el->tStart));
 fputc(sep,f);
 fprintf(f, "%u", el->blockCount);
 fputc(sep,f);
 if (sep == ',') fputc('{',f);
 fputc(lastSep,f);
 if (ferror(f))
     {
     perror("Error writing psl file\n");
     errAbort("\n");
     }
 }
 
 void pslOutFormat(struct psl *el, FILE *f, char sep, char lastSep) 
 /* Print out selected psl values.  Separate fields with sep. Follow last field with lastSep. */
 /* Prints out a better format with bold field headings followed by value */
 /* Requires further upstream work to ensure that only the field headers */
 /* declared here are printed if replacing an existing psl print function*/
 {
 const char *headers[] = {"Matches", "Mismatches", "Matches in repeats", "Number of N bases", "Query name", "Size", "Start", "End", "Chromosome", "Strand", "Start", "End"};
 char *hformat = "<B>%s:</B> "; /* string for formatted print for headers */
 char *uformat = "<B>%s:</B> %u%c"; /* string for formatted print for unsigned variable */
 char *targName;
 
 fprintf(f, uformat, headers[0], el->match, sep);
 fprintf(f, uformat, headers[1], el->misMatch, sep);
 fprintf(f, uformat, headers[2], el->repMatch, sep);
 fprintf(f, uformat, headers[3], el->nCount, sep);
 
 fprintf(f, hformat, headers[4]);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->qName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 
 fprintf(f, uformat, headers[5], el->qSize, sep);
 fprintf(f, uformat, headers[6], el->qStart, sep);
 fprintf(f, uformat, headers[7], el->qEnd, sep);
 
 fprintf(f, hformat, headers[8]);
 if (sep == ',') fputc('"',f);
 /* skip leading 'chr' in string to get only chromosome part */
 targName = el->tName;
 if (startsWith("chr", el->tName))
    targName += 3;
 fprintf(f, "%s", targName);
 
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 
 fprintf(f, hformat, headers[9]);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->strand);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 
 fprintf(f, uformat, headers[10], el->tStart, sep);
 fprintf(f, uformat, headers[11], el->tEnd, sep);
 
 fputc(lastSep,f);
 
 if (ferror(f))
     {
     perror("Error writing psl file\n");
     errAbort("\n");
     }
 
 }
 
 struct psl *pslLoadAll(char *fileName)
 /* Load all psl's in file. */
 {
 struct lineFile *lf = pslFileOpen(fileName);
 struct psl *pslList = NULL, *psl;
 while ((psl = pslNext(lf)) != NULL)
     {
     slAddHead(&pslList, psl);
     }
 slReverse(&pslList);
 lineFileClose(&lf);
 return pslList;
 }
 
 
 int pslCmpQuery(const void *va, const void *vb)
 /* Compare to sort based on query start. */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 int dif;
 dif = strcmp(a->qName, b->qName);
 if (dif == 0)
     dif = a->qStart - b->qStart;
 return dif;
 }
 
 int pslCmpTarget(const void *va, const void *vb)
 /* Compare to sort based on target start. */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 int dif;
 dif = strcmp(a->tName, b->tName);
 if (dif == 0)
     dif = a->tStart - b->tStart;
 return dif;
 }
 
 int pslCmpTargetAndStrand(const void *va, const void *vb)
 /* Compare to sort based on target, strand,  tStart. */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 int dif;
 dif = strcmp(a->tName, b->tName);
 if (dif == 0)
     dif = strcmp(a->strand, b->strand);
 if (dif == 0)
     dif = a->tStart - b->tStart;
 return dif;
 }
 
 
 int pslCmpScore(const void *va, const void *vb)
 /* Compare to sort based on score (descending). */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 return pslScore(b) - pslScore(a);
 }
 
 int pslCmpQueryScore(const void *va, const void *vb)
 /* Compare to sort based on query then score (descending). */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 int diff = strcmp(a->qName, b->qName);
 if (diff == 0)
     diff = pslScore(b) - pslScore(a);
 return diff;
 }
 
 int pslCmpMatch(const void *va, const void *vb)
 /* Compare to sort based on match */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 return b->match - a->match;
 }
 
 static void pslLabelColumns(FILE *f)
 /* Write column info. */
 {
 fputs("\n"
 "match\tmis- \trep. \tN's\tQ gap\tQ gap\tT gap\tT gap\tstrand\tQ        \tQ   \tQ    \tQ  \tT        \tT   \tT    \tT  \tblock\tblockSizes \tqStarts\t tStarts\n"
 "     \tmatch\tmatch\t   \tcount\tbases\tcount\tbases\t      \tname     \tsize\tstart\tend\tname     \tsize\tstart\tend\tcount\n" 
 "---------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
 f);
 }
 
 void pslxWriteHead(FILE *f, enum gfType qType, enum gfType tType)
 /* Write header for extended (possibly protein) psl file. */
 {
 fprintf(f, "psLayout version 4 %s %s\n", gfTypeName(qType), gfTypeName(tType));
 pslLabelColumns(f);
 }
 
 void pslWriteHead(FILE *f)
 /* Write head of psl. */
 {
 fputs("psLayout version 3\n", f);
 pslLabelColumns(f);
 }
 
 void pslWriteAll(struct psl *pslList, char *fileName, boolean writeHeader)
 /* Write a psl file from list. */
 {
 FILE *f;
 struct psl *psl;
 
 f = mustOpen(fileName, "w");
 if (writeHeader)
     pslWriteHead(f);
 for (psl = pslList; psl != NULL; psl = psl->next)
     pslTabOut(psl, f);
 fclose(f);
 }
 
 void pslxFileOpen(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf)
 /* Read header part of psl and make sure it's right.  Return
  * sequence types and file handle. */
 {
 char *line;
 int lineSize;
 char *words[30];
 char *version;
 int wordCount;
 int i;
 enum gfType qt = gftRna,  tt = gftDna;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 
 if (!lineFileNext(lf, &line, &lineSize))
     warn("%s is empty", fileName);
 else
     {
     if (startsWith("psLayout version", line))
 	{
 	wordCount = chopLine(line, words);
 	if (wordCount < 3)
 	    errAbort("%s is not a psLayout file", fileName);
 	version = words[2];
 	if (sameString(version, "3"))
 	    {
 	    }
 	else if (sameString(version, "4"))
 	    {
 	    qt = gfTypeFromName(words[3]);
 	    tt = gfTypeFromName(words[4]);
 	    }
 	else
 	    {
 	    errAbort("%s is version %s of psLayout, this program can only handle through version 4",
 		fileName,  version);
 	    }
 	for (i=0; i<4; ++i)
 	    {
 	    if (!lineFileNext(lf, &line, &lineSize))
 		errAbort("%s severely truncated", fileName);
 	    }
 	}
     else
 	lineFileReuse(lf); 
     }
 *retQueryType = qt;
 *retTargetType = tt;
 *retLf = lf;
 }
 
 static void pslxFileOpenWithMetaConfig(char *fileName, bool isMetaUnique, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
 /* Read header part of psl and make sure it's right.  Return
  * sequence types and file handle and send meta data to output file f */
 {
 char *line;
 int lineSize;
 char *words[30];
 char *version;
 int wordCount;
 int i;
 enum gfType qt = gftRna,  tt = gftDna;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 
 lineFileSetMetaDataOutput(lf, f);
 if (isMetaUnique)
     lineFileSetUniqueMetaData(lf);
 if (!lineFileNext(lf, &line, &lineSize))
     warn("%s is empty", fileName);
 else
     {
     if (startsWith("psLayout version", line))
 	{
 	wordCount = chopLine(line, words);
 	if (wordCount < 3)
 	    errAbort("%s is not a psLayout file", fileName);
 	version = words[2];
 	if (sameString(version, "3"))
 	    {
 	    }
 	else if (sameString(version, "4"))
 	    {
 	    qt = gfTypeFromName(words[3]);
 	    tt = gfTypeFromName(words[4]);
 	    }
 	else
 	    {
 	    errAbort("%s is version %s of psLayout, this program can only handle through version 4",
 		fileName,  version);
 	    }
 	for (i=0; i<4; ++i)
 	    {
 	    if (!lineFileNext(lf, &line, &lineSize))
 		errAbort("%s severely truncated", fileName);
 	    }
 	}
     else
         {
 	char *s = cloneString(line);
         boolean eof = FALSE;
         while ((line[0] == '#') && (!eof))
             {
             freeMem(s);
             if (!lineFileNext(lf, &line, &lineSize))
                 eof = TRUE;
             s = cloneString(line);
             }
 	wordCount = chopLine(s, words);
 	if ((wordCount < 21 || wordCount > 23 || (words[8][0] != '+' && words[8][0] != '-')) && (!eof))
 	    errAbort("%s is not a psLayout file", fileName);
 	else
 	    lineFileReuse(lf); 
 	freeMem(s);
 	}
     }
 *retQueryType = qt;
 *retTargetType = tt;
 *retLf = lf;
 }
 
 void pslxFileOpenWithMeta(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
 /* Read header part of psl and make sure it's right.  Return
  * sequence types and file handle and send meta data to output file f */
 {
 pslxFileOpenWithMetaConfig(fileName, FALSE, retQueryType, retTargetType, retLf, f);
 }
 
 void pslxFileOpenWithUniqueMeta(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
 /* Read header part of psl and make sure it's right.  Return
  * sequence types and file handle and send only unique meta data to output f */
 {
 pslxFileOpenWithMetaConfig(fileName, TRUE, retQueryType, retTargetType, retLf, f);
 }
 
 struct lineFile *pslFileOpen(char *fileName)
 /* Read header part of psl and make sure it's right. 
  * Return line file handle to it. */
 {
 enum gfType qt, tt;
 struct lineFile *lf;
 pslxFileOpen(fileName, &qt, &tt, &lf);
 return lf;
 }
 
 struct lineFile *pslFileOpenWithMeta(char *fileName, FILE *f)
 /* Read header part of psl and make sure it's right. 
  * Return line file handle to it. */
 {
 enum gfType qt, tt;
 struct lineFile *lf;
 pslxFileOpenWithMeta(fileName, &qt, &tt, &lf, f);
 return lf;
 }
 
 struct lineFile *pslFileOpenWithUniqueMeta(char *fileName, FILE *f)
 /* Read header part of psl and make sure it's right. 
  * Set flag to suppress duplicate header comments.
  * Return line file handle to it. */
 {
 enum gfType qt, tt;
 struct lineFile *lf;
 pslxFileOpenWithUniqueMeta(fileName, &qt, &tt, &lf, f);
 return lf;
 }
 
 struct psl *pslNext(struct lineFile *lf)
 /* Read next line from file and convert it to psl.  Return
  * NULL at eof. */
 {
 char *line;
 int lineSize;
 char *words[32];
 int wordCount;
 static int lineAlloc = 0;
 static char *chopBuf = NULL;
 
 if (!lineFileNextReal(lf, &line))
     {
     return NULL;
     }
 lineSize = strlen(line);
 if (lineSize >= lineAlloc)
     {
     lineAlloc = lineSize+256;
     chopBuf = needMoreMem(chopBuf, 0, lineAlloc);
     }
 memcpy(chopBuf, line, lineSize+1);
 wordCount = chopLine(chopBuf, words);
 if (wordCount == 21)
     {
     return pslLoad(words);
     }
 if (wordCount == 23)
     {
     return pslxLoad(words);
     }
 else
     {
     errAbort("Bad line %d of %s wordCount is %d instead of 21 or 23\n", lf->lineIx, lf->fileName, wordCount);
     return NULL;
     }
 }
 
 struct psl *pslxLoadLm(char **row, struct lm *lm)
 /* Load row into local memory pslx. */
 {
 struct psl *ret = pslLoadLm(row, lm);
 ret->qSequence = lmAlloc(lm, sizeof(ret->qSequence[0]) * ret->blockCount);
 sqlStringArray(lmCloneString(lm,row[21]),ret->qSequence, ret->blockCount);
 ret->tSequence = lmAlloc(lm, sizeof(ret->tSequence[0]) * ret->blockCount);
 sqlStringArray(lmCloneString(lm,row[22]),ret->tSequence, ret->blockCount);
 return ret;
 }
 
 struct psl *pslLoadLm(char **row, struct lm *lm)
 /* Load row into local memory psl. */
 {
 struct psl *ret;
 
 ret = lmAlloc(lm, sizeof(*ret));
 ret->blockCount = sqlUnsigned(row[17]);
 ret->match = sqlUnsigned(row[0]);
 ret->misMatch = sqlUnsigned(row[1]);
 ret->repMatch = sqlUnsigned(row[2]);
 ret->nCount = sqlUnsigned(row[3]);
 ret->qNumInsert = sqlUnsigned(row[4]);
 ret->qBaseInsert = sqlSigned(row[5]);
 ret->tNumInsert = sqlUnsigned(row[6]);
 ret->tBaseInsert = sqlSigned(row[7]);
 strcpy(ret->strand, row[8]);
 ret->qName = lmCloneString(lm,row[9]);
 ret->qSize = sqlUnsigned(row[10]);
 ret->qStart = sqlUnsigned(row[11]);
 ret->qEnd = sqlUnsigned(row[12]);
 ret->tName = lmCloneString(lm, row[13]);
 ret->tSize = sqlUnsigned(row[14]);
 ret->tStart = sqlUnsigned(row[15]);
 ret->tEnd = sqlUnsigned(row[16]);
 ret->blockSizes = lmAlloc(lm, sizeof(ret->blockSizes[0]) * ret->blockCount);
 sqlUnsignedArray(row[18], ret->blockSizes, ret->blockCount);
 ret->qStarts = lmAlloc(lm, sizeof(ret->qStarts[0]) * ret->blockCount);
 sqlUnsignedArray(row[19], ret->qStarts, ret->blockCount);
 ret->tStarts = lmAlloc(lm, sizeof(ret->tStarts[0]) * ret->blockCount);
 sqlUnsignedArray(row[20], ret->tStarts, ret->blockCount);
 return ret;
 }
 
 boolean pslIsProtein(const struct psl *psl)
 /* is psl a protein psl (are it's blockSizes and scores in protein space) */
 {
 int lastBlock = psl->blockCount - 1;
 
 return  (((psl->strand[1] == '+' ) &&
     (psl->tEnd == psl->tStarts[lastBlock] + 3*psl->blockSizes[lastBlock])) ||
    ((psl->strand[1] == '-') && 
     (psl->tStart == (psl->tSize-(psl->tStarts[lastBlock] + 3*psl->blockSizes[lastBlock])))));
 }
 
 int pslCalcMilliBad(struct psl *psl, boolean isMrna)
 /* Calculate badness in parts per thousand. */
 {
 int sizeMul = pslIsProtein(psl) ? 3 : 1;
 int qAliSize, tAliSize, aliSize;
 int milliBad = 0;
 int sizeDif;
 int insertFactor;
 int total;
 
 qAliSize = sizeMul * (psl->qEnd - psl->qStart);
 tAliSize = psl->tEnd - psl->tStart;
 aliSize = min(qAliSize, tAliSize);
 if (aliSize <= 0)
     return 0;
 sizeDif = qAliSize - tAliSize;
 if (sizeDif < 0)
     {
     if (isMrna)
 	sizeDif = 0;
     else
 	sizeDif = -sizeDif;
     }
 insertFactor = psl->qNumInsert;
 if (!isMrna)
     insertFactor += psl->tNumInsert;
 
 total = (sizeMul * (psl->match + psl->repMatch + psl->misMatch));
 if (total != 0)
     milliBad = (1000 * (psl->misMatch*sizeMul + insertFactor + round(3*log(1+sizeDif)))) / total;
 return milliBad;
 }
 
 int pslScore(const struct psl *psl)
 /* Return score for psl. */
 {
 int sizeMul = pslIsProtein(psl) ? 3 : 1;
 
 return sizeMul * (psl->match + ( psl->repMatch>>1)) - 
 	sizeMul * psl->misMatch - psl->qNumInsert - psl->tNumInsert;
 }
 
 int pslCmpScoreDesc(const void *va, const void *vb)
 /* Compare to sort based on score. */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 return pslScore(b) - pslScore(a);
 }
 
 
 struct ffAli *pslToFakeFfAli(struct psl *psl, DNA *needle, DNA *haystack)
 /* Convert from psl to ffAli format.  In some cases you can pass NULL
  * for needle and haystack - depending what the post-processing is going
  * to be. */
 {
 struct ffAli *ffList = NULL, *ff;
 int blockCount = psl->blockCount;
 unsigned *blockSizes = psl->blockSizes;
 unsigned *qStarts = psl->qStarts;
 unsigned *tStarts = psl->tStarts;
 int size;
 int i;
 
 for (i=0; i<blockCount; ++i)
     {
     size = blockSizes[i];
     AllocVar(ff);
     ff->left = ffList;
     ffList = ff;
     ff->nStart = ff->nEnd = needle + qStarts[i];
     ff->nEnd += size;
     ff->hStart = ff->hEnd = haystack + tStarts[i];
     ff->hEnd += size;
     }
 ffList = ffMakeRightLinks(ffList);
 return ffList;
 }
 
 struct psl *pslFromFakeFfAli(struct ffAli *ff, 
 	DNA *needle, DNA *haystack, char strand,
 	char *qName, int qSize, char *tName, int tSize)
 /* This will create a basic psl structure from a sorted series of ffAli
  * blocks.  The fields that would need actual sequence to be filled in
  * are left zero however - fields including match, repMatch, mismatch. */
 {
 struct psl *psl;
 unsigned *blockSizes;
 unsigned *qStarts;
 unsigned *tStarts;
 int blockCount;
 int i;
 int nStart, hStart;
 int nEnd, hEnd;
 
 AllocVar(psl);
 psl->blockCount = blockCount = ffAliCount(ff);
 psl->blockSizes = AllocArray(blockSizes, blockCount);
 psl->qStarts = AllocArray(qStarts, blockCount);
 psl->tStarts = AllocArray(tStarts, blockCount);
 psl->qName = cloneString(qName);
 psl->qSize = qSize;
 psl->tName = cloneString(tName);
 psl->tSize = tSize;
 psl->strand[0] = strand;
 
 for (i=0; i<blockCount; ++i)
     {
     nStart = ff->nStart - needle;
     nEnd = ff->nEnd - needle;
     hStart = ff->hStart - haystack;
     hEnd = ff->hEnd - haystack;
     blockSizes[i] = nEnd - nStart;
     qStarts[i] = nStart;
     tStarts[i] = hStart;
     if (i == 0)
        {
        psl->qStart = nStart;
        psl->tStart = hStart;
        }
     if (i == blockCount-1)
        {
        psl->qEnd = nEnd;
        psl->tEnd = hEnd;
        }
     ff = ff->right;
     }
 if (strand == '-')
     {
     reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
     }
 return psl;
 }
 
 struct ffAli *pslToFfAli(struct psl *psl, struct dnaSeq *query, struct dnaSeq *target,
 	int targetOffset)
 /* Convert from psl to ffAli format.  Clip to parts that we actually
  * have sequence for. */
 {
 struct ffAli *ffList = NULL, *ff;
 DNA *needle = query->dna;
 DNA *haystack = target->dna;
 int blockCount = psl->blockCount;
 unsigned *blockSizes = psl->blockSizes;
 unsigned *qStarts = psl->qStarts;
 unsigned *tStarts = psl->tStarts;
 int size;
 int i;
 int tMin = targetOffset;
 int tMax = targetOffset + target->size;
 int tStart, tEnd;
 int clipStart, clipEnd, clipOffset, clipSize;
 
 for (i=0; i<blockCount; ++i)
     {
     clipStart = tStart = tStarts[i];
     size = blockSizes[i];
     clipEnd = tEnd = tStart + size;
     if (tStart < tMax && tEnd > tMin)
 	{
 	if (clipStart < tMin) clipStart = tMin;
 	if (clipEnd > tMax) clipEnd = tMax;
 	clipOffset = clipStart - tStart;
 	clipSize = clipEnd - clipStart;
 	AllocVar(ff);
 	ff->left = ffList;
 	ffList = ff;
 	ff->nStart = ff->nEnd = needle + qStarts[i] + clipOffset;
 	ff->nEnd += clipSize;
 	ff->hStart = ff->hEnd = haystack + clipStart - targetOffset;
 	ff->hEnd += clipSize;
 	}
     }
 ffList = ffMakeRightLinks(ffList);
 ffCountGoodEnds(ffList);
 return ffList;
 }
 
 int pslOrientation(struct psl *psl)
 /* Translate psl strand + or - to orientation +1 or -1 */
 {
 if (psl->strand[1] != '\0')
     {
     /* translated blat */
     if (psl->strand[0] != psl->strand[1])
         return -1;
     else
         return 1;
     }
 else
     {
     if (psl->strand[0] == '-')
         return -1;
     else
         return 1;
     }
 }
 
 int pslWeightedIntronOrientation(struct psl *psl, struct dnaSeq *genoSeq, int offset)
 /* Return >0 if introns make it look like alignment is on + strand,
  *        <0 if introns make it look like alignment is on - strand,
  *        0 if can't tell.  The absolute value of the return indicates
  * how many splice sites we've seen supporting the orientation.
  * Sequence should NOT be reverse complemented.  */
 {
 int intronDir = 0;
 int oneDir;
 int i;
 DNA *dna = genoSeq->dna;
 
 /* code below doesn't support negative target strand (translated blat) */
 if (psl->strand[1] == '-')
     errAbort("pslWeightedIntronOrientation doesn't support a negative target strand");
 
 for (i=1; i<psl->blockCount; ++i)
     {
     int iStart, iEnd, blockSize = psl->blockSizes[i-1];
     if (psl->qStarts[i-1] + blockSize == psl->qStarts[i])
 	{
 	iStart = psl->tStarts[i-1] + psl->blockSizes[i-1] - offset;
 	iEnd = psl->tStarts[i] - offset;
 	oneDir = intronOrientation(dna+iStart, dna+iEnd);
 	intronDir += oneDir;
 	}
     }
 return intronDir;
 }
 
 int pslIntronOrientation(struct psl *psl, struct dnaSeq *genoSeq, int offset)
 /* Return 1 if introns make it look like alignment is on + strand,
  *       -1 if introns make it look like alignment is on - strand,
  *        0 if can't tell.
  * Sequence should NOT be reverse complemented.  */
 {
 int intronDir = pslWeightedIntronOrientation(psl, genoSeq, offset);
 if (intronDir < 0)
     intronDir = -1;
 else if (intronDir > 0)
     intronDir = 1;
 return intronDir;
 }
 
 boolean pslHasIntron(struct psl *psl, struct dnaSeq *seq, int seqOffset)
 /* Return TRUE if there's a probable intron. Sequence should NOT be
  * reverse complemented.*/
 {
 int blockCount = psl->blockCount, i;
 unsigned *tStarts = psl->tStarts;
 unsigned *blockSizes = psl->blockSizes;
 unsigned *qStarts = psl->qStarts;
 int blockSize, start, end;
 DNA *dna = seq->dna;
 
 for (i=1; i<blockCount; ++i)
     {
     blockSize = blockSizes[i-1];
     start = qStarts[i-1]+blockSize;
     end = qStarts[i];
     if (start == end)
         {
         start = tStarts[i-1] + blockSize;
         end = tStarts[i];
         if (psl->strand[1] == '-')
             reverseIntRange(&start, &end, psl->tSize);
         start -= seqOffset;
         end -= seqOffset;
 	if (intronOrientation(dna+start, dna+end) != 0)
 	    return TRUE;
 	}
     }
 return FALSE;
 }
 
 void pslTailSizes(struct psl *psl, int *retStartTail, int *retEndTail)
 /* Find the length of "tails" (rather than extensions) implied by psl. */
 {
 int orientation = pslOrientation(psl);
 int qFloppyStart, qFloppyEnd;
 int tFloppyStart, tFloppyEnd;
 
 if (orientation > 0)
     {
     qFloppyStart = psl->qStart;
     qFloppyEnd = psl->qSize - psl->qEnd;
     }
 else
     {
     qFloppyStart = psl->qSize - psl->qEnd;
     qFloppyEnd = psl->qStart;
     }
 tFloppyStart = psl->tStart;
 tFloppyEnd = psl->tSize - psl->tEnd;
 *retStartTail = min(qFloppyStart, tFloppyStart);
 *retEndTail = min(qFloppyEnd, tFloppyEnd);
 }
 
 static void rcSeqs(char **seqs, unsigned blockCount, unsigned *blockSizes)
 /* reverses complement sequences in list, maintain property that all strings
  * are in one malloc block.   blockSizes should already be reversed. */
 {
 char *buf, *next;
 int i, memSz = 0;
 
 /* get a new memory block for strings */
 for (i = 0; i < blockCount; i++)
     memSz += blockSizes[i]+1;
 next = buf = needLargeMem(memSz);
 
 /* reverse compliment and copy to new memory block */
 for (i = blockCount-1; i >= 0; i--)
     {
     int len = strlen(seqs[i]);
     reverseComplement(seqs[i], len);
     memcpy(next, seqs[i], len+1);
     next += len+1;
     }
 
 /* swap memory and update pointers */
 freeMem(seqs[0]);
 seqs[0] = buf;
 next = buf;
 
 for (i = 0; i < blockCount; i++)
     {
     seqs[i] = next;
     next += blockSizes[i]+1;
     }
 }
 
 void pslRc(struct psl *psl)
 /* Reverse-complement a PSL alignment.  This makes the target strand explicit. */
 {
 unsigned tSize = psl->tSize, qSize = psl->qSize;
 unsigned blockCount = psl->blockCount, i;
 unsigned *tStarts = psl->tStarts, *qStarts = psl->qStarts, *blockSizes = psl->blockSizes;
 int mult = pslIsProtein(psl) ? 3 : 1;
 
 /* swap strand, forcing target to have an explict strand */
 psl->strand[0] = (psl->strand[0] != '-') ? '-' : '+';
 psl->strand[1] = (psl->strand[1] != '-') ? '-' : '+';
 psl->strand[2] = 0;
 
 for (i=0; i<blockCount; ++i)
     {
     tStarts[i] = tSize - (tStarts[i] + mult * blockSizes[i]);
     qStarts[i] = qSize - (qStarts[i] + blockSizes[i]);
     }
 reverseUnsigned(tStarts, blockCount);
 reverseUnsigned(qStarts, blockCount);
 reverseUnsigned(blockSizes, blockCount);
 if (psl->qSequence != NULL)
     {
     rcSeqs(psl->qSequence, blockCount, blockSizes);
     rcSeqs(psl->tSequence, blockCount, blockSizes);
     }
 }
 
 
 /* macro to swap to variables */
 #define swapVars(a, b, tmp) ((tmp) = (a), (a) = (b), (b) = (tmp))
 
 static void swapBlocks(struct psl *psl)
 /* Swap the blocks in a psl without reverse complementing them. */
 {
 int i;
 unsigned utmp;
 char *stmp; 
 for (i = 0; i < psl->blockCount; i++)
     {
     swapVars(psl->qStarts[i], psl->tStarts[i], utmp);
     if (psl->qSequence != NULL)
         swapVars(psl->qSequence[i], psl->tSequence[i], stmp);
     }
 }
 
 static void swapRCBlocks(struct psl *psl)
 /* Swap and reverse complement blocks in a psl. Other psl fields must
  * be modified first */
 {
 int i;
 unsigned *uatmp;
 char **satmp;
 reverseUnsigned(psl->tStarts, psl->blockCount);
 reverseUnsigned(psl->qStarts, psl->blockCount);
 reverseUnsigned(psl->blockSizes, psl->blockCount);
 swapVars(psl->tStarts, psl->qStarts, uatmp);
 
 /* qSize and tSize have already been swapped */
 for (i = 0; i < psl->blockCount; i++)
     {
     psl->qStarts[i] = psl->qSize - (psl->qStarts[i] + psl->blockSizes[i]);
     psl->tStarts[i] = psl->tSize - (psl->tStarts[i] + psl->blockSizes[i]);
     }
 if (psl->qSequence != NULL)
     {
     /* note: all block sequences are stored in one malloc block, which is
      * entry zero */
     rcSeqs(psl->qSequence, psl->blockCount, psl->blockSizes);
     rcSeqs(psl->tSequence, psl->blockCount, psl->blockSizes);
     swapVars(psl->qSequence, psl->tSequence, satmp);
     }
 }
 
 void pslSwap(struct psl *psl, boolean noRc)
 /* swap query and target in psl.  If noRc is TRUE, don't reverse-complement
  * PSL if needed, instead make target strand explict. */
 {
 int itmp;
 unsigned utmp;
 char ctmp, *stmp; 
 swapVars(psl->qBaseInsert, psl->tBaseInsert, utmp);
 swapVars(psl->tNumInsert, psl->qNumInsert, utmp);
 swapVars(psl->qName, psl->tName, stmp);
 swapVars(psl->qSize, psl->tSize, utmp);
 swapVars(psl->qStart, psl->tStart, itmp);
 swapVars(psl->qEnd, psl->tEnd, itmp);
 
 /* handle strand and block copy */
 if (psl->strand[1] != '\0')
     {
     /* translated */
     swapVars(psl->strand[0], psl->strand[1], ctmp);
     swapBlocks(psl);
     }
 else if (noRc)
     {
     /* untranslated with no reverse complement */
     psl->strand[1] = psl->strand[0];
     psl->strand[0] = '+';
     swapBlocks(psl);
     }
 else
     {
     /* untranslated */
     if (psl->strand[0] == '+')
         swapBlocks(psl);
     else
         swapRCBlocks(psl);
     }
 }
 
 void pslTargetOffset(struct psl *psl, int offset)
 /* Add offset to target positions in psl. */
 {
 int i, blockCount = psl->blockCount;
 unsigned *tStarts = psl->tStarts;
 psl->tStart += offset;
 psl->tEnd += offset;
 for (i=0; i<blockCount; ++i)
    tStarts[i] += offset;
 }
 
 void pslDump(struct psl *psl, FILE *f)
 /* Dump most of PSL to file - for debugging. */
 {
 int i;
 fprintf(f, "<PRE>\n");
 fprintf(f, "psl %s:%d-%d %s %s:%d-%d %d\n", 
 	psl->qName, psl->qStart, psl->qEnd, psl->strand,
 	psl->tName, psl->tStart, psl->tEnd, psl->blockCount);
 for (i=0; i<psl->blockCount; ++i)
     fprintf(f, "  size %d, qStart %d, tStart %d\n", 
     	psl->blockSizes[i], psl->qStarts[i], psl->tStarts[i]);
 fprintf(f, "</PRE>");
 }
 
 void pslRecalcBounds(struct psl *psl)
 /* Calculate qStart/qEnd tStart/tEnd at top level to be consistent
  * with blocks. */
 {
 int qStart, qEnd, tStart, tEnd, size;
 int last = psl->blockCount - 1;
 qStart = psl->qStarts[0];
 tStart = psl->tStarts[0];
 size = psl->blockSizes[last];
 qEnd = psl->qStarts[last] + size;
 tEnd = psl->tStarts[last] + size;
 if (psl->strand[0] == '-')
     reverseIntRange(&qStart, &qEnd, psl->qSize);
 if (psl->strand[1] == '-')
     reverseIntRange(&tStart, &tEnd, psl->tSize);
 psl->qStart = qStart;
 psl->qEnd = qEnd;
 psl->tStart = tStart;
 psl->tEnd = tEnd;
 }
 
 struct psl *pslTrimToTargetRange(struct psl *oldPsl, int tMin, int tMax)
 /* Return psl trimmed to fit inside tMin/tMax.  Note this does not
  * update the match/misMatch and related fields. */
 {
 int newSize;
 int oldBlockCount = oldPsl->blockCount;
 boolean tIsRc = (oldPsl->strand[1] == '-');
 int newBlockCount = 0, completeBlockCount = 0;
 int i;
 struct psl *newPsl = NULL;
 int tMn = tMin, tMx = tMax;   /* tMin/tMax adjusted for strand. */
 
 /* Deal with case where we're completely trimmed out quickly. */
 newSize = rangeIntersection(oldPsl->tStart, oldPsl->tEnd, tMin, tMax);
 if (newSize <= 0)
     return NULL;
 
 if (tIsRc)
     reverseIntRange(&tMn, &tMx, oldPsl->tSize);
 
 /* Count how many blocks will survive trimming. */
 oldBlockCount = oldPsl->blockCount;
 for (i=0; i<oldBlockCount; ++i)
     {
     int s = oldPsl->tStarts[i];
     int e = s + oldPsl->blockSizes[i];
     int sz = e - s;
     int overlap;
     if ((overlap = rangeIntersection(s, e, tMn, tMx)) > 0)
         ++newBlockCount;
     if (overlap == sz)
         ++completeBlockCount;
     }
 
 if (newBlockCount == 0)
     return NULL;
 
 /* Allocate new psl and fill in what we already know. */
 AllocVar(newPsl);
 strcpy(newPsl->strand, oldPsl->strand);
 newPsl->qName = cloneString(oldPsl->qName);
 newPsl->qSize = oldPsl->qSize;
 newPsl->tName = cloneString(oldPsl->tName);
 newPsl->tSize = oldPsl->tSize;
 newPsl->blockCount = newBlockCount;
 AllocArray(newPsl->blockSizes, newBlockCount);
 AllocArray(newPsl->qStarts, newBlockCount);
 AllocArray(newPsl->tStarts, newBlockCount);
 
 /* Fill in blockSizes, qStarts, tStarts with real data. */
 newBlockCount = completeBlockCount = 0;
 for (i=0; i<oldBlockCount; ++i)
     {
     int oldSz = oldPsl->blockSizes[i];
     int sz = oldSz;
     int tS = oldPsl->tStarts[i];
     int tE = tS + sz;
     int qS = oldPsl->qStarts[i];
     int qE = qS + sz;
     if (rangeIntersection(tS, tE, tMn, tMx) > 0)
         {
 	int diff;
 	if ((diff = (tMn - tS)) > 0)
 	    {
 	    tS += diff;
 	    qS += diff;
 	    sz -= diff;
 	    }
 	if ((diff = (tE - tMx)) > 0)
 	    {
 	    tE -= diff;
 	    qE -= diff;
 	    sz -= diff;
 	    }
 	newPsl->qStarts[newBlockCount] = qS;
 	newPsl->tStarts[newBlockCount] = tS;
 	newPsl->blockSizes[newBlockCount] = sz;
 	++newBlockCount;
 	if (sz == oldSz)
 	    ++completeBlockCount;
 	}
     }
 pslRecalcBounds(newPsl);
 return newPsl;
 }
 
 struct psl *pslTrimToQueryRange(struct psl *oldPsl, int qMin, int qMax)
 /* Return psl trimmed to fit inside qMin/qMax.  Note this does not
  * update the match/misMatch and related fields. */
 {
 int newSize;
 int oldBlockCount = oldPsl->blockCount;
 boolean qIsRc = (oldPsl->strand[0] == '-');
 int newBlockCount = 0, completeBlockCount = 0;
 int i;
 struct psl *newPsl = NULL;
 int qMn = qMin, qMx = qMax;   /* qMin/qMax adjusted for strand. */
 
 /* Deal with case where we're completely trimmed out quickly. */
 newSize = rangeIntersection(oldPsl->qStart, oldPsl->qEnd, qMin, qMax);
 if (newSize <= 0)
     return NULL;
 
 if (qIsRc)
     reverseIntRange(&qMn, &qMx, oldPsl->qSize);
 
 /* Count how many blocks will survive trimming. */
 oldBlockCount = oldPsl->blockCount;
 for (i=0; i<oldBlockCount; ++i)
     {
     int s = oldPsl->qStarts[i];
     int e = s + oldPsl->blockSizes[i];
     int sz = e - s;
     int overlap;
     if ((overlap = rangeIntersection(s, e, qMn, qMx)) > 0)
         ++newBlockCount;
     if (overlap == sz)
         ++completeBlockCount;
     }
 
 if (newBlockCount == 0)
     return NULL;
 
 /* Allocate new psl and fill in what we already know. */
 AllocVar(newPsl);
 strcpy(newPsl->strand, oldPsl->strand);
 newPsl->qName = cloneString(oldPsl->qName);
 newPsl->qSize = oldPsl->qSize;
 newPsl->tName = cloneString(oldPsl->tName);
 newPsl->tSize = oldPsl->tSize;
 newPsl->blockCount = newBlockCount;
 AllocArray(newPsl->blockSizes, newBlockCount);
 AllocArray(newPsl->qStarts, newBlockCount);
 AllocArray(newPsl->tStarts, newBlockCount);
 
 /* Fill in blockSizes, qStarts, tStarts with real data. */
 newBlockCount = completeBlockCount = 0;
 for (i=0; i<oldBlockCount; ++i)
     {
     int oldSz = oldPsl->blockSizes[i];
     int sz = oldSz;
     int qS = oldPsl->qStarts[i];
     int qE = qS + sz;
     int tS = oldPsl->tStarts[i];
     int tE = tS + sz;
     if (rangeIntersection(qS, qE, qMn, qMx) > 0)
         {
 	int diff;
 	if ((diff = (qMn - qS)) > 0)
 	    {
 	    tS += diff;
 	    qS += diff;
 	    sz -= diff;
 	    }
 	if ((diff = (qE - qMx)) > 0)
 	    {
 	    tE -= diff;
 	    qE -= diff;
 	    sz -= diff;
 	    }
 	newPsl->qStarts[newBlockCount] = qS;
 	newPsl->tStarts[newBlockCount] = tS;
 	newPsl->blockSizes[newBlockCount] = sz;
 	++newBlockCount;
 	if (sz == oldSz)
 	    ++completeBlockCount;
 	}
     }
 pslRecalcBounds(newPsl);
 return newPsl;
 }
 
 static void printPslDesc(char* pslDesc, FILE* out, struct psl* psl)
 /* print description of a PSL on first error */
 {
 fprintf(out, "Error: invalid PSL: %s:%u-%u %s:%u-%u %s %s\n",
         psl->qName, psl->qStart, psl->qEnd,
         psl->tName, psl->tStart, psl->tEnd,
         psl->strand, pslDesc);
 }
 
 
 static void chkError(char* pslDesc, FILE* out, struct psl* psl, int* errCount, char* format, ...)
 /* forward needed to specify printf signature for gcc checking */
 #if defined(__GNUC__)
 __attribute__((format(printf, 5, 6)))
 #endif
 ;
 
 static void chkError(char* pslDesc, FILE* out, struct psl* psl, int* errCount, char* format, ...)
 /* error handling on an pslCheck error, counting error and issuing description
  * of PSL on the first error. */
 {
 if (*errCount == 0)
     printPslDesc(pslDesc, out, psl);
 va_list args;
 va_start(args, format);
 vfprintf(out, format, args);
 va_end(args);
 (*errCount)++;
 }
 
 static void chkBlkRanges(char* pslDesc, FILE* out, struct psl* psl,
                          char* pName, char* pLabel, char pCLabel, char pStrand,
                          unsigned pSize, unsigned pStart, unsigned pEnd,
                          unsigned iBlk, unsigned* pBlockStarts, int* errCount)
 /* check the target or query ranges in a PSL incrementing errorCnt */
 {
 unsigned blkStart = pBlockStarts[iBlk];
 unsigned blkEnd = blkStart+psl->blockSizes[iBlk];
 /* translate stand to genomic coords */
 unsigned gBlkStart = (pStrand == '+') ? blkStart : (pSize - blkEnd);
 unsigned gBlkEnd = (pStrand == '+') ? blkEnd : (pSize - blkStart);
 
 if ((pSize > 0) && (blkEnd > pSize))
     chkError(pslDesc, out, psl, errCount,
              "\t%s %s block %u end %u > %cSize %u\n",
              pName, pLabel, iBlk, blkEnd, pCLabel, pSize);
 if (gBlkStart < pStart)
     chkError(pslDesc, out, psl, errCount,
              "\t%s %s block %u start %u < %cStart %u\n",
              pName, pLabel, iBlk, gBlkStart, pCLabel, pStart);
 if (gBlkStart >= pEnd)
     chkError(pslDesc, out, psl, errCount,
              "\t%s %s block %u start %u >= %cEnd %u\n",
              pName, pLabel, iBlk, gBlkStart, pCLabel, pEnd);
 if (gBlkEnd < pStart)
     chkError(pslDesc, out, psl, errCount,
              "\t%s %s block %u end %u < %cStart %u\n",
              pName, pLabel, iBlk, gBlkEnd, pCLabel, pStart);
 if (gBlkEnd > pEnd)
     chkError(pslDesc, out, psl, errCount,
              "\t%s %s block %u end %u > %cEnd %u\n",
              pName, pLabel, iBlk, gBlkEnd, pCLabel, pEnd);
 if (iBlk > 0)
     {
     unsigned prevBlkEnd = pBlockStarts[iBlk-1]+psl->blockSizes[iBlk-1];
     if (blkStart < prevBlkEnd)
         chkError(pslDesc, out, psl, errCount,
                  "\t%s %s block %u start %u < previous block end %u\n",
                  pName, pLabel, iBlk, blkStart, prevBlkEnd);
     }
 }
 
 static void chkRanges(char* pslDesc, FILE* out, struct psl* psl,
                       char* pName, char* pLabel, char pCLabel, char pStrand,
                       unsigned pSize, unsigned pStart, unsigned pEnd,
                       unsigned* pBlockStarts, int blockSizeMult, int* errCount)
 /* check the target or query ranges in a PSL, increment errorCnt */
 {
 unsigned iBlk;
 if (pStart >= pEnd)
     chkError(pslDesc, out, psl, errCount,
              "\t%s %cStart %u >= %cEnd %u\n",
              pName, pCLabel, pStart, pCLabel, pEnd);
 if (pEnd > pSize)
     chkError(pslDesc, out, psl, errCount,
              "\t%s %cEnd %u >= %cSize %u\n",
              pName, pCLabel, pEnd, pCLabel, pSize);
 // check that block start/end matches overall start end
 unsigned pStartStrand = pStart, pEndStrand = pEnd;
 if (pStrand != '+')
     reverseUnsignedRange(&pStartStrand, &pEndStrand, pSize);
 unsigned lastBlkEnd = pBlockStarts[psl->blockCount-1] + (blockSizeMult * psl->blockSizes[psl->blockCount-1]);
 if ((pStartStrand != pBlockStarts[0]) || (pEndStrand != lastBlkEnd))
     chkError(pslDesc, out, psl, errCount,
              "\t%s strand \"%c\" adjusted %cStart-%cEnd range %u-%u != block range %u-%u\n",
              pName, pStrand, pCLabel, pCLabel, pStartStrand, pEndStrand, pBlockStarts[0], lastBlkEnd);
 
 for (iBlk = 0; iBlk < psl->blockCount; iBlk++)
     chkBlkRanges(pslDesc, out, psl, pName, pLabel, pCLabel, pStrand,
                  pSize, pStart, pEnd, iBlk, pBlockStarts, errCount);
 }
 
 
 static void chkInsertCounts(char* pslDesc, FILE* out, struct psl* psl,
                             char* pName, char pCLabel, unsigned* pBlockStarts,
                             unsigned pNumInsert, unsigned pBaseInsert,
                             int* errCount)
 /* check the insert counts, incrementing errorCnt */
 {
 unsigned numInsert = 0, baseInsert = 0;
 int iBlk;
 
 for (iBlk = 1; iBlk < psl->blockCount; iBlk++)
     {
     unsigned gapSize = pBlockStarts[iBlk] - (pBlockStarts[iBlk-1]+psl->blockSizes[iBlk-1]);
     if (gapSize > 0)
         {
         numInsert++;
         baseInsert += gapSize;
         }
     }
 if (numInsert != pNumInsert)
     chkError(pslDesc, out, psl, errCount,
              "\t%s %cNumInsert %u != expected %u\n",
              pName, pCLabel, pNumInsert, numInsert);
 if (baseInsert != pBaseInsert)
     chkError(pslDesc, out, psl, errCount,
              "\t%s %cBaseInsert %u != expected %u\n",
              pName, pCLabel, pBaseInsert, baseInsert);
 }
 
 int pslCheck2(unsigned opts, char *pslDesc, FILE* out, struct psl* psl)
 /* Validate a PSL for consistency.  pslDesc is printed the error messages to
  * file out (open /dev/null to discard). Return count of errors.  Option
  * PSL_CHECK_IGNORE_INSERT_CNTS doesn't validate problems insert counts fields
  * in each PSL.  Useful because protein PSL doesn't seen to compute these in a
  * consistent way.
  */
 {
 static char* VALID_STRANDS[] = {
     "+", "-", "++", "+-", "-+", "--", NULL
 };
 int i, errCount = 0;
 int tBlockSizeMult = pslIsProtein(psl) ? 3 : 1;
 
 /* check strand value */
 for (i = 0; VALID_STRANDS[i] != NULL; i++)
     {
     if (strcmp(psl->strand, VALID_STRANDS[i]) == 0)
         break;
     }
 if (VALID_STRANDS[i] == NULL)
     chkError(pslDesc, out, psl, &errCount,
              "\tinvalid PSL strand: \"%s\"\n", psl->strand);
 
 /* check query */
 chkRanges(pslDesc, out, psl, psl->qName, "query", 'q', pslQStrand(psl), psl->qSize, psl->qStart, psl->qEnd,
           psl->qStarts, 1, &errCount);
 if ((opts & PSL_CHECK_IGNORE_INSERT_CNTS) == 0)
     chkInsertCounts(pslDesc, out, psl, psl->qName, 'q', psl->qStarts, psl->qNumInsert, psl->qBaseInsert, &errCount);
 
 /* check target */
 chkRanges(pslDesc, out, psl, psl->tName, "target", 't', pslTStrand(psl), psl->tSize, psl->tStart, psl->tEnd,
           psl->tStarts, tBlockSizeMult, &errCount);
 if ((opts & PSL_CHECK_IGNORE_INSERT_CNTS) == 0)
     chkInsertCounts(pslDesc, out, psl, psl->tName, 't', psl->tStarts, psl->tNumInsert, psl->tBaseInsert, &errCount);
 
 return errCount;
 }
 
 int pslCheck(char *pslDesc, FILE* out, struct psl* psl)
 /* Validate a PSL for consistency.  pslDesc is printed the error messages
  * to file out (open /dev/null to discard). Return count of errors. */
 {
 return pslCheck2(0, pslDesc, out, psl);
 }
 
 struct hash *readPslToBinKeeper(char *sizeFileName, char *pslFileName)
 /* read a list of psls and return results in hash of binKeeper structure for fast query*/
 {
 struct binKeeper *bk; 
 struct psl *psl;
 struct lineFile *sf = lineFileOpen(sizeFileName, TRUE);
 struct lineFile *pf = lineFileOpen(pslFileName , TRUE);
 struct hash *hash = newHash(0);
 char *chromRow[2];
 char *row[21] ;
 
 while (lineFileRow(sf, chromRow))
     {
     char *name = chromRow[0];
     int size = lineFileNeedNum(sf, chromRow, 1);
 
     if (hashLookup(hash, name) != NULL)
         warn("Duplicate %s, ignoring all but first\n", name);
     else
         {
         bk = binKeeperNew(0, size);
         assert(size > 1);
 	hashAdd(hash, name, bk);
         }
     }
 while (lineFileNextRow(pf, row, ArraySize(row)))
     {
     psl = pslLoad(row);
     bk = hashMustFindVal(hash, psl->tName);
     binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
     }
 lineFileClose(&pf);
 lineFileClose(&sf);
 return hash;
 }
 
 static boolean isDelChar(char c)
 /* is this a indel character? */
 {
 return (c == '-') || (c == '.') || (c == '=') || (c == '_');
 }
 
 static void trimAlignment(struct psl* psl, char** qStrPtr, char** tStrPtr,
                           int* aliSizePtr)
 /* remove leading or trailing indels from alignment */
 {
 char* qStr = *qStrPtr;
 char* tStr = *tStrPtr;
 int aliSize = *aliSizePtr;
 
 // skip lending indels
 while ((aliSize > 0) && (isDelChar(*qStr) || isDelChar(*tStr)))
     {
     if (!isDelChar(*qStr))
         psl->qStart++;
     else if (!isDelChar(*tStr))
         psl->tStart++;
     qStr++;
     tStr++;
     aliSize--;
     }
 
 // skip trailing indels
 while ((aliSize > 0) && (isDelChar(qStr[aliSize-1]) || isDelChar(tStr[aliSize-1])))
     {
     if (!isDelChar(qStr[aliSize-1]))
         psl->qEnd--;
     else if (!isDelChar(tStr[aliSize-1]))
         psl->tEnd--;
     aliSize--;
     }
 *qStrPtr = qStr;
 *tStrPtr = tStr;
 *aliSizePtr = aliSize;
 }
 
 static void addBlock(struct psl* psl, int qs, int qe, int ts, int te,
                      int *blockSpace)
 /* add a block to the psl, growing if necessary */
 {
 assert((qe-qs) == (te-ts));  /* same lengths? */
 if (psl->blockCount == *blockSpace)
     pslGrow(psl, blockSpace);
 psl->blockSizes[psl->blockCount] = qe - qs;
 psl->qStarts[psl->blockCount] = qs;
 psl->tStarts[psl->blockCount] = ts;
 psl->blockCount++;
 }
 
 static void accumCounts(struct psl *psl, char prevQ, char prevT,
                         char q, char t, unsigned options)
 /* accumulate block and base counts  */
 {
 if (!isDelChar(q) && !isDelChar(t))
     {
     /* aligned column. */
     char qu = toupper(q);
     char tu = toupper(t);
     if ((q == 'N') || (t == 'N'))
         psl->nCount++;
     else if (qu == tu)
         {
         if ((options & PSL_IS_SOFTMASK) && ((qu != q) || (tu != t)))
             psl->repMatch++;
         else
             psl->match++;
         }
     else
         psl->misMatch++;
     }
 else if (isDelChar(q) && !isDelChar(t))
     {
     /* target insert */
     psl->tBaseInsert++;
     if (!isDelChar(prevQ))
         psl->tNumInsert++;
     }
 else if (isDelChar(t) && !isDelChar(q))
     {
     /* query insert */
     psl->qBaseInsert++;
     if (!isDelChar(prevT))
         psl->qNumInsert++;
     }
 }
 
 struct psl* pslFromAlign(char *qName, int qSize, int qStart, int qEnd, char *qString,
                          char *tName, int tSize, int tStart, int tEnd, char *tString,
                          char* strand, unsigned options)
 /* Create a PSL from an alignment.  Options PSL_IS_SOFTMASK if lower case
  * bases indicate repeat masking.  Returns NULL if alignment is empty after
  * triming leading and trailing indels.*/
 {
 /* Note, the unit tests for these programs exercise this function:
  *   hg/embossToPsl
  *   hg/mouseStuff/axtToPsl
  *   hg/spideyToPsl
  *   utils/est2genomeToPsl
  */
 
 int blockSpace = 16;
 struct psl* psl = NULL;
 int aliSize = strlen(qString);
 boolean eitherInsert = FALSE;	/* True if either in insert state. */
 int i, qs,qe,ts,te;
 char prevQ = '\0',  prevT = '\0';
 AllocVar(psl);
 
 if (strlen(tString) != aliSize)
     errAbort("query and target alignment strings are different lengths");
 
 psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd,
              strand, blockSpace, 0);
 trimAlignment(psl, &qString, &tString, &aliSize);
 
 /* Don't create if either query or target is zero length after trim */
  if ((psl->qStart == psl->qEnd) || (psl->tStart == psl->tEnd))
      {
      pslFree(&psl);
      return NULL;
      }
 
 /* Get range of alignment in strand-specified coordinates */
 qs = psl->qStart;
 qe = psl->qEnd;
 if (strand[0] == '-')
     reverseIntRange(&qs, &qe, psl->qSize);
 ts = psl->tStart;
 te = psl->tEnd;
 if (strand[1] == '-')
     reverseIntRange(&ts, &te, psl->tSize);
 
 eitherInsert = FALSE;
 qe = qs;  /* current block coords */
 te = ts;
 for (i=0; i<aliSize; ++i)
     {
     char q = qString[i];
     char t = tString[i];
     if (isDelChar(q) && isDelChar(t))
         {
         continue; /* nothing in this column, just ignore it */
         }
     else if (isDelChar(q) || isDelChar(t))
         {
         /* insert in one of the columns */
 	if (!eitherInsert)
 	    {
             /* end of a block */
             addBlock(psl, qs, qe, ts, te, &blockSpace);
 	    eitherInsert = TRUE;
 	    }
 	if (!isDelChar(q))
             qe += 1;
 	if (!isDelChar(t))
             te += 1;
 	}
     else
         {
         /* columns aligned */
 	if (eitherInsert)
 	    {
             /* start new block */
 	    qs = qe;
 	    ts = te;
 	    eitherInsert = FALSE;
 	    }
 	qe += 1;
 	te += 1;
 	}
     accumCounts(psl, prevQ, prevT, q, t, options);
     prevQ = q; /* will not include skipped empty columns */
     prevT = t;
     }
 addBlock(psl, qs, qe, ts, te, &blockSpace);
 return psl;
 }
 
 struct psl* pslNew(char *qName, unsigned qSize, int qStart, int qEnd,
                    char *tName, unsigned tSize, int tStart, int tEnd,
                    char *strand, unsigned blockSpace, unsigned opts)
 /* create a new psl with space for the specified number of blocks allocated.
  * pslGrow maybe used to expand this space if needed.  Valid options are
  * PSL_XA_FORMAT. */
 {
 struct psl *psl;
 AllocVar(psl);
 assert(blockSpace > 0);
 psl->qName = cloneString(qName);
 psl->qSize = qSize;
 psl->qStart = qStart;
 psl->qEnd = qEnd;
 psl->tName = cloneString(tName);
 psl->tSize = tSize;
 psl->tStart = tStart;
 psl->tEnd = tEnd;
 strncpy(psl->strand, strand, 2);
 AllocArray(psl->blockSizes, blockSpace);
 AllocArray(psl->qStarts, blockSpace);
 AllocArray(psl->tStarts, blockSpace);
 if (opts & PSL_XA_FORMAT)
     {
     AllocArray(psl->qSequence, blockSpace);
     AllocArray(psl->tSequence, blockSpace);
     }
 return psl;
 }
 
 void pslGrow(struct psl *psl, int *blockSpacePtr)
 /* Increase memory allocated to a psl to hold more blocks.  blockSpacePtr
  * should point the the current maximum number of blocks and will be
  * updated to with the new amount of space. */
 {
 int blockSpace = *blockSpacePtr;
 int newSpace = 2 * blockSpace;
 ExpandArray(psl->blockSizes, blockSpace, newSpace);
 ExpandArray(psl->qStarts, blockSpace, newSpace);
 ExpandArray(psl->tStarts, blockSpace, newSpace);
 if (psl->qSequence != NULL)
     {
     ExpandArray(psl->qSequence, blockSpace, newSpace);
     ExpandArray(psl->tSequence, blockSpace, newSpace);
     }
 *blockSpacePtr = newSpace;
 }
 
 void pslComputeInsertCounts(struct psl *psl)
 /* compute numInsert and baseInsert fields from the blocks */
 {
 psl->qNumInsert = psl->qBaseInsert = 0;
 psl->tNumInsert = psl->tBaseInsert = 0;
 int iBlk;
 
 for (iBlk = 1; iBlk < psl->blockCount; iBlk++)
     {
     unsigned qGapSize = psl->qStarts[iBlk] - (psl->qStarts[iBlk-1]+psl->blockSizes[iBlk-1]);
     if (qGapSize != 0)
         {
         psl->qNumInsert++;
         psl->qBaseInsert += qGapSize;
         }
     unsigned tGapSize = psl->tStarts[iBlk] - (psl->tStarts[iBlk-1]+psl->blockSizes[iBlk-1]);
     if (tGapSize != 0)
         {
         psl->tNumInsert++;
         psl->tBaseInsert += tGapSize;
         }
     }
 }
 
 static boolean getNextCigarOp(char *startPtr, boolean reverse, char **ptr, char *op, int *size)
 /* gets one cigar op out of the CIGAR string.  Reverse the order if asked */
 {
 char *str = *ptr;
 
 if (str == NULL)
     return FALSE;
 
 if ((!reverse && (*str == 0)) || (reverse && (str == startPtr)))
     return FALSE;
 
 // between each cigar op there could be nothing, or a space, or a plus
 if (reverse)
     {
     char *end = str - 1;
     for(;*end ; end--)
 	{
 	if (isalpha(*end))
 	    break;
 	}
     str = end;
     *ptr = end;
     }
 else
     {
     char *end = str + 1;
     for(;*end ; end++)
 	{
 	if (! (isdigit(*end)  || (*end == ' ') || (*end == '+')))
 	    break;
 	}
 
     *ptr = end;
     }
 
 *op = *str++;
 *size = atoi(str);
 return TRUE;
 }
 
 struct psl* pslFromGff3Cigar(char *qName, int qSize, int qStart, int qEnd,
                              char *tName, int tSize, int tStart, int tEnd,
                              char* strand, char *cigar)
 /* create a PSL from a GFF3-style cigar formatted alignment */
 {
 // this is currently tested by the gff3ToPsl
 int blocksAlloced = 4;
 struct psl *psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, strand, blocksAlloced, 0);
 
 char op;
 int size;
 int totalSize = 0;
 
 int qNext = qStart, qBlkEnd = qEnd;
 if (strand[0] == '-')
     reverseIntRange(&qNext, &qBlkEnd, qSize);
 int tNext = tStart, tBlkEnd = tEnd;
 if (strand[1] == '-')
     reverseIntRange(&tNext, &tBlkEnd, tSize);
 
 if (cigar == NULL)
     {
     // no cigar means one block
     size = qEnd - qStart;
     totalSize += size;
     psl->blockSizes[psl->blockCount] = size;
     psl->qStarts[psl->blockCount] = qNext;
     psl->tStarts[psl->blockCount] = tNext;
     psl->blockCount++;
     tNext += size;
     qNext += size;
     }
 else
     {
     if (strand[0] == '-' && strand[1] == '-')
         errAbort("GFF3 spec is vague about Gap when both strands are '-'; not implemented yet.");
     char cigarSpec[strlen(cigar)+1];  // copy since parsing is destructive
     safecpy(cigarSpec, sizeof cigarSpec, cigar);
     char *cigarNext = cigarSpec;
     while(getNextCigarOp(cigarSpec, FALSE, &cigarNext, &op, &size))
 	{
 	switch (op)
 	    {
 	    case 'M': // match or mismatch (gapless aligned block)
 		if (psl->blockCount == blocksAlloced)
 		    pslGrow(psl, &blocksAlloced);
 
 		totalSize += size;
 		psl->blockSizes[psl->blockCount] = size;
 		psl->qStarts[psl->blockCount] = qNext;
 		psl->tStarts[psl->blockCount] = tNext;
 		psl->blockCount++;
 		tNext += size;
 		qNext += size;
 		break;
 	    case 'I': // inserted in target
 		tNext += size;
 		break;
 	    case 'D': // deleted from target
 		qNext += size;
 		break;
 	    
 	    default:
 		errAbort("unrecognized CIGAR op %c in %s", op, cigar);
 	    }
 	}
     }
 
 /* CIGARs starting/ending with indels require adjusting of query/target ranges,
  * as PSL starts/ends with matches */
 psl->qStart = psl->qStarts[0];
 psl->qEnd = pslQEnd(psl, psl->blockCount-1);
 if (strand[0] == '-')
     reverseIntRange(&psl->qStart, &psl->qEnd, qSize);
 psl->tStart = psl->tStarts[0];
 psl->tEnd = pslTEnd(psl, psl->blockCount-1);
 if (strand[1] == '-')
     reverseIntRange(&psl->tStart, &psl->tEnd, tSize);
 
 /* sanity check */
 if (qNext != qBlkEnd)
     errAbort("CIGAR query length does not match specified query range %s:%d-%d", qName, qStart, qEnd);
 if (tNext != tBlkEnd)
     errAbort("CIGAR target length does not match specified target range %s:%d-%d", tName, tStart, tEnd);
 psl->match = totalSize;
 pslComputeInsertCounts(psl);
 return psl;
 }
 
 int pslRangeTreeOverlap(struct psl *psl, struct rbTree *rangeTree)
 /* Return amount that psl overlaps (on target side) with rangeTree. */
 {
 int i;
 int overlap = 0;
 boolean isRc = (psl->strand[1] == '-');
 for (i=0; i<psl->blockCount; ++i)
     {
     int start = psl->tStarts[i];
     int end = start + psl->blockSizes[i];
     if (isRc)
         reverseIntRange(&start, &end, psl->tSize);
     overlap += rangeTreeOverlapSize(rangeTree, start, end);
     }
 return overlap;
 }
 
 float pslIdent(struct psl *psl)
 /* computer fraction identity */
 {
 float aligned = psl->match + psl->misMatch + psl->repMatch;
 if (aligned == 0.0)
     return 0.0;
 else
     return ((float)(psl->match + psl->repMatch))/aligned;
 }
 
 float pslQueryAligned(struct psl *psl)
 /* compute fraction of query that was aligned */
 {
 float aligned = psl->match + psl->misMatch + psl->repMatch;
 return aligned/(float)psl->qSize;
 }
 
 struct psl* pslClone(struct psl *psl)
 /* clone a psl */
 {
 struct psl* pslCp = pslNew(psl->qName, psl->qSize, psl->qStart, psl->qEnd,
                            psl->tName, psl->tSize, psl->tStart, psl->tEnd,
                            psl->strand, psl->blockCount,
                            ((psl->tSequence != NULL) ? PSL_XA_FORMAT : 0));
 pslCp->match = psl->match;
 pslCp->misMatch = psl->misMatch;
 pslCp->repMatch = psl->repMatch;
 pslCp->nCount = psl->nCount;
 pslCp->qNumInsert = psl->qNumInsert;
 pslCp->qBaseInsert = psl->qBaseInsert;
 pslCp->tNumInsert = psl->tNumInsert;
 pslCp->tBaseInsert = psl->tBaseInsert;
 int iBlk;
 for (iBlk = 0; iBlk < psl->blockCount; iBlk++)
     {
     pslCp->blockSizes[iBlk] = psl->blockSizes[iBlk];
     pslCp->qStarts[iBlk] = psl->qStarts[iBlk];
     pslCp->tStarts[iBlk] = psl->tStarts[iBlk];
     if (psl->qSequence != NULL)
         pslCp->qSequence[iBlk] = cloneString(psl->qSequence[iBlk]);
     if (psl->tSequence != NULL)
         pslCp->tSequence[iBlk] = cloneString(psl->tSequence[iBlk]);
     pslCp->blockCount++;
     }
 return pslCp;
 }
 
 int cmpChrom(char *a, char *b)
 /* Compare two chromosomes. */
 {
 return cmpStringsWithEmbeddedNumbers(a, b);
 }
 
 
 int pslCmpTargetScore(const void *va, const void *vb)
 /* Compare to sort based on target then score. */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 int diff = cmpChrom(a->tName, b->tName);
 if (diff == 0)
     diff = pslScore(b) - pslScore(a);
 return diff;
 }
 
 int pslCmpTargetStart(const void *va, const void *vb)
 /* Compare to sort based on target start. */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 int diff = cmpChrom(a->tName, b->tName);
 if (diff == 0)
     diff = a->tStart - b->tStart;
 return diff;
 }
 
 char *pslSortList[] = {"query,score", "query,start", "chrom,score", "chrom,start", "score"};
 
 void pslSortListByVar(struct psl **pslList, char *sort)
 /* Sort a list of psls using the method definied in the sort string. */
 {
 if (sameString(sort, "query,start"))
     {
     slSort(pslList, pslCmpQuery);
     }
 else if (sameString(sort, "query,score"))
     {
     slSort(pslList, pslCmpQueryScore);
     }
 else if (sameString(sort, "score"))
     {
     slSort(pslList, pslCmpScore);
     }
 else if (sameString(sort, "chrom,start"))
     {
     slSort(pslList, pslCmpTargetStart);
     }
 else if (sameString(sort, "chrom,score"))
     {
     slSort(pslList, pslCmpTargetScore);
     }
 else
     {
     slSort(pslList, pslCmpQueryScore);
     }
 }
+
+void pslRemoveFrameShifts(struct psl *psl)
+/* Remove any frameshits if present. Changes in place, doesn't update statistics in first nine fields. */
+{
+unsigned prevEnd = psl->tStarts[0];
+int ii;
+
+for(ii = 0; ii < psl->blockCount; ii++)
+    {
+    int diff = prevEnd - psl->tStarts[ii];
+    if (diff > 0)
+        {
+        if (diff > psl->blockSizes[ii])
+            errAbort("frame shift (%d) larger than block size (%d)", diff, psl->blockSizes[ii]);
+        psl->blockSizes[ii] -= diff;
+        psl->tStarts[ii] += diff;
+        psl->qStarts[ii] += diff;
+        }
+    prevEnd = psl->tStarts[ii] + psl->blockSizes[ii];
+    }
+}