src/lib/psl.c 1.81

1.81 2009/05/31 07:28:33 markd
don't modify input sequences when converting alignments
Index: src/lib/psl.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/psl.c,v
retrieving revision 1.80
retrieving revision 1.81
diff -b -B -U 1000000 -r1.80 -r1.81
--- src/lib/psl.c	19 May 2008 18:14:36 -0000	1.80
+++ src/lib/psl.c	31 May 2009 07:28:33 -0000	1.81
@@ -1,1992 +1,1947 @@
 /* psl.c was originally generated by the autoSql program, which also 
  * generated as_psl.h and as_psl.sql.  This module links the database and the RAM 
  * representation of objects. 
  *
  * This file is copyright 2002 Jim Kent, but license is hereby
  * granted for all use - public, private or commercial. */
 
 #include "common.h"
 #include "sqlNum.h"
 #include "sqlList.h"
 #include "localmem.h"
 #include "psl.h"
 #include "hash.h"
 #include "linefile.h"
 #include "dnaseq.h"
 #include "dystring.h"
 #include "fuzzyFind.h"
 #include "aliType.h"
 #include "binRange.h"
 #include "rangeTree.h"
 
 static char const rcsid[] = "$Id$";
 
 static char *createString = 
 "CREATE TABLE %s (\n"
     "%s"				/* Optional bin */
     "matches int unsigned not null,	# Number of bases that match that aren't repeats\n"
     "misMatches int unsigned not null,	# Number of bases that don't match\n"
     "repMatches int unsigned not null,	# Number of bases that match but are part of repeats\n"
     "nCount int unsigned not null,	# Number of 'N' bases\n"
     "qNumInsert int unsigned not null,	# Number of inserts in query\n"
     "qBaseInsert int unsigned not null,	# Number of bases inserted in query\n"
     "tNumInsert int unsigned not null,	# Number of inserts in target\n"
     "tBaseInsert int unsigned not null,	# Number of bases inserted in target\n"
     "strand char(2) not null,	# + or - for strand.  First character is query, second is target.\n"
     "qName varchar(255) not null,	# Query sequence name\n"
     "qSize int unsigned not null,	# Query sequence size\n"
     "qStart int unsigned not null,	# Alignment start position in query\n"
     "qEnd int unsigned not null,	# Alignment end position in query\n"
     "tName varchar(255) not null,	# Target sequence name\n"
     "tSize int unsigned not null,	# Target sequence size\n"
     "tStart int unsigned not null,	# Alignment start position in target\n"
     "tEnd int unsigned not null,	# Alignment end position in target\n"
     "blockCount int unsigned not null,	# Number of blocks in alignment\n"
     "blockSizes longblob not null,	# Size of each block\n"
     "qStarts longblob not null,	# Start of each block in query.\n"
     "tStarts longblob not null,	# Start of each block in target.\n";
 
 static char *indexString = 
 	  "#Indices\n"
     "%s"                            /* Optional bin. */
     "INDEX(qName(12))\n"
 ")\n";
 
 
 struct psl *pslxLoad(char **row)
 /* Load a psl from row fetched with select * from psl
  * from database.  Dispose of this with pslFree(). */
 {
 struct psl *ret = pslLoad(row);
 int retSize;
 sqlStringDynamicArray(row[21],&ret->qSequence, &retSize);
 sqlStringDynamicArray(row[22],&ret->tSequence, &retSize);
 return ret;
 }
 
 struct psl *pslLoad(char **row)
 /* Load a psl from row fetched with select * from psl
  * from database.  Dispose of this with pslFree(). */
 {
 struct psl *ret;
 int sizeOne;
 
 AllocVar(ret);
 ret->blockCount = sqlUnsigned(row[17]);
 ret->match = sqlUnsigned(row[0]);
 ret->misMatch = sqlUnsigned(row[1]);
 ret->repMatch = sqlUnsigned(row[2]);
 ret->nCount = sqlUnsigned(row[3]);
 ret->qNumInsert = sqlUnsigned(row[4]);
 ret->qBaseInsert = sqlSigned(row[5]);
 ret->tNumInsert = sqlUnsigned(row[6]);
 ret->tBaseInsert = sqlSigned(row[7]);
 strcpy(ret->strand, row[8]);
 ret->qName = cloneString(row[9]);
 ret->qSize = sqlUnsigned(row[10]);
 ret->qStart = sqlUnsigned(row[11]);
 ret->qEnd = sqlUnsigned(row[12]);
 ret->tName = cloneString(row[13]);
 ret->tSize = sqlUnsigned(row[14]);
 ret->tStart = sqlUnsigned(row[15]);
 ret->tEnd = sqlUnsigned(row[16]);
 sqlUnsignedDynamicArray(row[18], &ret->blockSizes, &sizeOne);
 if (sizeOne != ret->blockCount)
     {
     printf("sizeOne bloxksizes %d bs %d block=%s\n",sizeOne, ret->blockCount,row[18]);
     }
 assert(sizeOne == ret->blockCount);
 sqlUnsignedDynamicArray(row[19], &ret->qStarts, &sizeOne);
 if (sizeOne != ret->blockCount)
     {
     printf("sizeOne qStarts %d bs %d\n",sizeOne, ret->blockCount);
     }
 assert(sizeOne == ret->blockCount);
 sqlUnsignedDynamicArray(row[20], &ret->tStarts, &sizeOne);
 if (sizeOne != ret->blockCount)
     {
     printf("sizeOne tStarts %d bs %d\n",sizeOne, ret->blockCount);
     }
 assert(sizeOne == ret->blockCount);
 return ret;
 }
 
 struct psl *pslCommaIn(char **pS, struct psl *ret)
 /* Create a psl out of a comma separated string. 
  * This will fill in ret if non-null, otherwise will
  * return a new psl */
 {
 char *s = *pS;
 int i;
 
 if (ret == NULL)
     AllocVar(ret);
 ret->match = sqlUnsignedComma(&s);
 ret->misMatch = sqlUnsignedComma(&s);
 ret->repMatch = sqlUnsignedComma(&s);
 ret->nCount = sqlUnsignedComma(&s);
 ret->qNumInsert = sqlUnsignedComma(&s);
 ret->qBaseInsert = sqlSignedComma(&s);
 ret->tNumInsert = sqlUnsignedComma(&s);
 ret->tBaseInsert = sqlSignedComma(&s);
 sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand));
 ret->qName = sqlStringComma(&s);
 ret->qSize = sqlUnsignedComma(&s);
 ret->qStart = sqlUnsignedComma(&s);
 ret->qEnd = sqlUnsignedComma(&s);
 ret->tName = sqlStringComma(&s);
 ret->tSize = sqlUnsignedComma(&s);
 ret->tStart = sqlUnsignedComma(&s);
 ret->tEnd = sqlUnsignedComma(&s);
 ret->blockCount = sqlUnsignedComma(&s);
 s = sqlEatChar(s, '{');
 AllocArray(ret->blockSizes, ret->blockCount);
 for (i=0; i<ret->blockCount; ++i)
     {
     ret->blockSizes[i] = sqlUnsignedComma(&s);
     }
 s = sqlEatChar(s, '}');
 s = sqlEatChar(s, ',');
 s = sqlEatChar(s, '{');
 AllocArray(ret->qStarts, ret->blockCount);
 for (i=0; i<ret->blockCount; ++i)
     {
     ret->qStarts[i] = sqlUnsignedComma(&s);
     }
 s = sqlEatChar(s, '}');
 s = sqlEatChar(s, ',');
 s = sqlEatChar(s, '{');
 AllocArray(ret->tStarts, ret->blockCount);
 for (i=0; i<ret->blockCount; ++i)
     {
     ret->tStarts[i] = sqlUnsignedComma(&s);
     }
 s = sqlEatChar(s, '}');
 s = sqlEatChar(s, ',');
 *pS = s;
 return ret;
 }
 
 void pslFree(struct psl **pEl)
 /* Free a single dynamically allocated psl such as created
  * with pslLoad(). */
 {
 struct psl *el;
 
 if ((el = *pEl) == NULL) return;
 freeMem(el->qName);
 freeMem(el->tName);
 freeMem(el->blockSizes);
 freeMem(el->qStarts);
 freeMem(el->tStarts);
 if (el->qSequence)
     {
     freeMem(el->qSequence[0]);
     freeMem(el->qSequence);
     }
 if (el->tSequence)
     {
     freeMem(el->tSequence[0]);
     freeMem(el->tSequence);
     }
 freez(pEl);
 }
 
 void pslFreeList(struct psl **pList)
 /* Free a list of dynamically allocated psl's */
 {
 struct psl *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     pslFree(&el);
     }
 *pList = NULL;
 }
 
 void pslOutput(struct psl *el, FILE *f, char sep, char lastSep) 
 /* Print out psl.  Separate fields with sep. Follow last field with lastSep. */
 {
 int i;
 fprintf(f, "%u", el->match);
 fputc(sep,f);
 fprintf(f, "%u", el->misMatch);
 fputc(sep,f);
 fprintf(f, "%u", el->repMatch);
 fputc(sep,f);
 fprintf(f, "%u", el->nCount);
 fputc(sep,f);
 fprintf(f, "%u", el->qNumInsert);
 fputc(sep,f);
 fprintf(f, "%d", el->qBaseInsert);
 fputc(sep,f);
 fprintf(f, "%u", el->tNumInsert);
 fputc(sep,f);
 fprintf(f, "%d", el->tBaseInsert);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->strand);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->qName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->qSize);
 fputc(sep,f);
 fprintf(f, "%u", el->qStart);
 fputc(sep,f);
 fprintf(f, "%u", el->qEnd);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->tName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->tSize);
 fputc(sep,f);
 fprintf(f, "%u", el->tStart);
 fputc(sep,f);
 fprintf(f, "%u", el->tEnd);
 fputc(sep,f);
 fprintf(f, "%u", el->blockCount);
 fputc(sep,f);
 if (sep == ',') fputc('{',f);
 for (i=0; i<el->blockCount; ++i)
     {
     fprintf(f, "%u", el->blockSizes[i]);
     fputc(',', f);
     }
 if (sep == ',') fputc('}',f);
 fputc(sep,f);
 if (sep == ',') fputc('{',f);
 for (i=0; i<el->blockCount; ++i)
     {
     fprintf(f, "%u", el->qStarts[i]);
     fputc(',', f);
     }
 if (sep == ',') fputc('}',f);
 fputc(sep,f);
 if (sep == ',') fputc('{',f);
 for (i=0; i<el->blockCount; ++i)
     {
     fprintf(f, "%u", el->tStarts[i]);
     fputc(',', f);
     }
 if (sep == ',') fputc('}',f);
 if (el->qSequence)
     {
     fputc(sep,f);
     if (sep == ',') fputc('{',f);
     for (i=0; i<el->blockCount; ++i)
 	{
 	fprintf(f, "%s", el->qSequence[i]);
 	fputc(',', f);
 	}
     if (sep == ',') fputc('}',f);
     fputc(sep,f);
     if (sep == ',') fputc('{',f);
     for (i=0; i<el->blockCount; ++i)
 	{
 	fprintf(f, "%s", el->tSequence[i]);
 	fputc(',', f);
 	}
     if (sep == ',') fputc('}',f);
     }
 
 fputc(lastSep,f);
 if (ferror(f))
     {
     perror("Error writing psl file\n");
     errAbort("\n");
     }
 }
 
 /* ----- end autoSql generated part --------------- */
 
 void pslOutputShort(struct psl *el, FILE *f, char sep, char lastSep) 
 /* Print out psl.  Separate fields with sep. Follow last field with lastSep. */
 {
 fprintf(f, "%u", el->match);
 fputc(sep,f);
 fprintf(f, "%u", el->misMatch);
 fputc(sep,f);
 fprintf(f, "%u", el->repMatch);
 fputc(sep,f);
 fprintf(f, "%u", el->qNumInsert);
 fputc(sep,f);
 fprintf(f, "%d", el->qBaseInsert);
 fputc(sep,f);
 fprintf(f, "%u", el->tNumInsert);
 fputc(sep,f);
 fprintf(f, "%d", el->tBaseInsert);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->strand);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->qName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->qStart);
 fputc(sep,f);
 fprintf(f, "%u", abs(el->qEnd - el->qStart));
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->tName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->tStart);
 fputc(sep,f);
 fprintf(f, "%u", abs(el->tEnd - el->tStart));
 fputc(sep,f);
 fprintf(f, "%u", el->blockCount);
 fputc(sep,f);
 if (sep == ',') fputc('{',f);
 fputc(lastSep,f);
 if (ferror(f))
     {
     perror("Error writing psl file\n");
     errAbort("\n");
     }
 }
 
 void pslOutFormat(struct psl *el, FILE *f, char sep, char lastSep) 
 /* Print out selected psl values.  Separate fields with sep. Follow last field with lastSep. */
 /* Prints out a better format with bold field headings followed by value */
 /* Requires further upstream work to ensure that only the field headers */
 /* declared here are printed if replacing an existing psl print function*/
 {
 const char *headers[] = {"Matches", "Mismatches", "Matches in repeats", "Number of N bases", "Query name", "Size", "Start", "End", "Chromosome", "Strand", "Start", "End"};
 char *hformat = "<B>%s:</B> "; /* string for formatted print for headers */
 char *uformat = "<B>%s:</B> %u%c"; /* string for formatted print for unsigned variable */
 char *targName;
 
 fprintf(f, uformat, headers[0], el->match, sep);
 fprintf(f, uformat, headers[1], el->misMatch, sep);
 fprintf(f, uformat, headers[2], el->repMatch, sep);
 fprintf(f, uformat, headers[3], el->nCount, sep);
 
 fprintf(f, hformat, headers[4]);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->qName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 
 fprintf(f, uformat, headers[5], el->qSize, sep);
 fprintf(f, uformat, headers[6], el->qStart, sep);
 fprintf(f, uformat, headers[7], el->qEnd, sep);
 
 fprintf(f, hformat, headers[8]);
 if (sep == ',') fputc('"',f);
 /* skip leading 'chr' in string to get only chromosome part */
 targName = el->tName;
 if (startsWith("chr", el->tName))
    targName += 3;
 fprintf(f, "%s", targName);
 
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 
 fprintf(f, hformat, headers[9]);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->strand);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 
 fprintf(f, uformat, headers[10], el->tStart, sep);
 fprintf(f, uformat, headers[11], el->tEnd, sep);
 
 fputc(lastSep,f);
 
 if (ferror(f))
     {
     perror("Error writing psl file\n");
     errAbort("\n");
     }
 
 }
 
 struct psl *pslLoadAll(char *fileName)
 /* Load all psl's in file. */
 {
 struct lineFile *lf = pslFileOpen(fileName);
 struct psl *pslList = NULL, *psl;
 while ((psl = pslNext(lf)) != NULL)
     {
     slAddHead(&pslList, psl);
     }
 slReverse(&pslList);
 lineFileClose(&lf);
 return pslList;
 }
 
 
 int pslCmpQuery(const void *va, const void *vb)
 /* Compare to sort based on query start. */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 int dif;
 dif = strcmp(a->qName, b->qName);
 if (dif == 0)
     dif = a->qStart - b->qStart;
 return dif;
 }
 
 int pslCmpTarget(const void *va, const void *vb)
 /* Compare to sort based on target start. */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 int dif;
 dif = strcmp(a->tName, b->tName);
 if (dif == 0)
     dif = a->tStart - b->tStart;
 return dif;
 }
 
 int pslCmpTargetAndStrand(const void *va, const void *vb)
 /* Compare to sort based on target, strand,  tStart. */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 int dif;
 dif = strcmp(a->tName, b->tName);
 if (dif == 0)
     dif = strcmp(a->strand, b->strand);
 if (dif == 0)
     dif = a->tStart - b->tStart;
 return dif;
 }
 
 
 int pslCmpScore(const void *va, const void *vb)
 /* Compare to sort based on score (descending). */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 return pslScore(b) - pslScore(a);
 }
 
 int pslCmpQueryScore(const void *va, const void *vb)
 /* Compare to sort based on query then score (descending). */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 int diff = strcmp(a->qName, b->qName);
 if (diff == 0)
     diff = pslScore(b) - pslScore(a);
 return diff;
 }
 
 int pslCmpMatch(const void *va, const void *vb)
 /* Compare to sort based on match */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 return b->match - a->match;
 }
 
 static void pslLabelColumns(FILE *f)
 /* Write column info. */
 {
 fputs("\n"
 "match\tmis- \trep. \tN's\tQ gap\tQ gap\tT gap\tT gap\tstrand\tQ        \tQ   \tQ    \tQ  \tT        \tT   \tT    \tT  \tblock\tblockSizes \tqStarts\t tStarts\n"
 "     \tmatch\tmatch\t   \tcount\tbases\tcount\tbases\t      \tname     \tsize\tstart\tend\tname     \tsize\tstart\tend\tcount\n" 
 "---------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
 f);
 }
 
 void pslxWriteHead(FILE *f, enum gfType qType, enum gfType tType)
 /* Write header for extended (possibly protein) psl file. */
 {
 fprintf(f, "psLayout version 4 %s %s\n", gfTypeName(qType), gfTypeName(tType));
 pslLabelColumns(f);
 }
 
 void pslWriteHead(FILE *f)
 /* Write head of psl. */
 {
 fputs("psLayout version 3\n", f);
 pslLabelColumns(f);
 }
 
 void pslWriteAll(struct psl *pslList, char *fileName, boolean writeHeader)
 /* Write a psl file from list. */
 {
 FILE *f;
 struct psl *psl;
 
 f = mustOpen(fileName, "w");
 if (writeHeader)
     pslWriteHead(f);
 for (psl = pslList; psl != NULL; psl = psl->next)
     pslTabOut(psl, f);
 fclose(f);
 }
 
 void pslxFileOpen(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf)
 /* Read header part of psl and make sure it's right.  Return
  * sequence types and file handle. */
 {
 char *line;
 int lineSize;
 char *words[30];
 char *version;
 int wordCount;
 int i;
 enum gfType qt = gftRna,  tt = gftDna;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 
 if (!lineFileNext(lf, &line, &lineSize))
     warn("%s is empty", fileName);
 else
     {
     if (startsWith("psLayout version", line))
 	{
 	wordCount = chopLine(line, words);
 	if (wordCount < 3)
 	    errAbort("%s is not a psLayout file", fileName);
 	version = words[2];
 	if (sameString(version, "3"))
 	    {
 	    }
 	else if (sameString(version, "4"))
 	    {
 	    qt = gfTypeFromName(words[3]);
 	    tt = gfTypeFromName(words[4]);
 	    }
 	else
 	    {
 	    errAbort("%s is version %s of psLayout, this program can only handle through version 4",
 		fileName,  version);
 	    }
 	for (i=0; i<4; ++i)
 	    {
 	    if (!lineFileNext(lf, &line, &lineSize))
 		errAbort("%s severely truncated", fileName);
 	    }
 	}
     else
         {
 	char *s = cloneString(line);
         while (line != NULL && line[0] == '#')
             {
             freeMem(s);
             lineFileNext(lf, &line, &lineSize);
             s = cloneString(line);
             }
 	wordCount = chopLine(s, words);
 	if (wordCount < 21 || wordCount > 23 || (words[8][0] != '+' && words[8][0] != '-'))
 	    errAbort("%s is not a psLayout file", fileName);
 	else
 	    lineFileReuse(lf); 
 	freeMem(s);
 	}
     }
 *retQueryType = qt;
 *retTargetType = tt;
 *retLf = lf;
 }
 
 static void pslxFileOpenWithMetaConfig(char *fileName, bool isMetaUnique, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
 /* Read header part of psl and make sure it's right.  Return
  * sequence types and file handle and send meta data to output file f */
 {
 char *line;
 int lineSize;
 char *words[30];
 char *version;
 int wordCount;
 int i;
 enum gfType qt = gftRna,  tt = gftDna;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 
 lineFileSetMetaDataOutput(lf, f);
 if (isMetaUnique)
     lineFileSetUniqueMetaData(lf);
 if (!lineFileNext(lf, &line, &lineSize))
     warn("%s is empty", fileName);
 else
     {
     if (startsWith("psLayout version", line))
 	{
 	wordCount = chopLine(line, words);
 	if (wordCount < 3)
 	    errAbort("%s is not a psLayout file", fileName);
 	version = words[2];
 	if (sameString(version, "3"))
 	    {
 	    }
 	else if (sameString(version, "4"))
 	    {
 	    qt = gfTypeFromName(words[3]);
 	    tt = gfTypeFromName(words[4]);
 	    }
 	else
 	    {
 	    errAbort("%s is version %s of psLayout, this program can only handle through version 4",
 		fileName,  version);
 	    }
 	for (i=0; i<4; ++i)
 	    {
 	    if (!lineFileNext(lf, &line, &lineSize))
 		errAbort("%s severely truncated", fileName);
 	    }
 	}
     else
         {
 	char *s = cloneString(line);
         boolean eof = FALSE;
         while ((line[0] == '#') && (!eof))
             {
             freeMem(s);
             if (!lineFileNext(lf, &line, &lineSize))
                 eof = TRUE;
             s = cloneString(line);
             }
 	wordCount = chopLine(s, words);
 	if ((wordCount < 21 || wordCount > 23 || (words[8][0] != '+' && words[8][0] != '-')) && (!eof))
 	    errAbort("%s is not a psLayout file", fileName);
 	else
 	    lineFileReuse(lf); 
 	freeMem(s);
 	}
     }
 *retQueryType = qt;
 *retTargetType = tt;
 *retLf = lf;
 }
 
 void pslxFileOpenWithMeta(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
 /* Read header part of psl and make sure it's right.  Return
  * sequence types and file handle and send meta data to output file f */
 {
 pslxFileOpenWithMetaConfig(fileName, FALSE, retQueryType, retTargetType, retLf, f);
 }
 
 void pslxFileOpenWithUniqueMeta(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f)
 /* Read header part of psl and make sure it's right.  Return
  * sequence types and file handle and send only unique meta data to output f */
 {
 pslxFileOpenWithMetaConfig(fileName, TRUE, retQueryType, retTargetType, retLf, f);
 }
 
 struct lineFile *pslFileOpen(char *fileName)
 /* Read header part of psl and make sure it's right. 
  * Return line file handle to it. */
 {
 enum gfType qt, tt;
 struct lineFile *lf;
 pslxFileOpen(fileName, &qt, &tt, &lf);
 return lf;
 }
 
 struct lineFile *pslFileOpenWithMeta(char *fileName, FILE *f)
 /* Read header part of psl and make sure it's right. 
  * Return line file handle to it. */
 {
 enum gfType qt, tt;
 struct lineFile *lf;
 pslxFileOpenWithMeta(fileName, &qt, &tt, &lf, f);
 return lf;
 }
 
 struct lineFile *pslFileOpenWithUniqueMeta(char *fileName, FILE *f)
 /* Read header part of psl and make sure it's right. 
  * Set flag to suppress duplicate header comments.
  * Return line file handle to it. */
 {
 enum gfType qt, tt;
 struct lineFile *lf;
 pslxFileOpenWithUniqueMeta(fileName, &qt, &tt, &lf, f);
 return lf;
 }
 
 struct psl *pslNext(struct lineFile *lf)
 /* Read next line from file and convert it to psl.  Return
  * NULL at eof. */
 {
 char *line;
 int lineSize;
 char *words[32];
 int wordCount;
 static int lineAlloc = 0;
 static char *chopBuf = NULL;
 
 if (!lineFileNextReal(lf, &line))
     {
     return NULL;
     }
 lineSize = strlen(line);
 if (lineSize >= lineAlloc)
     {
     lineAlloc = lineSize+256;
     chopBuf = needMoreMem(chopBuf, 0, lineAlloc);
     }
 memcpy(chopBuf, line, lineSize+1);
 wordCount = chopLine(chopBuf, words);
 if (wordCount == 21)
     {
     return pslLoad(words);
     }
 if (wordCount == 23)
     {
     return pslxLoad(words);
     }
 else
     {
     errAbort("Bad line %d of %s wordCount is %d instead of 21 or 23\n", lf->lineIx, lf->fileName, wordCount);
     return NULL;
     }
 }
 
 struct psl *pslxLoadLm(char **row, struct lm *lm)
 /* Load row into local memory pslx. */
 {
 struct psl *ret = pslLoadLm(row, lm);
 ret->qSequence = lmAlloc(lm, sizeof(ret->qSequence[0]) * ret->blockCount);
 sqlStringArray(lmCloneString(lm,row[21]),ret->qSequence, ret->blockCount);
 ret->tSequence = lmAlloc(lm, sizeof(ret->tSequence[0]) * ret->blockCount);
 sqlStringArray(lmCloneString(lm,row[22]),ret->tSequence, ret->blockCount);
 return ret;
 }
 
 struct psl *pslLoadLm(char **row, struct lm *lm)
 /* Load row into local memory psl. */
 {
 struct psl *ret;
 
 ret = lmAlloc(lm, sizeof(*ret));
 ret->blockCount = sqlUnsigned(row[17]);
 ret->match = sqlUnsigned(row[0]);
 ret->misMatch = sqlUnsigned(row[1]);
 ret->repMatch = sqlUnsigned(row[2]);
 ret->nCount = sqlUnsigned(row[3]);
 ret->qNumInsert = sqlUnsigned(row[4]);
 ret->qBaseInsert = sqlSigned(row[5]);
 ret->tNumInsert = sqlUnsigned(row[6]);
 ret->tBaseInsert = sqlSigned(row[7]);
 strcpy(ret->strand, row[8]);
 ret->qName = lmCloneString(lm,row[9]);
 ret->qSize = sqlUnsigned(row[10]);
 ret->qStart = sqlUnsigned(row[11]);
 ret->qEnd = sqlUnsigned(row[12]);
 ret->tName = lmCloneString(lm, row[13]);
 ret->tSize = sqlUnsigned(row[14]);
 ret->tStart = sqlUnsigned(row[15]);
 ret->tEnd = sqlUnsigned(row[16]);
 ret->blockSizes = lmAlloc(lm, sizeof(ret->blockSizes[0]) * ret->blockCount);
 sqlUnsignedArray(row[18], ret->blockSizes, ret->blockCount);
 ret->qStarts = lmAlloc(lm, sizeof(ret->qStarts[0]) * ret->blockCount);
 sqlUnsignedArray(row[19], ret->qStarts, ret->blockCount);
 ret->tStarts = lmAlloc(lm, sizeof(ret->tStarts[0]) * ret->blockCount);
 sqlUnsignedArray(row[20], ret->tStarts, ret->blockCount);
 return ret;
 }
 
 boolean pslIsProtein(const struct psl *psl)
 /* is psl a protein psl (are it's blockSizes and scores in protein space) */
 {
 int lastBlock = psl->blockCount - 1;
 
 return  (((psl->strand[1] == '+' ) &&
     (psl->tEnd == psl->tStarts[lastBlock] + 3*psl->blockSizes[lastBlock])) ||
    ((psl->strand[1] == '-') && 
     (psl->tStart == (psl->tSize-(psl->tStarts[lastBlock] + 3*psl->blockSizes[lastBlock])))));
 }
 
 int pslCalcMilliBad(struct psl *psl, boolean isMrna)
 /* Calculate badness in parts per thousand. */
 {
 int sizeMul = pslIsProtein(psl) ? 3 : 1;
 int qAliSize, tAliSize, aliSize;
 int milliBad = 0;
 int sizeDif;
 int insertFactor;
 int total;
 
 qAliSize = sizeMul * (psl->qEnd - psl->qStart);
 tAliSize = psl->tEnd - psl->tStart;
 aliSize = min(qAliSize, tAliSize);
 if (aliSize <= 0)
     return 0;
 sizeDif = qAliSize - tAliSize;
 if (sizeDif < 0)
     {
     if (isMrna)
 	sizeDif = 0;
     else
 	sizeDif = -sizeDif;
     }
 insertFactor = psl->qNumInsert;
 if (!isMrna)
     insertFactor += psl->tNumInsert;
 
 total = (sizeMul * (psl->match + psl->repMatch + psl->misMatch));
 if (total != 0)
     milliBad = (1000 * (psl->misMatch*sizeMul + insertFactor + round(3*log(1+sizeDif)))) / total;
 return milliBad;
 }
 
 int pslScore(const struct psl *psl)
 /* Return score for psl. */
 {
 int sizeMul = pslIsProtein(psl) ? 3 : 1;
 
 return sizeMul * (psl->match + ( psl->repMatch>>1)) - 
 	sizeMul * psl->misMatch - psl->qNumInsert - psl->tNumInsert;
 }
 
 int pslCmpScoreDesc(const void *va, const void *vb)
 /* Compare to sort based on score. */
 {
 const struct psl *a = *((struct psl **)va);
 const struct psl *b = *((struct psl **)vb);
 return pslScore(b) - pslScore(a);
 }
 
 
 struct ffAli *pslToFakeFfAli(struct psl *psl, DNA *needle, DNA *haystack)
 /* Convert from psl to ffAli format.  In some cases you can pass NULL
  * for needle and haystack - depending what the post-processing is going
  * to be. */
 {
 struct ffAli *ffList = NULL, *ff;
 int blockCount = psl->blockCount;
 unsigned *blockSizes = psl->blockSizes;
 unsigned *qStarts = psl->qStarts;
 unsigned *tStarts = psl->tStarts;
 int size;
 int i;
 
 for (i=0; i<blockCount; ++i)
     {
     size = blockSizes[i];
     AllocVar(ff);
     ff->left = ffList;
     ffList = ff;
     ff->nStart = ff->nEnd = needle + qStarts[i];
     ff->nEnd += size;
     ff->hStart = ff->hEnd = haystack + tStarts[i];
     ff->hEnd += size;
     }
 ffList = ffMakeRightLinks(ffList);
 return ffList;
 }
 
 struct psl *pslFromFakeFfAli(struct ffAli *ff, 
 	DNA *needle, DNA *haystack, char strand,
 	char *qName, int qSize, char *tName, int tSize)
 /* This will create a basic psl structure from a sorted series of ffAli
  * blocks.  The fields that would need actual sequence to be filled in
  * are left zero however - fields including match, repMatch, mismatch. */
 {
 struct psl *psl;
 unsigned *blockSizes;
 unsigned *qStarts;
 unsigned *tStarts;
 int blockCount;
 int i;
 int nStart, hStart;
 int nEnd, hEnd;
 
 AllocVar(psl);
 psl->blockCount = blockCount = ffAliCount(ff);
 psl->blockSizes = AllocArray(blockSizes, blockCount);
 psl->qStarts = AllocArray(qStarts, blockCount);
 psl->tStarts = AllocArray(tStarts, blockCount);
 psl->qName = cloneString(qName);
 psl->qSize = qSize;
 psl->tName = cloneString(tName);
 psl->tSize = tSize;
 psl->strand[0] = strand;
 
 for (i=0; i<blockCount; ++i)
     {
     nStart = ff->nStart - needle;
     nEnd = ff->nEnd - needle;
     hStart = ff->hStart - haystack;
     hEnd = ff->hEnd - haystack;
     blockSizes[i] = nEnd - nStart;
     qStarts[i] = nStart;
     tStarts[i] = hStart;
     if (i == 0)
        {
        psl->qStart = nStart;
        psl->tStart = hStart;
        }
     if (i == blockCount-1)
        {
        psl->qEnd = nEnd;
        psl->tEnd = hEnd;
        }
     ff = ff->right;
     }
 if (strand == '-')
     {
     reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
     }
 return psl;
 }
 
 struct ffAli *pslToFfAli(struct psl *psl, struct dnaSeq *query, struct dnaSeq *target,
 	int targetOffset)
 /* Convert from psl to ffAli format.  Clip to parts that we actually
  * have sequence for. */
 {
 struct ffAli *ffList = NULL, *ff;
 DNA *needle = query->dna;
 DNA *haystack = target->dna;
 int blockCount = psl->blockCount;
 unsigned *blockSizes = psl->blockSizes;
 unsigned *qStarts = psl->qStarts;
 unsigned *tStarts = psl->tStarts;
 int size;
 int i;
 int tMin = targetOffset;
 int tMax = targetOffset + target->size;
 int tStart, tEnd;
 int clipStart, clipEnd, clipOffset, clipSize;
 
 for (i=0; i<blockCount; ++i)
     {
     clipStart = tStart = tStarts[i];
     size = blockSizes[i];
     clipEnd = tEnd = tStart + size;
     if (tStart < tMax && tEnd > tMin)
 	{
 	if (clipStart < tMin) clipStart = tMin;
 	if (clipEnd > tMax) clipEnd = tMax;
 	clipOffset = clipStart - tStart;
 	clipSize = clipEnd - clipStart;
 	AllocVar(ff);
 	ff->left = ffList;
 	ffList = ff;
 	ff->nStart = ff->nEnd = needle + qStarts[i] + clipOffset;
 	ff->nEnd += clipSize;
 	ff->hStart = ff->hEnd = haystack + clipStart - targetOffset;
 	ff->hEnd += clipSize;
 	}
     }
 ffList = ffMakeRightLinks(ffList);
 ffCountGoodEnds(ffList);
 return ffList;
 }
 
 int pslOrientation(struct psl *psl)
 /* Translate psl strand + or - to orientation +1 or -1 */
 {
 if (psl->strand[1] != '\0')
     {
     /* translated blat */
     if (psl->strand[0] != psl->strand[1])
         return -1;
     else
         return 1;
     }
 else
     {
     if (psl->strand[0] == '-')
         return -1;
     else
         return 1;
     }
 }
 
 int pslWeightedIntronOrientation(struct psl *psl, struct dnaSeq *genoSeq, int offset)
 /* Return >0 if introns make it look like alignment is on + strand,
  *        <0 if introns make it look like alignment is on - strand,
  *        0 if can't tell.  The absolute value of the return indicates
  * how many splice sites we've seen supporting the orientation.
  * Sequence should NOT be reverse complemented.  */
 {
 int intronDir = 0;
 int oneDir;
 int i;
 DNA *dna = genoSeq->dna;
 
 /* code below doesn't support negative target strand (translated blat) */
 if (psl->strand[1] == '-')
     errAbort("pslWeightedIntronOrientation doesn't support a negative target strand");
 
 for (i=1; i<psl->blockCount; ++i)
     {
     int iStart, iEnd, blockSize = psl->blockSizes[i-1];
     if (psl->qStarts[i-1] + blockSize == psl->qStarts[i])
 	{
 	iStart = psl->tStarts[i-1] + psl->blockSizes[i-1] - offset;
 	iEnd = psl->tStarts[i] - offset;
 	oneDir = intronOrientation(dna+iStart, dna+iEnd);
 	intronDir += oneDir;
 	}
     }
 return intronDir;
 }
 
 int pslIntronOrientation(struct psl *psl, struct dnaSeq *genoSeq, int offset)
 /* Return 1 if introns make it look like alignment is on + strand,
  *       -1 if introns make it look like alignment is on - strand,
  *        0 if can't tell.
  * Sequence should NOT be reverse complemented.  */
 {
 int intronDir = pslWeightedIntronOrientation(psl, genoSeq, offset);
 if (intronDir < 0)
     intronDir = -1;
 else if (intronDir > 0)
     intronDir = 1;
 return intronDir;
 }
 
 boolean pslHasIntron(struct psl *psl, struct dnaSeq *seq, int seqOffset)
 /* Return TRUE if there's a probable intron. Sequence should NOT be
  * reverse complemented.*/
 {
 int blockCount = psl->blockCount, i;
 unsigned *tStarts = psl->tStarts;
 unsigned *blockSizes = psl->blockSizes;
 unsigned *qStarts = psl->qStarts;
 int blockSize, start, end;
 DNA *dna = seq->dna;
 
 for (i=1; i<blockCount; ++i)
     {
     blockSize = blockSizes[i-1];
     start = qStarts[i-1]+blockSize;
     end = qStarts[i];
     if (start == end)
         {
         start = tStarts[i-1] + blockSize;
         end = tStarts[i];
         if (psl->strand[1] == '-')
             reverseIntRange(&start, &end, psl->tSize);
         start -= seqOffset;
         end -= seqOffset;
 	if (intronOrientation(dna+start, dna+end) != 0)
 	    return TRUE;
 	}
     }
 return FALSE;
 }
 
 void pslTailSizes(struct psl *psl, int *retStartTail, int *retEndTail)
 /* Find the length of "tails" (rather than extensions) implied by psl. */
 {
 int orientation = pslOrientation(psl);
 int qFloppyStart, qFloppyEnd;
 int tFloppyStart, tFloppyEnd;
 
 if (orientation > 0)
     {
     qFloppyStart = psl->qStart;
     qFloppyEnd = psl->qSize - psl->qEnd;
     }
 else
     {
     qFloppyStart = psl->qSize - psl->qEnd;
     qFloppyEnd = psl->qStart;
     }
 tFloppyStart = psl->tStart;
 tFloppyEnd = psl->tSize - psl->tEnd;
 *retStartTail = min(qFloppyStart, tFloppyStart);
 *retEndTail = min(qFloppyEnd, tFloppyEnd);
 }
 
 static void rcSeqs(char **seqs, unsigned blockCount, unsigned *blockSizes)
 /* reverses complement sequences in list, maintain property that all strings
  * are in one malloc block.   blockSizes should already be reversed. */
 {
 char *buf, *next;
 int i, memSz = 0;
 
 /* get a new memory block for strings */
 for (i = 0; i < blockCount; i++)
     memSz += blockSizes[i]+1;
 next = buf = needLargeMem(memSz);
 
 /* reverse compliment and copy to new memory block */
 for (i = blockCount-1; i >= 0; i--)
     {
     int len = strlen(seqs[i]);
     reverseComplement(seqs[i], len);
     memcpy(next, seqs[i], len+1);
     next += len+1;
     }
 
 /* swap memory and update pointers */
 freeMem(seqs[0]);
 seqs[0] = buf;
 next = buf;
 
 for (i = 0; i < blockCount; i++)
     {
     seqs[i] = next;
     next += blockSizes[i]+1;
     }
 }
 
 void pslRc(struct psl *psl)
 /* Reverse-complement a PSL alignment.  This makes the target strand explicit. */
 {
 unsigned tSize = psl->tSize, qSize = psl->qSize;
 unsigned blockCount = psl->blockCount, i;
 unsigned *tStarts = psl->tStarts, *qStarts = psl->qStarts, *blockSizes = psl->blockSizes;
 
 /* swap strand, forcing target to have an explict strand */
 psl->strand[0] = (psl->strand[0] != '-') ? '-' : '+';
 psl->strand[1] = (psl->strand[1] != '-') ? '-' : '+';
 psl->strand[2] = 0;
 
 for (i=0; i<blockCount; ++i)
     {
     tStarts[i] = tSize - (tStarts[i] + blockSizes[i]);
     qStarts[i] = qSize - (qStarts[i] + blockSizes[i]);
     }
 reverseUnsigned(tStarts, blockCount);
 reverseUnsigned(qStarts, blockCount);
 reverseUnsigned(blockSizes, blockCount);
 if (psl->qSequence != NULL)
     {
     rcSeqs(psl->qSequence, blockCount, blockSizes);
     rcSeqs(psl->tSequence, blockCount, blockSizes);
     }
 }
 
 
 /* macro to swap to variables */
 #define swapVars(a, b, tmp) ((tmp) = (a), (a) = (b), (b) = (tmp))
 
 static void swapBlocks(struct psl *psl)
 /* Swap the blocks in a psl without reverse complementing them. */
 {
 int i;
 unsigned utmp;
 char *stmp; 
 for (i = 0; i < psl->blockCount; i++)
     {
     swapVars(psl->qStarts[i], psl->tStarts[i], utmp);
     if (psl->qSequence != NULL)
         swapVars(psl->qSequence[i], psl->tSequence[i], stmp);
     }
 }
 
 static void swapRCBlocks(struct psl *psl)
 /* Swap and reverse complement blocks in a psl. Other psl fields must
  * be modified first */
 {
 int i;
 unsigned *uatmp;
 char **satmp;
 reverseUnsigned(psl->tStarts, psl->blockCount);
 reverseUnsigned(psl->qStarts, psl->blockCount);
 reverseUnsigned(psl->blockSizes, psl->blockCount);
 swapVars(psl->tStarts, psl->qStarts, uatmp);
 
 /* qSize and tSize have already been swapped */
 for (i = 0; i < psl->blockCount; i++)
     {
     psl->qStarts[i] = psl->qSize - (psl->qStarts[i] + psl->blockSizes[i]);
     psl->tStarts[i] = psl->tSize - (psl->tStarts[i] + psl->blockSizes[i]);
     }
 if (psl->qSequence != NULL)
     {
     /* note: all block sequences are stored in one malloc block, which is
      * entry zero */
     rcSeqs(psl->qSequence, psl->blockCount, psl->blockSizes);
     rcSeqs(psl->tSequence, psl->blockCount, psl->blockSizes);
     swapVars(psl->qSequence, psl->tSequence, satmp);
     }
 }
 
 void pslSwap(struct psl *psl, boolean noRc)
 /* swap query and target in psl.  If noRc is TRUE, don't reverse-complement
  * PSL if needed, instead make target strand explict. */
 {
 int itmp;
 unsigned utmp;
 char ctmp, *stmp; 
 swapVars(psl->qBaseInsert, psl->tBaseInsert, utmp);
 swapVars(psl->tNumInsert, psl->qNumInsert, utmp);
 swapVars(psl->qName, psl->tName, stmp);
 swapVars(psl->qSize, psl->tSize, utmp);
 swapVars(psl->qStart, psl->tStart, itmp);
 swapVars(psl->qEnd, psl->tEnd, itmp);
 
 /* handle strand and block copy */
 if (psl->strand[1] != '\0')
     {
     /* translated */
     swapVars(psl->strand[0], psl->strand[1], ctmp);
     swapBlocks(psl);
     }
 else if (noRc)
     {
     /* untranslated with no reverse complement */
     psl->strand[1] = psl->strand[0];
     psl->strand[0] = '+';
     swapBlocks(psl);
     }
 else
     {
     /* untranslated */
     if (psl->strand[0] == '+')
         swapBlocks(psl);
     else
         swapRCBlocks(psl);
     }
 }
 
 void pslTargetOffset(struct psl *psl, int offset)
 /* Add offset to target positions in psl. */
 {
 int i, blockCount = psl->blockCount;
 unsigned *tStarts = psl->tStarts;
 psl->tStart += offset;
 psl->tEnd += offset;
 for (i=0; i<blockCount; ++i)
    tStarts[i] += offset;
 }
 
 void pslDump(struct psl *psl, FILE *f)
 /* Dump most of PSL to file - for debugging. */
 {
 int i;
 fprintf(f, "<PRE>\n");
 fprintf(f, "psl %s:%d-%d %s %s:%d-%d %d\n", 
 	psl->qName, psl->qStart, psl->qEnd, psl->strand,
 	psl->tName, psl->tStart, psl->tEnd, psl->blockCount);
 for (i=0; i<psl->blockCount; ++i)
     fprintf(f, "  size %d, qStart %d, tStart %d\n", 
     	psl->blockSizes[i], psl->qStarts[i], psl->tStarts[i]);
 fprintf(f, "</PRE>");
 }
 
 static void pslRecalcBounds(struct psl *psl)
 /* Calculate qStart/qEnd tStart/tEnd at top level to be consistent
  * with blocks. */
 {
 int qStart, qEnd, tStart, tEnd, size;
 int last = psl->blockCount - 1;
 qStart = psl->qStarts[0];
 tStart = psl->tStarts[0];
 size = psl->blockSizes[last];
 qEnd = psl->qStarts[last] + size;
 tEnd = psl->tStarts[last] + size;
 if (psl->strand[0] == '-')
     reverseIntRange(&qStart, &qEnd, psl->qSize);
 if (psl->strand[1] == '-')
     reverseIntRange(&tStart, &tEnd, psl->tSize);
 psl->qStart = qStart;
 psl->qEnd = qEnd;
 psl->tStart = tStart;
 psl->tEnd = tEnd;
 }
 
 struct psl *pslTrimToTargetRange(struct psl *oldPsl, int tMin, int tMax)
 /* Return psl trimmed to fit inside tMin/tMax.  Note this does not
  * update the match/misMatch and related fields. */
 {
 int newSize;
 int oldBlockCount = oldPsl->blockCount;
 boolean tIsRc = (oldPsl->strand[1] == '-');
 int newBlockCount = 0, completeBlockCount = 0;
 int i;
 struct psl *newPsl = NULL;
 int tMn = tMin, tMx = tMax;   /* tMin/tMax adjusted for strand. */
 
 /* Deal with case where we're completely trimmed out quickly. */
 newSize = rangeIntersection(oldPsl->tStart, oldPsl->tEnd, tMin, tMax);
 if (newSize <= 0)
     return NULL;
 
 if (tIsRc)
     reverseIntRange(&tMn, &tMx, oldPsl->tSize);
 
 /* Count how many blocks will survive trimming. */
 oldBlockCount = oldPsl->blockCount;
 for (i=0; i<oldBlockCount; ++i)
     {
     int s = oldPsl->tStarts[i];
     int e = s + oldPsl->blockSizes[i];
     int sz = e - s;
     int overlap;
     if ((overlap = rangeIntersection(s, e, tMn, tMx)) > 0)
         ++newBlockCount;
     if (overlap == sz)
         ++completeBlockCount;
     }
 
 if (newBlockCount == 0)
     return NULL;
 
 /* Allocate new psl and fill in what we already know. */
 AllocVar(newPsl);
 strcpy(newPsl->strand, oldPsl->strand);
 newPsl->qName = cloneString(oldPsl->qName);
 newPsl->qSize = oldPsl->qSize;
 newPsl->tName = cloneString(oldPsl->tName);
 newPsl->tSize = oldPsl->tSize;
 newPsl->blockCount = newBlockCount;
 AllocArray(newPsl->blockSizes, newBlockCount);
 AllocArray(newPsl->qStarts, newBlockCount);
 AllocArray(newPsl->tStarts, newBlockCount);
 
 /* Fill in blockSizes, qStarts, tStarts with real data. */
 newBlockCount = completeBlockCount = 0;
 for (i=0; i<oldBlockCount; ++i)
     {
     int oldSz = oldPsl->blockSizes[i];
     int sz = oldSz;
     int tS = oldPsl->tStarts[i];
     int tE = tS + sz;
     int qS = oldPsl->qStarts[i];
     int qE = qS + sz;
     if (rangeIntersection(tS, tE, tMn, tMx) > 0)
         {
 	int diff;
 	if ((diff = (tMn - tS)) > 0)
 	    {
 	    tS += diff;
 	    qS += diff;
 	    sz -= diff;
 	    }
 	if ((diff = (tE - tMx)) > 0)
 	    {
 	    tE -= diff;
 	    qE -= diff;
 	    sz -= diff;
 	    }
 	newPsl->qStarts[newBlockCount] = qS;
 	newPsl->tStarts[newBlockCount] = tS;
 	newPsl->blockSizes[newBlockCount] = sz;
 	++newBlockCount;
 	if (sz == oldSz)
 	    ++completeBlockCount;
 	}
     }
 pslRecalcBounds(newPsl);
 return newPsl;
 }
 
 struct psl *pslTrimToQueryRange(struct psl *oldPsl, int qMin, int qMax)
 /* Return psl trimmed to fit inside qMin/qMax.  Note this does not
  * update the match/misMatch and related fields. */
 {
 int newSize;
 int oldBlockCount = oldPsl->blockCount;
 boolean qIsRc = (oldPsl->strand[0] == '-');
 int newBlockCount = 0, completeBlockCount = 0;
 int i;
 struct psl *newPsl = NULL;
 int qMn = qMin, qMx = qMax;   /* qMin/qMax adjusted for strand. */
 
 /* Deal with case where we're completely trimmed out quickly. */
 newSize = rangeIntersection(oldPsl->qStart, oldPsl->qEnd, qMin, qMax);
 if (newSize <= 0)
     return NULL;
 
 if (qIsRc)
     reverseIntRange(&qMn, &qMx, oldPsl->qSize);
 
 /* Count how many blocks will survive trimming. */
 oldBlockCount = oldPsl->blockCount;
 for (i=0; i<oldBlockCount; ++i)
     {
     int s = oldPsl->qStarts[i];
     int e = s + oldPsl->blockSizes[i];
     int sz = e - s;
     int overlap;
     if ((overlap = rangeIntersection(s, e, qMn, qMx)) > 0)
         ++newBlockCount;
     if (overlap == sz)
         ++completeBlockCount;
     }
 
 if (newBlockCount == 0)
     return NULL;
 
 /* Allocate new psl and fill in what we already know. */
 AllocVar(newPsl);
 strcpy(newPsl->strand, oldPsl->strand);
 newPsl->qName = cloneString(oldPsl->qName);
 newPsl->qSize = oldPsl->qSize;
 newPsl->tName = cloneString(oldPsl->tName);
 newPsl->tSize = oldPsl->tSize;
 newPsl->blockCount = newBlockCount;
 AllocArray(newPsl->blockSizes, newBlockCount);
 AllocArray(newPsl->qStarts, newBlockCount);
 AllocArray(newPsl->tStarts, newBlockCount);
 
 /* Fill in blockSizes, qStarts, tStarts with real data. */
 newBlockCount = completeBlockCount = 0;
 for (i=0; i<oldBlockCount; ++i)
     {
     int oldSz = oldPsl->blockSizes[i];
     int sz = oldSz;
     int qS = oldPsl->qStarts[i];
     int qE = qS + sz;
     int tS = oldPsl->tStarts[i];
     int tE = tS + sz;
     if (rangeIntersection(qS, qE, qMn, qMx) > 0)
         {
 	int diff;
 	if ((diff = (qMn - qS)) > 0)
 	    {
 	    tS += diff;
 	    qS += diff;
 	    sz -= diff;
 	    }
 	if ((diff = (qE - qMx)) > 0)
 	    {
 	    tE -= diff;
 	    qE -= diff;
 	    sz -= diff;
 	    }
 	newPsl->qStarts[newBlockCount] = qS;
 	newPsl->tStarts[newBlockCount] = tS;
 	newPsl->blockSizes[newBlockCount] = sz;
 	++newBlockCount;
 	if (sz == oldSz)
 	    ++completeBlockCount;
 	}
     }
 pslRecalcBounds(newPsl);
 return newPsl;
 }
 char* pslGetCreateSql(char* table, unsigned options, int tNameIdxLen)
 /* Get SQL required to create PSL table.  Options is a bit set consisting
  * of PSL_TNAMEIX, PSL_WITH_BIN, and PSL_XA_FORMAT.  tNameIdxLen is
  * the number of characters in target name to index.  If greater than
  * zero, must specify PSL_TNAMEIX.  If zero and PSL_TNAMEIX is specified,
  * to will default to 8. */
 {
 struct dyString *sqlCmd = newDyString(2048);
 char *sqlCmdStr;
 char binIx[32];
 
 binIx[0] = '\0';
 
 /* check and default tNameIdxLen */
 if ((tNameIdxLen > 0) && !(options & PSL_TNAMEIX))
     errAbort("pslGetCreateSql: must specify PSL_TNAMEIX with tNameIdxLen > 0");
 if ((options & PSL_TNAMEIX) && (tNameIdxLen == 0))
     tNameIdxLen = 8;
 
 /* setup tName and bin index fields */
 if (options & PSL_WITH_BIN)
     {
     if (options & PSL_TNAMEIX)
 	safef(binIx, sizeof(binIx), "INDEX(tName(%d),bin),\n", tNameIdxLen);
     else
 	safef(binIx, sizeof(binIx), "INDEX(bin),\n");
     }
 dyStringPrintf(sqlCmd, createString, table, 
     ((options & PSL_WITH_BIN) ? "bin smallint unsigned not null,\n" : ""));
 if (options & PSL_XA_FORMAT)
     {
     dyStringPrintf(sqlCmd, "qSeq longblob not null,\n");
     dyStringPrintf(sqlCmd, "tSeq longblob not null,\n");
     }
 dyStringPrintf(sqlCmd, indexString, binIx);
 sqlCmdStr = cloneString(sqlCmd->string);
 dyStringFree(&sqlCmd);
 return sqlCmdStr;
 }
 
 static void printPslDesc(char* pslDesc, FILE* out, struct psl* psl)
 /* print description of a PSL on first error */
 {
 fprintf(out, "Error: invalid PSL: %s:%u-%u %s:%u-%u %s %s\n",
         psl->qName, psl->qStart, psl->qEnd,
         psl->tName, psl->tStart, psl->tEnd,
         psl->strand, pslDesc);
 }
 
 static void chkRanges(char* pslDesc, FILE* out, struct psl* psl,
                       char* pName, char* pLabel, char pCLabel, char pStrand,
                       unsigned pSize, unsigned pStart, unsigned pEnd,
                       unsigned blockCount, unsigned* blockSizes,
                       unsigned* pBlockStarts, int* errCountPtr)
 /* check the target or query ranges in a PSL, increment errorCnt */
 {
 int errCount = *errCountPtr;
 unsigned iBlk, prevBlkEnd = 0;
 
 if (pStart >= pEnd)
     {
     if (errCount == 0)
         printPslDesc(pslDesc, out, psl);
     fprintf(out, "\t%s %cStart %u >= %cEnd %u\n",
             pName, pCLabel, pStart, pCLabel, pEnd);
     errCount++;
     }
 if (pEnd > pSize)
     {
     if (errCount == 0)
         printPslDesc(pslDesc, out, psl);
     fprintf(out, "\t%s %cEnd %u >= %cSize %u\n",
             pName, pCLabel, pEnd, pCLabel, pSize);
     errCount++;
     }
 for (iBlk = 0; iBlk < blockCount; iBlk++)
     {
     unsigned blkStart = pBlockStarts[iBlk];
     unsigned blkEnd = blkStart+blockSizes[iBlk];
     /* translate stand to genomic coords */
     unsigned gBlkStart = (pStrand == '+') ? blkStart : (pSize - blkEnd);
     unsigned gBlkEnd = (pStrand == '+') ? blkEnd : (pSize - blkStart);
 
     if ((pSize > 0) && (blkEnd > pSize))
         {
         if (errCount == 0)
             printPslDesc(pslDesc, out, psl);
         fprintf(out, "\t%s %s block %u end %u > %cSize %u\n",
                 pName, pLabel, iBlk, blkEnd, pCLabel, pSize);
         errCount++;
         }
     if (gBlkStart < pStart)
         {
         if (errCount == 0)
             printPslDesc(pslDesc, out, psl);
         fprintf(out, "\t%s %s block %u start %u < %cStart %u\n",
                 pName, pLabel, iBlk, gBlkStart, pCLabel, pStart);
         errCount++;
         }
     if (gBlkStart >= pEnd)
         {
         if (errCount == 0)
             printPslDesc(pslDesc, out, psl);
         fprintf(out, "\t%s %s block %u start %u >= %cEnd %u\n",
                 pName, pLabel, iBlk, gBlkStart, pCLabel, pEnd);
         errCount++;
         }
     if (gBlkEnd < pStart)
         {
         if (errCount == 0)
             printPslDesc(pslDesc, out, psl);
         fprintf(out, "\t%s %s block %u end %u < %cStart %u\n",
                 pName, pLabel, iBlk, gBlkEnd, pCLabel, pStart);
         errCount++;
         }
     if (gBlkEnd > pEnd)
         {
         if (errCount == 0)
             printPslDesc(pslDesc, out, psl);
         fprintf(out, "\t%s %s block %u end %u > %cEnd %u\n",
                 pName, pLabel, iBlk, gBlkEnd, pCLabel, pEnd);
         errCount++;
         }
     if ((iBlk > 0) && (blkStart < prevBlkEnd))
         {
         if (errCount == 0)
             printPslDesc(pslDesc, out, psl);
         fprintf(out, "\t%s %s block %u start %u < previous block end %u\n",
                 pName, pLabel, iBlk, blkStart, prevBlkEnd);
         errCount++;
         }
     prevBlkEnd = blkEnd;
     }
 *errCountPtr = errCount;
 }
 
 int pslCheck(char *pslDesc, FILE* out, struct psl* psl)
 /* Validate a PSL for consistency.  pslDesc is printed the error messages
  * to file out (open /dev/null to discard). Return count of errors. */
 {
 static char* VALID_STRANDS[] = {
     "+", "-", "++", "+-", "-+", "--", NULL
 };
 int i, errCount = 0;
 char strand;
 boolean isProt = FALSE;
 
 /* check strand value */
 for (i = 0; VALID_STRANDS[i] != NULL; i++)
     {
     if (strcmp(psl->strand, VALID_STRANDS[i]) == 0)
         break;
     }
 if (VALID_STRANDS[i] == NULL)
     {
     if (errCount == 0)
         printPslDesc(pslDesc, out, psl);
     fprintf(out, "\tinvalid PSL strand: \"%s\"\n", psl->strand);
     errCount++;
     }
 
 /* check target */
 if (pslIsProtein(psl))
     {
     isProt = TRUE;
     for (i = 0; i < psl->blockCount ; i++)
 	psl->blockSizes[i] *= 3;
     }
 
 strand = ((psl->strand[1] == '\0') ? '+' : psl->strand[1]);
 chkRanges(pslDesc, out, psl, psl->tName, "target", 't',
           strand, psl->tSize, psl->tStart, psl->tEnd,
           psl->blockCount, psl->blockSizes, psl->tStarts,
           &errCount);
 if (isProt)
     {
     for (i = 0; i < psl->blockCount ; i++)
 	psl->blockSizes[i] /= 3;
     }
 
 /* check query */
 strand = psl->strand[0];
 chkRanges(pslDesc, out, psl, psl->qName, "query", 'q',
           strand, psl->qSize, psl->qStart, psl->qEnd,
           psl->blockCount, psl->blockSizes, psl->qStarts,
           &errCount);
 
 return errCount;
 }
 
 struct hash *readPslToBinKeeper(char *sizeFileName, char *pslFileName)
 /* read a list of psls and return results in hash of binKeeper structure for fast query*/
 {
 struct binKeeper *bk; 
 struct psl *psl;
 struct lineFile *sf = lineFileOpen(sizeFileName, TRUE);
 struct lineFile *pf = lineFileOpen(pslFileName , TRUE);
 struct hash *hash = newHash(0);
 char *chromRow[2];
 char *row[21] ;
 
 while (lineFileRow(sf, chromRow))
     {
     char *name = chromRow[0];
     int size = lineFileNeedNum(sf, chromRow, 1);
 
     if (hashLookup(hash, name) != NULL)
         warn("Duplicate %s, ignoring all but first\n", name);
     else
         {
         bk = binKeeperNew(0, size);
         assert(size > 1);
 	hashAdd(hash, name, bk);
         }
     }
 while (lineFileNextRow(pf, row, ArraySize(row)))
     {
     psl = pslLoad(row);
     bk = hashMustFindVal(hash, psl->tName);
     binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
     }
 lineFileClose(&pf);
 lineFileClose(&sf);
 return hash;
 }
 
-static int countInitialChars(char *s, char c)
-/* Count number of initial chars in s that match c. */
+static boolean isDelChar(char c)
+/* is this a indel character? */
 {
-int count = 0;
-char d;
-while ((d = *s++) != 0)
-    {
-    if (c == d)
-        ++count;
-    else
-        break;
-    }
-return count;
+return (c == '-') || (c == '.') || (c == '=') || (c == '_');
 }
 
-static int countTerminalChars(char *s, char c)
-/* Count number of terminal chars in s that match c. */
-{
-int len = strlen(s), i;
-int count = 0;
-for (i=len-1; i>=0; --i)
-    {
-    if (c == s[i])
-        ++count;
-    else
-        break;
-    }
-return count;
-}
-
-static int countNonInsert(char *s, int size)
-/* Count number of characters in initial part of s that
- * are not '-'. */
-{
-int count = 0;
-int i;
-for (i=0; i<size; ++i)
-    if (*s++ != '-')
-        ++count;
-return count;
-}
-
-static void trimAlignment(struct psl* psl, char** qStringPtr, char** tStringPtr,
+static void trimAlignment(struct psl* psl, char** qStrPtr, char** tStrPtr,
                           int* aliSizePtr)
 /* remove leading or trailing indels from alignment */
 {
-char* qString = *qStringPtr;
-char* tString = *tStringPtr;
+char* qStr = *qStrPtr;
+char* tStr = *tStrPtr;
 int aliSize = *aliSizePtr;
-int qStartInsert = countInitialChars(qString, '-');
-int tStartInsert = countInitialChars(tString, '-');
-int qEndInsert = countTerminalChars(qString, '-');
-int tEndInsert = countTerminalChars(tString, '-');
-int startInsert = max(qStartInsert, tStartInsert);
-int endInsert = max(qEndInsert, tEndInsert);
-int qNonCount, tNonCount;
-
-if (startInsert > 0)
-    {
-    qNonCount = countNonInsert(qString, startInsert);
-    tNonCount = countNonInsert(tString, startInsert);
-    qString += startInsert;
-    tString += startInsert;
-    aliSize -= startInsert;
-    psl->qStart += qNonCount;
-    psl->tStart += tNonCount;
-    }
-if (endInsert > 0)
-    {
-    aliSize -= endInsert;
-    qNonCount = countNonInsert(qString+aliSize, endInsert);
-    tNonCount = countNonInsert(tString+aliSize, endInsert);
-    qString[aliSize] = 0;
-    tString[aliSize] = 0;
-    psl->qEnd -= qNonCount;
-    psl->tEnd -= tNonCount;
+
+// skip lending indels
+while ((aliSize > 0) && (isDelChar(*qStr) || isDelChar(*tStr)))
+    {
+    if (!isDelChar(*qStr))
+        psl->qStart++;
+    else if (!isDelChar(*tStr))
+        psl->tStart++;
+    qStr++;
+    tStr++;
+    aliSize--;
+    }
+
+// skip trailing indels
+while ((aliSize > 0) && (isDelChar(qStr[aliSize-1]) || isDelChar(tStr[aliSize-1])))
+    {
+    if (!isDelChar(qStr[aliSize-1]))
+        psl->qEnd--;
+    else if (!isDelChar(tStr[aliSize-1]))
+        psl->tEnd--;
+    aliSize--;
     }
-*qStringPtr = qString;
-*tStringPtr = tString;
+*qStrPtr = qStr;
+*tStrPtr = tStr;
 *aliSizePtr = aliSize;
-/* recursive call to handle double gap */
-if (startInsert > 0 || endInsert > 0)
-    trimAlignment(psl, qStringPtr, tStringPtr, aliSizePtr);
 }
 
 static void addBlock(struct psl* psl, int qs, int qe, int ts, int te,
                      int *blockSpace)
 /* add a block to the psl, growing if necessary */
 {
 assert((qe-qs) == (te-ts));  /* same lengths? */
 if (psl->blockCount == *blockSpace)
     pslGrow(psl, blockSpace);
 psl->blockSizes[psl->blockCount] = qe - qs;
 psl->qStarts[psl->blockCount] = qs;
 psl->tStarts[psl->blockCount] = ts;
 psl->blockCount++;
 }
 
 static void accumCounts(struct psl *psl, char prevQ, char prevT,
                         char q, char t, unsigned options)
 /* accumulate block and base counts  */
 {
-if ((q != '-') && (t != '-'))
+if (!isDelChar(q) && !isDelChar(t))
     {
     /* aligned column. */
     char qu = toupper(q);
     char tu = toupper(t);
     if ((q == 'N') || (t == 'N'))
         psl->nCount++;
     else if (qu == tu)
         {
         if ((options & PSL_IS_SOFTMASK) && ((qu != q) || (tu != t)))
             psl->repMatch++;
         else
             psl->match++;
         }
     else
         psl->misMatch++;
     }
-else if ((q == '-') && (t != '-'))
+else if (isDelChar(q) && !isDelChar(t))
     {
     /* target insert */
     psl->tBaseInsert++;
-    if (prevQ != '-')
+    if (!isDelChar(prevQ))
         psl->tNumInsert++;
     }
-else if ((t == '-') && (q != '-'))
+else if (isDelChar(t) && !isDelChar(q))
     {
     /* query insert */
     psl->qBaseInsert++;
-    if (prevT != '-')
+    if (!isDelChar(prevT))
         psl->qNumInsert++;
     }
 }
 
 struct psl* pslFromAlign(char *qName, int qSize, int qStart, int qEnd, char *qString,
                          char *tName, int tSize, int tStart, int tEnd, char *tString,
                          char* strand, unsigned options)
 /* Create a PSL from an alignment.  Options PSL_IS_SOFTMASK if lower case
  * bases indicate repeat masking.  Returns NULL if alignment is empty after
  * triming leading and trailing indels.*/
 {
 /* Note, the unit tests for these programs exercise this function:
  *   hg/embossToPsl
  *   hg/mouseStuff/axtToPsl
  *   hg/spideyToPsl
  *   utils/est2genomeToPsl
  */
 
 int blockSpace = 16;
 struct psl* psl = NULL;
 int aliSize = strlen(qString);
 boolean eitherInsert = FALSE;	/* True if either in insert state. */
 int i, qs,qe,ts,te;
 char prevQ = '\0',  prevT = '\0';
 AllocVar(psl);
 
 if (strlen(tString) != aliSize)
     errAbort("query and target alignment strings are different lengths");
 
 psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd,
              strand, blockSpace, 0);
 trimAlignment(psl, &qString, &tString, &aliSize);
 
 /* Don't create if either query or target is zero length after trim */
  if ((psl->qStart == psl->qEnd) || (psl->tStart == psl->tEnd))
      {
      pslFree(&psl);
      return NULL;
      }
 
 /* Get range of alignment in strand-specified coordinates */
 qs = psl->qStart;
 qe = psl->qEnd;
 if (strand[0] == '-')
     reverseIntRange(&qs, &qe, psl->qSize);
 ts = psl->tStart;
 te = psl->tEnd;
 if (strand[1] == '-')
     reverseIntRange(&ts, &te, psl->tSize);
 
 eitherInsert = FALSE;
 qe = qs;  /* current block coords */
 te = ts;
 for (i=0; i<aliSize; ++i)
     {
     char q = qString[i];
     char t = tString[i];
-    if ((q == '-') && (t == '-'))
+    if (isDelChar(q) && isDelChar(t))
         {
         continue; /* nothing in this column, just ignore it */
         }
-    else if ((q == '-') || (t == '-'))
+    else if (isDelChar(q) || isDelChar(t))
         {
         /* insert in one of the columns */
 	if (!eitherInsert)
 	    {
             /* end of a block */
             addBlock(psl, qs, qe, ts, te, &blockSpace);
 	    eitherInsert = TRUE;
 	    }
-	if (q != '-')
+	if (!isDelChar(q))
             qe += 1;
-	if (t != '-')
+	if (!isDelChar(t))
             te += 1;
 	}
     else
         {
         /* columns aligned */
 	if (eitherInsert)
 	    {
             /* start new block */
 	    qs = qe;
 	    ts = te;
 	    eitherInsert = FALSE;
 	    }
 	qe += 1;
 	te += 1;
 	}
     accumCounts(psl, prevQ, prevT, q, t, options);
     prevQ = q; /* will not include skipped empty columns */
     prevT = t;
     }
 addBlock(psl, qs, qe, ts, te, &blockSpace);
 return psl;
 }
 
 struct psl* pslNew(char *qName, unsigned qSize, int qStart, int qEnd,
                    char *tName, unsigned tSize, int tStart, int tEnd,
                    char *strand, unsigned blockSpace, unsigned opts)
 /* create a new psl with space for the specified number of blocks allocated.
  * pslGrow maybe used to expand this space if needed.  Valid options are
  * PSL_XA_FORMAT. */
 {
 struct psl *psl;
 AllocVar(psl);
 assert(blockSpace > 0);
 psl->qName = cloneString(qName);
 psl->qSize = qSize;
 psl->qStart = qStart;
 psl->qEnd = qEnd;
 psl->tName = cloneString(tName);
 psl->tSize = tSize;
 psl->tStart = tStart;
 psl->tEnd = tEnd;
 strncpy(psl->strand, strand, 2);
 AllocArray(psl->blockSizes, blockSpace);
 AllocArray(psl->qStarts, blockSpace);
 AllocArray(psl->tStarts, blockSpace);
 if (opts & PSL_XA_FORMAT)
     {
     AllocArray(psl->qSequence, blockSpace);
     AllocArray(psl->tSequence, blockSpace);
     }
 return psl;
 }
 
 void pslGrow(struct psl *psl, int *blockSpacePtr)
 /* Increase memory allocated to a psl to hold more blocks.  blockSpacePtr
  * should point the the current maximum number of blocks and will be
  * updated to with the new amount of space. */
 {
 int blockSpace = *blockSpacePtr;
 int newSpace = 2 * blockSpace;
 ExpandArray(psl->blockSizes, blockSpace, newSpace);
 ExpandArray(psl->qStarts, blockSpace, newSpace);
 ExpandArray(psl->tStarts, blockSpace, newSpace);
 if (psl->qSequence != NULL)
     {
     ExpandArray(psl->qSequence, blockSpace, newSpace);
     ExpandArray(psl->tSequence, blockSpace, newSpace);
     }
 *blockSpacePtr = newSpace;
 }
 
 int pslRangeTreeOverlap(struct psl *psl, struct rbTree *rangeTree)
 /* Return amount that psl overlaps (on target side) with rangeTree. */
 {
 int i;
 int overlap = 0;
 boolean isRc = (psl->strand[1] == '-');
 for (i=0; i<psl->blockCount; ++i)
     {
     int start = psl->tStarts[i];
     int end = start + psl->blockSizes[i];
     if (isRc)
         reverseIntRange(&start, &end, psl->tSize);
     overlap += rangeTreeOverlapSize(rangeTree, start, end);
     }
 return overlap;
 }
 
 float pslIdent(struct psl *psl)
 /* computer fraction identity */
 {
 float aligned = psl->match + psl->misMatch + psl->repMatch;
 if (aligned == 0.0)
     return 0.0;
 else
     return ((float)(psl->match + psl->repMatch))/aligned;
 }
 
 float pslQueryAligned(struct psl *psl)
 /* compute fraction of query that was aligned */
 {
 float aligned = psl->match + psl->misMatch + psl->repMatch;
 return aligned/(float)psl->qSize;
 }