d8352b0a8c16641c5af4f33112b4f241af931771
braney
  Wed Jul 25 16:00:15 2012 -0700
add some tests, make the code easier to understand, and fix a problem with no CDS being indicated by thickStart == chromStart && thickEnd == chromEnd, instead of thickStart==thickEnd==0 as should have been the case.   Thanks Tim for the code review!!!
diff --git src/hg/pslToBed/pslToBed.c src/hg/pslToBed/pslToBed.c
index a7af53c..0e25ea1 100644
--- src/hg/pslToBed/pslToBed.c
+++ src/hg/pslToBed/pslToBed.c
@@ -1,133 +1,145 @@
 /* pslToBed -- tranform a psl format file to a bed format file */
 
 #include "common.h"
 #include "psl.h"
 #include "bed.h"
 #include "options.h"
 
 
 void usage()
 /* print usage infomation and exit */
 {
 errAbort("pslToBed: tranform a psl format file to a bed format file.\n"
          "usage:\n"
          "    pslToBed psl bed\n"
 	 "options:\n"
 	 "    -cds=cdsFile\n"
 	 "cdsFile specifies a input cds tab-separated file which contains\n"
 	 "genbank-style CDS records showing cdsStart..cdsEnd\n"
 	 "e.g. NM_123456 34..305\n"
 	 "These coordinates are assumed to be in the query coordinate system\n"
 	 "of the psl, like those that are created from genePredToFakePsl\n");
 
 } 
 
 static struct optionSpec options[] = {
    {"cds", OPTION_STRING},
    {NULL, 0},
 };
 
 struct cds
 {
 int start, end;   // cds start and end 
 };
 
-static void setThick(struct psl *psl, struct bed *bed, struct cds *cds)
-// set thickStart and thickEnd based on CDS record
+static unsigned getTargetForQuery(struct psl *psl, int queryAddress)
+// get target address for query address from PSL
 {
-int ii;
-int thickStart, thickEnd;
+int  blockNum;
 
-thickStart = thickEnd = 0;
-for(ii=0; ii < psl->blockCount; ii++)
+for (blockNum=0; blockNum < psl->blockCount; blockNum++)
     {
-    if (psl->qStarts[ii] + psl->blockSizes[ii] > cds->start - 1)
+    if (psl->qStarts[blockNum] > queryAddress)
 	{
-	int offset = cds->start - psl->qStarts[ii];
-	if (offset < 0)
-	    offset = 0;
-	thickEnd = thickStart = psl->tStarts[ii] + offset - 1;
-	break;
-	}
+	// this block starts past the queryAddress
+	// just point to the beginning of this block
+	return psl->tStarts[blockNum];
     }
-
-for(; ii < psl->blockCount; ii++)
-    {
-    if (psl->qStarts[ii] + psl->blockSizes[ii] >= cds->end)
+    else if (psl->qStarts[blockNum] + psl->blockSizes[blockNum] > queryAddress)
 	{
-	int offset = cds->end - psl->qStarts[ii];
-	if (offset < 0)
-	    offset = 0;
-	thickEnd = psl->tStarts[ii] + offset;
-	break;
+	// since block addresses are always increasing we know if the end
+	// of the block is beyond our address that the address is 
+	// in this block
+	unsigned offsetInBlock = queryAddress - psl->qStarts[blockNum];
+
+	assert(offsetInBlock < psl->blockSizes[blockNum]);
+
+	return psl->tStarts[blockNum] + offsetInBlock;
 	}
     }
-if (ii >= psl->blockCount)
-    {
-    thickEnd = psl->tStarts[ii - 1] + psl->blockSizes[ii - 1];
+
+// we don't have any blocks with this query in it, just point
+// to the end
+return psl->tEnd;
     }
+
+static void setThick(struct psl *psl, struct bed *bed, struct cds *cds)
+// set thickStart and thickEnd based on CDS record
+{
+int thickStart, thickEnd;
+
+// we subtract one from start to convert to PSL coordinate system
+thickStart = getTargetForQuery(psl, cds->start - 1); 
+thickEnd = getTargetForQuery(psl, cds->end); 
+
+// if thickStart equals thickEnd, then there is no CDS
+if (thickStart == thickEnd)
+    thickStart = thickEnd = 0;
+
 bed->thickStart = thickStart;
 bed->thickEnd = thickEnd;
 }
 
 void pslToBed(char *pslFile, char *bedFile, struct hash *cdsHash)
 /* pslToBed -- tranform a psl format file to a bed format file */
 {
 struct lineFile *pslLf = pslFileOpen(pslFile);
 FILE *bedFh = mustOpen(bedFile, "w");
 struct psl *psl;
 
 while ((psl = pslNext(pslLf)) != NULL)
     {
     struct bed *bed = bedFromPsl(psl);
     if (cdsHash)
 	{
 	struct cds *cds = hashFindVal(cdsHash, psl->qName);
-	if (cds != NULL)
+	if (cds == NULL)
+	    bed->thickStart = bed->thickEnd = 0;
+	else
 	    setThick(psl, bed, cds);
 	}
     bedTabOutN(bed, 12, bedFh);
     bedFree(&bed);
     pslFree(&psl);
     }
 carefulClose(&bedFh);
 lineFileClose(&pslLf);
 }
 
 struct hash *getCdsHash(char *cdsFile)
 /* read CDS start and end for list of id's */
 {
 struct lineFile *lf = lineFileOpen(cdsFile, TRUE);
 char *row[2];
 struct hash *hash = newHash(0);
 while (lineFileRow(lf, row))
     {
     // lines are of the form of <start>..<end>
     struct cds *cds;
     AllocVar(cds);
     cds->start = atoi(row[1]);
     char *ptr = strchr(row[1], '.');	 // point to first '.'
     ptr+=2; // step past two '.'s
     cds->end = atoi(ptr);
     hashAdd(hash, row[0], cds);
     }
 lineFileClose(&lf);
 return hash;
 }
 
 int main(int argc, char* argv[])
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 char *cdsFile = optionVal("cds", NULL);
 struct hash *cdsHash = NULL;
 
 if (cdsFile != NULL)
     cdsHash = getCdsHash(cdsFile);
 
 pslToBed(argv[1], argv[2], cdsHash);
 return 0;
 }