defea5ce53c69e33291c79fef25e9cbee73c4361
galt
  Mon Jul 1 13:53:12 2013 -0700
removing unneeded carefulCheckHeap calls since pushCarefulMemHandler was not being used and thus the check did absolutely nothing.
diff --git src/hg/pslReps/pslReps.c src/hg/pslReps/pslReps.c
index edc7ccd..9d76eac 100644
--- src/hg/pslReps/pslReps.c
+++ src/hg/pslReps/pslReps.c
@@ -1,429 +1,427 @@
 /* pslReps - analyse repeats and generate list of best alignments
  * from a genome wide, sorted by mRNA .psl alignment file.
  */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "memalloc.h"
 #include "psl.h"
 #include "options.h"
 #include "obscure.h"
 #include "sqlNum.h"
 
 
 /* command line */
 static struct optionSpec optionSpecs[] = {
     {"minAli", OPTION_FLOAT},
     {"nearTop", OPTION_FLOAT},
     {"minCover", OPTION_FLOAT},
     {"minNearTopSize", OPTION_INT},
     {"ignoreSize", OPTION_BOOLEAN},
     {"sizeMatters", OPTION_BOOLEAN},  /* opposite of ignore size, for compat */
     {"noIntrons", OPTION_BOOLEAN},
     {"singleHit", OPTION_BOOLEAN},
     {"nohead", OPTION_BOOLEAN},
     {"ignoreNs", OPTION_BOOLEAN},
     {"coverQSizes", OPTION_STRING},
     {NULL, 0}
 };
 
 double minAli = 0.93;
 double nearTop = 0.01;
 double minCover = 0.0;
 boolean ignoreNs = FALSE; /* Ignore the n's when calculating coverage score, other
 			     wise n's count as mismatches. */
 boolean ignoreSize = FALSE;
 boolean noIntrons = FALSE;
 boolean singleHit = FALSE;
 boolean noHead = FALSE;
 boolean quiet = FALSE;
 int minNearTopSize = 30;
 char *coverQSizeFile = NULL;
 
 void usage()
 /* Print usage instructions and exit. */
 {
 errAbort(
     "pslReps - analyse repeats and generate genome wide best\n"
     "alignments from a sorted set of local alignments\n"
     "usage:\n"
     "    pslReps in.psl out.psl out.psr\n"
     "where in.psl is an alignment file generated by psLayout and\n"
     "sorted by pslSort, out.psl is the best alignment output\n"
     "and out.psr contains repeat info\n"
     "options:\n"
     "    -nohead don't add PSL header\n"
     "    -ignoreSize Will not weigh in favor of larger alignments so much\n"
     "    -noIntrons Will not penalize for not having introns when calculating\n"
     "              size factor\n"
     "    -singleHit  Takes single best hit, not splitting into parts\n"
     "    -minCover=0.N minimum coverage to output.  Default is 0.\n"
     "    -ignoreNs Ignore 'N's when calculating minCover.\n"
     "    -minAli=0.N minimum alignment ratio\n"
     "               default is 0.93\n"
     "    -nearTop=0.N how much can deviate from top and be taken\n"
     "               default is 0.01\n"
     "    -minNearTopSize=N  Minimum size of alignment that is near top\n"
     "               for alignment to be kept.  Default 30.\n"
     "    -coverQSizes=file Tab-separate file with effective query sizes.\n"
     "                     When used with -minCover, this allows polyAs\n"
     "                     to be excluded from the coverage calculation\n");
 }
 
 /* hash table of query name to sizes */
 struct hash* coverQSizes = NULL;
 
 void loadCoverQSizes(char* coverQSizeFile)
 /* load coverage query sizes */
 {
 struct lineFile *lf = lineFileOpen(coverQSizeFile, TRUE);
 char *row[2];
 coverQSizes = hashNew(0);
 
 while (lineFileNextRowTab(lf, row, ArraySize(row)))
     {
     int qSize = sqlSigned(row[1]);
     hashAdd(coverQSizes, row[0], intToPt(qSize));
     }
 
 lineFileClose(&lf);
 }
 
 int calcMilliScore(struct psl *psl)
 /* Figure out percentage score. */
 {
 return 1000-pslCalcMilliBad(psl, TRUE);
 }
 
 int intronFactor(struct psl *psl)
 /* Figure bonus for having introns.  An intron is worth 3 bases... 
  * An intron in this case is just a gap of 0 bases in query and
  * 30 or more in target. */
 {
 int i, blockCount = psl->blockCount;
 int ts, qs, te, qe, sz;
 int bonus = 0;
 if (blockCount <= 1)
     return 0;
 sz = psl->blockSizes[0];
 qe = psl->qStarts[0] + sz;
 te = psl->tStarts[0] + sz;
 for (i=1; i<blockCount; ++i)
     {
     qs = psl->qStarts[i];
     ts = psl->tStarts[i];
     if (qs == qe && ts - te >= 30)
         bonus += 3;
     sz = psl->blockSizes[i];
     qe = qs + sz;
     te = ts + sz;
     }
 if (bonus > 10) bonus = 10;
 return bonus;
 }
 
 int sizeFactor(struct psl *psl)
 /* Return a factor that will favor longer alignments. */
 {
 int score;
 if (ignoreSize) return 0;
 score = 4*round(sqrt(psl->match + psl->repMatch/4));
 if (!noIntrons)
     score += intronFactor(psl);
 return score;
 }
 
 int calcSizedScore(struct psl *psl)
 /* Return score that includes base matches and size. */
 {
 int score = calcMilliScore(psl) + sizeFactor(psl);
 return score;
 }
 
 boolean uglyTarget(struct psl *psl)
 /* Return TRUE if it's the one we're snooping on. */
 {
 return sameString(psl->qName, "AF103731");
 }
 
 
 boolean closeToTop(struct psl *psl, int *scoreTrack)
 /* Returns TRUE if psl is near the top scorer for at least 20 bases. */
 {
 int milliScore = calcSizedScore(psl);
 int threshold = round(milliScore * (1.0+nearTop));
 int i, blockIx;
 int start, size, end;
 int topCount = 0;
 char strand = psl->strand[0];
 
 for (blockIx = 0; blockIx < psl->blockCount; ++blockIx)
     {
     start = psl->qStarts[blockIx];
     size = psl->blockSizes[blockIx];
     end = start+size;
     if (strand == '-')
 	reverseIntRange(&start, &end, psl->qSize);
     for (i=start; i<end; ++i)
 	{
 	if (scoreTrack[i] <= threshold)
 	    {
 	    if (++topCount >= minNearTopSize)
 		return TRUE;
 	    }
 	}
     }
 return FALSE;
 }
 
 boolean passMinCoverage(struct psl *psl)
 /* Does this psl have enough bases aligned to pass the minCover filter. */
 {
 int qSize = psl->qSize;
 if (coverQSizes != NULL)
     {
     struct hashEl *hel = hashLookup(coverQSizes, psl->qName);
     if (hel != NULL)
         qSize = ptToInt(hel->val);
     }
 
 if(ignoreNs)
     return (psl->match + psl->repMatch >= minCover * (qSize - psl->nCount));
 else
     return (psl->match + psl->repMatch >= minCover * qSize);
 }
 
 boolean passFilters(struct psl *psl, int *scoreTrack)
 /* Return TRUE if this psl passes the millScore, minCoverage and
    closeToTop thresholds.  If scoreTrack is NULL then closeToTop
    threshold is skipped. */
 {
 int milliMin = 1000*minAli;
 if (calcMilliScore(psl) >= milliMin && 
     (scoreTrack == NULL || closeToTop(psl, scoreTrack)) &&
     passMinCoverage(psl))
     return TRUE;
 return FALSE;
 }
 
 void processBestMulti(char *acc, struct psl *pslList, FILE *bestFile, FILE *repFile)
 /* Find psl's that are best anywhere along their length. */
 {
 struct psl *psl;
 int qSize;
 int *repTrack = NULL;
 int *scoreTrack = NULL;
 int milliScore;
 int goodAliCount = 0;
 int bestAliCount = 0;
 int milliMin = 1000*minAli;
 
 
 if (pslList == NULL)
     return;
 qSize = pslList->qSize;
 AllocArray(repTrack, qSize+1);
 AllocArray(scoreTrack, qSize+1);
 
 for (psl = pslList; psl != NULL; psl = psl->next)
     {
     int blockIx;
     char strand = psl->strand[0];
     milliScore = calcMilliScore(psl);
     if (milliScore >= milliMin)
 	{
 	++goodAliCount;
 	milliScore += sizeFactor(psl);
 	for (blockIx = 0; blockIx < psl->blockCount; ++blockIx)
 	    {
 	    int start = psl->qStarts[blockIx];
 	    int size = psl->blockSizes[blockIx];
 	    int end = start+size;
 	    int i;
 	    if (strand == '-')
 	        reverseIntRange(&start, &end, psl->qSize);
 	    if (start < 0 || end > qSize)
 		{
 		warn("Odd: qName %s tName %s qSize %d psl->qSize %d start %d end %d",
 		    psl->qName, psl->tName, qSize, psl->qSize, start, end);
 		if (start < 0)
 		    start = 0;
 		if (end > qSize)
 		    end = qSize;
 		}
 	    for (i=start; i<end; ++i)
 		{
 		repTrack[i] += 1;
 		if (milliScore > scoreTrack[i])
 		    scoreTrack[i] = milliScore;
 		}
 	    }
 	}
     }
 /* Print out any alignments that are within 2% of top score. */
 for (psl = pslList; psl != NULL; psl = psl->next)
     {
     if(passFilters(psl, scoreTrack))
 	{
 	pslTabOut(psl, bestFile);
 	++bestAliCount;
 	}
     }
 
 
 /* Print out run-length-encoded repeat info.  */
     {
     int runVal = repTrack[0];
     int curVal;
     int runSize = 1;
     int packetCount = 0;
     int *packetSize = NULL;
     int *packetVal = NULL;
     int i;
 
     AllocArray(packetSize, qSize);
     AllocArray(packetVal, qSize);
 
     repTrack[qSize] = -1;	/* Sentinal value to simplify RLC loop. */
     fprintf(repFile, "%s\t%d\t%d\t", acc, bestAliCount, goodAliCount);
 
     for (i=1; i<=qSize; ++i)
 	{
 	if ((curVal = repTrack[i]) != runVal)
 	    {
 	    packetSize[packetCount] = runSize;
 	    packetVal[packetCount] = runVal;
 	    ++packetCount;
 	    runSize = 1;
 	    runVal = curVal;
 	    }
 	else
 	    {
 	    ++runSize;
 	    }
 	}
     fprintf(repFile, "%d\t", packetCount);
     for (i=0; i<packetCount; ++i)
 	fprintf(repFile, "%d,", packetSize[i]);
     fprintf(repFile, "\t");
     for (i=0; i<packetCount; ++i)
 	fprintf(repFile, "%d,", packetVal[i]);
     fprintf(repFile, "\n");
 
-    carefulCheckHeap();
     freeMem(packetSize);
     freeMem(packetVal);
     }
 
-carefulCheckHeap();
 freeMem(repTrack);
 freeMem(scoreTrack);
 }
 
 void processBestSingle(char *acc, struct psl *pslList, FILE *bestFile, FILE *repFile)
 /* Find single best psl in list. */
 {
 struct psl *bestPsl = NULL, *psl;
 int bestScore = 0, score, threshold;
 
 for (psl = pslList; psl != NULL; psl = psl->next)
     {
     score = pslScore(psl);
     if (score > bestScore)
         {
 	bestScore = score;
 	bestPsl = psl;
 	}
     }
 threshold = round((1.0 - nearTop)*bestScore);
 for (psl = pslList; psl != NULL; psl = psl->next)
     {
     if (pslScore(psl) >= threshold && passFilters(psl, NULL))
         pslTabOut(psl, bestFile);
     }
 }
 
 void doOneAcc(char *acc, struct psl *pslList, FILE *bestFile, FILE *repFile)
 /* Process alignments of one piece of mRNA. */
 {
 if (singleHit)
     processBestSingle(acc, pslList, bestFile, repFile);
 else
     processBestMulti(acc, pslList, bestFile, repFile);
 }
 
 void pslReps(char *inName, char *bestAliName, char *repName)
 /* Analyse inName and put best alignments for eacmRNA in estAliName.
  * Put repeat info in repName. */
 {
 struct lineFile *in = pslFileOpen(inName);
 FILE *bestFile = mustOpen(bestAliName, "w");
 FILE *repFile = mustOpen(repName, "w");
 int lineSize;
 char *line;
 char *words[32];
 int wordCount;
 struct psl *pslList = NULL, *psl = NULL;
 char lastName[512];
 int aliCount = 0;
 quiet = sameString(bestAliName, "stdout") || sameString(repName, "stdout");
 if (coverQSizeFile != NULL)
     loadCoverQSizes(coverQSizeFile);
 
 if (!quiet)
     printf("Processing %s to %s and %s\n", inName, bestAliName, repName);
  if (!noHead)
      pslWriteHead(bestFile);
 strcpy(lastName, "");
 while (lineFileNext(in, &line, &lineSize))
     {
     if (((++aliCount & 0x1ffff) == 0) && !quiet)
         {
 	printf(".");
 	fflush(stdout);
 	}
     wordCount = chopTabs(line, words);
     if (wordCount == 21)
 	psl = pslLoad(words);
     else if (wordCount == 23)
 	psl = pslxLoad(words);
     else
 	errAbort("Bad line %d of %s\n", in->lineIx, in->fileName);
     if (!sameString(lastName, psl->qName))
 	{
 	doOneAcc(lastName, pslList, bestFile, repFile);
 	pslFreeList(&pslList);
 	safef(lastName, sizeof(lastName), "%s", psl->qName);
 	}
     slAddHead(&pslList, psl);
     }
 doOneAcc(lastName, pslList, bestFile, repFile);
 pslFreeList(&pslList);
 lineFileClose(&in);
 fclose(bestFile);
 fclose(repFile);
 if (!quiet)
     printf("Processed %d alignments\n", aliCount);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc != 4)
     usage();
 minAli = optionFloat("minAli", minAli);
 nearTop = optionFloat("nearTop", nearTop);
 minCover = optionFloat("minCover", minCover);
 minNearTopSize = optionInt("minNearTopSize", minNearTopSize);
 ignoreSize = optionExists("ignoreSize");
 if (optionExists("sizeMatters"))
     warn("warning: -sizeMatters is deprecated and is now the default");
 noIntrons = optionExists("noIntrons");
 singleHit = optionExists("singleHit");
 noHead = optionExists("nohead");
 ignoreNs = optionExists("ignoreNs");
 coverQSizeFile = optionVal("coverQSizes", NULL);
 pslReps(argv[1], argv[2], argv[3]);
 return 0;
 }