e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/hg/pslToPileup/pslToPileup.c src/hg/pslToPileup/pslToPileup.c
index 5490741..087381e 100644
--- src/hg/pslToPileup/pslToPileup.c
+++ src/hg/pslToPileup/pslToPileup.c
@@ -1,156 +1,159 @@
 /** pslToPileup -- Extract pileup locations from PSL, output as BED. */
 
+/* Copyright (C) 2011 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
+
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "psl.h"
 #include "bed.h"
 #include "bits.h"
 #include "options.h"
 
 
 int minHits = 50;
 int maxGap = 20;
 
 static struct optionSpec optionSpecs[] =  
 /* option for pslToPileup */
 {
     {"minHits", OPTION_INT},
     {"maxGap", OPTION_INT},
     {"help", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 void usage()
 /** provide usage information */
 {
 errAbort("pslToPileup: Extract pileup locations from PSL (target), output as BED 5+.\n"
 	 "usage:\n"
 	 "    pslToPileup [options] in.psl chrom.sizes out.bed\n"
 	 "options:\n"
 	 "    -minHits=N  Minimum number of alignments covering a given base in order for it\n"
 	 "                to be considered part of a pileup.  Default: %d.\n"
 	 "    -maxGap=N   Maximum number of consecutive bases within a pileup which may have\n"
 	 "                fewer than minHits.  Default: %d.\n"
 	 "    \n",
 	 minHits, maxGap
 	 );
 } 
 
 /** Allocating 4 bytes per base for entire chroms proved too big even for 
  * kolossus, so use short (and watch out for count overflow -- very unlikely, 
  * but just in case). */
 #define MAXSHORT 0x7fff
 
 short *getCountArr(struct hash *chromCounts, struct hash *chromSizes,
 		 char *chrom, int chromSize)
 /** Return the array of counts associated with chrom.  Hash sizes too. */
 {
 struct hashEl *el = hashLookup(chromCounts, chrom);
 short *arr = el ? el->val : NULL;
 if (arr == NULL)
     {
     arr = needLargeZeroedMem(chromSize * sizeof(short));
     hashAdd(chromCounts, chrom, arr);
     hashAddInt(chromSizes, chrom, chromSize);
     }
 return arr;
 }
 
 void pslToPileup(char *pslFile, char *chromSizeFile, char *outBedName)
 /** Extract pileup locations from PSL, output as BED. */
 {
 FILE *outBed = NULL;
 struct hash *chromCounts = hashNew(8);
 struct hash *chromSizes = hashNew(8);
 struct hashCookie cookie;
 struct hashEl *el = NULL;
 struct lineFile *lf = pslFileOpen(pslFile);
 struct psl *psl = NULL;
 
 outBed = mustOpen(outBedName, "w");
 
 while ((psl = pslNext(lf)) != NULL)
     {
     short *countArr = getCountArr(chromCounts, chromSizes, psl->tName,
 				psl->tSize);
     int i=0, j=0;
     for (i=0;  i < psl->blockCount;  i++)
 	{
 	for (j=0;  j < psl->blockSizes[i];  j++)
 	    {
 	    //#*** if tStrand (strand[1]) is -, do we need to flip??
 	    if (countArr[psl->tStarts[i]+j] < MAXSHORT)
 		countArr[psl->tStarts[i]+j]++;
 	    }
 	}
     }
 
 cookie = hashFirst(chromCounts);
 while ((el = hashNext(&cookie)) != NULL)
     {
     char *chrom = el->name;
     short *countArr = (short *)(el->val);
     int i=0;
     boolean inPileup = FALSE;
     int pStart=0, pEnd=0;
     short pMin=MAXSHORT, pMax=0;
     int pTotal=0;
     int gapCount = 0;
     int chromSize = hashIntVal(chromSizes, chrom);
     for (i=0;  i < chromSize;  i++)
 	{
 	if (countArr[i] >= minHits)
 	    {
 	    if (! inPileup)
 		pStart = i;
 	    pEnd = i+1;
 	    if (countArr[i] < pMin)
 		pMin = countArr[i];
 	    if (countArr[i] > pMax)
 		pMax = countArr[i];
 	    pTotal += countArr[i];
 	    inPileup = TRUE;
 	    }
 	else if (inPileup)
 	    {
 	    gapCount++;
 	    if (gapCount > maxGap)
 		{
 		fprintf(outBed, "%s\t%d\t%d\t%s:%d-%d\t%d\t%d\t%.1f\t%d\n",
 			chrom, pStart, pEnd, chrom, pStart+1, pEnd,
 			(int)(pTotal/(pEnd-pStart)),
 			pMin, (double)pTotal/(pEnd-pStart), pMax);
 		pStart = pEnd = pMax = pTotal = 0;
 		pMin = MAXSHORT;
 		gapCount = 0;
 		inPileup = FALSE;
 		}
 	    }
 	}
     if (inPileup)
 	fprintf(outBed, "%s\t%d\t%d\t%s:%d-%d\t%d\t%d\t%.1f\t%d\n",
 		chrom, pStart, pEnd, chrom, pStart+1, pEnd,
 		(int)(pTotal/(pEnd-pStart)),
 		pMin, (double)pTotal/(pEnd-pStart), pMax);
     }
 
 lineFileClose(&lf);
 carefulClose(&outBed);
 }
 
 
 int main(int argc, char* argv[])
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc != 4)
     usage();
 if (optionExists("help"))
     usage();
 minHits = optionInt("minHits", minHits);
 maxGap = optionInt("maxGap", maxGap);
 pslToPileup(argv[1], argv[2], argv[3]);
 return 0;
 }