4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/makeDb/hgLoadWiggle/hgWiggle.c src/hg/makeDb/hgLoadWiggle/hgWiggle.c
index 95f8b8f..c9bc9fd 100644
--- src/hg/makeDb/hgLoadWiggle/hgWiggle.c
+++ src/hg/makeDb/hgLoadWiggle/hgWiggle.c
@@ -1,538 +1,538 @@
 /* hgWiggle - fetch wiggle data from database or .wig file */
 
 /* Copyright (C) 2012 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "options.h"
 #include "linefile.h"
 #include "obscure.h"
 #include "hash.h"
 #include "cheapcgi.h"
 #include "jksql.h"
 #include "dystring.h"
 #include "wiggle.h"
 #include "hdb.h"
 #include "portable.h"
 #include "memalloc.h"
 
 
 /* Command line switches. */
 static boolean doAscii = TRUE;	/*	do not output ascii data */
 static boolean doStats = FALSE;	/*	perform stats measurement */
 static boolean doBed = FALSE;	/*	output bed format */
 static boolean doHistogram = FALSE;	/*	output histogram of the data */
 static boolean silent = FALSE;	/*	no data points output */
 static boolean fetchNothing = FALSE;	/*  no ascii, bed, or stats returned */
 static boolean timing = FALSE;	/*	turn timing on	*/
 static boolean skipDataRead = FALSE;	/*	do not read the wib data */
 static boolean rawDataOut = FALSE;	/*	just the values, no positions */
 static boolean htmlOut = FALSE;	/*	stats and histogram output in HTML */
 static boolean help = FALSE;	/*	extended help message */
 static char *db = NULL;			/* database specification	*/
 static char *chr = NULL;		/* work on this chromosome only */
 static char *chromLst = NULL;	/*	file with list of chroms to process */
 static char *position = NULL;	/*	to specify a range on a chrom */
 static char *bedFile = NULL;	/*	to constrain via bedFile */
 static unsigned winStart = 0;	/*	from the position specification */
 static unsigned winEnd = 0;	/*	from the position specification */
 static float hBinSize = 1.0;	/*	histoGram bin size	*/
 static unsigned hBinCount = 25;	/*	histoGram bin count	*/
 static float hMinVal = 0.0;	/*	histoGram minimum value	*/
 static float hRange = 0.0;	/*	hRange = hBinSize * hBinCount; */
 static float hMax = 0.0;	/*	hMax = hMinVal + hRange;	*/
 static int lift = 0;		/*	offset to lift ascii positions out */
 
 #if defined(DEBUG)
 struct cart *cart;
 #endif
 
 /* command line option specifications */
 static struct optionSpec optionSpecs[] = {
     {"db", OPTION_STRING},
     {"chr", OPTION_STRING},
     {"chrom", OPTION_STRING},
     {"chromLst", OPTION_STRING},
     {"position", OPTION_STRING},
     {"bedFile", OPTION_STRING},
     {"dataConstraint", OPTION_STRING},
     {"doAscii", OPTION_BOOLEAN},
     {"doStats", OPTION_BOOLEAN},
     {"doBed", OPTION_BOOLEAN},
     {"doHistogram", OPTION_BOOLEAN},
     {"silent", OPTION_BOOLEAN},
     {"fetchNothing", OPTION_BOOLEAN},
     {"timing", OPTION_BOOLEAN},
     {"skipDataRead", OPTION_BOOLEAN},
     {"rawDataOut", OPTION_BOOLEAN},
     {"htmlOut", OPTION_BOOLEAN},
     {"help", OPTION_BOOLEAN},
     {"span", OPTION_INT},
     {"ll", OPTION_FLOAT},
     {"ul", OPTION_FLOAT},
     {"hBinSize", OPTION_STRING},
     {"hBinCount", OPTION_INT},
     {"hMinVal", OPTION_STRING},
     {"lift", OPTION_INT},
     {NULL, 0}
 };
 
 void usage(boolean moreHelp)
 /* Explain usage and exit. */
 {
 verbose(VERBOSE_ALWAYS_ON,
   "hgWiggle - fetch wiggle data from data base or file\n"
   "usage:\n"
   "   hgWiggle [options] <track names ...>\n"
   "options:\n"
   "   -db=<database> - use specified database\n"
   "   -chr=chrN - examine data only on chrN\n"
   "   -chrom=chrN - same as -chr option above\n"
   "   -position=[chrN:]start-end - examine data in window start-end (1-relative)\n"
   "             (the chrN: is optional)\n"
   "   -chromLst=<file> - file with list of chroms to examine\n"
   "   -doAscii - perform the default ascii output, in addition to other outputs\n"
   "            - Any of the other -do outputs turn off the default ascii output\n"
   "            - ***WARNING*** this ascii output is 0-relative offset which\n"
   "            - *** is *not* the normal wiggle input format.  Use the -lift\n"
   "            - *** argument -lift=1 to get 1-relative offset:\n"
   "   -lift=<D> - lift ascii output positions by D (0 default)\n"
   "   -rawDataOut - output just the data values, nothing else\n"
   "   -htmlOut - output stats or histogram in HTML instead of plain text\n"
   "   -doStats - perform stats measurement, default output text, see -htmlOut\n"
   "   -doBed - output bed format\n"
   "   -bedFile=<file> - constrain output to ranges specified in bed <file>\n"
   "   -dataConstraint='DC' - where DC is one of < = >= <= == != 'in range'\n"
   "   -ll=<F> - lowerLimit compare data values to F (float) (all but 'in range')\n"
   "   -ul=<F> - upperLimit compare data values to F (float)\n\t\t(need both ll and ul when 'in range')\n"
   "\n"
   "   -help - display more examples and extra options (to stderr)\n"
   "\n"
   "   When no database is specified, track names will refer to .wig files\n\n"
   "   example using the file chrM.wig:\n"
   "\thgWiggle chrM\n"
   "   example using the database table hg17.gc5Base:\n"
   "\thgWiggle -chr=chrM -db=hg17 gc5Base\n"
   );
 
 if (moreHelp)
   verbose(VERBOSE_ALWAYS_ON,
   "   example, stats on all .wig files in this directory:\n"
   "\thgWiggle -span=5 -doStats `ls *.wig | sed -e 's/.wig//'`\n"
   "   example using dataCompare limits, show values over 35.0:\n"
   "\thgWiggle -span=5 -dataConstraint='>' -ll=35.0 -chr=chrM -db=hg17 gc5Base\n"
   "   example, show values in range [ 50.0 : 80.0 ]\n"
   "\thgWiggle -span=5 -dataConstraint='in range' -ll=50.0 -ul=80.0 \\\n"
   "\t\t-chr=chrM -db=hg17 gc5Base\n"
   "\n"
   "   the case of multiple track names is most appropriate with .wig files\n"
   "\tit will work with a database, but doesn't make much sense.\n"
   "\n"
   "   -rawDataOut - output just the data values, nothing else, useful for\n"
   "\t\tsending data to other processes, e.g:\n"
   "\thgWiggle -span=5 -rawDataOut -chr=chrM -db=hg17 gc5Base | \\\n"
   "\t\ttextHistogram -real -binSize=20 -maxBinCount=13 -pValues stdin\n"
 /*"   -skipDataRead - do not read the .wib data (for no-read speed check)\n"*/
   "   -doHistogram - perform histogram on data, turns off all other outputs\n"
   "                - see also -htmlOut to output the histogram in html\n"
   "   -hBinSize=<F> - sets histogram bin size to <F> (float, default 1.0)\n"
   "   -hBinCount=<D> - sets histogram bin count to <D> (int, default 25)\n"
   "   -hMinVal=<F> - sets histogram minimum value to <F> (float, default 0.0)\n"
   "   -timing - display timing statistics to stderr\n"
   "   -silent - no output, scanning data only and prepares result (timing check)\n"
   "   -fetchNothing - scanning data only, *NOT* preparing result (timing check)\n"
   "   -verbose=2 - generic and per chrom information to stderr\n"
   "   -verbose=3 - per SQL row information to stderr\n"
   "   -verbose=4 - data block information to stderr\n"
   );
 
 exit(255);
 }
 
 static void hgWiggle(struct wiggleDataStream *wds, int trackCount,
 	char *tracks[])
 /* hgWiggle - dump wiggle data from database or .wig file */
 {
 struct slName *chromList = NULL;	/*	list of chroms to process */
 int i;
 long startClock;
 long endClock;
 unsigned long long totalMatched = 0;
 struct histoResult *histoGramResult = NULL;
 float *valuesArray = NULL;
 size_t valueCount = 0;
 
 if (chromLst)
     {
     struct lineFile *clf = lineFileOpen(chromLst, TRUE);
     char *row[1];
     while (lineFileRow(clf, row))
         {
 	struct slName *el;
 	el = newSlName(row[0]);
 	slAddHead(&chromList, el);
         }
     lineFileClose(&clf);
     slReverse(&chromList);
     }
 else if (chr)
     {
     struct slName *el;
     el = newSlName(chr);
     slAddHead(&chromList, el);
     }
 else
     {
     if (db && !bedFile)
 	{
 	chromList = hAllChromNames(db);
 	slReverse(&chromList);
 	}
     else
 	verbose(VERBOSE_CHR_LEVEL, "#\tno chrom specified for file read, do them all\n");
     }
 
 verbose(VERBOSE_CHR_LEVEL, "#\texamining tracks:");
 for (i=0; i<trackCount; ++i)
     verbose(VERBOSE_CHR_LEVEL, " %s", tracks[i]);
 verbose(VERBOSE_CHR_LEVEL, "\n");
 
 startClock = clock1000();
 for (i=0; i<trackCount; ++i)
     {
     int once = 1;	/*	at least once through even on null chromList */
     struct slName *chromPtr;
     unsigned long long chrBytesStart = 0;
     unsigned long long chrNoDataBytesStart = 0;
     unsigned long long chrRowsStart = 0;
     unsigned long long chrValuesMatchedStart = 0;
 
     /*	chromList may be empty, perform loop once in that case	*/
     for (chromPtr=chromList;  (once == 1) || (chromPtr != NULL); --once )
 	{
 	long chrStartClock = clock1000();
 	int operations = wigFetchAscii;	/*	default operation */
 	unsigned long long valuesMatched;
 
 	if (chromPtr)
 	    {
 	    wds->setChromConstraint(wds, chromPtr->name);
 	    verbose(VERBOSE_CHR_LEVEL,"#\tchrom: %s\n", chromPtr->name);
 	    }
 
 	if (!rawDataOut && !doAscii)
 		operations &= ~wigFetchAscii;
 	if (doStats)
 		operations |= wigFetchStats;
 	if (doBed)
 		operations |= wigFetchBed;
 	if (fetchNothing)
 		operations |= wigFetchNoOp;
 	if (doHistogram)
 		operations |= wigFetchAscii;
 
 	if (bedFile)
 	    {
 	    struct bed *bedList=bedLoadNAllChrom(bedFile, 3, wds->chrName);
 	    valuesMatched = wds->getDataViaBed(wds, db, tracks[i],
 		operations, &bedList);
 	    bedFreeList(&bedList);
 	    }
 	else
 	    valuesMatched = wds->getData(wds, db, tracks[i], operations);
 
 	if (doHistogram && (valuesMatched > 0))
 	    {
 	    /*  convert the ascii data listings to one giant float array */
 	    valuesArray=wds->asciiToDataArray(wds, valuesMatched, &valueCount);
 	    /* first time starts the histogram, next times accumulate into it */
 	    if (histoGramResult == NULL)
 		histoGramResult = histoGram(valuesArray, valueCount,
 		    hBinSize, hBinCount+1, hMinVal, hMinVal, hMax,
 			(struct histoResult *)NULL);
 	    else
 		(void) histoGram(valuesArray, valueCount,
 		    hBinSize, hBinCount+1, hMinVal, hMinVal, hMax,
 			histoGramResult);
 	    freeMem(valuesArray);
 	    }
 
 	totalMatched += valuesMatched;
 
 	if ((!silent && (valuesMatched > 0)) || (!silent && doStats))
 	    {
 	    /*	the TRUE means sort the results.  The FALSE case is
 	     *	possible if you are accumulating results via numerous calls
 	     *	and perhaps sorted them all at once with the
 	     *	sortResults() method elsewhere, then came to printout.
 	     *	No need to sort again.
 	     *	When working through a list of chroms or tracks,
 	     *	no need to print stats until all done.
 	     */
 	    if (doStats && (!chromPtr) && (trackCount == 1))
 		wds->statsOut(wds, db, "stdout", TRUE, htmlOut, TRUE, FALSE);
 	    if (doBed)
 		wds->bedOut(wds, "stdout", TRUE);
 	    if (rawDataOut || doAscii)
 		wds->asciiOut(wds, db, "stdout", TRUE, rawDataOut);
 	    }
 	wds->freeBed(wds);
 	wds->freeAscii(wds);
 	if (doStats && (!chromPtr) && (trackCount == 1))
 	    wds->freeStats(wds);
 	if (timing)
 	    {
 	    long et;
 	    long chrEndClock;
 	    unsigned long long chrBytesRead = wds->validPoints - chrBytesStart;
 	    unsigned long long chrNoDataBytes =
 		wds->noDataPoints - chrNoDataBytesStart;
 	    unsigned long long chrRowsRead = wds->rowsRead - chrRowsStart;
 	    unsigned long long chrValuesMatched =
 		wds->valuesMatched - chrValuesMatchedStart;
 
 	    chrEndClock = clock1000();
 	    et = chrEndClock - chrStartClock;
 	    verbose(VERBOSE_ALWAYS_ON,"#\t%s.%s %llu data bytes, %llu no-data bytes, %ld ms, %llu rows, %llu matched\n",
 			tracks[i], wds->currentChrom, chrBytesRead,
 			chrNoDataBytes, et, chrRowsRead, chrValuesMatched);
 	    chrNoDataBytes = chrBytesRead = 0;
 	    chrBytesStart = wds->validPoints;
 	    chrNoDataBytesStart = wds->noDataPoints;
 	    chrRowsStart = wds->rowsRead;
 	    chrValuesMatchedStart = wds->valuesMatched;
 	    chrStartClock = clock1000();
 	    }
 	if (chromPtr)
 	    chromPtr = chromPtr->next;
 	}
     }
 
 if (doHistogram)
     {
     printHistoGram(histoGramResult, htmlOut);
     freeHistoGram(&histoGramResult);
     }
 
 /* when working through a chrom list, or track list, stats only at the end */
 if (doStats && (chromList || (trackCount > 1)))
     {
     wds->statsOut(wds, db, "stdout", TRUE, htmlOut, TRUE, FALSE);
     wds->freeStats(wds);
     }
 endClock = clock1000();
 
 if ((!doStats) && (0 == totalMatched))
     verbose(VERBOSE_ALWAYS_ON,"#\tno values found with these constraints\n");
 
 if (timing)
     {
     long et;
     et = endClock - startClock;
     if (wds->validPoints > 0 )
 	{
     verbose(VERBOSE_ALWAYS_ON,"#\ttotal %llu valid bytes, %llu no-data bytes, %ld ms, %llu rows\n#\t%llu matched = %% %.2f, %llu wib bytes, %llu bytes skipped\n",
 	wds->validPoints, wds->noDataPoints, et, wds->rowsRead,
 	wds->valuesMatched,
 	100.0 * (float)wds->valuesMatched / (float)wds->validPoints,
 	wds->bytesRead, wds->bytesSkipped);
 	}
     else
 	{
     verbose(VERBOSE_ALWAYS_ON,"#\ttotal %llu valid bytes, %llu no-data bytes, %ld ms, %llu rows\n#\t%llu matched = %% 0.00, %llu wib bytes, %llu bytes skipped\n",
 	wds->validPoints, wds->noDataPoints, et, wds->rowsRead,
 	wds->valuesMatched, wds->bytesRead, wds->bytesSkipped);
 	}
     }
 }	/*	void hgWiggle()	*/
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 struct wiggleDataStream *wds = NULL;
 float lowerLimit = -1 * INFINITY;  /*	for constraint comparison	*/
 float upperLimit = INFINITY;	/*	for constraint comparison	*/
 unsigned span = 0;	/*	select for this span only	*/
 static char *dataConstraint;	/*	one of < = >= <= == != 'in range' */
 
 pushCarefulMemHandler(2147483647 * ((sizeof(size_t)/4)*(sizeof(size_t)/4)));
 	/*	== 2 Gb for 32 bit machines, 8 Gb for 64 bit machines */
 
 optionInit(&argc, argv, optionSpecs);
 
 
 db = optionVal("db", NULL);
 chr = optionVal("chr", NULL);
 if (NULL == chr)
     chr = optionVal("chrom", NULL);
 chromLst = optionVal("chromLst", NULL);
 position = optionVal("position", NULL);
 bedFile = optionVal("bedFile", NULL);
 dataConstraint = optionVal("dataConstraint", NULL);
 doStats = optionExists("doStats");
 doBed = optionExists("doBed");
 doHistogram = optionExists("doHistogram");
 silent = optionExists("silent");
 fetchNothing = optionExists("fetchNothing");
 timing = optionExists("timing");
 skipDataRead = optionExists("skipDataRead");
 rawDataOut = optionExists("rawDataOut");
 htmlOut = optionExists("htmlOut");
 help = optionExists("help");
 span = optionInt("span", 0);
 lowerLimit = optionFloat("ll", -1 * INFINITY);
 upperLimit = optionFloat("ul", INFINITY);
 hBinSize = optionFloat("hBinSize", 1.0);
 hMinVal = optionFloat("hMinVal", 0.0);
 hBinCount = optionInt("hBinCount", 25);
 lift = optionInt("lift", 0);
 
 if (help)
     usage(TRUE);
 
 if (doStats || doBed)
     doAscii = optionExists("doAscii");
 
 if (doHistogram)	/*	histogram turns off everything else */
     {
     doAscii = FALSE;
     doBed = FALSE;
     doStats = FALSE;
     rawDataOut = FALSE;
     hRange = hBinSize * hBinCount;
     if ( !(hRange > 0.0))
 	errAbort("ERROR: histogram range is not > 0.0. binSize: %g, binCount: %u, range: %g", hBinSize, hBinCount, hRange);
     hMax = hMinVal + hRange;
     }
 
 if (db)
     verbose(VERBOSE_CHR_LEVEL, "#\tdatabase: %s\n", db);
 else
     verbose(VERBOSE_CHR_LEVEL, "#\tno database specified, using .wig files\n");
 
 if (chromLst && chr)
     {
     warn("ERROR: both specified: -chr=%s and -chromLst=%s", chr, chromLst);
     errAbort("ERROR: specify only one of -chr or -chromLst but not both");
     }
 
 /*	create the object here to allow constraint settings	*/
 wds = wiggleDataStreamNew();
 
 if (chr)
     {
     wds->setChromConstraint(wds, chr);
     verbose(VERBOSE_CHR_LEVEL, "#\tchrom constraint: %s\n", wds->chrName);
     }
 if (position)
     {
     char *startEnd[2];
     char *clonePosition = cloneString(position);
     char *stripped = stripCommas(clonePosition);
     
     /*	allow chrN: or not	*/
     if (2 == chopByChar(stripped, ':', startEnd, 2))
 	{
 	char *freeThis;
 	if (chr)
 	    {
 	    if (differentString(chr, startEnd[0]))
 		{
 	warn("different chroms specified via -chr and -position arguments\n");
 	warn("uncertain which one it should be: %s vs. %s\n", chr, startEnd[0]);
 		usage(FALSE);
 		}
 	    }
 	    else
 		chr = cloneString(startEnd[0]);
 	freeThis = stripped;
 	stripped = stripCommas(startEnd[1]);
 	freeMem(freeThis);
 	}
     if (2 != chopByChar(stripped, '-', startEnd, 2))
 	errAbort("can not parse position: '%s' '%s'", position, stripped);
     if (sqlSigned(startEnd[0]) < 1)
 	errAbort("ERROR: illegal chromStart: %d in specified position: %s", sqlSigned(startEnd[0]), position);
     if (sqlSigned(startEnd[1]) < 1)
 	errAbort("ERROR: illegal chromEnd: %d in specified position: %s", sqlSigned(startEnd[1]), position);
     winStart = BASE_0(sqlUnsigned(startEnd[0])); /* !!! 1-relative coming in */
     winEnd = sqlUnsigned(startEnd[1]);
     freeMem(stripped);
     wds->setPositionConstraint(wds, winStart, winEnd);
     verbose(VERBOSE_CHR_LEVEL, "#\tposition specified: %u-%u\n", BASE_1(wds->winStart), wds->winEnd);
     freeMem(clonePosition);
     }
 wds->offset = lift;
 if (lift != 0)
     verbose(VERBOSE_CHR_LEVEL, "#\tlifting ascii positions on output by %d\n", lift);
 if (doAscii)
     verbose(VERBOSE_CHR_LEVEL, "#\tdoAscii option on, perform the default ascii output\n");
 if (doHistogram)
     verbose(VERBOSE_CHR_LEVEL, "#\tdoHistogram: min,max: %g:%g, range: %g, binCount: %u, binSize: %g\n", hMinVal, hMax, hRange, hBinCount, hBinSize);
 
 if (doBed)
     verbose(VERBOSE_CHR_LEVEL, "#\tdoBed option on, output bed format\n");
 if (doStats)
     verbose(VERBOSE_CHR_LEVEL, "#\tdoStats option on, perform statistics measurements\n");
 if (silent)
     verbose(VERBOSE_CHR_LEVEL, "#\tsilent option on, no data points output, data is scanned\n");
 if (fetchNothing)
     verbose(VERBOSE_CHR_LEVEL, "#\tfetchNothing option on, data will be scanned, no results prepared\n");
 if (timing)
     verbose(VERBOSE_CHR_LEVEL, "#\ttiming option on\n");
 if (skipDataRead)
     verbose(VERBOSE_CHR_LEVEL, "#\tskipDataRead option on, do not read .wib data\n");
 if (rawDataOut)
     verbose(VERBOSE_CHR_LEVEL, "#\trawDataOut option on, only data values are output\n");
 if (htmlOut)
     verbose(VERBOSE_CHR_LEVEL, "#\thtmlOut option on, output stats or histogram in HTML format\n");
 if (span)
     {
     wds->setSpanConstraint(wds, span);
     verbose(VERBOSE_CHR_LEVEL, "#\tspan constraint: %u\n", wds->spanLimit);
     }
 
 if (bedFile)
     verbose(VERBOSE_CHR_LEVEL, "#\twill constrain to ranges specified in bed file: %s\n",
 	bedFile);
 
 if (dataConstraint)
     {
     if (sameString(dataConstraint, "in range"))
 	{
 	if (!(optionExists("ll") && optionExists("ul")))
 	    {
 	    warn("ERROR: dataConstraint 'in range' specified without both -ll=<F> and -ul=<F>");
 	    usage(FALSE);
 	    }
 	}
     else if (!optionExists("ll"))
 	{
 	warn("ERROR: dataConstraint specified without -ll=<F>");
 	usage(FALSE);
 	}
 
     wds->setDataConstraint(wds, dataConstraint, lowerLimit, upperLimit);
 
     if (sameString(dataConstraint, "in range"))
 	verbose(VERBOSE_CHR_LEVEL, "#\tdataConstraint: %s [%f : %f]\n", wds->dataConstraint,
 		wds->limit_0, wds->limit_1);
     else
 	verbose(VERBOSE_CHR_LEVEL, "#\tdataConstraint: data values %s %f\n",
 		wds->dataConstraint, wds->limit_0);
     }
 else if (optionExists("ll") || optionExists("ul"))
     {
     warn("ERROR: ll or ul options specified without -dataConstraint");
     usage(FALSE);
     }
 
 if (argc < 2)
     usage(FALSE);
 
 hgWiggle(wds, argc-1, argv+1);
 
 wiggleDataStreamFree(&wds);
 if (verboseLevel() > 1)
     printVmPeak();
 return 0;
 }