4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/featureBits/featureBits.c src/hg/featureBits/featureBits.c
index a6a260a..363ec5c 100644
--- src/hg/featureBits/featureBits.c
+++ src/hg/featureBits/featureBits.c
@@ -1,1034 +1,1034 @@
 /* featureBits - Correlate tables via bitmap projections and booleans. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "fa.h"
 #include "bits.h"
 #include "bed.h"
 #include "psl.h"
 #include "portable.h"
 #include "featureBits.h"
 #include "agpGap.h"
 #include "chain.h"
 #include "chromInfo.h"
 
 
 static struct optionSpec optionSpecs[] =
 /* command line option specifications */
 {
     {"bed", OPTION_STRING},
     {"fa", OPTION_STRING},
     {"faMerge", OPTION_BOOLEAN},
     {"minSize", OPTION_INT},
     {"chrom", OPTION_STRING},
     {"or", OPTION_BOOLEAN},
     {"not", OPTION_BOOLEAN},
     {"countGaps", OPTION_BOOLEAN},
     {"noRandom", OPTION_BOOLEAN},
     {"noHap", OPTION_BOOLEAN},
     {"primaryChroms", OPTION_BOOLEAN},
     {"dots", OPTION_INT},
     {"minFeatureSize", OPTION_INT},
     {"enrichment", OPTION_BOOLEAN},
     {"where", OPTION_STRING},
     {"bin", OPTION_STRING},
     {"chromSize", OPTION_STRING},
     {"binSize", OPTION_INT},
     {"binOverlap", OPTION_INT},
     {"bedRegionIn", OPTION_STRING},
     {"bedRegionOut", OPTION_STRING},
     {"countBlocks", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 int minSize = 1;	/* Minimum size of feature. */
 char *clChrom = "all";	/* Which chromosome. */
 boolean orLogic = FALSE;  /* Do ors instead of ands? */
 boolean notResults = FALSE;   /* negate results? */
 char *where = NULL;		/* Extra selection info. */
 char *chromSizes = NULL;		/* read chrom sizes from file instead of database . */
 boolean countGaps = FALSE;	/* Count gaps in denominator? */
 boolean noRandom = FALSE;	/* Exclude _random chromosomes? */
 boolean noHap = FALSE;	/* Exclude _hap|_alt chromosomes? */
 boolean primaryChroms = FALSE; /* only include primary chroms */
 int dots = 0;	/* print dots every N chroms (scaffolds) processed */
 boolean calcEnrichment = FALSE;	/* Calculate coverage/enrichment? */
 int binSize = 500000;	/* Default bin size. */
 int binOverlap = 250000;	/* Default bin size. */
 boolean countBlocks = FALSE;	/* Count blocks in bed12 rather than extent. */
 
 /* to process chroms without constantly looking up in chromInfo, create
  * this list of them from the chromInfo once.
  */
 static struct chromInfo *chromInfoList = NULL;
 static struct hash *gapHash = NULL;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "featureBits - Correlate tables via bitmap projections. \n"
   "usage:\n"
   "   featureBits database table(s)\n"
   "This will return the number of bits in all the tables bitwise ANDed together\n"
   "Pipe warning:  output goes to stderr.\n"
   "Options:\n"
   "   -bed=output.bed   Put intersection into bed format. Can use stdout.\n"
   "   -fa=output.fa     Put sequence in intersection into .fa file\n"
   "   -faMerge          For fa output merge overlapping features.\n"
   "   -minSize=N        Minimum size to output (default 1)\n"
   "   -chrom=chrN       Restrict to one chromosome\n"
   "   -chromSize=sizefile       Read chrom sizes from file instead of database. \n"
   "                             (chromInfo three column format)\n"
   "   -or               Bitwise OR tables together instead of ANDing them.\n"
   "   -not              Output negation of resulting bit set.\n"
   "   -countGaps        Count gaps in denominator\n"
   "   -countBlocks      Count blocks in bed12 files rather than entire extent.\n"
   "   -noRandom         Don't include _random (or Un) chromosomes\n"
   "   -noHap            Don't include _hap|_alt chromosomes\n"
   "   -primaryChroms    Primary assembly (chroms without '_' in name)\n"
   "   -dots=N           Output dot every N chroms (scaffolds) processed\n"
   "   -minFeatureSize=n Don't include bits of the track that are smaller than\n"
   "                     minFeatureSize, useful for differentiating between\n"
   "                     alignment gaps and introns.\n"
   "   -bin=output.bin   Put bin counts in output file\n"
   "   -binSize=N        Bin size for generating counts in bin file (default 500000)\n"
   "   -binOverlap=N     Bin overlap for generating counts in bin file (default 250000)\n"
   "   -bedRegionIn=input.bed    Read in a bed file for bin counts in specific regions \n"
   "                     and write to bedRegionsOut\n"
   "   -bedRegionOut=output.bed  Write a bed file of bin counts in specific regions \n"
   "                     from bedRegionIn\n"
   "   -enrichment       Calculates coverage and enrichment assuming first table\n"
   "                     is reference gene track and second track something else\n"
   "                     Enrichment is the amount of table1 that covers table2 vs. the\n"
   "                     amount of table1 that covers the genome. It's how much denser\n"
   "                     table1 is in table2 than it is genome-wide.\n"
   "   '-where=some sql pattern'  Restrict to features matching some sql pattern\n"
   "You can include a '!' before a table name to negate it.\n"
   "   To prevent your shell from interpreting the '!' you will need\n"
   "   to use the backslash \\!, for example the gap table: \\!gap\n"
   "Some table names can be followed by modifiers such as:\n"
   "    :exon:N          Break into exons and add N to each end of each exon\n"
   "    :cds             Break into coding exons\n"
   "    :intron:N        Break into introns, remove N from each end\n"
   "    :utr5, :utr3     Break into 5' or 3' UTRs\n" 
   "    :upstream:N      Consider the region of N bases before region\n"
   "    :end:N           Consider the region of N bases after region\n"
   "    :score:N         Consider records with score >= N \n"
   "    :upstreamAll:N   Like upstream, but doesn't filter out genes that \n"
   "                     have txStart==cdsStart or txEnd==cdsEnd\n"
   "    :endAll:N        Like end, but doesn't filter out genes that \n"
   "                     have txStart==cdsStart or txEnd==cdsEnd\n"
   "The tables can be bed, psl, or chain files, or a directory full of\n"
   "such files as well as actual database tables.  To count the bits\n"
   "used in dir/chrN_something*.bed you'd do:\n"
   "   featureBits database dir/_something.bed\n"
   "File types supported are BED, bigBed, PSL, and chain.  The suffix of the file \n"
   "is used to determine the type and MUST be .bed, .bb, .psl, or .chain respectively.\n"
   "NB: by default, featureBits omits gap regions from its calculation of the total\n"
   "number of bases.  This requires connecting to a database server using credentials\n"
   "from a .hg.conf file (or similar).  If such a connection is not available, you will\n"
   "need to specify -countGaps (which skips the database connection) in addition to\n"
   "providing all tables as files or directories.\n"
   );
 }
 
 static struct chromInfo *fbCreateChromInfoList(char *name, char *database)
 /* Load up all chromosome infos. */
 {
 struct sqlConnection *conn = sqlConnect(database);
 struct sqlResult *sr = NULL;
 char **row;
 int loaded=0;
 struct chromInfo *ret = NULL;
 unsigned totalSize = 0;
 
 if (sameWord(name, "all"))
     sr = sqlGetResult(conn, NOSQLINJ "select * from chromInfo");
 else
     {
     char select[256];
     sqlSafef(select, ArraySize(select), "select * from chromInfo where chrom='%s'",
 	name);
     sr = sqlGetResult(conn, select);
     }
 
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct chromInfo *el;
     struct chromInfo *ci;
     AllocVar(ci);
 
     el = chromInfoLoad(row);
     ci->chrom = cloneString(el->chrom);
     ci->size = el->size;
     totalSize += el->size;
     slAddHead(&ret, ci);
     ++loaded;
     }
 if (loaded < 1)
     errAbort("ERROR: can not find chrom name: '%s'\n", name);
 slReverse(&ret);
 if (sameWord(name, "all"))
     verbose(2, "#\tloaded size info for %d chroms, total size: %u\n",
 	loaded, totalSize);
 sqlFreeResult(&sr);
 sqlDisconnect(&conn);
 return ret;
 }
 
 static boolean hasSuffixCompress(char *file, char *suffix, char *compSuffix)
 /* determine if file ends with .suffix.compSuffix */
 {
 char sbuf[64];
 safef(sbuf, sizeof(sbuf), "%s.%s", suffix, compSuffix);
 return endsWith(file, sbuf);
 }
 
 boolean isFileType(char *file, char *suffix)
 /* determine if file ends with .suffix, or .suffix.{gz,Z,bz2} */
 {
 char cleaned[PATH_LEN], *p;
 
 /* remove any : qualifiers */
 strcpy(cleaned, file);
 p = strrchr (cleaned, '/');
 if (p == NULL)
     p = cleaned;
 p = strchr(p, ':');
 if (p != NULL)
     *p = '\0';
 
 if (endsWith(cleaned, suffix))
     return TRUE;
 else if (hasSuffixCompress(cleaned, suffix, "gz"))
     return TRUE;
 else if (hasSuffixCompress(cleaned, suffix, "Z"))
     return TRUE;
 else if (hasSuffixCompress(cleaned, suffix, "bz2"))
     return TRUE;
 else
     return FALSE;
 }
 
 bool inclChrom(char *name)
 /* check if a chromosome should be included */
 {
 return  !((noRandom && (endsWith(name, "_random")
                         || startsWith("chrUn", name)
                         || sameWord("chrNA", name) /* danRer */
                         || sameWord("chrU", name)))  /* dm */
           || (noHap && haplotype(name))
           || (primaryChroms && (strchr(name, '_') != NULL)));
 }
 
 void bitsToBins(Bits *bits, char *chrom, int chromSize, FILE *binFile, int binSize, int binOverlap)
 /* Write out binned counts of bits. */
 {
 int bin, count;
 
 if (!binFile)
     return;
 for (bin=0; bin+binSize<chromSize; bin=bin+binOverlap)
     {
     count = bitCountRange(bits, bin, binSize);
     fprintf(binFile, "%s\t%d\t%d\t%d\t%s.%d\n", chrom, bin, bin+binSize, count, chrom, bin/binOverlap+1);
     }
 count = bitCountRange(bits, bin, chromSize-bin);
 fprintf(binFile, "%s\t%d\t%d\t%d\t%s.%d\n", chrom, bin, chromSize, count, chrom, bin/binOverlap+1);
 }
 
 void bitsToRegions(Bits *bits, char *chrom, int chromSize, struct bed *bedList, 
 		   FILE *bedOutFile)
 /* Write out counts of bits in regions defined by bed elements. */
 {
 struct bed *bl=NULL;
 int count, i=0;
 
 if (!bedOutFile)
     return;
 for (bl=bedList; bl!=NULL; bl=bl->next)
     {
     if(differentString(bl->chrom,chrom))
 	continue;
     count = bitCountRange(bits, bl->chromStart, bl->chromEnd-bl->chromStart);
     fprintf(bedOutFile, "%s\t%d\t%d\t%d\t%s.%d\n", chrom, bl->chromStart, bl->chromEnd, count, chrom, ++i);
     }
 }
 
 void check(struct sqlConnection *conn, char *table)
 /* Check it's as planned. */
 {
 char query[256], **row;
 struct sqlResult *sr;
 int lastEnd = -1, lastStart = -1, start, end;
 sqlSafef(query, sizeof query, "select chromStart,chromEnd from %s", table);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     start = atoi(row[0]);
     end = atoi(row[1]);
     if (start < lastStart)
         fprintf(stderr,"Out of order: %d,%d\n", lastStart, start);
     if (rangeIntersection(lastStart, lastEnd, start-1, end) > 0)
         fprintf(stderr,"Overlapping: (%d %d) (%d %d)\n", lastStart, lastEnd, start, end);
     lastStart = start;
     lastEnd = end;
     }
 sqlFreeResult(&sr);
 errAbort("All for now");
 }
 
 char *chromFileName(char *track, char *chrom, char fileName[512])
 /* Return chromosome specific version of file if it exists. */
 {
 if (fileExists(track))
     {
     strncpy(fileName, track, 512);
     }
 else
     {
     char dir[256], root[128], ext[65];
     int len;
     splitPath(track, dir, root, ext);
     /* Chop trailing / off of dir. */
     len = strlen(dir);
     if (len > 0 && dir[len-1] == '/')
         dir[len-1] = 0;
     safef(fileName, 512, "%s/%s%s%s", dir, chrom, root, ext);
     if (!fileExists(fileName))
 	{
         warn("Couldn't find %s or %s", track, fileName);
 	strncpy(fileName, track, 512);
 	}
     }
 return fileName;
 }
 
 void outOfRange(struct lineFile *lf, char *chrom, int chromSize)
 /* Complain that coordinate is out of range. */
 {
 errAbort("Coordinate out of allowed range [%d,%d) for %s near line %d of %s",
 	0, chromSize, chrom, lf->lineIx, lf->fileName);
 }
 
 void setPslBits(struct lineFile *lf, 
 	Bits *bits, struct psl *psl, int winStart, int winEnd)
 /* Set bits that are in psl. */
 {
 int i, s, e, w, blockCount = psl->blockCount;
 boolean isRev = (psl->strand[1] == '-');
 for (i=0; i<blockCount; ++i)
     {
     s = psl->tStarts[i];
     e = s + psl->blockSizes[i];
     if (isRev)
        {
        /* Use w as a temp variable to reverse coordinates s&e. */
        w = psl->tSize - e;
        e = psl->tSize - s;
        s = w;
        }
     /* Clip, and if anything left set it. */
     if (s < winStart) outOfRange(lf, psl->tName, psl->tSize);
     if (e > winEnd) outOfRange(lf, psl->tName, psl->tSize);
     w = e - s;
     if (w > 0)
 	bitSetRange(bits, s, w);
     }
 }
 
 void fbOrPsl(Bits *acc, char *track, char *chrom, int chromSize)
 /* Or in bits of psl file that correspond to chrom. */
 {
 struct lineFile *lf;
 char fileName[512];
 struct psl *psl;
 
 chromFileName(track, chrom, fileName);
 if (!fileExists(fileName))
     return;
 lf = pslFileOpen(fileName);
 while ((psl = pslNext(lf)) != NULL)
     {
     if (sameString(psl->tName, chrom))
 	setPslBits(lf, acc, psl, 0, chromSize);
     pslFree(&psl);
     }
 lineFileClose(&lf);
 }
 
 static int howManyFields(char *fileName)
 /* Figure out how many fields the first bed has in this file. */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *line, *row[256];
 unsigned numFields = 0;
 if (lineFileNextReal(lf, &line))
     numFields = chopByWhite(line, row, ArraySize(row));
 
 lineFileClose(&lf);
 return numFields;
 }
 
 static struct bed *cacheBeds(char *fileName)
 /* Read in the beds from this file and keep them in a cache. */
 {
 static char *cachedFile;
 static struct bed *cachedBeds;
 
 if ((cachedFile == NULL) || differentString(cachedFile, fileName))
     {
     if (cachedBeds)
         {
         bedFreeList(&cachedBeds);
         freez(&cachedFile);
         }
 
     cachedFile = cloneString(fileName);
     unsigned numFields = howManyFields(cachedFile);
     cachedBeds = bedLoadNAllChrom(fileName, numFields > 12 ? 12 : numFields, NULL);
     }
 
 return cachedBeds;
 }
 
 void fbOrBed(Bits *acc, char *track, char *chrom, int chromSize)
 /* Or in bits of bed file that correspond to chrom. */
 {
 char fileName[512];
 
 chromFileName(track, chrom, fileName);
 if (!fileExists(fileName))
     return;
 struct bed *bed, *beds = cacheBeds(fileName);
 
 for(bed = beds; bed; bed = bed->next)
     {
     if (sameString(bed->chrom, chrom))
         {
         if (countBlocks && (bed->blockCount > 0))
             {
             int ii;
             for(ii=0; ii < bed->blockCount; ii++)
                 bitSetRange(acc, bed->chromStart + bed->chromStarts[ii], bed->blockSizes[ii]);
             }
         else
             {
             int w;
             w = bed->chromEnd - bed->chromStart;
             if (w > 0)
                 bitSetRange(acc, bed->chromStart, w);
             }
 	}
     }
 }
 
 void fbOrBigBed(Bits *acc, char *track, char *chrom, int chromSize)
 /* Or in a bigBed file. */
 {
 char fileName[512];
 chromFileName(track, chrom, fileName);
 if (!fileExists(fileName))
     return;
 
 struct lm *lm = lmInit(0);
 struct bbiFile *bbi = bigBedFileOpen(fileName);
 unsigned fieldCount = bbi->definedFieldCount;
 char *bedRow[fieldCount];
 struct bigBedInterval *interval, *intervalList = bigBedIntervalQuery(bbi, chrom, 0, chromSize, 0, lm);
 
 for (interval = intervalList; interval != NULL; interval = interval->next)
     {
     if (countBlocks && (fieldCount >= 12))
         {
         char startBuf[16], endBuf[16];
         bigBedIntervalToRow(interval, chrom, startBuf, endBuf, bedRow, ArraySize(bedRow));
         struct bed *bed = bedLoadN(bedRow, fieldCount);
         int ii;
         for(ii=0; ii < bed->blockCount; ii++)
             bitSetRange(acc, bed->chromStart + bed->chromStarts[ii], bed->blockSizes[ii]);
         bedFree(&bed);
         }
     else
         {
         int w = interval->end - interval->start;
         if (w > 0)
             bitSetRange(acc, interval->start, w);
         }
     }
     
 lmCleanup(&lm);
 bbiFileClose(&bbi);
 }
 
 void fbOrChain(Bits *acc, char *track, char *chrom, int chromSize)
 /* Or in a chain file. */
 {
 struct lineFile *lf;
 char fileName[512];
 struct chain *chain;
 struct cBlock *b;
 
 chromFileName(track, chrom, fileName);
 if (!fileExists(fileName))
     return;
 lf = lineFileOpen(fileName, TRUE);
 while ((chain = chainRead(lf)) != NULL)
     {
     if (sameString(chrom, chain->tName))
         {
         for (b = chain->blockList; b != NULL; b = b->next)
             {
             int s = b->tStart, e = b->tEnd;
             if (s < 0) outOfRange(lf, chrom, chromSize);
             if (e > chromSize) outOfRange(lf, chrom, chromSize);
             bitSetRange(acc, b->tStart, b->tEnd - b->tStart);
             }
         }
     chainFree(&chain);
     }
 lineFileClose(&lf);
 }
 
 
 void isolateTrackPartOfSpec(char *spec, char track[512])
 /* Convert something like track:exon to just track */
 {
 char *s;
 strncpy(track, spec, 512);
 s = strchr(track, ':');
 if (s != NULL) *s = 0;
 }
 
 void orFile(Bits *acc, char *track, char *chrom, int chromSize)
 /* Or some sort of file into bits. */
 {
 if (isFileType(track, "psl"))
     {
     fbOrPsl(acc, track, chrom, chromSize);
     }
 else if (isFileType(track, "bed"))
     {
     fbOrBed(acc, track, chrom, chromSize);
     }
 else if (isFileType(track, "chain"))
     {
     fbOrChain(acc, track, chrom, chromSize);
     }
 else if (isFileType(track, "bb"))
     {
     fbOrBigBed(acc, track, chrom, chromSize);
     }
 else  
     errAbort("can't determine file type of: %s", track);
 }
 
 void orTable(char *database, Bits *acc, char *track, char *chrom, 
 	int chromSize, struct sqlConnection *conn)
 /* Or in table if it exists.  Else do nothing. */
 {
 char t[512], *s;
 char table[HDB_MAX_TABLE_STRING];
 
 isolateTrackPartOfSpec(track, t);
 s = strrchr(t, '.');
 if (s != NULL)
     {
     orFile(acc, track, chrom, chromSize);
     }
 else
     {
     boolean hasBin;
     int minFeatureSize = optionInt("minFeatureSize", 0);
     boolean isFound = hFindSplitTable(database, chrom, t, table, sizeof table, &hasBin);
     verbose(3,"orTable: db: %s isFound: %s %s %s %s\n", database,
 	isFound ? "TRUE" : "FALSE", chrom, t, table );
     if (isFound)
 	fbOrTableBitsQueryMinSize(database, acc, track, chrom, chromSize, conn, where,
 		   TRUE, TRUE, minFeatureSize);
     }
 }
 
 
 void chromFeatureBits(struct sqlConnection *conn,char *database, 
 	char *chrom, int tableCount, char *tables[],
 	FILE *bedFile, FILE *faFile, FILE *binFile,
         struct bed *bedRegionList, FILE *bedOutFile,
 	int chromSize, int *retChromBits,
 	int *retFirstTableBits, int *retSecondTableBits)
 /* featureBits - Correlate tables via bitmap projections and booleans
  * on one chromosome. */
 {
 int i;
 Bits *acc = NULL;
 Bits *bits = NULL;
 char *table;
 
 acc = bitAlloc(chromSize);
 bits = bitAlloc(chromSize);
 for (i=0; i<tableCount; ++i)
     {
     boolean not = FALSE;
     table = tables[i];
     if (table[0] == '!')
         {
 	not = TRUE;
 	++table;
 	}
     if (i == 0)
         {
 	orTable(database, acc, table, chrom, chromSize, conn);
 	if (not)
 	   bitNot(acc, chromSize);
 	if (retFirstTableBits != NULL)
 	   *retFirstTableBits = bitCountRange(acc, 0, chromSize);
 	}
     else
 	{
 	bitClear(bits, chromSize);
 	orTable(database, bits, table, chrom, chromSize, conn);
 	if (not)
 	   bitNot(bits, chromSize);
 	if (i == 1 && retSecondTableBits != NULL)
 	   *retSecondTableBits = bitCountRange(bits, 0, chromSize);
 	/* feature/bug - the above does not respect minSize */
 	if (orLogic)
 	    bitOr(acc, bits, chromSize);
 	else
 	    bitAnd(acc, bits, chromSize);
 	}
     }
 if (notResults)
     bitNot(acc, chromSize);    
 *retChromBits = bitCountRange(acc, 0, chromSize);
 if (bedFile != NULL || faFile != NULL)
     {
     minSize = optionInt("minSize", minSize);
     bitsToBed(database, acc, chrom, chromSize, bedFile, faFile, minSize);
     }
 if (binFile != NULL)
     {
     binSize = optionInt("binSize", binSize);
     binOverlap = optionInt("binOverlap", binOverlap);
     bitsToBins(acc, chrom, chromSize, binFile, binSize, binOverlap);
     }
 if (bedOutFile != NULL)
     bitsToRegions(acc, chrom, chromSize, bedRegionList, bedOutFile);
 bitFree(&acc);
 bitFree(&bits);
 }
 
 void chromFeatureSeq(struct sqlConnection *conn, 
 	char *database, char *chrom, char *trackSpec,
 	FILE *bedFile, FILE *faFile,
 	int *retItemCount, int *retBaseCount)
 /* Write out sequence file for features from one chromosome.
  * This separate routine handles the non-merged case.  It's
  * reason for being is so that the feature names get preserved. */
 {
 boolean hasBin;
 char t[512], *s = NULL;
 char table[HDB_MAX_TABLE_STRING];
 struct featureBits *fbList = NULL, *fb;
 
 if (trackSpec[0] == '!')
    errAbort("Sorry, '!' not available with fa output unless you use faMerge");
 isolateTrackPartOfSpec(trackSpec, t);
 s = strchr(t, '.');
 if (s != NULL)
     errAbort("Sorry, only database (not file) tracks allowed with "
              "fa output unless you use faMerge");
 if (!hFindSplitTable(database, chrom, t, table, sizeof table, &hasBin))
     errAbort("track %s not found", t);
 fbList = fbGetRangeQuery(database, trackSpec, chrom, 0, hChromSize(database, chrom),
 			 where, TRUE, TRUE);
 for (fb = fbList; fb != NULL; fb = fb->next)
     {
     int s = fb->start, e = fb->end;
     if (bedFile != NULL)
 	{
 	fprintf(bedFile, "%s\t%d\t%d\t%s", 
 	    fb->chrom, fb->start, fb->end, fb->name);
 	if (fb->strand != '?')
 	    fprintf(bedFile, "\t0\t%c", fb->strand);
 	fprintf(bedFile, "\n");
 	}
     if (faFile != NULL)
         {
 	struct dnaSeq *seq = hDnaFromSeq(database, chrom, s, e, dnaLower);
 	if (fb->strand == '-')
 	    reverseComplement(seq->dna, seq->size);
 	faWriteNext(faFile, fb->name, seq->dna, seq->size);
 	freeDnaSeq(&seq);
 	}
     }
 featureBitsFreeList(&fbList);
 }
 
 static struct hash *loadAllGaps(struct sqlConnection *conn, char *db)
 /*	working on all chroms, fetch all per-chrom gap counts at once
  *	returns hash by chrom name to gap counts for that chrom
  */
 { 
 struct chromInfo *cInfo;
 struct sqlResult *sr;
 char **row;
 struct hash *ret;
 int totalGapSize = 0;
 int gapCount = 0;
 
 ret = newHash(0);
 
 /*	If not split, read in whole gulp, create per-chrom hash of sizes */
 if (hTableExists(db, "gap"))
     {
     char *prevChrom = NULL;
     int totalGapsThisChrom = 0;
     
     sr = sqlGetResult(conn,
 	NOSQLINJ "select chrom,chromStart,chromEnd from gap order by chrom");
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	int gapSize = sqlUnsigned(row[2]) - sqlUnsigned(row[1]);
 	++gapCount;
 	if (prevChrom && sameWord(prevChrom,row[0]))
 	    {
 	    totalGapsThisChrom += gapSize;
 	    totalGapSize += gapSize;
 	    }
 	else
 	    {
 	    if (prevChrom)
 		{
 		hashAddInt(ret, prevChrom, totalGapsThisChrom);
 		freeMem(prevChrom);
 		prevChrom = cloneString(row[0]);
 		totalGapsThisChrom = gapSize;
 		totalGapSize += gapSize;
 		}
 	    else
 		{
 		prevChrom = cloneString(row[0]);
 		totalGapsThisChrom = gapSize;
 		totalGapSize += gapSize;
 		}
 	    }
 	}
 	/*	and the last one	*/
 	if (prevChrom && (totalGapsThisChrom > 0))
 	    {
 	    hashAddInt(ret, prevChrom, totalGapsThisChrom);
 	    freeMem(prevChrom);
 	    }
     sqlFreeResult(&sr);
     }
 else
     {
     /*	for each chrom name, fetch the gap count	*/
     for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next)
 	{
 	int rowOffset;
 	int totalGapsThisChrom = 0;
 	sr = hChromQuery(conn, "gap", cInfo->chrom, NULL, &rowOffset);
 	while ((row = sqlNextRow(sr)) != NULL)
 	    {
 	    int gapSize;
 	    struct agpGap gap;
 	    ++gapCount;
 	    agpGapStaticLoad(row+rowOffset, &gap);
 	    gapSize = gap.chromEnd - gap.chromStart;
 	    totalGapsThisChrom += gapSize;
 	    totalGapSize += gapSize;
 	    }
 	sqlFreeResult(&sr);
 	hashAddInt(ret, cInfo->chrom, totalGapsThisChrom);
 	}
     }
 verbose(2,"#\tloaded %d gaps covering %d bases\n", gapCount, totalGapSize);
 return ret;
 }
 
 int countBases(struct sqlConnection *conn, char *chrom, int chromSize,
     char *database)
 /* Count bases, generally not including gaps, in chromosome. */
 {
 static boolean gapsLoaded = FALSE;
 struct sqlResult *sr;
 int totalGaps = 0;
 char **row;
 int rowOffset;
 
 if (countGaps)
     return chromSize;
 
 /*	If doing all chroms, then load up all the gaps and be done with
  *	it instead of re-reading the gap table for every chrom
  */
 if (sameWord(clChrom,"all"))
     {
     if (!gapsLoaded)
 	gapHash = loadAllGaps(conn, database);
     gapsLoaded = TRUE;
     totalGaps = hashIntValDefault(gapHash, chrom, 0);
     }
 else
     {
     sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset);
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	int gapSize;
 	struct agpGap gap;
 	agpGapStaticLoad(row+rowOffset, &gap);
 	gapSize = gap.chromEnd - gap.chromStart;
 	totalGaps += gapSize;
 	}
     sqlFreeResult(&sr);
     }
 return chromSize - totalGaps;
 }
 
 void checkInputExists(struct sqlConnection *conn,char *database, 
 	struct chromInfo *chromInfoList, int tableCount, char *tables[])
 /* check input tables/files exist, especially to handle split tables */
 {
 char *track=NULL;
 int i = 0, missing=0;
 char t[512], *s=NULL;
 char table[HDB_MAX_TABLE_STRING];
 char fileName[512];
 boolean found = FALSE;
 
 for (i=0; i<tableCount; ++i)
     {
     struct chromInfo *cInfo;
 
     track = tables[i];
     if (track[0] == '!')
 	{
 	++track;
 	}
     isolateTrackPartOfSpec(track, t);
     s = strrchr(t, '.');
     if (s)
 	{
 	if (fileExists(t))
 	    continue;
 	}
     else
 	{
 	if (NULL == conn) conn = hAllocConn(database);
 	if (sqlTableExists(conn, t))
 	    continue;
 	}
     found = FALSE;
     for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next)
 	{
 	if (inclChrom(cInfo->chrom))
 	    {
 	    if (s)
 		{
 		chromFileName(t, cInfo->chrom, fileName);
 		if (fileExists(fileName))
 		    {
 		    found = TRUE;
 		    break;
 		    }
 		}
 	    else
 		{
 		boolean hasBin;
 		if (hFindSplitTable(database, cInfo->chrom, t, table, sizeof table, &hasBin))
 		    {
 		    found = TRUE;
 		    break;
 		    }
 		}
 	    }
 	}
     if (!found)
 	{
 	if (s)
 	    warn("file %s not found for any chroms", t);
 	else
 	    warn("table %s not found for any chroms", t);
 	++missing;	    
 	}
     }
 if (missing>0)
     errAbort("Error: %d input table(s)/file(s) do not exist for any of the chroms specified",missing);
 }
 
 
 void featureBits(char *database, int tableCount, char *tables[])
 /* featureBits - Correlate tables via bitmap projections and booleans. */
 {
 struct sqlConnection *conn = NULL;
 char *bedName = optionVal("bed", NULL), *faName = optionVal("fa", NULL);
 char *binName = optionVal("bin", NULL);
 char *bedRegionInName = optionVal("bedRegionIn", NULL);
 char *bedRegionOutName = optionVal("bedRegionOut", NULL);
 FILE *bedFile = NULL, *faFile = NULL, *binFile = NULL;
 FILE *bedRegionOutFile = NULL;
 struct bed *bedRegionList = NULL;
 boolean faIndependent = FALSE;
 struct chromInfo *cInfo;
 
 if (bedName)
     bedFile = mustOpen(bedName, "w");
 if (binName)
     binFile = mustOpen(binName, "w");
 if ((bedRegionInName && !bedRegionOutName) || (!bedRegionInName && bedRegionOutName))
     errAbort("bedRegionIn and bedRegionOut must both be specified");
 if (faName)
     {
     boolean faMerge = optionExists("faMerge");
     faFile = mustOpen(faName, "w");
     if (tableCount > 1)
         {
 	if (!faMerge)
 	    errAbort("For fa output of multiple tables you must use the "
 	             "faMerge option");
 	}
     faIndependent = (!faMerge);
     }
 
 if (chromSizes != NULL)
     chromInfoList = chromInfoLoadAll(chromSizes);
 else
     chromInfoList = fbCreateChromInfoList(clChrom, database);
 
 if (!countGaps)
     conn = hAllocConn(database);
 checkInputExists(conn, database, chromInfoList, tableCount, tables);
 
 if (!faIndependent)
     {
     double totalBases = 0, totalBits = 0;
     int firstTableBits = 0, secondTableBits = 0;
     int *pFirstTableBits = NULL, *pSecondTableBits = NULL;
     double totalFirstBits = 0, totalSecondBits = 0;
     static int dotClock = 1;
 
     if (calcEnrichment)
         {
 	pFirstTableBits = &firstTableBits;
 	pSecondTableBits = &secondTableBits;
 	}
     if (bedRegionInName)
 	{
 	struct lineFile *lf = lineFileOpen(bedRegionInName, TRUE);
 	struct bed *bed;
 	char *row[3];
 	
 	bedRegionOutFile = mustOpen(bedRegionOutName, "w");
 	while (lineFileRow(lf, row))
 	    {
 	    if (startsWith(row[0],"#")||startsWith(row[0],"chrom"))
 		continue;
 	    bed = bedLoad3(row);
 	    slAddHead(&bedRegionList, bed);
 	    }
 	lineFileClose(&lf);
 	slReverse(&bedRegionList);
 	}
     for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next)
 	{
 	if (inclChrom(cInfo->chrom))
 	    {
 	    int chromBitSize;
 	    int chromSize = cInfo->size;
 	    verbose(3,"chromFeatureBits(%s)\n", cInfo->chrom);
 	    chromFeatureBits(conn, database, cInfo->chrom, tableCount, tables,
 		bedFile, faFile, binFile, bedRegionList, bedRegionOutFile, 
 		chromSize, &chromBitSize, pFirstTableBits, pSecondTableBits
 		);
 	    totalBases += countBases(conn, cInfo->chrom, chromSize, database);
 	    totalBits += chromBitSize;
 	    totalFirstBits += firstTableBits;
 	    totalSecondBits += secondTableBits;
 	    if (dots > 0)
 		{
 		if (--dotClock <= 0)
 		    {
 		    fputc('.', stdout);
 		    fflush(stdout);
 		    dotClock = dots;
 		    }
 		}
 	    }
 	}
 	if (dots > 0)
 	    {
 	    fputc('\n', stdout);
 	    fflush(stdout);
 	    }
     if (calcEnrichment)
         fprintf(stderr,"%s %5.3f%%, %s %5.3f%%, both %5.3f%%, cover %4.2f%%, enrich %4.2fx\n",
 		tables[0], 
 		100.0 * totalFirstBits/totalBases,
 		tables[1],
 		100.0 * totalSecondBits/totalBases,
 		100.0 * totalBits/totalBases,
 		100.0 * totalBits / totalFirstBits,
 		(totalBits/totalSecondBits) / (totalFirstBits/totalBases) );
     else
 	fprintf(stderr,"%1.0f bases of %1.0f (%4.3f%%) in intersection\n",
 	    totalBits, totalBases, 100.0*totalBits/totalBases);
     }
 else
     {
     int totalItems = 0;
     double totalBases = 0;
     int itemCount, baseCount;
     for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next)
         {
 	if (inclChrom(cInfo->chrom))
 	    {
 	    chromFeatureSeq(conn, database, cInfo->chrom, tables[0],
 		    bedFile, faFile, &itemCount, &baseCount);
 	    totalBases += countBases(conn, cInfo->chrom, baseCount, database);
 	    totalItems += itemCount;
 	    }
 	}
     }
 hFreeConn(&conn);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc < 3)
     usage();
 clChrom = optionVal("chrom", clChrom);
 chromSizes = optionVal("chromSize", NULL);
 orLogic = optionExists("or");
 notResults = optionExists("not");
 countGaps = optionExists("countGaps");
 countBlocks = optionExists("countBlocks");
 noRandom = optionExists("noRandom");
 noHap = optionExists("noHap");
 primaryChroms = optionExists("primaryChroms");
 dots = optionInt("dots", dots);
 where = optionVal("where", NULL);
 calcEnrichment = optionExists("enrichment");
 if (calcEnrichment && argc != 4)
     errAbort("You must specify two tables with -enrichment");
 if (calcEnrichment && orLogic)
     errAbort("You can't use -or with -enrichment");
 if (calcEnrichment && notResults)
     errAbort("You can't use -not with -enrichment");
 if (notResults && optionExists("fa") && !optionExists("faMerge"))
     errAbort("Must specify -faMerge if using -not with -fa");
 
 featureBits(argv[1], argc-2, argv+2);
 return 0;
 }