2306b0cbe3778fe985fa284367228b35fd9dae78
markd
  Tue Feb 23 11:27:38 2021 -0800
added featureBits option to only consider primary chromosomes using UCSC naming convention

diff --git src/hg/featureBits/featureBits.c src/hg/featureBits/featureBits.c
index c05d8e7..ca3a900 100644
--- src/hg/featureBits/featureBits.c
+++ src/hg/featureBits/featureBits.c
@@ -20,52 +20,54 @@
 
 
 static struct optionSpec optionSpecs[] =
 /* command line option specifications */
 {
     {"bed", OPTION_STRING},
     {"fa", OPTION_STRING},
     {"faMerge", OPTION_BOOLEAN},
     {"minSize", OPTION_INT},
     {"chrom", OPTION_STRING},
     {"or", OPTION_BOOLEAN},
     {"not", OPTION_BOOLEAN},
     {"countGaps", OPTION_BOOLEAN},
     {"noRandom", OPTION_BOOLEAN},
     {"noHap", OPTION_BOOLEAN},
+    {"primaryChroms", OPTION_BOOLEAN},
     {"dots", OPTION_INT},
     {"minFeatureSize", OPTION_INT},
     {"enrichment", OPTION_BOOLEAN},
     {"where", OPTION_STRING},
     {"bin", OPTION_STRING},
     {"chromSize", OPTION_STRING},
     {"binSize", OPTION_INT},
     {"binOverlap", OPTION_INT},
     {"bedRegionIn", OPTION_STRING},
     {"bedRegionOut", OPTION_STRING},
     {NULL, 0}
 };
 
 int minSize = 1;	/* Minimum size of feature. */
 char *clChrom = "all";	/* Which chromosome. */
 boolean orLogic = FALSE;  /* Do ors instead of ands? */
 boolean notResults = FALSE;   /* negate results? */
 char *where = NULL;		/* Extra selection info. */
 char *chromSizes = NULL;		/* read chrom sizes from file instead of database . */
 boolean countGaps = FALSE;	/* Count gaps in denominator? */
 boolean noRandom = FALSE;	/* Exclude _random chromosomes? */
 boolean noHap = FALSE;	/* Exclude _hap|_alt chromosomes? */
+boolean primaryChroms = FALSE; /* only include primary chroms */
 int dots = 0;	/* print dots every N chroms (scaffolds) processed */
 boolean calcEnrichment = FALSE;	/* Calculate coverage/enrichment? */
 int binSize = 500000;	/* Default bin size. */
 int binOverlap = 250000;	/* Default bin size. */
 
 /* to process chroms without constantly looking up in chromInfo, create
  * this list of them from the chromInfo once.
  */
 static struct chromInfo *chromInfoList = NULL;
 static struct hash *gapHash = NULL;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
@@ -75,30 +77,31 @@
   "This will return the number of bits in all the tables anded together\n"
   "Pipe warning:  output goes to stderr.\n"
   "Options:\n"
   "   -bed=output.bed   Put intersection into bed format. Can use stdout.\n"
   "   -fa=output.fa     Put sequence in intersection into .fa file\n"
   "   -faMerge          For fa output merge overlapping features.\n"
   "   -minSize=N        Minimum size to output (default 1)\n"
   "   -chrom=chrN       Restrict to one chromosome\n"
   "   -chromSize=sizefile       Read chrom sizes from file instead of database. \n"
   "                             (chromInfo three column format)\n"
   "   -or               Or tables together instead of anding them\n"
   "   -not              Output negation of resulting bit set.\n"
   "   -countGaps        Count gaps in denominator\n"
   "   -noRandom         Don't include _random (or Un) chromosomes\n"
   "   -noHap            Don't include _hap|_alt chromosomes\n"
+  "   -primaryChroms    Primary assembly (chroms without '_' in name)\n"
   "   -dots=N           Output dot every N chroms (scaffolds) processed\n"
   "   -minFeatureSize=n Don't include bits of the track that are smaller than\n"
   "                     minFeatureSize, useful for differentiating between\n"
   "                     alignment gaps and introns.\n"
   "   -bin=output.bin   Put bin counts in output file\n"
   "   -binSize=N        Bin size for generating counts in bin file (default 500000)\n"
   "   -binOverlap=N     Bin overlap for generating counts in bin file (default 250000)\n"
   "   -bedRegionIn=input.bed    Read in a bed file for bin counts in specific regions \n"
   "                     and write to bedRegionsOut\n"
   "   -bedRegionOut=output.bed  Write a bed file of bin counts in specific regions \n"
   "                     from bedRegionIn\n"
   "   -enrichment       Calculates coverage and enrichment assuming first table\n"
   "                     is reference gene track and second track something else\n"
   "                     Enrichment is the amount of table1 that covers table2 vs. the\n"
   "                     amount of table1 that covers the genome. It's how much denser\n"
@@ -204,31 +207,32 @@
 else if (hasSuffixCompress(cleaned, suffix, "Z"))
     return TRUE;
 else if (hasSuffixCompress(cleaned, suffix, "bz2"))
     return TRUE;
 else
     return FALSE;
 }
 
 bool inclChrom(char *name)
 /* check if a chromosome should be included */
 {
 return  !((noRandom && (endsWith(name, "_random")
                         || startsWith("chrUn", name)
                         || sameWord("chrNA", name) /* danRer */
                         || sameWord("chrU", name)))  /* dm */
-          || (noHap && haplotype(name)));
+          || (noHap && haplotype(name))
+          || (primaryChroms && (strchr(name, '_') != NULL)));
 }
 
 void bitsToBins(Bits *bits, char *chrom, int chromSize, FILE *binFile, int binSize, int binOverlap)
 /* Write out binned counts of bits. */
 {
 int bin, count;
 
 if (!binFile)
     return;
 for (bin=0; bin+binSize<chromSize; bin=bin+binOverlap)
     {
     count = bitCountRange(bits, bin, binSize);
     fprintf(binFile, "%s\t%d\t%d\t%d\t%s.%d\n", chrom, bin, bin+binSize, count, chrom, bin/binOverlap+1);
     }
 count = bitCountRange(bits, bin, chromSize-bin);
@@ -914,30 +918,31 @@
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc < 3)
     usage();
 clChrom = optionVal("chrom", clChrom);
 chromSizes = optionVal("chromSize", NULL);
 orLogic = optionExists("or");
 notResults = optionExists("not");
 countGaps = optionExists("countGaps");
 noRandom = optionExists("noRandom");
 noHap = optionExists("noHap");
+primaryChroms = optionExists("primaryChroms");
 dots = optionInt("dots", dots);
 where = optionVal("where", NULL);
 calcEnrichment = optionExists("enrichment");
 if (calcEnrichment && argc != 4)
     errAbort("You must specify two tables with -enrichment");
 if (calcEnrichment && orLogic)
     errAbort("You can't use -or with -enrichment");
 if (calcEnrichment && notResults)
     errAbort("You can't use -not with -enrichment");
 if (notResults && optionExists("fa") && !optionExists("faMerge"))
     errAbort("Must specify -faMerge if using -not with -fa");
 
 featureBits(argv[1], argc-2, argv+2);
 return 0;
 }