78e58e35abfe32f6d3a2708e07666af59454d159 hiram Fri Jan 24 08:45:45 2014 -0800 gather in the diaspora of haplotype check routines and add hg38 haplotype recognition refs #11681 diff --git src/hg/featureBits/featureBits.c src/hg/featureBits/featureBits.c index 4493075..5edf8cf 100644 --- src/hg/featureBits/featureBits.c +++ src/hg/featureBits/featureBits.c @@ -38,31 +38,31 @@ {"binSize", OPTION_INT}, {"binOverlap", OPTION_INT}, {"bedRegionIn", OPTION_STRING}, {"bedRegionOut", OPTION_STRING}, {NULL, 0} }; int minSize = 1; /* Minimum size of feature. */ char *clChrom = "all"; /* Which chromosome. */ boolean orLogic = FALSE; /* Do ors instead of ands? */ boolean notResults = FALSE; /* negate results? */ char *where = NULL; /* Extra selection info. */ char *chromSizes = NULL; /* read chrom sizes from file instead of database . */ boolean countGaps = FALSE; /* Count gaps in denominator? */ boolean noRandom = FALSE; /* Exclude _random chromosomes? */ -boolean noHap = FALSE; /* Exclude _hap chromosomes? */ +boolean noHap = FALSE; /* Exclude _hap|_alt chromosomes? */ int dots = 0; /* print dots every N chroms (scaffolds) processed */ boolean calcEnrichment = FALSE; /* Calculate coverage/enrichment? */ int binSize = 500000; /* Default bin size. */ int binOverlap = 250000; /* Default bin size. */ /* to process chroms without constantly looking up in chromInfo, create * this list of them from the chromInfo once. */ static struct chromInfo *chromInfoList = NULL; static struct hash *gapHash = NULL; void usage() /* Explain usage and exit. */ { errAbort( @@ -71,31 +71,31 @@ " featureBits database table(s)\n" "This will return the number of bits in all the tables anded together\n" "Pipe warning: output goes to stderr.\n" "Options:\n" " -bed=output.bed Put intersection into bed format. Can use stdout.\n" " -fa=output.fa Put sequence in intersection into .fa file\n" " -faMerge For fa output merge overlapping features.\n" " -minSize=N Minimum size to output (default 1)\n" " -chrom=chrN Restrict to one chromosome\n" " -chromSize=sizefile Read chrom sizes from file instead of database. \n" " (chromInfo three column format)\n" " -or Or tables together instead of anding them\n" " -not Output negation of resulting bit set.\n" " -countGaps Count gaps in denominator\n" " -noRandom Don't include _random (or Un) chromosomes\n" - " -noHap Don't include _hap chromosomes\n" + " -noHap Don't include _hap|_alt chromosomes\n" " -dots=N Output dot every N chroms (scaffolds) processed\n" " -minFeatureSize=n Don't include bits of the track that are smaller than\n" " minFeatureSize, useful for differentiating between\n" " alignment gaps and introns.\n" " -bin=output.bin Put bin counts in output file\n" " -binSize=N Bin size for generating counts in bin file (default 500000)\n" " -binOverlap=N Bin overlap for generating counts in bin file (default 250000)\n" " -bedRegionIn=input.bed Read in a bed file for bin counts in specific regions \n" " and write to bedRegionsOut\n" " -bedRegionOut=output.bed Write a bed file of bin counts in specific regions \n" " from bedRegionIn\n" " -enrichment Calculates coverage and enrichment assuming first table\n" " is reference gene track and second track something else\n" " Enrichment is the amount of table1 that covers table2 vs. the\n" " amount of table1 that covers the genome. It's how much denser\n" @@ -196,32 +196,31 @@ else if (hasSuffixCompress(cleaned, suffix, "Z")) return TRUE; else if (hasSuffixCompress(cleaned, suffix, "bz2")) return TRUE; else return FALSE; } bool inclChrom(char *name) /* check if a chromosome should be included */ { return !((noRandom && (endsWith(name, "_random") || startsWith("chrUn", name) || sameWord("chrNA", name) /* danRer */ || sameWord("chrU", name))) /* dm */ - || (noHap && stringIn( "_hap", name)) - || (noHap && stringIn( "_alt", name))); + || (noHap && haplotype(name))); } void bitsToBins(Bits *bits, char *chrom, int chromSize, FILE *binFile, int binSize, int binOverlap) /* Write out binned counts of bits. */ { int bin, count; if (!binFile) return; for (bin=0; bin+binSize