110f5e12a634db49ea9aa1ea23ff4965f1c2befe galt Tue Aug 14 13:48:57 2018 -0700 changing cse to soe in domains, sometimes gi. ref #21876 diff --git src/utils/bedClip/bedClip.c src/utils/bedClip/bedClip.c index 9b72f4b..c308bbe 100644 --- src/utils/bedClip/bedClip.c +++ src/utils/bedClip/bedClip.c @@ -1,115 +1,115 @@ /* bedClip - Remove lines from bed file that refer to off-chromosome places.. */ /* Copyright (C) 2011 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "bbiFile.h" #include "sqlNum.h" #include "obscure.h" void usage() /* Explain usage and exit. */ { errAbort( "bedClip - Remove lines from bed file that refer to off-chromosome locations.\n" "usage:\n" " bedClip [options] input.bed chrom.sizes output.bed\n" "chrom.sizes is a two-column file/URL: <chromosome name> <size in bases>\n" "If the assembly <db> is hosted by UCSC, chrom.sizes can be a URL like\n" - " http://hgdownload.cse.ucsc.edu/goldenPath/<db>/bigZips/<db>.chrom.sizes\n" + " http://hgdownload.soe.ucsc.edu/goldenPath/<db>/bigZips/<db>.chrom.sizes\n" "or you may use the script fetchChromSizes to download the chrom.sizes file.\n" "If not hosted by UCSC, a chrom.sizes file can be generated by running\n" "twoBitInfo on the assembly .2bit file.\n" "options:\n" " -truncate - truncate items that span ends of chrom instead of the\n" " default of dropping the items\n" " -verbose=2 - set to get list of lines clipped and why" ); } static struct optionSpec options[] = { {"truncate", OPTION_BOOLEAN}, {NULL, 0}, }; static boolean trim = FALSE; // the name truncate is already taken void bedClip(char *inFile, char *chromSizes, char *outFile) /* bedClip - Remove lines from bed file that refer to off-chromosome places.. */ { struct hash *chromSizesHash = bbiChromSizesFromFile(chromSizes); struct lineFile *lf = lineFileOpen(inFile, TRUE); FILE *f = mustOpen(outFile, "w"); char *line; while (lineFileNextReal(lf, &line)) { char *chrom = nextWord(&line); char *startString = nextWord(&line); char *endString = nextWord(&line); if (endString == NULL) errAbort("Need at least three fields line %d of %s", lf->lineIx, lf->fileName); if (startString[0] == '-') { if (trim) { verbose(2, "Truncating negative start line %d of %s: %s:%s-%s\n", lf->lineIx, lf->fileName, chrom, startString, endString); startString = "0"; } else { verbose(2, "Clipping negative line %d of %s: %s:%s-%s\n", lf->lineIx, lf->fileName, chrom, startString, endString); continue; // Clip off negatives } } if (!isdigit(startString[0])) errAbort("Expecting number got %s line %d of %s: %s:%s-%s", startString, lf->lineIx, lf->fileName, chrom, startString, endString); if (!isdigit(endString[0])) errAbort("Expecting number got %s line %d of %s: %s:%s-%s", endString, lf->lineIx, lf->fileName, chrom, startString, endString); int start = sqlUnsigned(startString); int end = sqlUnsigned(endString); if (start >= end) { verbose(2, "Clipping end <= start line %d of %s: %s:%s-%s\n", lf->lineIx, lf->fileName, chrom, startString, endString); continue; } struct hashEl *hel = hashLookup(chromSizesHash, chrom); if (hel == NULL) errAbort("Chromosome %s isn't in %s line %d of %s: %s:%s-%s\n", chrom, chromSizes, lf->lineIx, lf->fileName, chrom, startString, endString); int chromSize = ptToInt(hel->val); if (end > chromSize) { if (trim) { end = chromSize; verbose(2, "Truncating end > chromSize(%d) line %d of %s: %s:%s-%s\n", chromSize, lf->lineIx, lf->fileName, chrom, startString, endString); } else { verbose(2, "Clipping end > chromSize(%d) line %d of %s: %s:%s-%s\n", chromSize, lf->lineIx, lf->fileName, chrom, startString, endString); continue; } } fprintf(f, "%s\t%d\t%d", chrom, start, end); line = skipLeadingSpaces(line); if (line == NULL || line[0] == 0) fputc('\n', f); else fprintf(f, "\t%s\n", line); } carefulClose(&f); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 4) usage(); trim = optionExists("truncate"); bedClip(argv[1], argv[2], argv[3]); return 0; }