2b7b5ede61bcbc412ea08e4110a726709b8a4d7c braney Wed Apr 24 11:49:44 2019 -0700 add a -tabs option to bedToPsl to parse TSV files diff --git src/hg/utils/bedToPsl/bedToPsl.c src/hg/utils/bedToPsl/bedToPsl.c index ab026d6..5240f38 100644 --- src/hg/utils/bedToPsl/bedToPsl.c +++ src/hg/utils/bedToPsl/bedToPsl.c @@ -1,49 +1,52 @@ /* bedToGenePred - convert bed format files to genePred format */ /* Copyright (C) 2013 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "options.h" #include "bed.h" #include "psl.h" #include "sqlNum.h" #include "hash.h" #include "linefile.h" /* command line option specifications */ static struct optionSpec optionSpecs[] = { + {"tabs", OPTION_BOOLEAN}, {"keepQuery", OPTION_BOOLEAN}, {NULL, 0} }; /* command line options */ static boolean keepQuery = FALSE; +static boolean doTabs = FALSE; void usage(char *msg) /* Explain usage and exit. */ { errAbort("%s:\n" "bedToPsl - convert bed format files to psl format\n" "usage:\n" - " bedToPsl chromSizes bedFile pslFile\n" + " bedToPsl [options] chromSizes bedFile pslFile\n" "\n" "Convert a BED file to a PSL file. This the result is an alignment.\n" " It is intended to allow processing by tools that operate on PSL.\n" "If the BED has at least 12 columns, then a PSL with blocks is created.\n" "Otherwise single-exon PSLs are created.\n\n" "Options:\n" + "-tabs - use tab as a separator\n" "-keepQuery - instead of creating a fake query, create PSL with identical query and\n" " target specs. Useful if bed features are to be lifted with pslMap and one \n" " wants to keep the source location in the lift result.\n" , msg); } static struct hash *loadChromSizes(char *chromSizesFile) /* read the chromosome sizes file. */ { struct lineFile *lf = lineFileOpen(chromSizesFile, TRUE); struct hash *sizes = newHash(12); char *words[2]; while (lineFileRow(lf, words)) hashAddInt(sizes, words[0], sqlSigned(words[1])); lineFileClose(&lf); return sizes; @@ -99,52 +102,58 @@ bed->chrom, hashIntVal(chromSizes, bed->chrom), bed->chromStart, bed->chromEnd, ((bed->strand[0] == '\0') ? "+" : bed->strand), (bed->blockCount == 0) ? 1 : bed->blockCount, 0); psl->match = psl->qSize; if (bed->blockCount == 0) bedToPsl4(bed, psl); else bedToPsl12(bed, psl); return psl; } /* convert one line read from a bed file to a PSL */ void cnvBedRec(char *line, struct hash *chromSizes, FILE *pslFh) { char *row[12]; -int numCols = chopByWhite(line, row, ArraySize(row)); +int numCols; +if (doTabs) + numCols = chopString(line, "\t", row, ArraySize(row)); +else + numCols = chopByWhite(line, row, ArraySize(row)); if (numCols < 4) errAbort("bed must have at least 4 columns"); struct bed *bed = bedLoadN(row, numCols); struct psl* psl = bedToPsl(bed, chromSizes); pslTabOut(psl, pslFh); pslFree(&psl); bedFree(&bed); } void cnvBedToPsl(char *chromSizesFile, char *bedFile, char *pslFile) /* convert bed format files to PSL format */ { struct hash *chromSizes = loadChromSizes(chromSizesFile); struct lineFile *bedLf = lineFileOpen(bedFile, TRUE); FILE *pslFh = mustOpen(pslFile, "w"); char *line; while (lineFileNextReal(bedLf, &line)) cnvBedRec(line, chromSizes, pslFh); carefulClose(&pslFh); lineFileClose(&bedLf); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, optionSpecs); if (argc != 4) usage("Too few arguments"); +if (optionExists("tabs")) + doTabs = TRUE; if (optionExists("keepQuery")) keepQuery = TRUE; cnvBedToPsl(argv[1], argv[2], argv[3]); return 0; }