d1aafb0904765d2abfbb8b7eb92826460962b0ed angie Fri May 1 16:26:50 2015 -0700 Libifying hgTables' code that parses user's region input, so I can use it to add support for user regions to hgIntegrator. refs #14579 diff --git src/hg/lib/userRegions.c src/hg/lib/userRegions.c new file mode 100644 index 0000000..5a335e9 --- /dev/null +++ src/hg/lib/userRegions.c @@ -0,0 +1,202 @@ +/* userRegions: parse user regions entered as BED3, BED4 or chr:start-end + * optionally followed by name. */ + +/* Copyright (C) 2015 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + +#include "common.h" +#include "hui.h" +#include "linefile.h" +#include "trashDir.h" +#include "userRegions.h" + +static boolean illegalCoordinate(char *db, char *chrom, int start, int end, char *line, int lineIx, + struct dyString *dyWarn) +/* verify start and end are legal for this chrom */ +{ +int maxEnd = hChromSize(db, chrom); +if (start < 0) + { + dyStringPrintf(dyWarn, "line %d: '%s': chromStart (%d) less than zero\n", + lineIx, line, start); + return TRUE; + } +if (end > maxEnd) + { + dyStringPrintf(dyWarn, "line %d: '%s': chromEnd (%d) greater than chrom length (%s:%d)\n", + lineIx, line, end, chrom, maxEnd); + return TRUE; + } +if (start > end) + { + dyStringPrintf(dyWarn, "line %d: '%s': chromStart (%d) greater than chromEnd (%d)\n", + lineIx, line, start, end); + return TRUE; + } +return FALSE; +} + +static struct bed4 *parseRegionInput(char *db, char *inputString, int maxRegions, int maxErrs, + struct dyString *dyWarn) +/* scan the user region definition, turn into a bed list */ +{ +int regionCount = 0; +int errCount = 0; +struct bed4 *bedList = NULL; +struct lineFile *lf = lineFileOnString("userData", TRUE, inputString); +char *line = NULL; +while (lineFileNextReal(lf, &line)) + { + char *chromName = NULL; + int chromStart = 0; + int chromEnd = 0; + char *regionName = NULL; + // Chop a copy of line so we can display line if there's an error. + char copy[strlen(line)+1]; + safecpy(copy, sizeof(copy), line); + char *words[5]; + int wordCount = chopByWhite(copy, words, sizeof(words)); + boolean badFormat = FALSE; + boolean gotError = FALSE; + /* might be something of the form: chrom:start-end optionalRegionName */ + if (((1 == wordCount) || (2 == wordCount)) && + hgParseChromRange(NULL, words[0], &chromName, + &chromStart, &chromEnd)) + { + if (2 == wordCount) + regionName = cloneString(words[1]); + } + else if (!((3 == wordCount) || (4 == wordCount))) + { + dyStringPrintf(dyWarn, "line %d: '%s': " + "unrecognized format. Please enter 3- or 4-column BED or " + "a chr:start-end position range optionally followed by a name.\n", + lf->lineIx, line); + badFormat = TRUE; + gotError = TRUE; + } + else + { + chromName = words[0]; + // Make sure chromStart and chromEnd are numbers + if (!isNumericString(words[1])) + { + dyStringPrintf(dyWarn, "line %d: '%s': chromStart must be a number but is '%s'\n", + lf->lineIx, line, words[1]); + gotError = TRUE; + } + if (!isNumericString(words[2])) + { + dyStringPrintf(dyWarn, "line %d: '%s': chromEnd must be a number but is '%s'\n", + lf->lineIx, line, words[2]); + gotError = TRUE; + } + if (! gotError) + { + chromStart = atoi(words[1]); + chromEnd = atoi(words[2]); + if (wordCount > 3) + regionName = cloneString(words[3]); + } + } + char *officialChromName = chromName ? hgOfficialChromName(db, chromName) : NULL; + if (! badFormat) + { + if (NULL == officialChromName) + { + dyStringPrintf(dyWarn, + "line %d: '%s': chrom name '%s' not recognized in this assembly\n", + lf->lineIx, line, chromName ? chromName : words[0]); + gotError = TRUE; + } + else if (illegalCoordinate(db, officialChromName, chromStart, chromEnd, line, lf->lineIx, + dyWarn)) + { + gotError = TRUE; + } + } + if (gotError) + { + errCount++; + if (errCount > maxErrs && maxErrs > 0) + { + dyStringPrintf(dyWarn, "Exceeded maximum number of errors (%d), quitting\n", maxErrs); + break; + } + else + continue; + } + ++regionCount; + if (regionCount > maxRegions && maxRegions > 0) + { + dyStringPrintf(dyWarn, + "line %d: limit of %d region definitions exceeded, skipping the rest\n", + lf->lineIx, maxRegions); + break; + } + struct bed4 *bedEl = bed4New(officialChromName, chromStart, chromEnd, regionName); + slAddHead(&bedList, bedEl); + } +lineFileClose(&lf); +// Keep regions in same order as user entered them: +slReverse(&bedList); +return (bedList); +} + +char *userRegionsParse(char *db, char *regionsText, int maxRegions, int maxErrs, + int *retRegionCount, char **retWarnText) +/* Parse user regions entered as BED3, BED4 or chr:start-end optionally followed by name. + * Return name of trash file containing BED for parsed regions if regionsText contains + * valid regions; otherwise NULL. + * If maxRegions <= 0, it is ignored; likewise for maxErrs. + * If retRegionCount is non-NULL, it will be set to the number of valid parsed regions + * in the trash file. + * If retWarnText is non-NULL, it will be set to a string containing warning and error + * messages encountered while parsing input. */ + +{ +char *trashFileName = NULL; +if (isNotEmpty(regionsText)) + { + char *copy = cloneString(regionsText); + struct dyString *dyWarn = dyStringNew(0); + struct bed4 *bedList = parseRegionInput(db, copy, maxRegions, maxErrs, dyWarn); + if (retWarnText != NULL) + { + if (dyWarn->stringSize > 0) + *retWarnText = dyStringCannibalize(&dyWarn); + else + { + *retWarnText = NULL; + dyStringFree(&dyWarn); + } + } + int regionCount = slCount(bedList); + if (retRegionCount != NULL) + *retRegionCount = regionCount; + if (regionCount > 0) + { + struct tempName tn; + trashDirFile(&tn, "hgtData", "user", ".region"); + trashFileName = cloneString(tn.forCgi); + FILE *f = mustOpen(trashFileName, "w"); + struct bed4 *bed; + for (bed = bedList; bed; bed = bed->next ) + { + char *name = bed->name ? bed->name : ""; + fprintf(f, "%s\t%d\t%d\t%s\n", + bed->chrom, bed->chromStart, bed->chromEnd, name); + } + carefulClose(&f); + } + freeMem(copy); + } +else + { + if (retRegionCount != NULL) + *retRegionCount = 0; + if (retWarnText != NULL) + *retWarnText = NULL; + } +return trashFileName; +}