02882e0cda463b6bee5f2061eb89b26a437e42e0 hiram Mon Aug 24 12:26:49 2020 -0700 allow genbank/refseq/ensembl chrom names from chromAlias table for input on bed file custom tracks refs #24396 diff --git src/hg/lib/customFactory.c src/hg/lib/customFactory.c index c38d3e6..fc95602 100644 --- src/hg/lib/customFactory.c +++ src/hg/lib/customFactory.c @@ -74,30 +74,42 @@ char *customFactoryNextRealTilTrack(struct customPp *cpp) /* Return next "real" line (not blank, not comment). * Return NULL at end of input or at line starting with * "track." */ { char *line = customPpNextReal(cpp); if (line != NULL && startsWithWord("track", line)) { customPpReuse(cpp, line); line = NULL; } return line; } +static char *customFactoryCheckChromNameAliasDb(char *genomeDb, char *word, struct lineFile *lf) +/* Abort if word is not a valid sequence name for genomeDb. If word is a recognizable alias + * or case-sensitive variant of a valid sequence, suggest that to the user. */ +{ +static char *aliasName = NULL; +aliasName = hgOfficialChromName(genomeDb, word); +verbose(1,"# DBG: alias name check '%s' returned '%s'\n", word, aliasName); +if (! aliasName) + lineFileAbort(lf, "'%s' is not a valid sequence name in %s", word, genomeDb); +return aliasName; +} + void customFactoryCheckChromNameDb(char *genomeDb, char *word, struct lineFile *lf) /* Abort if word is not a valid sequence name for genomeDb. If word is a recognizable alias * or case-sensitive variant of a valid sequence, suggest that to the user. */ { char *officialChrom = hgOfficialChromName(genomeDb, word); if (! officialChrom) lineFileAbort(lf, "'%s' is not a valid sequence name in %s", word, genomeDb); else if (differentString(word, officialChrom)) lineFileAbort(lf, "'%s' is not a valid sequence name in %s (perhaps you mean '%s'?)", word, genomeDb, officialChrom); freeMem(officialChrom); } void customFactorySetupDbTrack(struct customTrack *track) /* Fill in fields most database-resident custom tracks need. */ @@ -238,38 +250,40 @@ } if (wordCount > bedKnownFields) { if (reason) dyStringPrintf(reason, "Too many fields (expected at most %d, got %d)", bedKnownFields, wordCount); return FALSE; } char *officialChrom = hgOfficialChromName(db, row[0]); if (! officialChrom) { if (reason) dyStringPrintf(reason, "'%s' is not a valid sequence name in %s", row[0], db); return FALSE; } +#ifdef OBSOLETE_TO_BE_REMOVED else if (differentString(row[0], officialChrom)) { if (reason) dyStringPrintf(reason, "'%s' is not a valid sequence name in %s (perhaps you mean '%s'?)", row[0], db, officialChrom); freeMem(officialChrom); return FALSE; } +#endif freeMem(officialChrom); if (! isAllDigits(row[1])) { if (reason) dyStringPrintf(reason, "Second column needs to be a number but is '%s'", row[1]); return FALSE; } if (! isAllDigits(row[2])) { if (reason) dyStringPrintf(reason, "Third column needs to be a number but is '%s'", row[2]); return FALSE; } return TRUE; } @@ -447,37 +461,37 @@ static struct bed *customTrackBed(char *row[64], int wordCount, int chromSize, struct lineFile *lf) /* Convert a row of strings to a bed. * Intended to replace old customTrackBed, * currently new code is activated by hg.conf switch */ { struct bed * bed; AllocVar(bed); loadAndValidateBed(row, wordCount, wordCount, lf, bed, NULL, TRUE); if (bed->chromEnd > chromSize) lineFileAbort(lf, "chromEnd larger than chrom %s size (%d > %d)", bed->chrom, bed->chromEnd, chromSize); return bed; } static struct bed *customTrackBedOld(char *db, char *row[13], int wordCount, - struct hash *chromHash, struct lineFile *lf) + struct hash *chromHash, struct lineFile *lf, char *aliasName) /* Convert a row of strings to a bed. */ { struct bed * bed; int count; AllocVar(bed); -bed->chrom = hashStoreName(chromHash, row[0]); +bed->chrom = hashStoreName(chromHash, aliasName); customFactoryCheckChromNameDb(db, bed->chrom, lf); bed->chromStart = lineFileNeedNum(lf, row, 1); bed->chromEnd = lineFileNeedNum(lf, row, 2); if (bed->chromEnd < 1) lineFileAbort(lf, "chromEnd less than 1 (%d)", bed->chromEnd); if (bed->chromEnd < bed->chromStart) lineFileAbort(lf, "chromStart after chromEnd (%d > %d)", bed->chromStart, bed->chromEnd); int chromSize = hChromSize(db, bed->chrom); if (bed->chromEnd > chromSize) lineFileAbort(lf, "chromEnd larger than chrom %s size (%d > %d)", bed->chrom, bed->chromEnd, chromSize); if (wordCount > 3) bed->name = cloneString(row[3]); @@ -589,59 +603,60 @@ lineFileAbort(lf, "expecting %d elements in expScores array (bed field 15)", bed->expCount); } } return bed; } static struct customTrack *bedLoader(struct customFactory *fac, struct hash *chromHash, struct customPp *cpp, struct customTrack *track, boolean dbRequested) /* Load up bed data until get next track line. */ { char *line; char *db = ctGenomeOrCurrent(track); char *lastChrom = NULL; +char *aliasName = NULL; int chromSize = -1; boolean newCustomTrackValidate = sameOk(cfgOption("newCustomTrackValidate"), "on"); while ((line = customFactoryNextRealTilTrack(cpp)) != NULL) { char *row[bedKnownFields]; int wordCount = chopLine(line, row); struct lineFile *lf = cpp->fileStack; lineFileExpectAtLeast(lf, track->fieldCount, wordCount); /* since rows are often sorted, we can reduce repetitive checking */ if (differentStringNullOk(row[0], lastChrom)) { - customFactoryCheckChromNameDb(db, row[0], lf); - chromSize = hChromSize(db, row[0]); + aliasName = customFactoryCheckChromNameAliasDb(db, row[0], lf); + chromSize = hChromSize(db, aliasName); freez(&lastChrom); lastChrom = cloneString(row[0]); } struct bed *bed = NULL; /* Intended to replace old customTrackBed */ if (newCustomTrackValidate) { bed = customTrackBed(row, wordCount, chromSize, lf); - bed->chrom = hashStoreName(chromHash, row[0]); + bed->chrom = hashStoreName(chromHash, aliasName); } else { - bed = customTrackBedOld(db, row, wordCount, chromHash, lf); + bed = customTrackBedOld(db, row, wordCount, chromHash, lf, aliasName); } slAddHead(&track->bedList, bed); } slReverse(&track->bedList); return bedFinish(track, dbRequested); } static struct customTrack *bedGraphLoader(struct customFactory *fac, struct hash *chromHash, struct customPp *cpp, struct customTrack *track, boolean dbRequested) /* Load up bedGraph data until get next track line. */ { char buf[20]; bedLoader(fac, chromHash, cpp, track, dbRequested);