99d93d87504dd96cb48f7730ca1162c2c94ffc82 braney Wed May 28 09:23:28 2025 -0700 bedMethyl custom track factory diff --git src/hg/lib/customFactory.c src/hg/lib/customFactory.c index bddb0d1fbf2..fecc83a35e4 100644 --- src/hg/lib/customFactory.c +++ src/hg/lib/customFactory.c @@ -40,30 +40,31 @@ #include "makeItemsItem.h" #include "bedDetail.h" #include "pgSnp.h" #include "regexHelper.h" #include "chromInfo.h" #include "grp.h" #include "trackHub.h" #include "bedTabix.h" #include "barChartBed.h" #include "barChartUi.h" #include "interact.h" #include "interactUi.h" #include "hic.h" #include "cgiApoptosis.h" #include "chromAlias.h" +#include "bedMethyl.h" // placeholder when custom track uploaded file name is not known #define CT_NO_FILE_NAME "custom track" static boolean doExtraChecking = FALSE; /*** Utility routines used by many factories. ***/ char *customFactoryNextTilTrack(struct customPp *cpp) /* Return next line. Return NULL at end of input or at line starting with * "track." */ { char *line = customPpNext(cpp); if (line != NULL && startsWithWord("track", line)) { @@ -326,30 +327,71 @@ struct customPp *cpp, char *type, struct customTrack *track) /* Return TRUE if looks like we're handling a microarray track */ { return bedRecognizer(fac, cpp, type, track) && (track->fieldCount == 15); } static boolean coloredExonRecognizer(struct customFactory *fac, struct customPp *cpp, char *type, struct customTrack *track) /* Return TRUE if looks like we're handling a colored-exon track */ { return bedRecognizer(fac, cpp, type, track) && (track->fieldCount >= 14); } + +static struct pipeline *bedMethylLoaderPipe(struct customTrack *track) +/* Set up pipeline that will load wig into database. */ +{ +/* running the single command: + * hgLoadBed -customTrackLoader -tmpDir=/data/tmp + * -maxChromNameLength=${nameLength} customTrash tableName stdin + * -customTrackLoader turns on options: -noNameIx -noHistory -ignoreEmpty + * -allowStartEqualEnd -allowNegativeScores -verbose=0 + */ +struct dyString *tmpDy = dyStringNew(0); +int index = 5; /* verify this references the first NULL as cmd1[index] */ +char *cmd1[] = {"loader/hgLoadBed", "-customTrackLoader", "-sqlTable=loader/bedMethyl.sql", + "-renameSqlTable", "-trimSqlTable", + NULL, NULL, NULL, NULL, NULL, NULL}; +char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp"); +struct stat statBuf; + +if (stat(tmpDir,&statBuf)) + errAbort("can not find custom track tmp load directory: '%s'<BR>\n" + "create directory or specify in hg.conf customTracks.tmpdir", tmpDir); +dyStringPrintf(tmpDy, "-tmpDir=%s", tmpDir); +cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); +dyStringPrintf(tmpDy, "-maxChromNameLength=%d", track->maxChromName); +cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0); +cmd1[index++] = CUSTOM_TRASH; +cmd1[index++] = track->dbTableName; +cmd1[index++] = "stdin"; +assert(index <= ArraySize(cmd1)); + +/* the "/dev/null" file isn't actually used for anything, but it is used + * in the pipeLineOpen to properly get a pipe started that isn't simply + * to STDOUT which is what a NULL would do here instead of this name. + * This function exits if it can't get the pipe created + * The dbStderrFile will get stderr messages from hgLoadBed into the + * our private error log so we can send it back to the user + */ +return pipelineOpen1(cmd1, pipelineWrite | pipelineNoAbort, + "/dev/null", track->dbStderrFile, 0); +} + static struct pipeline *bedLoaderPipe(struct customTrack *track) /* Set up pipeline that will load wig into database. */ { /* running the single command: * hgLoadBed -customTrackLoader -tmpDir=/data/tmp * -maxChromNameLength=${nameLength} customTrash tableName stdin * -customTrackLoader turns on options: -noNameIx -noHistory -ignoreEmpty * -allowStartEqualEnd -allowNegativeScores -verbose=0 */ struct dyString *tmpDy = dyStringNew(0); int index = 3; /* verify this references the first NULL as cmd1[index] */ char *cmd1[] = {"loader/hgLoadBed", "-customTrackLoader", "-lineLimit=50000000", NULL, NULL, NULL, NULL, NULL, NULL, NULL}; char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp"); struct stat statBuf; @@ -396,30 +438,116 @@ while( (i < 10) && lineFileNext(lf, &line, NULL)) { dyStringPrintf(errDy, "%s<BR>\n", line); ++i; // break out of loop after wibSizeLimit msg to avoid printing stuff from other commands in the pipe. if(strstr(line, "wibSizeLimit")) break; } lineFileClose(&lf); if (i < 1) dyStringPrintf(errDy, "unknown failure<BR>\n"); unlink(track->dbStderrFile); errAbort("%s",dyStringCannibalize(&errDy)); } +static boolean bedMethylRecognizer(struct customFactory *fac, + struct customPp *cpp, char *type, + struct customTrack *track) +/* Return TRUE if looks like we're handling a bedMethyl track */ +{ +if (!sameType(type, "bedMethyl")) + return FALSE; + +track->dbTrackType = cloneString("bedMethyl"); +return TRUE; +#ifdef NOTNOW +char *line = customFactoryNextRealTilTrack(cpp); +char *dupe = cloneString(line); +char *row[1024]; +int wordCount = chopLine(dupe, row); +struct dyString *whyNotBed = dyStringNew(0); +char *ctDb = ctGenomeOrCurrent(track); +boolean isBed = rowIsBed(row, 9, ctDb, whyNotBed); +struct lineFile *lf = cpp->fileStack; +if (!isBed && type != NULL) + lineFileAbort(lf, "%s", whyNotBed->string); +dyStringFree(&whyNotBed); +freeMem(dupe); +lineFileExpectAtLeast(lf, 18, wordCount); +track->fieldCount = 18; +track->dbTrackType = cloneString("bedMethyl"); +customPpReuse(cpp, line); +return TRUE; +#endif +} + +static struct customTrack *bedMethylFinish(struct customTrack *track, + boolean dbRequested) +/* Finish up bed tracks (and others that create track->bedList). */ +{ +/* If necessary load database */ +if (dbRequested) + { + if (! fileExists("loader/hgLoadBed") ) + { + errAbort("loading custom tracks: can not find " + "'cgi-bin/loader/hgLoadBed' command\n"); + } + customFactorySetupDbTrack(track); +//struct pipeline *bedLoaderPipe(struct customTrack *track); + struct pipeline *dataPipe = bedMethylLoaderPipe(track); + FILE *out = pipelineFile(dataPipe); + struct bed *bed; + for (bed = track->bedList; bed != NULL; bed = bed->next) + { + bedMethylOutput((struct bedMethyl *)bed, out, '\t', '\n'); + } + fflush(out); /* help see error from loader failure */ + if(ferror(out) || pipelineWait(dataPipe)) + pipelineFailExit(track); /* prints error and exits */ + unlink(track->dbStderrFile); /* no errors, not used */ + pipelineFree(&dataPipe); + } +return track; +} + +static struct customTrack *bedMethylLoader(struct customFactory *fac, + struct hash *chromHash, + struct customPp *cpp, struct customTrack *track, boolean dbRequested) +/* Load up encodePeak data until get next track line. */ +{ +char *line; +//char *db = ctGenomeOrCurrent(track); +struct bedMethylBed *bedList = NULL; +if (!dbRequested) + errAbort("encodePeak custom track type unavailable without custom trash database. Please set that up in your hg.conf"); +while ((line = customFactoryNextRealTilTrack(cpp)) != NULL) + { + char *row[1024]; + int wordCount = chopLine(line, row); + struct lineFile *lf = cpp->fileStack; + lineFileExpectAtLeast(lf, track->fieldCount, wordCount); + //struct encodePeak *peak = customTrackEncodePeak(db, row, pt, chromHash, lf); + struct bedMethyl *bedMethyl = bedMethylLoad(row); + slAddHead(&bedList, bedMethyl); + } +slReverse(&bedList); +track->bedList = (struct bed *)bedList; +return bedMethylFinish(track, dbRequested); +} + static struct customTrack *bedFinish(struct customTrack *track, boolean dbRequested) /* Finish up bed tracks (and others that create track->bedList). */ { /* Add type based on field count */ char buf[20]; safef(buf, sizeof(buf), "%s %d .", track->tdb->type != NULL && startsWithWord("bedGraph", track->tdb->type) ? "bedGraph" : "bed", track->fieldCount); track->tdb->type = cloneString(buf); track->dbTrackType = cloneString(buf); safef(buf, sizeof(buf), "%d", track->fieldCount); ctAddToSettings(track, "fieldCount", cloneString(buf)); /* If necessary add track offsets. */ int offset = track->offset; if (offset != 0) @@ -718,30 +846,39 @@ NULL, "array", microarrayRecognizer, microarrayLoader, }; static struct customFactory coloredExonFactory = /* Factory for bed tracks */ { NULL, "coloredExon", coloredExonRecognizer, coloredExonLoader, }; +static struct customFactory bedMethylFactory = +/* Factory for bed tracks */ + { + NULL, + "bedMethyl", + bedMethylRecognizer, + bedMethylLoader, + }; + /**** ENCODE PEAK Factory - closely related to BED but not quite ***/ static boolean encodePeakRecognizer(struct customFactory *fac, struct customPp *cpp, char *type, struct customTrack *track) /* Return TRUE if looks like we're handling an encodePeak track */ { enum encodePeakType pt = 0; // type is required if (type == NULL || (!sameType(type, fac->name) && !sameString(type, "narrowPeak") && !sameString(type, "broadPeak") && !sameString(type, "gappedPeak"))) return FALSE; char *line = customFactoryNextRealTilTrack(cpp); if (line == NULL) @@ -3589,30 +3726,31 @@ slAddTail(&factoryList, &coloredExonFactory); slAddTail(&factoryList, &encodePeakFactory); slAddTail(&factoryList, &bedDetailFactory); slAddTail(&factoryList, &adjacencyFactory); slAddTail(&factoryList, &bamFactory); slAddTail(&factoryList, &vcfTabixFactory); slAddTail(&factoryList, &makeItemsFactory); slAddTail(&factoryList, &bigDataOopsFactory); slAddTail(&factoryList, &barChartFactory); slAddTail(&factoryList, &bigBarChartFactory); slAddTail(&factoryList, &interactFactory); slAddTail(&factoryList, &bigInteractFactory); slAddTail(&factoryList, &hicFactory); slAddTail(&factoryList, &bigRmskFactory); slAddTail(&factoryList, &bigLollyFactory); + slAddTail(&factoryList, &bedMethylFactory); } } struct customFactory *customFactoryFind(char *genomeDb, struct customPp *cpp, char *type, struct customTrack *track) /* Figure out factory that can handle this track. The track is * loaded from the track line if any, and type is the type element * if any from that track. */ { struct customFactory *fac; customFactoryInit(); for (fac = factoryList; fac != NULL; fac = fac->next) if (fac->recognizer(fac, cpp, type, track)) break; return fac;