99d93d87504dd96cb48f7730ca1162c2c94ffc82
braney
Wed May 28 09:23:28 2025 -0700
bedMethyl custom track factory
diff --git src/hg/lib/customFactory.c src/hg/lib/customFactory.c
index bddb0d1fbf2..fecc83a35e4 100644
--- src/hg/lib/customFactory.c
+++ src/hg/lib/customFactory.c
@@ -40,30 +40,31 @@
#include "makeItemsItem.h"
#include "bedDetail.h"
#include "pgSnp.h"
#include "regexHelper.h"
#include "chromInfo.h"
#include "grp.h"
#include "trackHub.h"
#include "bedTabix.h"
#include "barChartBed.h"
#include "barChartUi.h"
#include "interact.h"
#include "interactUi.h"
#include "hic.h"
#include "cgiApoptosis.h"
#include "chromAlias.h"
+#include "bedMethyl.h"
// placeholder when custom track uploaded file name is not known
#define CT_NO_FILE_NAME "custom track"
static boolean doExtraChecking = FALSE;
/*** Utility routines used by many factories. ***/
char *customFactoryNextTilTrack(struct customPp *cpp)
/* Return next line. Return NULL at end of input or at line starting with
* "track." */
{
char *line = customPpNext(cpp);
if (line != NULL && startsWithWord("track", line))
{
@@ -326,30 +327,71 @@
struct customPp *cpp, char *type,
struct customTrack *track)
/* Return TRUE if looks like we're handling a microarray track */
{
return bedRecognizer(fac, cpp, type, track) && (track->fieldCount == 15);
}
static boolean coloredExonRecognizer(struct customFactory *fac,
struct customPp *cpp, char *type,
struct customTrack *track)
/* Return TRUE if looks like we're handling a colored-exon track */
{
return bedRecognizer(fac, cpp, type, track) && (track->fieldCount >= 14);
}
+
+static struct pipeline *bedMethylLoaderPipe(struct customTrack *track)
+/* Set up pipeline that will load wig into database. */
+{
+/* running the single command:
+ * hgLoadBed -customTrackLoader -tmpDir=/data/tmp
+ * -maxChromNameLength=${nameLength} customTrash tableName stdin
+ * -customTrackLoader turns on options: -noNameIx -noHistory -ignoreEmpty
+ * -allowStartEqualEnd -allowNegativeScores -verbose=0
+ */
+struct dyString *tmpDy = dyStringNew(0);
+int index = 5; /* verify this references the first NULL as cmd1[index] */
+char *cmd1[] = {"loader/hgLoadBed", "-customTrackLoader", "-sqlTable=loader/bedMethyl.sql",
+ "-renameSqlTable", "-trimSqlTable",
+ NULL, NULL, NULL, NULL, NULL, NULL};
+char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp");
+struct stat statBuf;
+
+if (stat(tmpDir,&statBuf))
+ errAbort("can not find custom track tmp load directory: '%s'
\n"
+ "create directory or specify in hg.conf customTracks.tmpdir", tmpDir);
+dyStringPrintf(tmpDy, "-tmpDir=%s", tmpDir);
+cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0);
+dyStringPrintf(tmpDy, "-maxChromNameLength=%d", track->maxChromName);
+cmd1[index++] = dyStringCannibalize(&tmpDy); tmpDy = dyStringNew(0);
+cmd1[index++] = CUSTOM_TRASH;
+cmd1[index++] = track->dbTableName;
+cmd1[index++] = "stdin";
+assert(index <= ArraySize(cmd1));
+
+/* the "/dev/null" file isn't actually used for anything, but it is used
+ * in the pipeLineOpen to properly get a pipe started that isn't simply
+ * to STDOUT which is what a NULL would do here instead of this name.
+ * This function exits if it can't get the pipe created
+ * The dbStderrFile will get stderr messages from hgLoadBed into the
+ * our private error log so we can send it back to the user
+ */
+return pipelineOpen1(cmd1, pipelineWrite | pipelineNoAbort,
+ "/dev/null", track->dbStderrFile, 0);
+}
+
static struct pipeline *bedLoaderPipe(struct customTrack *track)
/* Set up pipeline that will load wig into database. */
{
/* running the single command:
* hgLoadBed -customTrackLoader -tmpDir=/data/tmp
* -maxChromNameLength=${nameLength} customTrash tableName stdin
* -customTrackLoader turns on options: -noNameIx -noHistory -ignoreEmpty
* -allowStartEqualEnd -allowNegativeScores -verbose=0
*/
struct dyString *tmpDy = dyStringNew(0);
int index = 3; /* verify this references the first NULL as cmd1[index] */
char *cmd1[] = {"loader/hgLoadBed", "-customTrackLoader",
"-lineLimit=50000000", NULL, NULL, NULL, NULL, NULL, NULL, NULL};
char *tmpDir = cfgOptionDefault("customTracks.tmpdir", "/data/tmp");
struct stat statBuf;
@@ -396,30 +438,116 @@
while( (i < 10) && lineFileNext(lf, &line, NULL))
{
dyStringPrintf(errDy, "%s
\n", line);
++i;
// break out of loop after wibSizeLimit msg to avoid printing stuff from other commands in the pipe.
if(strstr(line, "wibSizeLimit"))
break;
}
lineFileClose(&lf);
if (i < 1)
dyStringPrintf(errDy, "unknown failure
\n");
unlink(track->dbStderrFile);
errAbort("%s",dyStringCannibalize(&errDy));
}
+static boolean bedMethylRecognizer(struct customFactory *fac,
+ struct customPp *cpp, char *type,
+ struct customTrack *track)
+/* Return TRUE if looks like we're handling a bedMethyl track */
+{
+if (!sameType(type, "bedMethyl"))
+ return FALSE;
+
+track->dbTrackType = cloneString("bedMethyl");
+return TRUE;
+#ifdef NOTNOW
+char *line = customFactoryNextRealTilTrack(cpp);
+char *dupe = cloneString(line);
+char *row[1024];
+int wordCount = chopLine(dupe, row);
+struct dyString *whyNotBed = dyStringNew(0);
+char *ctDb = ctGenomeOrCurrent(track);
+boolean isBed = rowIsBed(row, 9, ctDb, whyNotBed);
+struct lineFile *lf = cpp->fileStack;
+if (!isBed && type != NULL)
+ lineFileAbort(lf, "%s", whyNotBed->string);
+dyStringFree(&whyNotBed);
+freeMem(dupe);
+lineFileExpectAtLeast(lf, 18, wordCount);
+track->fieldCount = 18;
+track->dbTrackType = cloneString("bedMethyl");
+customPpReuse(cpp, line);
+return TRUE;
+#endif
+}
+
+static struct customTrack *bedMethylFinish(struct customTrack *track,
+ boolean dbRequested)
+/* Finish up bed tracks (and others that create track->bedList). */
+{
+/* If necessary load database */
+if (dbRequested)
+ {
+ if (! fileExists("loader/hgLoadBed") )
+ {
+ errAbort("loading custom tracks: can not find "
+ "'cgi-bin/loader/hgLoadBed' command\n");
+ }
+ customFactorySetupDbTrack(track);
+//struct pipeline *bedLoaderPipe(struct customTrack *track);
+ struct pipeline *dataPipe = bedMethylLoaderPipe(track);
+ FILE *out = pipelineFile(dataPipe);
+ struct bed *bed;
+ for (bed = track->bedList; bed != NULL; bed = bed->next)
+ {
+ bedMethylOutput((struct bedMethyl *)bed, out, '\t', '\n');
+ }
+ fflush(out); /* help see error from loader failure */
+ if(ferror(out) || pipelineWait(dataPipe))
+ pipelineFailExit(track); /* prints error and exits */
+ unlink(track->dbStderrFile); /* no errors, not used */
+ pipelineFree(&dataPipe);
+ }
+return track;
+}
+
+static struct customTrack *bedMethylLoader(struct customFactory *fac,
+ struct hash *chromHash,
+ struct customPp *cpp, struct customTrack *track, boolean dbRequested)
+/* Load up encodePeak data until get next track line. */
+{
+char *line;
+//char *db = ctGenomeOrCurrent(track);
+struct bedMethylBed *bedList = NULL;
+if (!dbRequested)
+ errAbort("encodePeak custom track type unavailable without custom trash database. Please set that up in your hg.conf");
+while ((line = customFactoryNextRealTilTrack(cpp)) != NULL)
+ {
+ char *row[1024];
+ int wordCount = chopLine(line, row);
+ struct lineFile *lf = cpp->fileStack;
+ lineFileExpectAtLeast(lf, track->fieldCount, wordCount);
+ //struct encodePeak *peak = customTrackEncodePeak(db, row, pt, chromHash, lf);
+ struct bedMethyl *bedMethyl = bedMethylLoad(row);
+ slAddHead(&bedList, bedMethyl);
+ }
+slReverse(&bedList);
+track->bedList = (struct bed *)bedList;
+return bedMethylFinish(track, dbRequested);
+}
+
static struct customTrack *bedFinish(struct customTrack *track,
boolean dbRequested)
/* Finish up bed tracks (and others that create track->bedList). */
{
/* Add type based on field count */
char buf[20];
safef(buf, sizeof(buf), "%s %d .", track->tdb->type != NULL && startsWithWord("bedGraph", track->tdb->type) ? "bedGraph" : "bed", track->fieldCount);
track->tdb->type = cloneString(buf);
track->dbTrackType = cloneString(buf);
safef(buf, sizeof(buf), "%d", track->fieldCount);
ctAddToSettings(track, "fieldCount", cloneString(buf));
/* If necessary add track offsets. */
int offset = track->offset;
if (offset != 0)
@@ -718,30 +846,39 @@
NULL,
"array",
microarrayRecognizer,
microarrayLoader,
};
static struct customFactory coloredExonFactory =
/* Factory for bed tracks */
{
NULL,
"coloredExon",
coloredExonRecognizer,
coloredExonLoader,
};
+static struct customFactory bedMethylFactory =
+/* Factory for bed tracks */
+ {
+ NULL,
+ "bedMethyl",
+ bedMethylRecognizer,
+ bedMethylLoader,
+ };
+
/**** ENCODE PEAK Factory - closely related to BED but not quite ***/
static boolean encodePeakRecognizer(struct customFactory *fac,
struct customPp *cpp, char *type,
struct customTrack *track)
/* Return TRUE if looks like we're handling an encodePeak track */
{
enum encodePeakType pt = 0;
// type is required
if (type == NULL ||
(!sameType(type, fac->name) && !sameString(type, "narrowPeak") &&
!sameString(type, "broadPeak") && !sameString(type, "gappedPeak")))
return FALSE;
char *line = customFactoryNextRealTilTrack(cpp);
if (line == NULL)
@@ -3589,30 +3726,31 @@
slAddTail(&factoryList, &coloredExonFactory);
slAddTail(&factoryList, &encodePeakFactory);
slAddTail(&factoryList, &bedDetailFactory);
slAddTail(&factoryList, &adjacencyFactory);
slAddTail(&factoryList, &bamFactory);
slAddTail(&factoryList, &vcfTabixFactory);
slAddTail(&factoryList, &makeItemsFactory);
slAddTail(&factoryList, &bigDataOopsFactory);
slAddTail(&factoryList, &barChartFactory);
slAddTail(&factoryList, &bigBarChartFactory);
slAddTail(&factoryList, &interactFactory);
slAddTail(&factoryList, &bigInteractFactory);
slAddTail(&factoryList, &hicFactory);
slAddTail(&factoryList, &bigRmskFactory);
slAddTail(&factoryList, &bigLollyFactory);
+ slAddTail(&factoryList, &bedMethylFactory);
}
}
struct customFactory *customFactoryFind(char *genomeDb, struct customPp *cpp,
char *type, struct customTrack *track)
/* Figure out factory that can handle this track. The track is
* loaded from the track line if any, and type is the type element
* if any from that track. */
{
struct customFactory *fac;
customFactoryInit();
for (fac = factoryList; fac != NULL; fac = fac->next)
if (fac->recognizer(fac, cpp, type, track))
break;
return fac;