533112afe2a2005e80cdb1f82904ea65032d4302 braney Sat Oct 2 11:37:34 2021 -0700 split hg/lib into two separate libaries, one only used by the cgis diff --git src/hg/cgilib/annoStreamDbFactorSource.c src/hg/cgilib/annoStreamDbFactorSource.c new file mode 100644 index 0000000..ce03287 --- /dev/null +++ src/hg/cgilib/annoStreamDbFactorSource.c @@ -0,0 +1,213 @@ +/* annoStreamDbFactorSource -- factorSource track w/related tables */ + +#include "annoStreamDbFactorSource.h" +#include "annoStreamDb.h" +#include "factorSource.h" +#include "hdb.h" +#include "sqlNum.h" + +static char *asfsAutoSqlString = +"table factorSourcePlus" +"\"factorSourcePlus: Peaks clustered by factor w/normalized scores plus cell type and treatment\"" +" (" +" string chrom; \"Chromosome\"" +" uint chromStart; \"Start position in chrom\"" +" uint chromEnd; \"End position in chrom\"" +" string name; \"factor that has a peak here\"" +" uint score; \"Score from 0-1000\"" +" uint expCount; \"Number of experiment values\"" +" uint[expCount] expNums; \"Comma separated list of experiment numbers\"" +" float[expCount] expScores; \"Comma separated list of experiment scores\"" +" string[expCount] cellType; \"Comma separated list of experiment cell types\"" +" string[expCount] treatment; \"Comma separated list of experiment treatments\"" +" )"; + +#define FACTORSOURCEPLUS_NUM_COLS 10 + +struct annoStreamDbFactorSource +{ + struct annoStreamer streamer; // Parent class members & methods (external interface) + // Private members + struct annoStreamer *mySource; // Internal source of track table rows + // Data from related tables + int expCount; // Number of experiments whose results were clustered + char **expCellType; // Array[expCount] of cellType used in each experiment + char **expTreatment; // Array[expCount] of treatment used in each experiment +}; + +struct asObject *annoStreamDbFactorSourceAsObj() +/* Return an autoSql object that describs fields of a joining query on a factorSource table + * and its inputs. */ +{ +return asParseText(asfsAutoSqlString); +} + +static void asdfsSetRegion(struct annoStreamer *sSelf, char *chrom, uint rStart, uint rEnd) +/* Pass setRegion down to internal source. */ +{ +annoStreamerSetRegion(sSelf, chrom, rStart, rEnd); +struct annoStreamDbFactorSource *self = (struct annoStreamDbFactorSource *)sSelf; +self->mySource->setRegion(self->mySource, chrom, rStart, rEnd); +} + +static char *commaSepFromExpData(char **expAttrs, int *expNums, uint expCount, struct lm *lm) +/* Look up experiment attribute strings by experiment numbers; return a comma-separated string + * of experiment attributes, allocated using lm. */ +{ +int i; +int len = 0, offset = 0; +for (i = 0; i < expCount; i++) + len += (strlen(expAttrs[expNums[i]]) + 1); +char *str = lmAlloc(lm, len + 1); +for (i = 0; i < expCount; i++) + { + char *attr = expAttrs[expNums[i]]; + safef(str + offset, len + 1 - offset, "%s,", attr); + offset += strlen(attr) + 1; + } +return str; +} + +INLINE void getCommaSepInts(char *commaSep, int *values, int expectedCount) +/* Parse comma-separated ints into values[]. This is like sqlSignedStaticArray, + * but we give it an expected count and it's thread-safe because it doesn't use + * static variables. */ +{ +char *s = commaSep, *e = NULL; +int count; +for (count = 0; isNotEmpty(s); count++, s = e) + { + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count < expectedCount) + values[count] = sqlSigned(s); + } +if (count != expectedCount) + errAbort("getCommaSepInts: expected %d values but found %d", expectedCount, count); +} + +static void factorSourceToFactorSourcePlus(struct annoStreamDbFactorSource *self, + char **fsWords, char **fspWords, struct lm *lm) +/* Copy fsWords into fspWords and add columns for cellTypes and treatments corresponding to + * expNums. */ +{ +// Parse out experiment IDs from expNums column +uint expCount = sqlUnsigned(fsWords[5]); +int expNums[expCount]; +getCommaSepInts(fsWords[6], expNums, expCount); +// Copy factorSource columns, then add experiment attribute columns. +int i; +for (i = 0; i < FACTORSOURCE_NUM_COLS; i++) + fspWords[i] = fsWords[i]; +fspWords[i++] = commaSepFromExpData(self->expCellType, expNums, expCount, lm); +fspWords[i++] = commaSepFromExpData(self->expTreatment, expNums, expCount, lm); +if (i != FACTORSOURCEPLUS_NUM_COLS) + errAbort("annoStreamDbFactorSource %s: expected to make %d columns but made %d", + self->streamer.name, FACTORSOURCEPLUS_NUM_COLS, i); +} + +static struct annoRow *asdfsNextRow(struct annoStreamer *sSelf, char *minChrom, uint minEnd, + struct lm *lm) +/* Join experiment data with expNums from track table and apply filters. */ +{ +struct annoStreamDbFactorSource *self = (struct annoStreamDbFactorSource *)sSelf; +char **fspWords; +lmAllocArray(lm, fspWords, FACTORSOURCEPLUS_NUM_COLS); +struct annoRow *fsRow; +boolean rightJoinFail = FALSE; +while ((fsRow = self->mySource->nextRow(self->mySource, minChrom, minEnd, lm)) != NULL) + { + char **fsWords = fsRow->data; + factorSourceToFactorSourcePlus(self, fsWords, fspWords, lm); + // If there are filters on experiment attributes, apply them, otherwise just return aRow. + if (sSelf->filters) + { + boolean fails = annoFilterRowFails(sSelf->filters, fspWords, FACTORSOURCEPLUS_NUM_COLS, + &rightJoinFail); + // If this row passes the filter, or fails but is rightJoin, then we're done looking. + if (!fails || rightJoinFail) + break; + } + else + // no filtering to do, just use this row + break; + } +if (fsRow != NULL) + return annoRowFromStringArray(fsRow->chrom, fsRow->start, fsRow->end, rightJoinFail, + fspWords, FACTORSOURCEPLUS_NUM_COLS, lm); +else + return NULL; +} + +static void getExperimentData(struct annoStreamDbFactorSource *self, char *db, + char *sourceTable, char *inputsTable) +/* Join two small tables to relate experiment IDs from the track tables expNums column + * to experiment attributes cellType and treatment. */ +{ +struct sqlConnection *conn = hAllocConn(db); +self->expCount = sqlRowCount(conn, sourceTable); +AllocArray(self->expCellType, self->expCount); +AllocArray(self->expTreatment, self->expCount); +struct dyString *query = sqlDyStringCreate("select id, cellType, treatment " + "from %s, %s where %s.description = %s.source", + sourceTable, inputsTable, sourceTable, inputsTable); +struct sqlResult *sr = sqlGetResult(conn, query->string); +char **row; +while ((row = sqlNextRow(sr)) != NULL) + { + int id = sqlSigned(row[0]); + if (id < 0 || id >= self->expCount) + errAbort("annoStreamDbFactorSource %s: found out-of-range id %d in %s (expected [0-%d])", + ((struct annoStreamer *)self)->name, id, sourceTable, self->expCount - 1); + self->expCellType[id] = cloneString(row[1]); + self->expTreatment[id] = cloneString(row[2]); + } +sqlFreeResult(&sr); +hFreeConn(&conn); +} + +static void asdfsClose(struct annoStreamer **pSSelf) +/* Free up state. */ +{ +if (pSSelf == NULL) + return; +struct annoStreamDbFactorSource *self = *(struct annoStreamDbFactorSource **)pSSelf; +self->mySource->close(&(self->mySource)); +int i; +for (i = 0; i < self->expCount; i++) + { + freeMem(self->expCellType[i]); + freeMem(self->expTreatment[i]); + } +freez(&self->expCellType); +freez(&self->expTreatment); +annoStreamerFree(pSSelf); +} + +struct annoStreamer *annoStreamDbFactorSourceNew(char *db, char *trackTable, char *sourceTable, + char *inputsTable, struct annoAssembly *aa, + int maxOutRows) +/* Create an annoStreamer (subclass) object using three database tables: + * trackTable: the table for a track with type factorSource (bed5 + exp{Count,Nums,Scores}) + * sourceTable: trackTable's tdb setting sourceTable; expNums -> source name "cellType+lab+antibody" + * inputsTable: trackTable's tdb setting inputTrackTable; source name -> cellType, treatment, etc. + */ +{ +struct annoStreamDbFactorSource *self; +AllocVar(self); +struct annoStreamer *streamer = &(self->streamer); +// Set up external streamer interface +annoStreamerInit(streamer, aa, annoStreamDbFactorSourceAsObj(), trackTable); +streamer->rowType = arWords; +// Get internal streamer for trackTable +self->mySource = annoStreamDbNew(db, trackTable, aa, maxOutRows, NULL); +// Slurp in data from small related tables +getExperimentData(self, db, sourceTable, inputsTable); +// Override methods that need to pass through to internal source: +streamer->setRegion = asdfsSetRegion; +streamer->nextRow = asdfsNextRow; +streamer->close = asdfsClose; + +return (struct annoStreamer *)self; +}