2ffc9d13a7475fbf6eb60a6b50d463ee11e7ff41 kate Wed Oct 25 18:06:54 2017 -0700 Implement V2 GTEx eQTL clusters as described in RM as 'permissive with ID'. This adds Ensembl gene ID to schemas. Also adding summary columns to cluster track (maxEffect, maxPvalue, effectType) to provide alternative to parsing comma-sep fields for table filtering. refs #15646 diff --git src/hg/lib/gtexEqtlCluster.c src/hg/lib/gtexEqtlCluster.c index b69b44c..157064e 100644 --- src/hg/lib/gtexEqtlCluster.c +++ src/hg/lib/gtexEqtlCluster.c @@ -1,306 +1,327 @@ /* gtexEqtlCluster.c was originally generated by the autoSql program, which also * generated gtexEqtlCluster.h and gtexEqtlCluster.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "gtexEqtlCluster.h" -char *gtexEqtlClusterCommaSepFieldNames = "chrom,chromStart,chromEnd,name,score,target,distance,expCount,expNames,expScores,expPvals,expProbs"; +char *gtexEqtlClusterCommaSepFieldNames = "chrom,chromStart,chromEnd,name,score,targetId,target,distance,maxEffect,effectType,maxPvalue,expCount,expNames,expScores,expPvals,expProbs"; struct gtexEqtlCluster *gtexEqtlClusterLoadByQuery(struct sqlConnection *conn, char *query) /* Load all gtexEqtlCluster from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with gtexEqtlClusterFreeList(). */ { struct gtexEqtlCluster *list = NULL, *el; struct sqlResult *sr; char **row; sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { el = gtexEqtlClusterLoad(row); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); return list; } void gtexEqtlClusterSaveToDb(struct sqlConnection *conn, struct gtexEqtlCluster *el, char *tableName, int updateSize) /* Save gtexEqtlCluster as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size * of a string that would contain the entire query. Arrays of native types are * converted to comma separated strings and loaded as such, User defined types are * inserted as NULL. This function automatically escapes quoted strings for mysql. */ { struct dyString *update = newDyString(updateSize); char *expNamesArray, *expScoresArray, *expPvalsArray, *expProbsArray; expNamesArray = sqlStringArrayToString(el->expNames, el->expCount); expScoresArray = sqlFloatArrayToString(el->expScores, el->expCount); expPvalsArray = sqlFloatArrayToString(el->expPvals, el->expCount); expProbsArray = sqlFloatArrayToString(el->expProbs, el->expCount); -sqlDyStringPrintf(update, "insert into %s values ( '%s',%u,%u,'%s',%u,'%s',%d,%u,'%s','%s','%s','%s')", - tableName, el->chrom, el->chromStart, el->chromEnd, el->name, el->score, el->target, el->distance, el->expCount, expNamesArray , expScoresArray , expPvalsArray , expProbsArray ); +sqlDyStringPrintf(update, "insert into %s values ( '%s',%u,%u,'%s',%u,'%s','%s',%d,%g,'%s',%g,%u,'%s','%s','%s','%s')", + tableName, el->chrom, el->chromStart, el->chromEnd, el->name, el->score, el->targetId, el->target, el->distance, el->maxEffect, el->effectType, el->maxPvalue, el->expCount, expNamesArray , expScoresArray , expPvalsArray , expProbsArray ); sqlUpdate(conn, update->string); freeDyString(&update); freez(&expNamesArray); freez(&expScoresArray); freez(&expPvalsArray); freez(&expProbsArray); } struct gtexEqtlCluster *gtexEqtlClusterLoad(char **row) /* Load a gtexEqtlCluster from row fetched with select * from gtexEqtlCluster * from database. Dispose of this with gtexEqtlClusterFree(). */ { struct gtexEqtlCluster *ret; AllocVar(ret); -ret->expCount = sqlUnsigned(row[7]); +ret->expCount = sqlUnsigned(row[11]); ret->chrom = cloneString(row[0]); ret->chromStart = sqlUnsigned(row[1]); ret->chromEnd = sqlUnsigned(row[2]); ret->name = cloneString(row[3]); ret->score = sqlUnsigned(row[4]); -ret->target = cloneString(row[5]); -ret->distance = sqlSigned(row[6]); +ret->targetId = cloneString(row[5]); +ret->target = cloneString(row[6]); +ret->distance = sqlSigned(row[7]); +ret->maxEffect = sqlFloat(row[8]); +safecpy(ret->effectType, sizeof(ret->effectType), row[9]); +ret->maxPvalue = sqlFloat(row[10]); { int sizeOne; -sqlStringDynamicArray(row[8], &ret->expNames, &sizeOne); +sqlStringDynamicArray(row[12], &ret->expNames, &sizeOne); assert(sizeOne == ret->expCount); } { int sizeOne; -sqlFloatDynamicArray(row[9], &ret->expScores, &sizeOne); +sqlFloatDynamicArray(row[13], &ret->expScores, &sizeOne); assert(sizeOne == ret->expCount); } { int sizeOne; -sqlFloatDynamicArray(row[10], &ret->expPvals, &sizeOne); +sqlFloatDynamicArray(row[14], &ret->expPvals, &sizeOne); assert(sizeOne == ret->expCount); } { int sizeOne; -sqlFloatDynamicArray(row[11], &ret->expProbs, &sizeOne); +sqlFloatDynamicArray(row[15], &ret->expProbs, &sizeOne); assert(sizeOne == ret->expCount); } return ret; } struct gtexEqtlCluster *gtexEqtlClusterLoadAll(char *fileName) /* Load all gtexEqtlCluster from a whitespace-separated file. * Dispose of this with gtexEqtlClusterFreeList(). */ { struct gtexEqtlCluster *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); -char *row[12]; +char *row[16]; while (lineFileRow(lf, row)) { el = gtexEqtlClusterLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct gtexEqtlCluster *gtexEqtlClusterLoadAllByChar(char *fileName, char chopper) /* Load all gtexEqtlCluster from a chopper separated file. * Dispose of this with gtexEqtlClusterFreeList(). */ { struct gtexEqtlCluster *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); -char *row[12]; +char *row[16]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = gtexEqtlClusterLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct gtexEqtlCluster *gtexEqtlClusterCommaIn(char **pS, struct gtexEqtlCluster *ret) /* Create a gtexEqtlCluster out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new gtexEqtlCluster */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->chrom = sqlStringComma(&s); ret->chromStart = sqlUnsignedComma(&s); ret->chromEnd = sqlUnsignedComma(&s); ret->name = sqlStringComma(&s); ret->score = sqlUnsignedComma(&s); +ret->targetId = sqlStringComma(&s); ret->target = sqlStringComma(&s); ret->distance = sqlSignedComma(&s); +ret->maxEffect = sqlFloatComma(&s); +sqlFixedStringComma(&s, ret->effectType, sizeof(ret->effectType)); +ret->maxPvalue = sqlFloatComma(&s); ret->expCount = sqlUnsignedComma(&s); { int i; s = sqlEatChar(s, '{'); AllocArray(ret->expNames, ret->expCount); for (i=0; iexpCount; ++i) { ret->expNames[i] = sqlStringComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } { int i; s = sqlEatChar(s, '{'); AllocArray(ret->expScores, ret->expCount); for (i=0; iexpCount; ++i) { ret->expScores[i] = sqlFloatComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } { int i; s = sqlEatChar(s, '{'); AllocArray(ret->expPvals, ret->expCount); for (i=0; iexpCount; ++i) { ret->expPvals[i] = sqlFloatComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } { int i; s = sqlEatChar(s, '{'); AllocArray(ret->expProbs, ret->expCount); for (i=0; iexpCount; ++i) { ret->expProbs[i] = sqlFloatComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } *pS = s; return ret; } void gtexEqtlClusterFree(struct gtexEqtlCluster **pEl) /* Free a single dynamically allocated gtexEqtlCluster such as created * with gtexEqtlClusterLoad(). */ { struct gtexEqtlCluster *el; if ((el = *pEl) == NULL) return; freeMem(el->chrom); freeMem(el->name); +freeMem(el->targetId); freeMem(el->target); /* All strings in expNames are allocated at once, so only need to free first. */ if (el->expNames != NULL) freeMem(el->expNames[0]); freeMem(el->expNames); freeMem(el->expScores); freeMem(el->expPvals); freeMem(el->expProbs); freez(pEl); } void gtexEqtlClusterFreeList(struct gtexEqtlCluster **pList) /* Free a list of dynamically allocated gtexEqtlCluster's */ { struct gtexEqtlCluster *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; gtexEqtlClusterFree(&el); } *pList = NULL; } void gtexEqtlClusterOutput(struct gtexEqtlCluster *el, FILE *f, char sep, char lastSep) /* Print out gtexEqtlCluster. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->chrom); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->chromStart); fputc(sep,f); fprintf(f, "%u", el->chromEnd); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->name); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->score); fputc(sep,f); if (sep == ',') fputc('"',f); +fprintf(f, "%s", el->targetId); +if (sep == ',') fputc('"',f); +fputc(sep,f); +if (sep == ',') fputc('"',f); fprintf(f, "%s", el->target); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%d", el->distance); fputc(sep,f); +fprintf(f, "%g", el->maxEffect); +fputc(sep,f); +if (sep == ',') fputc('"',f); +fprintf(f, "%s", el->effectType); +if (sep == ',') fputc('"',f); +fputc(sep,f); +fprintf(f, "%g", el->maxPvalue); +fputc(sep,f); fprintf(f, "%u", el->expCount); fputc(sep,f); { int i; if (sep == ',') fputc('{',f); for (i=0; iexpCount; ++i) { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->expNames[i]); if (sep == ',') fputc('"',f); fputc(',', f); } if (sep == ',') fputc('}',f); } fputc(sep,f); { int i; if (sep == ',') fputc('{',f); for (i=0; iexpCount; ++i) { fprintf(f, "%g", el->expScores[i]); fputc(',', f); } if (sep == ',') fputc('}',f); } fputc(sep,f); { int i; if (sep == ',') fputc('{',f); for (i=0; iexpCount; ++i) { fprintf(f, "%g", el->expPvals[i]); fputc(',', f); } if (sep == ',') fputc('}',f); } fputc(sep,f); { int i; if (sep == ',') fputc('{',f); for (i=0; iexpCount; ++i) { fprintf(f, "%g", el->expProbs[i]); fputc(',', f); } if (sep == ',') fputc('}',f); } fputc(lastSep,f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */