2ffc9d13a7475fbf6eb60a6b50d463ee11e7ff41 kate Wed Oct 25 18:06:54 2017 -0700 Implement V2 GTEx eQTL clusters as described in RM as 'permissive with ID'. This adds Ensembl gene ID to schemas. Also adding summary columns to cluster track (maxEffect, maxPvalue, effectType) to provide alternative to parsing comma-sep fields for table filtering. refs #15646 diff --git src/hg/inc/gtexEqtlCluster.h src/hg/inc/gtexEqtlCluster.h index 8d5cac0..01bd1ff 100644 --- src/hg/inc/gtexEqtlCluster.h +++ src/hg/inc/gtexEqtlCluster.h @@ -1,87 +1,91 @@ /* gtexEqtlCluster.h was originally generated by the autoSql program, which also * generated gtexEqtlCluster.c and gtexEqtlCluster.sql. This header links the database and * the RAM representation of objects. */ #ifndef GTEXEQTLCLUSTER_H #define GTEXEQTLCLUSTER_H #include "jksql.h" -#define GTEXEQTLCLUSTER_NUM_COLS 12 +#define GTEXEQTLCLUSTER_NUM_COLS 16 extern char *gtexEqtlClusterCommaSepFieldNames; struct gtexEqtlCluster -/* BED5+ of eQTLs (variants affecting gene expression) with a target (gene or tissue), and lists of secondary targets (e.g. tissues or genes) */ +/* BED5+ of eQTLs (variants affecting gene expression) with a target (gene or tissue), and lists of values related to combined factors (e.g. tissues or genes) */ { struct gtexEqtlCluster *next; /* Next in singly linked list. */ char *chrom; /* Reference sequence chromosome or scaffold */ unsigned chromStart; /* Start position in chromosome */ unsigned chromEnd; /* End position in chromosome */ char *name; /* Name of variant (rsID or GTEx identifier if none) */ unsigned score; /* Score from 0-1000 */ + char *targetId; /* Identifier of target (gene or tissue) */ char *target; /* Name of target (gene or tissue) */ int distance; /* Distance from TSS */ + float maxEffect; /* Maximum absolute value effect size in cluster */ + char effectType[2]; /* +, -, 0 (for mixed) */ + float maxPvalue; /* Maximum -log10 pValue in cluster */ unsigned expCount; /* Number of experiment values */ char **expNames; /* Comma separated list of experiment names (e.g. tissue or gene) */ float *expScores; /* Comma separated list of effect size values */ float *expPvals; /* Comma separated list of -log10 transformed p-values */ - float *expProbs; /* Comma separated list of probability variant is causal */ + float *expProbs; /* Comma separated list of probabilities variant is in high confidence causal set */ }; struct gtexEqtlCluster *gtexEqtlClusterLoadByQuery(struct sqlConnection *conn, char *query); /* Load all gtexEqtlCluster from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with gtexEqtlClusterFreeList(). */ void gtexEqtlClusterSaveToDb(struct sqlConnection *conn, struct gtexEqtlCluster *el, char *tableName, int updateSize); /* Save gtexEqtlCluster as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size * of a string that would contain the entire query. Arrays of native types are * converted to comma separated strings and loaded as such, User defined types are * inserted as NULL. This function automatically escapes quoted strings for mysql. */ struct gtexEqtlCluster *gtexEqtlClusterLoad(char **row); /* Load a gtexEqtlCluster from row fetched with select * from gtexEqtlCluster * from database. Dispose of this with gtexEqtlClusterFree(). */ struct gtexEqtlCluster *gtexEqtlClusterLoadAll(char *fileName); /* Load all gtexEqtlCluster from whitespace-separated file. * Dispose of this with gtexEqtlClusterFreeList(). */ struct gtexEqtlCluster *gtexEqtlClusterLoadAllByChar(char *fileName, char chopper); /* Load all gtexEqtlCluster from chopper separated file. * Dispose of this with gtexEqtlClusterFreeList(). */ #define gtexEqtlClusterLoadAllByTab(a) gtexEqtlClusterLoadAllByChar(a, '\t'); /* Load all gtexEqtlCluster from tab separated file. * Dispose of this with gtexEqtlClusterFreeList(). */ struct gtexEqtlCluster *gtexEqtlClusterCommaIn(char **pS, struct gtexEqtlCluster *ret); /* Create a gtexEqtlCluster out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new gtexEqtlCluster */ void gtexEqtlClusterFree(struct gtexEqtlCluster **pEl); /* Free a single dynamically allocated gtexEqtlCluster such as created * with gtexEqtlClusterLoad(). */ void gtexEqtlClusterFreeList(struct gtexEqtlCluster **pList); /* Free a list of dynamically allocated gtexEqtlCluster's */ void gtexEqtlClusterOutput(struct gtexEqtlCluster *el, FILE *f, char sep, char lastSep); /* Print out gtexEqtlCluster. Separate fields with sep. Follow last field with lastSep. */ #define gtexEqtlClusterTabOut(el,f) gtexEqtlClusterOutput(el,f,'\t','\n'); /* Print out gtexEqtlCluster as a line in a tab-separated file. */ #define gtexEqtlClusterCommaOut(el,f) gtexEqtlClusterOutput(el,f,',',','); /* Print out gtexEqtlCluster as a comma separated list including final comma. */ /* -------------------------------- End autoSql Generated Code -------------------------------- */ #define GTEX_EQTL_GENE_FIELD "target" #endif /* GTEXEQTLCLUSTER_H */