12431b42d1616cb02adead491a305b8dc163ca71 kate Mon Mar 28 11:59:03 2016 -0700 Add support for filter on score. Show total median expression (from which score is computed) on details page. refs #15645 diff --git src/hg/lib/gtexGeneBed.c src/hg/lib/gtexGeneBed.c index fff2145..5f0d4ad 100644 --- src/hg/lib/gtexGeneBed.c +++ src/hg/lib/gtexGeneBed.c @@ -1,279 +1,289 @@ /* gtexGeneBed.c was originally generated by the autoSql program, which also * generated gtexGeneBed.h and gtexGeneBed.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "gtexGeneBed.h" char *gtexGeneBedCommaSepFieldNames = "chrom,chromStart,chromEnd,name,score,strand,geneId,geneType,expCount,expScores"; struct gtexGeneBed *gtexGeneBedLoadByQuery(struct sqlConnection *conn, char *query) /* Load all gtexGeneBed from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with gtexGeneBedFreeList(). */ { struct gtexGeneBed *list = NULL, *el; struct sqlResult *sr; char **row; sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { el = gtexGeneBedLoad(row); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); return list; } void gtexGeneBedSaveToDb(struct sqlConnection *conn, struct gtexGeneBed *el, char *tableName, int updateSize) /* Save gtexGeneBed as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size * of a string that would contain the entire query. Arrays of native types are * converted to comma separated strings and loaded as such, User defined types are * inserted as NULL. This function automatically escapes quoted strings for mysql. */ { struct dyString *update = newDyString(updateSize); char *expScoresArray; expScoresArray = sqlFloatArrayToString(el->expScores, el->expCount); sqlDyStringPrintf(update, "insert into %s values ( '%s',%u,%u,'%s',%u,'%s','%s','%s',%u,'%s')", tableName, el->chrom, el->chromStart, el->chromEnd, el->name, el->score, el->strand, el->geneId, el->geneType, el->expCount, expScoresArray ); sqlUpdate(conn, update->string); freeDyString(&update); freez(&expScoresArray); } struct gtexGeneBed *gtexGeneBedLoad(char **row) /* Load a gtexGeneBed from row fetched with select * from gtexGeneBed * from database. Dispose of this with gtexGeneBedFree(). */ { struct gtexGeneBed *ret; AllocVar(ret); ret->expCount = sqlUnsigned(row[8]); ret->chrom = cloneString(row[0]); ret->chromStart = sqlUnsigned(row[1]); ret->chromEnd = sqlUnsigned(row[2]); ret->name = cloneString(row[3]); ret->score = sqlUnsigned(row[4]); safecpy(ret->strand, sizeof(ret->strand), row[5]); ret->geneId = cloneString(row[6]); ret->geneType = cloneString(row[7]); { int sizeOne; sqlFloatDynamicArray(row[9], &ret->expScores, &sizeOne); assert(sizeOne == ret->expCount); } return ret; } struct gtexGeneBed *gtexGeneBedLoadAll(char *fileName) /* Load all gtexGeneBed from a whitespace-separated file. * Dispose of this with gtexGeneBedFreeList(). */ { struct gtexGeneBed *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[10]; while (lineFileRow(lf, row)) { el = gtexGeneBedLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct gtexGeneBed *gtexGeneBedLoadAllByChar(char *fileName, char chopper) /* Load all gtexGeneBed from a chopper separated file. * Dispose of this with gtexGeneBedFreeList(). */ { struct gtexGeneBed *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[10]; while (lineFileNextCharRow(lf, chopper, row, ArraySize(row))) { el = gtexGeneBedLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct gtexGeneBed *gtexGeneBedCommaIn(char **pS, struct gtexGeneBed *ret) /* Create a gtexGeneBed out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new gtexGeneBed */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->chrom = sqlStringComma(&s); ret->chromStart = sqlUnsignedComma(&s); ret->chromEnd = sqlUnsignedComma(&s); ret->name = sqlStringComma(&s); ret->score = sqlUnsignedComma(&s); sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand)); ret->geneId = sqlStringComma(&s); ret->geneType = sqlStringComma(&s); ret->expCount = sqlUnsignedComma(&s); { int i; s = sqlEatChar(s, '{'); AllocArray(ret->expScores, ret->expCount); for (i=0; i<ret->expCount; ++i) { ret->expScores[i] = sqlFloatComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } *pS = s; return ret; } void gtexGeneBedFree(struct gtexGeneBed **pEl) /* Free a single dynamically allocated gtexGeneBed such as created * with gtexGeneBedLoad(). */ { struct gtexGeneBed *el; if ((el = *pEl) == NULL) return; freeMem(el->chrom); freeMem(el->name); freeMem(el->geneId); freeMem(el->geneType); freeMem(el->expScores); freez(pEl); } void gtexGeneBedFreeList(struct gtexGeneBed **pList) /* Free a list of dynamically allocated gtexGeneBed's */ { struct gtexGeneBed *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; gtexGeneBedFree(&el); } *pList = NULL; } void gtexGeneBedOutput(struct gtexGeneBed *el, FILE *f, char sep, char lastSep) /* Print out gtexGeneBed. Separate fields with sep. Follow last field with lastSep. */ { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->chrom); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->chromStart); fputc(sep,f); fprintf(f, "%u", el->chromEnd); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->name); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->score); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->strand); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->geneId); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->geneType); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->expCount); fputc(sep,f); { int i; if (sep == ',') fputc('{',f); for (i=0; i<el->expCount; ++i) { fprintf(f, "%g", el->expScores[i]); fputc(',', f); } if (sep == ',') fputc('}',f); } fputc(lastSep,f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ void gtexGeneBedCreateTable(struct sqlConnection *conn, char *table) /* Create expression record format table of given name. */ { char query[1024]; sqlSafef(query, sizeof(query), "CREATE TABLE %s (\n" " chrom varchar(255) not null, # Reference sequence chromosome or scaffold\n" " chromStart int unsigned not null, # Start position in chromosome\n" " chromEnd int unsigned not null, # End position in chromosome\n" " name varchar(255) not null, # Gene symbol\n" " score int unsigned not null, # Score from 0-1000\n" " strand char(1) not null, # + or - for strand\n" " geneId varchar(255) not null, # Ensembl gene ID, referenced in GTEx data tables\n" " geneType varchar(255) not null, # GENCODE gene biotype\n" " expCount int unsigned not null, # Number of experiment values\n" " expScores longblob not null, # Comma separated list of experiment scores\n" "#Indices\n" " PRIMARY KEY(geneId)\n" ")\n", table); sqlRemakeTable(conn, table, query); } char *gtexGeneClass(struct gtexGeneBed *geneBed) /* Return gene "class" (analogous to GENCODE transcriptClass) for a GENCODE gene biotype * Mapped as follows: * coding: IG_C_gene, IG_D_gene, IG_J_gene, IG_V_gene, TR_C_gene, TR_D_gene, TR_J_gene, TR_V_gene polymorphic_pseudogene, protein_coding * pseudo: IG_C_pseudogene, IG_J_pseudogene, IG_V_pseudogene, TR_J_pseudogene, TR_V_pseudogene, pseudogene * nonCoding: 3prime_overlapping_ncrna, Mt_rRNA, Mt_tRNA, antisense, lincRNA, miRNA, misc_RNA, processed_transcript, rRNA, sense_intronic, sense_overlapping, snRNA, snoRNA * (MarkD request out for approval). */ { char *geneType = geneBed->geneType; if (geneType == NULL) return "unknown"; if (sameString(geneType, "coding") || sameString(geneType, "protein_coding") || sameString(geneType, "polymorphic_pseudogene") || endsWith(geneType, "_gene")) return "coding"; if (sameString(geneType, "pseudo") || sameString(geneType, "pseudogene") || endsWith(geneType, "_pseudogene")) return "pseudo"; // A bit of a cheat here -- better a mapping table return "nonCoding"; } boolean gtexGeneIsCoding(struct gtexGeneBed *geneBed) /* Return TRUE if biotype indicates this is a protein coding gene */ { return sameString("coding", gtexGeneClass(geneBed)); } + +float gtexGeneTotalMedianExpression(struct gtexGeneBed *geneBed) +/* Return total of all tissue medians */ +{ +int i; +float sum = 0.0; +for (i=0; i<geneBed->expCount; i++) + sum += geneBed->expScores[i]; +return sum; +}