12431b42d1616cb02adead491a305b8dc163ca71
kate
  Mon Mar 28 11:59:03 2016 -0700
Add support for filter on score.  Show total median expression (from which score is computed) on details page. refs #15645

diff --git src/hg/lib/gtexGeneBed.c src/hg/lib/gtexGeneBed.c
index fff2145..5f0d4ad 100644
--- src/hg/lib/gtexGeneBed.c
+++ src/hg/lib/gtexGeneBed.c
@@ -1,279 +1,289 @@
 /* gtexGeneBed.c was originally generated by the autoSql program, which also 
  * generated gtexGeneBed.h and gtexGeneBed.sql.  This module links the database and
  * the RAM representation of objects. */
 
 #include "common.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "gtexGeneBed.h"
 
 
 
 char *gtexGeneBedCommaSepFieldNames = "chrom,chromStart,chromEnd,name,score,strand,geneId,geneType,expCount,expScores";
 
 struct gtexGeneBed *gtexGeneBedLoadByQuery(struct sqlConnection *conn, char *query)
 /* Load all gtexGeneBed from table that satisfy the query given.  
  * Where query is of the form 'select * from example where something=something'
  * or 'select example.* from example, anotherTable where example.something = 
  * anotherTable.something'.
  * Dispose of this with gtexGeneBedFreeList(). */
 {
 struct gtexGeneBed *list = NULL, *el;
 struct sqlResult *sr;
 char **row;
 
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     el = gtexGeneBedLoad(row);
     slAddHead(&list, el);
     }
 slReverse(&list);
 sqlFreeResult(&sr);
 return list;
 }
 
 void gtexGeneBedSaveToDb(struct sqlConnection *conn, struct gtexGeneBed *el, char *tableName, int updateSize)
 /* Save gtexGeneBed as a row to the table specified by tableName. 
  * As blob fields may be arbitrary size updateSize specifies the approx size
  * of a string that would contain the entire query. Arrays of native types are
  * converted to comma separated strings and loaded as such, User defined types are
  * inserted as NULL. This function automatically escapes quoted strings for mysql. */
 {
 struct dyString *update = newDyString(updateSize);
 char  *expScoresArray;
 expScoresArray = sqlFloatArrayToString(el->expScores, el->expCount);
 sqlDyStringPrintf(update, "insert into %s values ( '%s',%u,%u,'%s',%u,'%s','%s','%s',%u,'%s')", 
 	tableName,  el->chrom,  el->chromStart,  el->chromEnd,  el->name,  el->score,  el->strand,  el->geneId,  el->geneType,  el->expCount,  expScoresArray );
 sqlUpdate(conn, update->string);
 freeDyString(&update);
 freez(&expScoresArray);
 }
 
 struct gtexGeneBed *gtexGeneBedLoad(char **row)
 /* Load a gtexGeneBed from row fetched with select * from gtexGeneBed
  * from database.  Dispose of this with gtexGeneBedFree(). */
 {
 struct gtexGeneBed *ret;
 
 AllocVar(ret);
 ret->expCount = sqlUnsigned(row[8]);
 ret->chrom = cloneString(row[0]);
 ret->chromStart = sqlUnsigned(row[1]);
 ret->chromEnd = sqlUnsigned(row[2]);
 ret->name = cloneString(row[3]);
 ret->score = sqlUnsigned(row[4]);
 safecpy(ret->strand, sizeof(ret->strand), row[5]);
 ret->geneId = cloneString(row[6]);
 ret->geneType = cloneString(row[7]);
 {
 int sizeOne;
 sqlFloatDynamicArray(row[9], &ret->expScores, &sizeOne);
 assert(sizeOne == ret->expCount);
 }
 return ret;
 }
 
 struct gtexGeneBed *gtexGeneBedLoadAll(char *fileName) 
 /* Load all gtexGeneBed from a whitespace-separated file.
  * Dispose of this with gtexGeneBedFreeList(). */
 {
 struct gtexGeneBed *list = NULL, *el;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[10];
 
 while (lineFileRow(lf, row))
     {
     el = gtexGeneBedLoad(row);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 struct gtexGeneBed *gtexGeneBedLoadAllByChar(char *fileName, char chopper) 
 /* Load all gtexGeneBed from a chopper separated file.
  * Dispose of this with gtexGeneBedFreeList(). */
 {
 struct gtexGeneBed *list = NULL, *el;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[10];
 
 while (lineFileNextCharRow(lf, chopper, row, ArraySize(row)))
     {
     el = gtexGeneBedLoad(row);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 struct gtexGeneBed *gtexGeneBedCommaIn(char **pS, struct gtexGeneBed *ret)
 /* Create a gtexGeneBed out of a comma separated string. 
  * This will fill in ret if non-null, otherwise will
  * return a new gtexGeneBed */
 {
 char *s = *pS;
 
 if (ret == NULL)
     AllocVar(ret);
 ret->chrom = sqlStringComma(&s);
 ret->chromStart = sqlUnsignedComma(&s);
 ret->chromEnd = sqlUnsignedComma(&s);
 ret->name = sqlStringComma(&s);
 ret->score = sqlUnsignedComma(&s);
 sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand));
 ret->geneId = sqlStringComma(&s);
 ret->geneType = sqlStringComma(&s);
 ret->expCount = sqlUnsignedComma(&s);
 {
 int i;
 s = sqlEatChar(s, '{');
 AllocArray(ret->expScores, ret->expCount);
 for (i=0; i<ret->expCount; ++i)
     {
     ret->expScores[i] = sqlFloatComma(&s);
     }
 s = sqlEatChar(s, '}');
 s = sqlEatChar(s, ',');
 }
 *pS = s;
 return ret;
 }
 
 void gtexGeneBedFree(struct gtexGeneBed **pEl)
 /* Free a single dynamically allocated gtexGeneBed such as created
  * with gtexGeneBedLoad(). */
 {
 struct gtexGeneBed *el;
 
 if ((el = *pEl) == NULL) return;
 freeMem(el->chrom);
 freeMem(el->name);
 freeMem(el->geneId);
 freeMem(el->geneType);
 freeMem(el->expScores);
 freez(pEl);
 }
 
 void gtexGeneBedFreeList(struct gtexGeneBed **pList)
 /* Free a list of dynamically allocated gtexGeneBed's */
 {
 struct gtexGeneBed *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     gtexGeneBedFree(&el);
     }
 *pList = NULL;
 }
 
 void gtexGeneBedOutput(struct gtexGeneBed *el, FILE *f, char sep, char lastSep) 
 /* Print out gtexGeneBed.  Separate fields with sep. Follow last field with lastSep. */
 {
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->chrom);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->chromStart);
 fputc(sep,f);
 fprintf(f, "%u", el->chromEnd);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->name);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->score);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->strand);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->geneId);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->geneType);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->expCount);
 fputc(sep,f);
 {
 int i;
 if (sep == ',') fputc('{',f);
 for (i=0; i<el->expCount; ++i)
     {
     fprintf(f, "%g", el->expScores[i]);
     fputc(',', f);
     }
 if (sep == ',') fputc('}',f);
 }
 fputc(lastSep,f);
 }
 
 /* -------------------------------- End autoSql Generated Code -------------------------------- */
 
 void gtexGeneBedCreateTable(struct sqlConnection *conn, char *table)
 /* Create expression record format table of given name. */
 {
 char query[1024];
 
 sqlSafef(query, sizeof(query),
 "CREATE TABLE %s (\n"
 "   chrom varchar(255) not null,	# Reference sequence chromosome or scaffold\n"
 "   chromStart int unsigned not null,	# Start position in chromosome\n"
 "   chromEnd int unsigned not null,	# End position in chromosome\n"
 "   name varchar(255) not null,	# Gene symbol\n"
 "   score int unsigned not null,	# Score from 0-1000\n"
 "   strand char(1) not null,	# + or - for strand\n"
 "   geneId varchar(255) not null,	# Ensembl gene ID, referenced in GTEx data tables\n"
 "   geneType varchar(255) not null,	# GENCODE gene biotype\n"
 "   expCount int unsigned not null,	# Number of experiment values\n"
 "   expScores longblob not null,	# Comma separated list of experiment scores\n"
           "#Indices\n"
 "   PRIMARY KEY(geneId)\n"
 ")\n",
     table);
 sqlRemakeTable(conn, table, query);
 }
 
 char *gtexGeneClass(struct gtexGeneBed *geneBed)
 /* Return gene "class" (analogous to GENCODE transcriptClass) for a GENCODE gene biotype 
  * Mapped as follows:
 
  * coding: IG_C_gene, IG_D_gene, IG_J_gene, IG_V_gene, 
                TR_C_gene, TR_D_gene, TR_J_gene, TR_V_gene 
                polymorphic_pseudogene, protein_coding
 
  * pseudo: IG_C_pseudogene, IG_J_pseudogene, IG_V_pseudogene, TR_J_pseudogene, TR_V_pseudogene,
                pseudogene 
 
  * nonCoding: 3prime_overlapping_ncrna, Mt_rRNA, Mt_tRNA, antisense, lincRNA, miRNA, 
                 misc_RNA, processed_transcript, rRNA, sense_intronic, sense_overlapping, 
                 snRNA, snoRNA
  * (MarkD request out for approval).
 */
 {
 char *geneType = geneBed->geneType;
 if (geneType == NULL)
     return "unknown";
 if (sameString(geneType, "coding") || sameString(geneType, "protein_coding") ||
         sameString(geneType, "polymorphic_pseudogene") || endsWith(geneType, "_gene"))
     return "coding";
 if (sameString(geneType, "pseudo") || sameString(geneType, "pseudogene") ||
         endsWith(geneType, "_pseudogene"))
     return "pseudo";
 // A bit of a cheat here -- better a mapping table
 return "nonCoding";
 }
 
 boolean gtexGeneIsCoding(struct gtexGeneBed *geneBed)
 /* Return TRUE if biotype indicates this is a protein coding gene */
 {
 return sameString("coding", gtexGeneClass(geneBed));
 }
+
+float gtexGeneTotalMedianExpression(struct gtexGeneBed *geneBed)
+/* Return total of all tissue medians */
+{
+int i;
+float sum = 0.0;
+for (i=0; i<geneBed->expCount; i++)
+    sum += geneBed->expScores[i];
+return sum;
+}