df66e5b7936616048e5503a03196ef1684a63a07
kate
  Wed Mar 16 12:07:50 2016 -0700
New schema for GTEX gene bed file.  Removes bogus transcriptId and replaces transcriptClass with geneType. refs #15645

diff --git src/hg/lib/gtexGeneBed.c src/hg/lib/gtexGeneBed.c
index 78e9239..02f773d 100644
--- src/hg/lib/gtexGeneBed.c
+++ src/hg/lib/gtexGeneBed.c
@@ -1,221 +1,252 @@
 /* gtexGeneBed.c was originally generated by the autoSql program, which also 
  * generated gtexGeneBed.h and gtexGeneBed.sql.  This module links the database and
  * the RAM representation of objects. */
 
 #include "common.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "gtexGeneBed.h"
 
 
 
-char *gtexGeneBedCommaSepFieldNames = "chrom,chromStart,chromEnd,name,score,strand,geneId,transcriptId,transcriptClass,expCount,expScores";
+char *gtexGeneBedCommaSepFieldNames = "chrom,chromStart,chromEnd,name,score,strand,geneId,geneType,expCount,expScores";
+
+struct gtexGeneBed *gtexGeneBedLoadByQuery(struct sqlConnection *conn, char *query)
+/* Load all gtexGeneBed from table that satisfy the query given.  
+ * Where query is of the form 'select * from example where something=something'
+ * or 'select example.* from example, anotherTable where example.something = 
+ * anotherTable.something'.
+ * Dispose of this with gtexGeneBedFreeList(). */
+{
+struct gtexGeneBed *list = NULL, *el;
+struct sqlResult *sr;
+char **row;
+
+sr = sqlGetResult(conn, query);
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    el = gtexGeneBedLoad(row);
+    slAddHead(&list, el);
+    }
+slReverse(&list);
+sqlFreeResult(&sr);
+return list;
+}
+
+void gtexGeneBedSaveToDb(struct sqlConnection *conn, struct gtexGeneBed *el, char *tableName, int updateSize)
+/* Save gtexGeneBed as a row to the table specified by tableName. 
+ * As blob fields may be arbitrary size updateSize specifies the approx size
+ * of a string that would contain the entire query. Arrays of native types are
+ * converted to comma separated strings and loaded as such, User defined types are
+ * inserted as NULL. This function automatically escapes quoted strings for mysql. */
+{
+struct dyString *update = newDyString(updateSize);
+char  *expScoresArray;
+expScoresArray = sqlFloatArrayToString(el->expScores, el->expCount);
+sqlDyStringPrintf(update, "insert into %s values ( '%s',%u,%u,'%s',%u,'%s','%s','%s',%u,'%s')", 
+	tableName,  el->chrom,  el->chromStart,  el->chromEnd,  el->name,  el->score,  el->strand,  el->geneId,  el->geneType,  el->expCount,  expScoresArray );
+sqlUpdate(conn, update->string);
+freeDyString(&update);
+freez(&expScoresArray);
+}
 
 struct gtexGeneBed *gtexGeneBedLoad(char **row)
 /* Load a gtexGeneBed from row fetched with select * from gtexGeneBed
  * from database.  Dispose of this with gtexGeneBedFree(). */
 {
 struct gtexGeneBed *ret;
 
 AllocVar(ret);
-ret->expCount = sqlUnsigned(row[9]);
+ret->expCount = sqlUnsigned(row[8]);
 ret->chrom = cloneString(row[0]);
 ret->chromStart = sqlUnsigned(row[1]);
 ret->chromEnd = sqlUnsigned(row[2]);
 ret->name = cloneString(row[3]);
 ret->score = sqlUnsigned(row[4]);
 safecpy(ret->strand, sizeof(ret->strand), row[5]);
 ret->geneId = cloneString(row[6]);
-ret->transcriptId = cloneString(row[7]);
-ret->transcriptClass = cloneString(row[8]);
+ret->geneType = cloneString(row[7]);
 {
 int sizeOne;
-sqlFloatDynamicArray(row[10], &ret->expScores, &sizeOne);
+sqlFloatDynamicArray(row[9], &ret->expScores, &sizeOne);
 assert(sizeOne == ret->expCount);
 }
 return ret;
 }
 
 struct gtexGeneBed *gtexGeneBedLoadAll(char *fileName) 
 /* Load all gtexGeneBed from a whitespace-separated file.
  * Dispose of this with gtexGeneBedFreeList(). */
 {
 struct gtexGeneBed *list = NULL, *el;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
-char *row[11];
+char *row[10];
 
 while (lineFileRow(lf, row))
     {
     el = gtexGeneBedLoad(row);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 struct gtexGeneBed *gtexGeneBedLoadAllByChar(char *fileName, char chopper) 
 /* Load all gtexGeneBed from a chopper separated file.
  * Dispose of this with gtexGeneBedFreeList(). */
 {
 struct gtexGeneBed *list = NULL, *el;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
-char *row[11];
+char *row[10];
 
 while (lineFileNextCharRow(lf, chopper, row, ArraySize(row)))
     {
     el = gtexGeneBedLoad(row);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 struct gtexGeneBed *gtexGeneBedCommaIn(char **pS, struct gtexGeneBed *ret)
 /* Create a gtexGeneBed out of a comma separated string. 
  * This will fill in ret if non-null, otherwise will
  * return a new gtexGeneBed */
 {
 char *s = *pS;
 
 if (ret == NULL)
     AllocVar(ret);
 ret->chrom = sqlStringComma(&s);
 ret->chromStart = sqlUnsignedComma(&s);
 ret->chromEnd = sqlUnsignedComma(&s);
 ret->name = sqlStringComma(&s);
 ret->score = sqlUnsignedComma(&s);
 sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand));
 ret->geneId = sqlStringComma(&s);
-ret->transcriptId = sqlStringComma(&s);
-ret->transcriptClass = sqlStringComma(&s);
+ret->geneType = sqlStringComma(&s);
 ret->expCount = sqlUnsignedComma(&s);
 {
 int i;
 s = sqlEatChar(s, '{');
 AllocArray(ret->expScores, ret->expCount);
 for (i=0; i<ret->expCount; ++i)
     {
     ret->expScores[i] = sqlFloatComma(&s);
     }
 s = sqlEatChar(s, '}');
 s = sqlEatChar(s, ',');
 }
 *pS = s;
 return ret;
 }
 
 void gtexGeneBedFree(struct gtexGeneBed **pEl)
 /* Free a single dynamically allocated gtexGeneBed such as created
  * with gtexGeneBedLoad(). */
 {
 struct gtexGeneBed *el;
 
 if ((el = *pEl) == NULL) return;
 freeMem(el->chrom);
 freeMem(el->name);
 freeMem(el->geneId);
-freeMem(el->transcriptId);
-freeMem(el->transcriptClass);
+freeMem(el->geneType);
 freeMem(el->expScores);
 freez(pEl);
 }
 
 void gtexGeneBedFreeList(struct gtexGeneBed **pList)
 /* Free a list of dynamically allocated gtexGeneBed's */
 {
 struct gtexGeneBed *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     gtexGeneBedFree(&el);
     }
 *pList = NULL;
 }
 
 void gtexGeneBedOutput(struct gtexGeneBed *el, FILE *f, char sep, char lastSep) 
 /* Print out gtexGeneBed.  Separate fields with sep. Follow last field with lastSep. */
 {
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->chrom);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->chromStart);
 fputc(sep,f);
 fprintf(f, "%u", el->chromEnd);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->name);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->score);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->strand);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->geneId);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
-fprintf(f, "%s", el->transcriptId);
-if (sep == ',') fputc('"',f);
-fputc(sep,f);
-if (sep == ',') fputc('"',f);
-fprintf(f, "%s", el->transcriptClass);
+fprintf(f, "%s", el->geneType);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%u", el->expCount);
 fputc(sep,f);
 {
 int i;
 if (sep == ',') fputc('{',f);
 for (i=0; i<el->expCount; ++i)
     {
     fprintf(f, "%g", el->expScores[i]);
     fputc(',', f);
     }
 if (sep == ',') fputc('}',f);
 }
 fputc(lastSep,f);
 }
 
 /* -------------------------------- End autoSql Generated Code -------------------------------- */
 
 void gtexGeneBedCreateTable(struct sqlConnection *conn, char *table)
 /* Create expression record format table of given name. */
 {
 char query[1024];
 
 sqlSafef(query, sizeof(query),
 "CREATE TABLE %s (\n"
 "   chrom varchar(255) not null,	# Reference sequence chromosome or scaffold\n"
 "   chromStart int unsigned not null,	# Start position in chromosome\n"
 "   chromEnd int unsigned not null,	# End position in chromosome\n"
 "   name varchar(255) not null,	# Gene symbol\n"
 "   score int unsigned not null,	# Score from 0-1000\n"
 "   strand char(1) not null,	# + or - for strand\n"
 "   geneId varchar(255) not null,	# Ensembl gene ID, referenced in GTEx data tables\n"
-"   transcriptId varchar(255) not null,	# Ensembl ID of Canonical transcript; determines genomic position\n"
-"   transcriptClass varchar(255) not null,	# GENCODE transcript class (coding, nonCoding, pseudo)\n"
+"   geneType varchar(255) not null,	# GENCODE gene biotype\n"
 "   expCount int unsigned not null,	# Number of experiment values\n"
 "   expScores longblob not null,	# Comma separated list of experiment scores\n"
           "#Indices\n"
 "   PRIMARY KEY(geneId)\n"
 ")\n",
     table);
 sqlRemakeTable(conn, table, query);
 }
 
 char *gtexVersionSuffix(char *table)
 /* Return version string for a GTEx track table.  For now, just supporting V4 and V6 (default, no suffix )*/
 {
 if (endsWith(table, "V4"))
     return("V4");
 return("");
 }