src/hg/instinct/bioInt2/populateDb.c 1.8

1.8 2009/04/27 18:13:34 jsanborn
fixed memory leak
Index: src/hg/instinct/bioInt2/populateDb.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/bioInt2/populateDb.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -b -B -U 1000000 -r1.7 -r1.8
--- src/hg/instinct/bioInt2/populateDb.c	27 Apr 2009 06:15:49 -0000	1.7
+++ src/hg/instinct/bioInt2/populateDb.c	27 Apr 2009 18:13:34 -0000	1.8
@@ -1,1214 +1,1216 @@
 /* mapProbesToGenes - Will maps probes in BED format to overlapping gene(s). */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "bed.h"
 #include "genePred.h"
 #include "hPrint.h"
 #include "hdb.h"  
 #include "microarray.h"
 #include "ra.h"
 #include "featuresLib.h"
 #include "hgHeatmapLib.h"
 #include "cprob.h"
 #include "hgStatsLib.h" 
 #include "bioIntDriver.h"
 #include "bioIntDb.h"
 
 char *hgDb = "hg18";
 char *genome = "Human";
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "populateDb \n"
   "   populateDb [OPTIONS] db table tissue\n"
   "options:\n"
   "   -dropAll   Drop/recreate any table\n"
   "   -tcga      handles TCGA ids\n"
   "\n"
   );
 }
 
 boolean dropTable = FALSE;   // If true, any table that should be dropped/recreated will be
 boolean isTCGA    = FALSE;   // If true, specially handle TCGA ids 
 
 static struct optionSpec options[] = {
     {"dropAll", OPTION_BOOLEAN},
     {"tcga", OPTION_BOOLEAN},
     {NULL, 0},
 };
 
 
 char *getId(struct sqlConnection *conn, char *table, char *key, char *sample, char *value)
 /* get ISPY ID from sample (or experiment) Id */
 {
 char query[512];
 safef(query, sizeof(query), "select %s from %s where %s = \"%s\" ", key, table, value, sample);
 return sqlQuickString(conn, query);
 }
 
 
 struct slName *getProbesFromTable(struct sqlConnection *hgConn, char *tableName)
 {
 char query[512];
 char *key = "name";
 safef(query, sizeof(query), "select DISTINCT %s from %s ", key, tableName);
 struct sqlResult *sr = sqlGetResult(hgConn, query);
 char **row = NULL;
 
 struct slName *sl, *slList = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     sl = slNameNew(row[0]); 
     slAddHead(&slList, sl);                                                                       
     }
 
 slReverse(&slList);
 sqlFreeResult(&sr);
 return slList;
 }
 
 
 struct maGrouping *getMaGrouping(struct sqlConnection *hgConn, char *tableName)
 {
 /*microarray specific settings*/
 struct trackDb *tdb = hMaybeTrackInfo(hgConn, tableName);  
 struct microarrayGroups *maGs = maGroupings("hg18", tableName);
 trackDbFreeList(&tdb);
 if (!maGs)
     return NULL;
 return maGs->allArrays;
 }
 
 struct hash *getSettings(char *tableName)
 {
 struct column *raList = getColumns(NULL, "datasets.ra", NULL);  
 
 struct column *col;
 struct hash *settings = NULL;
 for (col = raList; col; col = col->next)
     {
     if (!sameString(col->name, tableName))
 	continue;
 
     settings = col->settings;
     break;
     }
 
 if (!settings)
     errAbort("Couldn't find datasets.ra listing for %s", tableName);
 
 return settings;
 }
 
 struct geneAlias {
     struct geneAlias *next;
 
     char *probe;
     struct slName *genes;
 }; 
  
 struct hash *getAliases(struct sqlConnection *hgConn, char *tableName)
 {
 if (!hgConn || !tableName)
     return NULL;
 
 char query[512];
 char **row;
   
 safef(query, sizeof(query), "select * from %s", tableName);
 
 struct sqlResult *sr = sqlGetResult(hgConn, query);
 
 struct geneAlias *ga, *gaList = NULL;
 struct hash *gaHash = hashNew(0);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *probe = cloneString(row[0]);
     char *gene = cloneString(row[1]);
 
     struct hashEl *el = hashLookup(gaHash, probe);
 
     if (!el)
 	{
 	ga = AllocA(struct geneAlias);
 	ga->probe = cloneString(probe);
 	ga->genes = NULL;
 
 	slAddHead(&gaList, ga);
 	hashAdd(gaHash, probe, ga);
 	}
     else
 	ga = el->val;
 
     slNameAddHead(&ga->genes, gene);	
     }
 
 sqlFreeResult(&sr);              
 
 return gaHash;                   
 }
 
 struct dataTypes *findDataType(struct sqlConnection *biConn, char *type, char *platform)
 {
 if (!sameString(type, "bed 15"))
     errAbort("populateDb only runs on bed 15 files.");
 
 char *data_format = "analysisVals";
 
 char query[256];
 safef(query, sizeof(query), 
       "select * from %s where format = \"%s\" and name = \"%s\"",
       DT_TABLE, data_format, platform);
 
 return dataTypesLoadByQuery(biConn, query);
 }
 
 struct dataTypes *createDataType(struct sqlConnection *biConn, char *type, char *platform)
 {
 int nextId = sqlTableSize(biConn, DT_TABLE);
 struct dataTypes *dt;
 AllocVar(dt);
 dt->id = nextId;
 dt->format = cloneString("analysisVals");
 dt->name   = cloneString(platform);
 
 /* Save to db */
 dataTypesSaveToDb(biConn, dt, DT_TABLE, 100);
 return dt;
 } 
 
 struct dataTypes *setupDataType(struct sqlConnection *biConn, 
 				char *type, char *platform)
 {
 if (!sqlTableExists(biConn, DT_TABLE))
     {
     fprintf(stderr, "Tables dataTypes doesn't exist, creating...\n");
     createDataTypesTable(biConn, DT_TABLE);
     }
 
 struct dataTypes *dt = findDataType(biConn, type, platform);
 if (!dt)
     dt = createDataType(biConn, type, platform);
 
 return dt;
 }
 
 
 struct tissues *findTissue(struct sqlConnection *biConn, char *tissue)
 {
 char query[256];
 safef(query, sizeof(query), 
       "select * from %s where name = \"%s\";",
       TI_TABLE, tissue);
 return tissuesLoadByQuery(biConn, query);
 }
 
 struct tissues *createTissue(struct sqlConnection *biConn, char *tissue)
 {
 int nextId = sqlTableSize(biConn, TI_TABLE);
 struct tissues *ti;
 AllocVar(ti);
 ti->id = nextId;
 ti->name = cloneString(tissue);
 
 /* Save to db */
 tissuesSaveToDb(biConn, ti, TI_TABLE, 100);
 return ti;
 } 
 
 struct tissues *setupTissue(struct sqlConnection *biConn, char *tissue)
 {
 if (!sqlTableExists(biConn, TI_TABLE))
     {
     fprintf(stderr, "Tables tissues doesn't exist, creating...\n");
     createTissuesTable(biConn,  TI_TABLE);
     }
 
 struct tissues *ti = findTissue(biConn, tissue);
 if (!ti)
     ti = createTissue(biConn, tissue);
 
 return ti;
 }
 
 
 struct datasets *findDataset(struct sqlConnection *biConn, char *name)
 {
 char query[256];
 safef(query, sizeof(query), 
       "select * from %s where data_table = \"%s\";",
       DA_TABLE, name);
 return datasetsLoadByQuery(biConn, query);
 }
 
 
 struct datasets *createDataset(struct sqlConnection *biConn, 
 			       char *tableName, char *tissue, int numSamples)
 {
 struct hash *settings = getSettings(tableName);
 
 struct hashEl *el = hashLookup(settings, "shortLabel");
 if (!el)
     errAbort("No shortLabel");
 char *shortLabel = cloneString(el->val);
 
 el = hashLookup(settings, "name");
 if (!el)
     errAbort("No name");
 char *dataTable = cloneString(el->val);
   
 char *platform;
 el = hashLookup(settings, "platform");
 if (!el)
     platform = cloneString("Expression");
 else
     platform = cloneString(el->val);
 
 el = hashLookup(settings, "dataType");
 if (!el)
     errAbort("No dataType");
 char *dataType = cloneString(el->val);
 
 struct dataTypes *dt = setupDataType(biConn, dataType, platform);
 
 struct tissues *ti = setupTissue(biConn, tissue);
 
 int nextId = sqlTableSize(biConn, DA_TABLE);
 
 struct datasets *da;
 AllocVar(da);
 da->id = nextId;
 da->tissue_id = ti->id;
 da->type_id = dt->id;
 da->num_samples = numSamples;
 da->name = shortLabel;
 da->data_table = dataTable;
 
 dataTypesFree(&dt);
 tissuesFree(&ti);
 
 /* Write datasets */
 datasetsSaveToDbEscaped(biConn, da, DA_TABLE, 100); 
 
 return da;
 }
 
 struct datasets *setupDataset(struct sqlConnection *biConn, 
 			      char *tableName, char *tissue, int numSamples)
 {
 if (!sqlTableExists(biConn, DA_TABLE))
     {
     fprintf(stderr, "Tables datasets doesn't exist, creating...");
     createDatasetsTable(biConn, DA_TABLE);
     }
 
 struct datasets *da = findDataset(biConn, tableName);
 if (!da)
     da = createDataset(biConn, tableName, tissue, numSamples);
 
 return da;
 }
 
 
 char *findPatientName(struct sqlConnection *pdConn, char *pTable, 
 		      char *pField, char *sField, char *sName)
 {
 char query[256];
 safef(query, sizeof(query),
       "select %s from %s where %s = \"%s\"",
       pField, pTable, sField, sName);
 
 return sqlQuickString(pdConn, query);
 }
 
 int findId(struct sqlConnection *biConn, char *idField, char *sField, char *name)
 {
 if (sqlTableSize(biConn, SA_TABLE) == 0)  /* brand new table, return 0 */
     return 0;
 
 char query[256];
 safef(query, sizeof(query), 
       "select DISTINCT %s from %s where %s = \"%s\";",
       idField, SA_TABLE, sField, name);
 if (sqlExists(biConn, query))  /* sample name found, use same id */
     return sqlQuickNum(biConn, query);
 
 /* Else, find maximum sample id and add one to it */
 safef(query, sizeof(query),
       "select max(%s) from %s;", 
       idField, SA_TABLE);
 int maxId = sqlQuickNum(biConn, query);
 return maxId + 1;
 }
 
 boolean sampleExists(struct sqlConnection *biConn, struct samples *sa)
 {
 char query[256];
 safef(query, sizeof(query),
       "select * from %s where id = %d "
       "and name = \"%s\" "
       "and patient_id = %d "
       "and patient_name = \"%s\" "
       "and dataset_id = %d "
       "and tissue_id = %d ",
       SA_TABLE, sa->id, sa->name, sa->patient_id, sa->patient_name, sa->dataset_id,
       sa->tissue_id);
 
 return sqlExists(biConn, query);
 }
 
 void createSamples(struct sqlConnection *biConn, struct datasets *da, struct maGrouping *allA)
 {
 int datasetId = da->id;
 int tissueId = da->tissue_id;
 
 struct hash *settings = getSettings(da->data_table);
 
 struct hashEl *el = hashLookup(settings, "patDb");
 if (!el)
     errAbort("No patDb!");
 char *patDb = cloneString(el->val);
 
 el = hashLookup(settings, "patTable");
 if (!el)
     errAbort("No patTable");
 char *patTable = cloneString(el->val);
 
 el = hashLookup(settings, "patField");
 if (!el)
     errAbort("No patField");
 char *patField = cloneString(el->val);
 
 el = hashLookup(settings, "sampleField");
 if (!el)
     errAbort("No sampleField");
 char *sampleField = cloneString(el->val);
 
 struct sqlConnection *pdConn = hAllocConnProfile("localDb", patDb);
 
 int i;
 struct samples *sa;
 for (i = 0; i < allA->size; i++)
     {
     char *sampleName, *patientName;
     if (isTCGA)
 	{
 	sampleName = cloneStringZ(allA->names[i], 16);
 	patientName = cloneStringZ(allA->names[i], 12);
 	}
     else
 	{
 	sampleName = cloneString(allA->names[i]);
 	patientName = findPatientName(pdConn, patTable, patField, sampleField, sampleName); 
 	}
 
     int sampleId = findId(biConn, "id", "name", sampleName);
     int patientId = findId(biConn, "patient_id", "patient_name", patientName);
 
     AllocVar(sa);
     sa->id = sampleId;
     sa->name = sampleName;
     sa->patient_id = patientId;
     sa->patient_name = patientName;
     sa->dataset_id = datasetId;
     sa->tissue_id = tissueId;
 
     if (!sampleExists(biConn, sa))
 	samplesSaveToDb(biConn, sa, SA_TABLE, 100);
 
     samplesFree(&sa);
     }
 
 hFreeConn(&pdConn);
 }
 
 struct samples *getSamples(struct sqlConnection *biConn, struct datasets *da)
 {
 char query[256];
 safef(query, sizeof(query),
       "select * from %s where dataset_id = %d order by id;",
       SA_TABLE, da->id);
 
 return samplesLoadByQuery(biConn, query);
 }
 
 struct samples *setupSamples(struct sqlConnection *biConn, struct datasets *da, 
 			     struct maGrouping *allA)
 {
 if (!sqlTableExists(biConn, SA_TABLE))
     {
     fprintf(stderr, "Table samples doesn't exist, creating...\n");
     createSamplesTable(biConn, SA_TABLE);
     }
 
 createSamples(biConn, da, allA);
 struct samples *saList = getSamples(biConn, da);
 
 //if (slCount(saList) != allA->size)
 //    errAbort("Sample count from microarrayGroups and database don't match!");
 
 return saList;
 }
 
 
 int getFeatureId(struct sqlConnection *biConn, char *name)
 {
 if (sqlTableSize(biConn, FE_TABLE) == 0)  /* brand new table, return 0 */
     return 0;
 
 char query[256];
 safef(query, sizeof(query), 
       "select id from %s where name = \"%s\";",
       FE_TABLE, name);
 
 if (sqlExists(biConn, query))  /* sample name found, use same id */
     return sqlQuickNum(biConn, query);
 else
     return sqlTableSize(biConn, FE_TABLE);
 }
 
 struct features *getFeature(struct sqlConnection *biConn, char *name)
 {
 char query[256];
 safef(query, sizeof(query),
       "select * from %s where name = \"%s\";",
       FE_TABLE, name);
 
 return featuresLoadByQuery(biConn, query);
 }
 
 boolean featureExists(struct sqlConnection *biConn, struct features *fs)
 {
 char query[256];
 safef(query, sizeof(query),
       "select * from %s where name = \"%s\";",
       FE_TABLE, fs->name);
 
 return sqlExists(biConn, query);
 }
 
 boolean clinicalDataExists(struct sqlConnection *biConn, struct clinicalData *cd)
 {
 char query[256];
 safef(query, sizeof(query),
       "select * from %s where sample_id = %d "
       "and feature_id = %d; ",
       CD_TABLE, cd->sample_id, cd->feature_id);
 
 if (!sqlExists(biConn, query))  /* entry doesn't exist, report */
     return FALSE;
 
 /* Make sure entry has same values, if not there is a problem 
  * (sample_id, feature_id) should be unique */
 
 struct clinicalData *cd2 = clinicalDataLoadByQuery(biConn, query);
 if (slCount(cd2) != 1)
     errAbort("clinicalData entries not unique, sample_id = %d, feature_id = %d",
 	     cd->sample_id, cd->feature_id);
 
 if (cd->val != cd2->val)
     errAbort("clinicalData values don't match, sample_id = %d, feature_id = %d, "
 	     "%f != %f",
 	     cd->sample_id, cd->feature_id, cd->val, cd2->val);
 
 if (cd->code && cd2->code)
     if (!sameString(cd->code, cd2->code))	
 	errAbort("clinicalData codes don't match, sample_id = %d, feature_id = %d",
 		 cd->sample_id, cd->feature_id);
 
 return TRUE;
 }
 
 void setupClinicalInfo(struct sqlConnection *biConn, struct datasets *da, struct samples *saList)
 {
 if (!saList)
     return;
 
 if (!sqlTableExists(biConn, FE_TABLE))
     {
     fprintf(stderr, "Table features doesn't exist, creating...\n");
     createFeaturesTable(biConn, FE_TABLE);
     }
 
 if (!sqlTableExists(biConn, CD_TABLE))
     {
     fprintf(stderr, "Table clinicalData doesn't exist, creating...\n");
     createClinicalDataTable(biConn, CD_TABLE);
     }
 
 struct hash *settings = getSettings(da->data_table);
 
 struct hashEl *el = hashLookup(settings, "raFile");
 if (!el)
     errAbort("No raFile");
 char *raFile = cloneString(el->val);
 
 el = hashLookup(settings, "patDb");
 if (!el)
     errAbort("No patDb");
 char *patDb = cloneString(el->val);
 
 el = hashLookup(settings, "patTable");
 if (!el)
     errAbort("No patTable");
 char *patTable = cloneString(el->val);
 
 el = hashLookup(settings, "patField");
 if (!el)
     errAbort("No patField");
 char *patField = cloneString(el->val);
 
 el = hashLookup(settings, "sampleField");
 if (!el)
     errAbort("No sampleField");
 char *sampleField = cloneString(el->val);
 
 if (!raFile || !patDb || !patTable || !patField || !sampleField)
     errAbort("Incomplete ra entry for %s.", da->data_table);
 
 struct sqlConnection *pdConn = hAllocConnProfile("localDb", patDb); //connection to patient data
 
 if (DEBUG)
     fprintf(stderr, "Getting columns of clinical data...\n");
 struct column *col, *colList = getColumns(pdConn, raFile, patDb);
 
 /* Set up features */
 struct features *fs;
 for (col = colList; col; col = col->next)
     {
     char *name = col->name;
     char *shortLabel = col->shortLabel;
     char *longLabel = col->longLabel;
 
     int id = getFeatureId(biConn, name);
     AllocVar(fs);
     fs->id = id;
     fs->name = cloneString(name);
     fs->shortLabel = cloneString(shortLabel);
     fs->longLabel= cloneString(longLabel);
 
     if (!featureExists(biConn, fs))
 	featuresSaveToDbEscaped(biConn, fs, FE_TABLE, 100);
     featuresFree(&fs);
 
     fs = getFeature(biConn, name);
     if (!fs)
 	errAbort("Could not find feature %s.", name);
     
     if (slCount(fs) != 1)
 	errAbort("Could not find unique feature by name = %s.", name);
 
     /* Loop through all samples, putting data in database */
     struct samples *sa; 
     struct clinicalData *cd;
     for (sa = saList; sa; sa = sa->next)
 	{
 	struct slName *id = slNameNew(getId(pdConn, patTable, patField, sa->name, sampleField));
 
 	char *cellVal = col->cellVal(col, id, pdConn);
 	if (!cellVal)
 	    continue; 
 
 	AllocVar(cd);
 	cd->sample_id = sa->id;
 	cd->feature_id = fs->id;
 
 	cd->val = atof(cellVal);
 	cd->code = NULL;
 	if (col->cellCoded(col, pdConn))
 	    cd->code = cloneString(col->cellCodedVal(col, id, pdConn));
 	
 	if (!clinicalDataExists(biConn, cd))
 	    clinicalDataSaveToDb(biConn, cd, CD_TABLE, 100);
 
 	clinicalDataFree(&cd);
 	slNameFree(&id);
 	}
     featuresFree(&fs);
     }
 
 hFreeConn(&pdConn);
 }
 
 
 struct analysisFeatures *getAnalysisFeatures(struct sqlConnection *biConn, 
 					     char *names, char *type)
 {
 if (!names)
     return NULL;
 struct slName *sl, *slList = slNameListFromComma(names);
 
 struct dyString *dy = newDyString(100);
 dyStringPrintf(dy, 
 	       "select * from %s where type = \"%s\" "
 	       "and feature_name in (", AF_TABLE, type);
 for (sl = slList; sl; sl = sl->next)
     {
     dyStringPrintf(dy, "\"%s\"", sl->name);
     if (sl->next)
 	dyStringPrintf(dy, ",");
     }
 dyStringPrintf(dy, ");");
 char *query = dyStringCannibalize(&dy);
 
 return analysisFeaturesLoadByQuery(biConn, query);
 }
 
 
 
 void addDataToHash(struct hash *dataHash, char *gene, 
 		   unsigned int expCount, float *expScores, struct maGrouping *allA)
 {
 struct hash *hash;
 struct hashEl *el = hashLookup(dataHash, gene);
 if (!el)
     {
     hash = hashNew(0);
     hashAdd(dataHash, gene, hash);
     }
 else
     hash = el->val;
 
 int i;
 for (i = 0; i < expCount; i++)
     {
     float val = expScores[i];
     char *name;
     if (isTCGA)
 	name = cloneStringZ(allA->names[i], 16);
     else
 	name = cloneString(allA->names[i]);
 
     struct slDouble *sd = slDoubleNew(val);
     
     el = hashLookup(hash, name);
     if (!el)
 	hashAdd(hash, name, sd);
     else
 	{
 	struct slDouble *sdList = el->val;
 	slAddTail(&sdList, sd);
 	}
+    freeMem(&name);
     }
 }
 
 boolean reduceDataHash(struct hash *dataHash, double *retMed, double *retStd)
 {
 struct slDouble *sd, *allSd, *allSdList = NULL;
 struct hashEl *outEl;
 struct hashCookie cookie = hashFirst(dataHash);
 while ((outEl = hashNext(&cookie)) != NULL)
     {
     struct hash *hash = outEl->val;
 
     struct hashEl *inEl, *elList = hashElListHash(hash);
     for (inEl = elList; inEl != NULL; inEl = inEl->next)
 	{
 	char *sample = inEl->name;
 	struct slDouble *sdList = inEl->val;
 	double med = slDoubleMedian(sdList);
 	sd = slDoubleNew(med);
 	allSd = slDoubleNew(med);
 
 	slAddHead(&allSdList, allSd);
 	
 	hashRemove(hash, sample);
 	slFreeList(&sdList);
 
 	hashAdd(hash, sample, sd);
 	}
     hashElFreeList(&elList);
     }
 
 double count = (double) slCount(allSdList);
 double allMedian = slDoubleMedian(allSdList);
 
 slSort(allSdList, slDoubleCmp);
 int low = round(count * (0.0015));
 int high = round(count * (1.0 - 0.0015));
 
 sd = slElementFromIx(allSdList, low);
 double lowVal = sd->val;
 
 sd = slElementFromIx(allSdList, high);
 double highVal = sd->val;
 
 double mad = max(fabs(lowVal - allMedian), fabs(highVal - allMedian))/3.0;
 double std = mad * 1.43;
 
 *retStd = std;
 *retMed = allMedian;
 
+slFreeList(&allSdList);
 return TRUE;
 }
 
 struct analysisVals *getAnalysisVals(struct sqlConnection *biConn, struct hash *dataHash, 
 				     double med, double std)
 {
 struct hash *sampleHash = createIdHash(biConn, SA_TABLE, "name");
 
 double z, p, val;
 double maxLogP = 88.0;
 struct hashEl *inEl, *outEl;
 struct analysisVals *av, *avList = NULL;
 struct hashCookie cookie = hashFirst(dataHash);
 while ((outEl = hashNext(&cookie)) != NULL)
     {
     char *gene = outEl->name;
     struct hash *hash = outEl->val;
 
     struct analysisFeatures *af = getAnalysisFeatures(biConn, gene, "gene");
 
     if (!af)
 	continue;
 
     struct hashEl *elList = hashElListHash(hash);
     for (inEl = elList; inEl != NULL; inEl = inEl->next)
 	{
 	char *sample = inEl->name;
 	struct slDouble *sd = inEl->val;
 
 	int sample_id = hashIntValDefault(sampleHash, sample, -1);
 	if (sample_id == -1)
 	    errAbort("No sample by name of %s\n", sample);
 
 	AllocVar(av);
 	av->sample_id = sample_id;
 	av->feature_id = af->id;
 	av->val = sd->val;
 
 	z = (av->val - med)/std;
 	p = ndtr(-1.0*fabs(z));
 	if (p > 0)
 	    val = min(-log(p)/log(10.0), maxLogP);
 	else
 	    val = maxLogP;
 	
 	if (z < 0.0)
 	    val = -1.0*val;  // signed log(p-value)
         av->conf = val; 
 
 	slAddHead(&avList, av);
 	}
+    analysisFeaturesFree(&af);
     hashElFreeList(&elList);
     }
 
 return avList;
 }
 
 void setupProbeData(struct sqlConnection *hgConn, struct sqlConnection *biConn, 
 		    struct datasets *da, struct maGrouping *allA)
 {
 char *dataTable = da->data_table;
 
 if (!dataTable)
     errAbort("datasets entry not complete, data_table not set.");
 
 boolean inputProbeVals = FALSE;
 if (sqlTableExists(biConn, dataTable) && dropTable)
     {
     fprintf(stderr, "analysisVals table %s already exists in db, dropping...\n", dataTable);
     sqlDropTable(biConn, dataTable);
     }
 
 if (!sqlTableExists(biConn, dataTable))
     {
     fprintf(stderr, "Creating analysisVals table %s...\n", dataTable);
     inputProbeVals = TRUE;  // empty table, input
     }
 
 if (!inputProbeVals)
     return;
 
 struct hash *settings = getSettings(dataTable);
 struct hashEl *el = hashLookup(settings, "aliasTable");
 if (!el)
     errAbort("No aliasTable.\n");
 char *aliasTable = cloneString(el->val);
 struct hash *gaHash = getAliases(hgConn, aliasTable);
 
 char query[256];
 safef(query, sizeof(query), "select * from %s;", dataTable);
 
 /* Get bed15 data from hg18 database */
 struct sqlResult *sr = sqlGetResult(hgConn, query);
 
 struct hash *dataHash = hashNew(0);
 
 char **row = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct bed *nb = bedLoadN(row+1, 15);
-
-    char *name        = nb->name; // row[4];
-    unsigned expCount = nb->expCount; //sqlUnsigned(row[13]);
-    float *expScores  = nb->expScores; // row[15];
-
-    struct hashEl *el = hashLookup(gaHash, name);
-    if (!el)
-	continue;
+    struct hashEl *el = hashLookup(gaHash, nb->name);
+    if (el)
+	{
     struct geneAlias *ga = el->val;
     struct slName *sl;
     for (sl = ga->genes; sl; sl = sl->next)
-	addDataToHash(dataHash, sl->name, expCount, expScores, allA);
+	    addDataToHash(dataHash, sl->name, nb->expCount, nb->expScores, allA);
+	}
+    bedFree(&nb);
     }
+sqlFreeResult(&sr);
 
 if (hashNumEntries(dataHash) == 0)
     errAbort("no entries in hash\n");
 
 fprintf(stderr, "\treducing hash...\n");
 double med, std;
 if (!reduceDataHash(dataHash, &med, &std))
     errAbort("problem reducing hash\n");
 
 fprintf(stderr, "\tconverting hash to analysisVals...\n");
 struct analysisVals *avList = getAnalysisVals(biConn, dataHash, med, std);
 
 fprintf(stderr, "\tstoring analysisVals...\n");
 storeAnalysisValsInDb(biConn, dataTable, avList);
 analysisValsFreeList(&avList);
+hashFree(&dataHash);
 }
 
 
 int getProbeId(struct sqlConnection *biConn, char *tableName, char *name)
 {
 char query[128];
 safef(query, sizeof(query),
       "select id from %s where name = \"%s\"",
       tableName, name);
 
 if (!sqlExists(biConn, query))
     return -1;
 
 return sqlQuickNum(biConn, query);
 }
 
 
 char *getPathwayDescription(struct sqlConnection *pdConn, char *name)
 {
 if (!sqlTableExists(pdConn, name))
     return NULL;
 
 char query[128];
 safef(query, sizeof(query), 
       "select description from descriptions where name = \"%s\";",
       name);
 
 return sqlQuickString(pdConn, query);
 }
 
 void setupGenesets(struct sqlConnection *biConn)
 {
 boolean inputGenesets = FALSE;
 boolean inputGenesetInfo = FALSE;
 boolean inputGenesetGenes = FALSE;
 
 struct sqlConnection *pdConn = hAllocConnProfile("localDb", "pathway");
 if (!pdConn)
     errAbort("Could not connect to pathways database.\n");
 
 if (sqlTableExists(biConn, GE_TABLE) && dropTable)
     {
     fprintf(stderr, "%s table already exists, dropping and recreating.\n", GE_TABLE);
     sqlDropTable(biConn, GE_TABLE);
     }
 if (!sqlTableExists(biConn, GE_TABLE))
     {
     fprintf(stderr, "Creating %s table.\n", GE_TABLE);
     createGenesetsTable(biConn, GE_TABLE);
     inputGenesets = TRUE;
     }
 
 if (sqlTableExists(biConn, GG_TABLE) && dropTable)
     {
     fprintf(stderr, "%s table already exists, dropping and recreating.\n", GG_TABLE);
     sqlDropTable(biConn, GG_TABLE);
     }
 
 if (!sqlTableExists(biConn, GG_TABLE))
     {
     fprintf(stderr, "Creating %s table.\n", GG_TABLE);
     createGenesetGenesTable(biConn, GG_TABLE);
     inputGenesetGenes = TRUE;
     }
 
 if (sqlTableExists(biConn, GI_TABLE) && dropTable)
     {
     fprintf(stderr, "%s table already exists, dropping and recreating.\n", GI_TABLE);
     sqlDropTable(biConn, GI_TABLE);
     }
 
 if (!sqlTableExists(biConn, GI_TABLE))
     {
     fprintf(stderr, "Creeting %s table.\n", GI_TABLE);
     createGenesetInfoTable(biConn, GI_TABLE);
     inputGenesetInfo = TRUE;
     }
 
 if (!inputGenesets && !inputGenesetInfo && !inputGenesetGenes)
     {
     fprintf(stderr, "Nothing to do for geneset tables.\n");
     return;
     }
 
 /* Setting up pathways table */
 char query[128];
 safef(query, sizeof(query), "select * from genesets;");
 
 struct sqlResult *sr = sqlGetResult(pdConn, query);
 char **row = NULL;
 
 /* Save all data in lists to avoid "out of sync" error when attempting
  * query inside of a running query on same db */
 struct slName *na, *names = NULL;
 struct slName *ge, *genes = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     slNameAddHead(&names, row[0]);
     slNameAddHead(&genes, row[1]);
     }
 slReverse(&names);
 slReverse(&genes);
 sqlFreeResult(&sr);
 
 for (na = names, ge = genes; na && ge; na = na->next, ge = ge->next)
     {
     char *name = na->name;
     char *members = ge->name;
 
     struct analysisFeatures *gsAf = getAnalysisFeatures(biConn, name, "geneset");
     struct analysisFeatures *af, *afList = getAnalysisFeatures(biConn, members, "gene"); 
     
     if (!gsAf)
 	continue;
 
     if (!afList)
 	continue;
 
     if (inputGenesets)
 	{
 	struct genesets *gs;
 	AllocVar(gs);
 	gs->id = gsAf->id;
 	gs->name = cloneString(name);
 	gs->source = cloneString("N/A");
 	genesetsSaveToDb(biConn, gs, GE_TABLE, 100);
 	genesetsFree(&gs);
 	}
 
     if (inputGenesetGenes)
 	{
 	struct genesetGenes *gg;
 	AllocVar(gg);
 	gg->id = gsAf->id;
 	for (af = afList; af; af = af->next)
 	    {
 	    gg->gene_id = af->id;
 	    genesetGenesSaveToDb(biConn, gg, GG_TABLE, 100);
 	    }
 	genesetGenesFree(&gg);
 	}
     
     if (inputGenesetInfo)
 	{
 	char *desc = getPathwayDescription(pdConn, name);
 	if (desc)
 	    {
 	    struct genesetInfo *gi;
 	    AllocVar(gi);
 	    gi->id = gsAf->id;
 	    gi->description = desc;
 	    genesetInfoSaveToDbEscaped(biConn, gi, GI_TABLE, 200);
 	    genesetInfoFree(&gi);
 	    }
 	}
 
     analysisFeaturesFree(&gsAf);
     analysisFeaturesFreeList(&afList);
     }
 
 hFreeConn(&pdConn);
 }
 
 boolean analysisFeatureExists(struct sqlConnection *biConn, struct analysisFeatures *af)
 {
 char query[256];
 safef(query, sizeof(query),
       "select * from %s where id = %d "
       "and feature_name = \"%s\" "
       "and type = \"%s\"",
       AF_TABLE, af->id, af->feature_name, af->type);
 
 return sqlExists(biConn, query);
 }   
 
 int findIdForAnalysisFeature(struct sqlConnection *biConn, char *tableName,
 			     struct analysisFeatures *af)
 {
 if (sqlTableSize(biConn, tableName) == 0)  /* brand new table, return 0 */
     return 0;
 
 char query[256];
 safef(query, sizeof(query),
       "select DISTINCT id from %s where feature_name = \"%s\" "
       "and type = \"%s\";",
       tableName, af->feature_name, af->type);
 if (sqlExists(biConn, query))  /* sample name found, use same id */
     return sqlQuickNum(biConn, query);
 
 /* Else, find maximum sample id and add one to it */
 safef(query, sizeof(query),
       "select max(id) from %s;",
       tableName);
 int maxId = sqlQuickNum(biConn, query);
 return maxId + 1;
 }   
 
 void setupAnalysisFeatures(struct sqlConnection *biConn)
 {
 if (!sqlTableExists(biConn, AF_TABLE))
     createAnalysisFeaturesTable(biConn, AF_TABLE);
 
 if (sqlTableSize(biConn, AF_TABLE) != 0)
     {
     fprintf(stderr, "%s table is not empty, doing nothing\n", AF_TABLE);
     return;
     }
 
 /* set up gene features */
 char query[256];
 safef(query, sizeof(query), 
       "select DISTINCT geneSymbol from %s;", KX_TABLE);
 struct slName *sl, *slList = sqlQuickList(biConn, query);
 
 struct analysisFeatures *af, *afList = NULL;
 
 for (sl = slList; sl; sl = sl->next)
     {
     AllocVar(af);
     af->id = 0;
     af->feature_name = cloneString(sl->name);
     af->type = cloneString("gene");
    
     slAddHead(&afList, af);
     }
 slNameFreeList(&slList);
 
 /* set up geneset features */
 struct sqlConnection *pdConn = hAllocConnProfile("localDb", "pathway");
 safef(query, sizeof(query), 
       "select name from genesets;");
 slList = sqlQuickList(pdConn, query);
 
 for (sl = slList; sl; sl = sl->next)
     {
     AllocVar(af);
     af->id = 0;
     af->feature_name = cloneString(sl->name);
     af->type = cloneString("geneset");
     
     slAddHead(&afList, af);
     }
 hFreeConn(&pdConn);
 slNameFreeList(&slList);
 
 slReverse(&afList);
 
 /* set up pathway features (TODO) */
 
 
 /* save features to db */
 for (af = afList; af; af = af->next)
     {
     int feature_id = findIdForAnalysisFeature(biConn, AF_TABLE, af);
     af->id = feature_id;
     if (!analysisFeatureExists(biConn, af))
 	analysisFeaturesSaveToDb(biConn, af, AF_TABLE, 10);
     }
 
 analysisFeaturesFree(&afList);
 }
 
 
 void populateDb(char *db, char *tableName, char *tissue)
 {
 tissue = strLower(tissue);
 struct sqlConnection *biConn = hAllocConnProfile("localDb", db);
 struct sqlConnection *hgConn = hAllocConnProfile("localDb", hgDb);
 
 /* Create analysis features (if necessary) */
 uglyTime(NULL);
 fprintf(stderr, "Setting up analysis features...\n");
 setupAnalysisFeatures(biConn);
 uglyTime("Time");
 
 /* Create geneLookup table (if necessary) */
 //uglyTime(NULL);
 //fprintf(stderr, "Setting up geneLookup table...\n");
 //createGeneLookup(biConn);
 //uglyTime("Time");
 
 /* Set up pathways */
 uglyTime(NULL);
 fprintf(stderr, "Setting up pathways tables...\n");
 setupGenesets(biConn);
 uglyTime("Time");
 
 /* Set up datasets entry */
 struct maGrouping *allA = getMaGrouping(hgConn, tableName);
 if (!allA)
     errAbort("Could not find maGrouping for %s!", tableName);
 
 uglyTime(NULL);
 int numSamples = allA->size;
 fprintf(stderr, "Adding datasets entry...\n");
 struct datasets *da = setupDataset(biConn, tableName, tissue, numSamples);
 uglyTime("Time");
 
 /* Set up samples entries */
 uglyTime(NULL);
 fprintf(stderr, "Adding to samples table...\n");
 struct samples *saList = setupSamples(biConn, da, allA);
 uglyTime("Time");
 
 /* Set up features and clinicalData */
 uglyTime(NULL);
 fprintf(stderr, "Setting up clinical data tables...\n");
 setupClinicalInfo(biConn, da, saList);
 uglyTime("Time");
 
 /* Set up probeInfo table (if necessary) and probeVals table */
 uglyTime(NULL);
 fprintf(stderr, "Setting up probe data tables (be patient!)...\n");
 setupProbeData(hgConn, biConn, da, allA);
 uglyTime("Time");
 
 fprintf(stderr, "Done!");
 fprintf(stderr, "Please run 'setCohort' to find datasets that have overlapping samples.\n");
 
 hFreeConn(&biConn);
 hFreeConn(&hgConn);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 
 dropTable = FALSE;
 if (optionExists("dropAll"))
     dropTable = TRUE;
 if (optionExists("tcga"))
     isTCGA = TRUE;
 
 populateDb(argv[1], argv[2], argv[3]);
 return 0;
 }