src/hg/instinct/raToDb/raToDb.c 1.6

1.6 2010/04/11 03:32:45 jsanborn
updated
Index: src/hg/instinct/raToDb/raToDb.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/raToDb/raToDb.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -B -U 4 -r1.5 -r1.6
--- src/hg/instinct/raToDb/raToDb.c	10 Apr 2010 03:53:54 -0000	1.5
+++ src/hg/instinct/raToDb/raToDb.c	11 Apr 2010 03:32:45 -0000	1.6
@@ -24,19 +24,24 @@
 {
 errAbort(
   "raToDb - RA to database table converter RA to database converter\n"
   "usage:\n"
-  "   raToDb db tableName file.ra\n"
+  "   raToDb -alpha db file.ra\n"
   "\n"
   "    db = database to put ra_username table\n"
   "    file.ra = root ra file\n"
+  "options:\n"
+  "   -alpha  =  Make alpha database table (raDb)\n"
   );
 }
 
 static struct optionSpec options[] = {
+    {"alpha", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
+boolean alpha = FALSE;
+
 static char *rootDir        = "../hgHeatmap2/hgHeatmapData";
 static char *hgCgiDir       = "../../makeDb/hgCgiData";
 static char *maGroupsFile   = "microarrayGroups.ra";
 static char *localDbProfile = "localDb";
@@ -183,23 +188,37 @@
 safef(fileName, sizeof(fileName), "%s/%s", rootDir, rootName);
 struct slName *sl, *slList = getRaIncludes(fileName);
 
 raFoldIn(fileName, hashOfHash);
+helList = hashElListHash(hashOfHash);
+for (hel = helList; hel != NULL; hel = hel->next)
+    {
+    ra = hel->val;
+    slAddHead(&raList, ra);
+    hel->val = NULL;
+    }
+hashElFreeList(&helList);
+hashFree(&hashOfHash);
+
 for (sl = slList; sl; sl = sl->next)
     {
     safef(fileName, sizeof(fileName), "%s/%s", rootDir, sl->name);
+    hashOfHash = newHash(10);
     raFoldIn(fileName, hashOfHash);
-    }
 
-/* Create list. */
-helList = hashElListHash(hashOfHash);
-for (hel = helList; hel != NULL; hel = hel->next)
+    helList = hashElListHash(hashOfHash);
+    for (hel = helList; hel != NULL; hel = hel->next)
     {
     ra = hel->val;
     slAddHead(&raList, ra);
     hel->val = NULL;
     }
-hashElFreeList(&helList);
+    hashElFreeList(&helList);
+    hashFree(&hashOfHash);
+    }
+
+/* Create list. */
+
 return raList;
 }
 
 struct microarrayGroups *maGroupingsForRa(char *database, char *table)
@@ -306,27 +325,35 @@
 int count = sqlQuickNum(conn, query);
 
 if (count != *(ra->expCount))
     {
-    printf("FAIL\t%s\tMicroarray groups count != count in dataset (%d != %d)\n", 
+    printf("  FAIL\t%s\tmaGroups count != count in data table (%d != %d)\n", 
 	   ra->name, count, *(ra->expCount));
     return 0;
     }
 return 1;
 }
 
 int checkClinicalData(struct raDb *ra)
 {
+if (!ra->patDb)
+    {
+    printf("  FAIL\t%s\tMissing clinical db in RA file.\n", ra->name);
+    return 0;
+    }
+
 struct sqlConnection *conn = sqlMayConnectProfile(ra->profile, ra->patDb);
 if (!conn)
     {
-    printf("FAIL\t%s\tClinical database '%s' does not exist.\n", ra->name, ra->patDb);
+    printf("  FAIL\t%s\tClinical db '%s' doesn't exist.\n", ra->name, ra->patDb);
+    sqlDisconnect(&conn);
     return 0;
     }
 
 if (!sqlTableExists(conn, ra->patTable))
     {
-    printf("FAIL\t%s\tClinical table '%s' does not exist.\n", ra->name, ra->patTable);
+    printf("  FAIL\t%s\tClinical table '%s' doesn't exist.\n", ra->name, ra->patTable);
+    sqlDisconnect(&conn);
     return 0;
     }
 
 struct slName *sl, *slList = sqlFieldNames(conn, ra->patTable);
@@ -334,23 +361,25 @@
 boolean pfExists = FALSE;
 boolean sfExists = FALSE;
 for (sl = slList; sl; sl = sl->next)
     {
-    if (sameString(sl->name, ra->patField))
+    if (sameWord(sl->name, ra->patField))
 	pfExists = TRUE;
-    if (sameString(sl->name, ra->sampleField))
+    if (sameWord(sl->name, ra->sampleField))
 	sfExists = TRUE;
     }
 if (!pfExists)
     {
-    printf("FAIL\t%s\tPatient Field '%s' not in table '%s'.\n", 
+    printf("  FAIL\t%s\tpatField '%s' not in table '%s'.\n", 
 	   ra->name, ra->patField, ra->patTable);
+    sqlDisconnect(&conn);
     return 0;
     }
 if (!sfExists)
     {
-    printf("FAIL\t%s\tSample Field '%s' not in table '%s'.\n", 
-	   ra->name, ra->patField, ra->patTable);
+    printf("  FAIL\t%s\tsampleField '%s' not in table '%s'.\n", 
+	   ra->name, ra->sampleField, ra->patTable);
+    sqlDisconnect(&conn);
     return 0;
     }
 
 struct hash *raHash, *raHashList = readRaFile(ra->raFile);
@@ -367,15 +396,23 @@
     char *table    = cloneString(nextWord(&queryType));
     char *keyField = cloneString(nextWord(&queryType));
     char *valField = cloneString(nextWord(&queryType));
 
+    
+    if (!sqlTableExists(conn, table))
+	{
+	printf("  WARN\t%s\tTable '%s' does not exist in clinical db '%s'.\n", 
+	       ra->name, table, ra->patDb);
+	continue;
+	}
+
     safef(query, sizeof(query), "select %s,%s from %s limit 1", 
 	  keyField, valField, table);
-
     if (!sqlExists(conn, query))
 	{
-	printf("FAIL\t%s\tValue field '%s' for column %s does not exist.\n", 
+	printf("  FAIL\t%s\tvalField '%s' for feature '%s' doesn't exist.\n", 
 	       ra->name, valField, name);
+	sqlDisconnect(&conn);
 	return 0;
 	}
     }
 
@@ -392,9 +429,9 @@
 
 struct sqlConnection *conn = hAllocConnProfile(ra->profile, genomicDb);
 if (!sqlTableExists(conn, ra->name))
     {
-    printf("FAIL\t%s\tGenomic data not in hg18 (%s).\n", ra->name, ra->profile);
+    printf("  FAIL\t%s\tData table not in hg18 db (%s).\n", ra->name, ra->profile);
     hFreeConn(&conn);  // No longer need connection
     return NULL;
     }
 ra->dataType = mustGetString(raHash, "dataType");
@@ -405,9 +442,9 @@
     {
     struct microarrayGroups *maGs = maGroupingsForRa(genomicDb, ra->name);
     if (!maGs)
 	{
-	printf("FAIL\t%s\tBad microarray groups\n", ra->name);
+	printf("  FAIL\t%s\tBad maGroups.\n", ra->name);
 	hFreeConn(&conn);  // No longer need connection
 	return NULL;
 	}
     struct maGrouping *allA = maGs->allArrays;
@@ -422,23 +459,23 @@
 
 ra->accessTable = getOptionalString(raHash, "accessTable", ra->name);
 if (!sqlTableExists(conn, ra->accessTable))
     {
-    printf("WARN\t%s\tDown-sampled table '%s' not in database.\n", ra->name, ra->accessTable);
+    printf("  WARN\t%s\taccessTable '%s' not in db.\n", ra->name, ra->accessTable);
     ra->accessTable = cloneString(ra->name);
     }
 
 ra->aliasTable  = getOptionalString(raHash, "aliasTable", NULL);
 if (ra->aliasTable && !sqlTableExists(conn, ra->aliasTable))
     {
-    printf("WARN\t%s\tProbe->Gene Alias table '%s' not in database.\n", ra->name, ra->aliasTable);
+    printf("  WARN\t%s\taliasTable '%s' not in db.\n", ra->name, ra->aliasTable);
     ra->aliasTable = NULL;
     }
 
 ra->displayNameTable = getOptionalString(raHash, "displayNameTable", NULL);
 if (ra->displayNameTable && !sqlTableExists(conn, ra->displayNameTable))
     {
-    printf("WARN\t%s\tDisplay Name table '%s' not in database.\n", ra->name, ra->displayNameTable);
+    printf("  WARN\t%s\tdisplayNameTable '%s' not in db.\n", ra->name, ra->displayNameTable);
     ra->displayNameTable = NULL;
     }
 
 hFreeConn(&conn);  // No longer need connection
@@ -455,12 +492,9 @@
 ra->patField    = getOptionalString(raHash, "patField", NULL);
 ra->sampleField = getOptionalString(raHash, "sampleField", NULL);
 
 if (!checkClinicalData(ra))
-    {
-    printf("FAIL\t%s\tClinical data incorrect or missing.\n", ra->name);
     return NULL;
-    }
     
 /* Platform setting, currently defaults to expression */
 ra->platform   = getOptionalString(raHash, "platform", "expression");
 
@@ -500,9 +534,9 @@
     *(ra->height) = atoi(((char *) hashFindVal(raHash, "height")));
 else
     *(ra->height) = 0;
 			     
-printf("PASS\t%s\n", ra->name);
+printf("  pass\t%s\n", ra->name);
 return ra;
 }
 
 
@@ -540,21 +574,37 @@
 
 if (raHashList == NULL)
     errAbort("Couldn't find anything from %s", raName);
 
+int numPass = 0, numFail = 0;
+
 struct raDb *ra, *raList = NULL;
 for (raHash = raHashList; raHash; raHash = raHash->next)
     {
     if ((ra = validateRa(raHash)) == NULL)
+	{
+	numFail += 1;
 	continue;
+	}
     slAddHead(&raList, ra); 
+    numPass += 1;
     }
 
 struct sqlConnection *conn = hAllocConnProfile(localDbProfile, db);
 
-char *raDbName = raDbPath();
+char *raDbName;
+if (alpha)
+    raDbName = cloneString("raDb");
+else
+    raDbName = raDbPath();
+
 putDataToTable(conn, raDbName, raList);
 
+printf("\n");
+printf("raToDb Validation:\n");
+printf("\t%d passed\n\t%d failed\n", numPass, numFail);
+printf("\n");
+
 hFreeConn(&conn);
 }
 
 int main(int argc, char *argv[])
@@ -562,7 +612,11 @@
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
+
+if (optionExists("alpha"))
+    alpha = TRUE;
+
 raToDb(argv[1], argv[2]);
 return 0;
 }