src/hg/instinct/raToDb/raToDb.c 1.6
1.6 2010/04/11 03:32:45 jsanborn
updated
Index: src/hg/instinct/raToDb/raToDb.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/raToDb/raToDb.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -B -U 4 -r1.5 -r1.6
--- src/hg/instinct/raToDb/raToDb.c 10 Apr 2010 03:53:54 -0000 1.5
+++ src/hg/instinct/raToDb/raToDb.c 11 Apr 2010 03:32:45 -0000 1.6
@@ -24,19 +24,24 @@
{
errAbort(
"raToDb - RA to database table converter RA to database converter\n"
"usage:\n"
- " raToDb db tableName file.ra\n"
+ " raToDb -alpha db file.ra\n"
"\n"
" db = database to put ra_username table\n"
" file.ra = root ra file\n"
+ "options:\n"
+ " -alpha = Make alpha database table (raDb)\n"
);
}
static struct optionSpec options[] = {
+ {"alpha", OPTION_BOOLEAN},
{NULL, 0},
};
+boolean alpha = FALSE;
+
static char *rootDir = "../hgHeatmap2/hgHeatmapData";
static char *hgCgiDir = "../../makeDb/hgCgiData";
static char *maGroupsFile = "microarrayGroups.ra";
static char *localDbProfile = "localDb";
@@ -183,23 +188,37 @@
safef(fileName, sizeof(fileName), "%s/%s", rootDir, rootName);
struct slName *sl, *slList = getRaIncludes(fileName);
raFoldIn(fileName, hashOfHash);
+helList = hashElListHash(hashOfHash);
+for (hel = helList; hel != NULL; hel = hel->next)
+ {
+ ra = hel->val;
+ slAddHead(&raList, ra);
+ hel->val = NULL;
+ }
+hashElFreeList(&helList);
+hashFree(&hashOfHash);
+
for (sl = slList; sl; sl = sl->next)
{
safef(fileName, sizeof(fileName), "%s/%s", rootDir, sl->name);
+ hashOfHash = newHash(10);
raFoldIn(fileName, hashOfHash);
- }
-/* Create list. */
-helList = hashElListHash(hashOfHash);
-for (hel = helList; hel != NULL; hel = hel->next)
+ helList = hashElListHash(hashOfHash);
+ for (hel = helList; hel != NULL; hel = hel->next)
{
ra = hel->val;
slAddHead(&raList, ra);
hel->val = NULL;
}
-hashElFreeList(&helList);
+ hashElFreeList(&helList);
+ hashFree(&hashOfHash);
+ }
+
+/* Create list. */
+
return raList;
}
struct microarrayGroups *maGroupingsForRa(char *database, char *table)
@@ -306,27 +325,35 @@
int count = sqlQuickNum(conn, query);
if (count != *(ra->expCount))
{
- printf("FAIL\t%s\tMicroarray groups count != count in dataset (%d != %d)\n",
+ printf(" FAIL\t%s\tmaGroups count != count in data table (%d != %d)\n",
ra->name, count, *(ra->expCount));
return 0;
}
return 1;
}
int checkClinicalData(struct raDb *ra)
{
+if (!ra->patDb)
+ {
+ printf(" FAIL\t%s\tMissing clinical db in RA file.\n", ra->name);
+ return 0;
+ }
+
struct sqlConnection *conn = sqlMayConnectProfile(ra->profile, ra->patDb);
if (!conn)
{
- printf("FAIL\t%s\tClinical database '%s' does not exist.\n", ra->name, ra->patDb);
+ printf(" FAIL\t%s\tClinical db '%s' doesn't exist.\n", ra->name, ra->patDb);
+ sqlDisconnect(&conn);
return 0;
}
if (!sqlTableExists(conn, ra->patTable))
{
- printf("FAIL\t%s\tClinical table '%s' does not exist.\n", ra->name, ra->patTable);
+ printf(" FAIL\t%s\tClinical table '%s' doesn't exist.\n", ra->name, ra->patTable);
+ sqlDisconnect(&conn);
return 0;
}
struct slName *sl, *slList = sqlFieldNames(conn, ra->patTable);
@@ -334,23 +361,25 @@
boolean pfExists = FALSE;
boolean sfExists = FALSE;
for (sl = slList; sl; sl = sl->next)
{
- if (sameString(sl->name, ra->patField))
+ if (sameWord(sl->name, ra->patField))
pfExists = TRUE;
- if (sameString(sl->name, ra->sampleField))
+ if (sameWord(sl->name, ra->sampleField))
sfExists = TRUE;
}
if (!pfExists)
{
- printf("FAIL\t%s\tPatient Field '%s' not in table '%s'.\n",
+ printf(" FAIL\t%s\tpatField '%s' not in table '%s'.\n",
ra->name, ra->patField, ra->patTable);
+ sqlDisconnect(&conn);
return 0;
}
if (!sfExists)
{
- printf("FAIL\t%s\tSample Field '%s' not in table '%s'.\n",
- ra->name, ra->patField, ra->patTable);
+ printf(" FAIL\t%s\tsampleField '%s' not in table '%s'.\n",
+ ra->name, ra->sampleField, ra->patTable);
+ sqlDisconnect(&conn);
return 0;
}
struct hash *raHash, *raHashList = readRaFile(ra->raFile);
@@ -367,15 +396,23 @@
char *table = cloneString(nextWord(&queryType));
char *keyField = cloneString(nextWord(&queryType));
char *valField = cloneString(nextWord(&queryType));
+
+ if (!sqlTableExists(conn, table))
+ {
+ printf(" WARN\t%s\tTable '%s' does not exist in clinical db '%s'.\n",
+ ra->name, table, ra->patDb);
+ continue;
+ }
+
safef(query, sizeof(query), "select %s,%s from %s limit 1",
keyField, valField, table);
-
if (!sqlExists(conn, query))
{
- printf("FAIL\t%s\tValue field '%s' for column %s does not exist.\n",
+ printf(" FAIL\t%s\tvalField '%s' for feature '%s' doesn't exist.\n",
ra->name, valField, name);
+ sqlDisconnect(&conn);
return 0;
}
}
@@ -392,9 +429,9 @@
struct sqlConnection *conn = hAllocConnProfile(ra->profile, genomicDb);
if (!sqlTableExists(conn, ra->name))
{
- printf("FAIL\t%s\tGenomic data not in hg18 (%s).\n", ra->name, ra->profile);
+ printf(" FAIL\t%s\tData table not in hg18 db (%s).\n", ra->name, ra->profile);
hFreeConn(&conn); // No longer need connection
return NULL;
}
ra->dataType = mustGetString(raHash, "dataType");
@@ -405,9 +442,9 @@
{
struct microarrayGroups *maGs = maGroupingsForRa(genomicDb, ra->name);
if (!maGs)
{
- printf("FAIL\t%s\tBad microarray groups\n", ra->name);
+ printf(" FAIL\t%s\tBad maGroups.\n", ra->name);
hFreeConn(&conn); // No longer need connection
return NULL;
}
struct maGrouping *allA = maGs->allArrays;
@@ -422,23 +459,23 @@
ra->accessTable = getOptionalString(raHash, "accessTable", ra->name);
if (!sqlTableExists(conn, ra->accessTable))
{
- printf("WARN\t%s\tDown-sampled table '%s' not in database.\n", ra->name, ra->accessTable);
+ printf(" WARN\t%s\taccessTable '%s' not in db.\n", ra->name, ra->accessTable);
ra->accessTable = cloneString(ra->name);
}
ra->aliasTable = getOptionalString(raHash, "aliasTable", NULL);
if (ra->aliasTable && !sqlTableExists(conn, ra->aliasTable))
{
- printf("WARN\t%s\tProbe->Gene Alias table '%s' not in database.\n", ra->name, ra->aliasTable);
+ printf(" WARN\t%s\taliasTable '%s' not in db.\n", ra->name, ra->aliasTable);
ra->aliasTable = NULL;
}
ra->displayNameTable = getOptionalString(raHash, "displayNameTable", NULL);
if (ra->displayNameTable && !sqlTableExists(conn, ra->displayNameTable))
{
- printf("WARN\t%s\tDisplay Name table '%s' not in database.\n", ra->name, ra->displayNameTable);
+ printf(" WARN\t%s\tdisplayNameTable '%s' not in db.\n", ra->name, ra->displayNameTable);
ra->displayNameTable = NULL;
}
hFreeConn(&conn); // No longer need connection
@@ -455,12 +492,9 @@
ra->patField = getOptionalString(raHash, "patField", NULL);
ra->sampleField = getOptionalString(raHash, "sampleField", NULL);
if (!checkClinicalData(ra))
- {
- printf("FAIL\t%s\tClinical data incorrect or missing.\n", ra->name);
return NULL;
- }
/* Platform setting, currently defaults to expression */
ra->platform = getOptionalString(raHash, "platform", "expression");
@@ -500,9 +534,9 @@
*(ra->height) = atoi(((char *) hashFindVal(raHash, "height")));
else
*(ra->height) = 0;
-printf("PASS\t%s\n", ra->name);
+printf(" pass\t%s\n", ra->name);
return ra;
}
@@ -540,21 +574,37 @@
if (raHashList == NULL)
errAbort("Couldn't find anything from %s", raName);
+int numPass = 0, numFail = 0;
+
struct raDb *ra, *raList = NULL;
for (raHash = raHashList; raHash; raHash = raHash->next)
{
if ((ra = validateRa(raHash)) == NULL)
+ {
+ numFail += 1;
continue;
+ }
slAddHead(&raList, ra);
+ numPass += 1;
}
struct sqlConnection *conn = hAllocConnProfile(localDbProfile, db);
-char *raDbName = raDbPath();
+char *raDbName;
+if (alpha)
+ raDbName = cloneString("raDb");
+else
+ raDbName = raDbPath();
+
putDataToTable(conn, raDbName, raList);
+printf("\n");
+printf("raToDb Validation:\n");
+printf("\t%d passed\n\t%d failed\n", numPass, numFail);
+printf("\n");
+
hFreeConn(&conn);
}
int main(int argc, char *argv[])
@@ -562,7 +612,11 @@
{
optionInit(&argc, argv, options);
if (argc != 3)
usage();
+
+if (optionExists("alpha"))
+ alpha = TRUE;
+
raToDb(argv[1], argv[2]);
return 0;
}