src/hg/hubApi/getData.c ec2ad305db739d810e35ad11800b01736ee21071

ec2ad305db739d810e35ad11800b01736ee21071
hiram
  Thu Apr 18 13:29:46 2019 -0700
handle illegal db names and split chrom tables refs #18869

diff --git src/hg/hubApi/getData.c src/hg/hubApi/getData.c
index d829129..d45f568 100644
--- src/hg/hubApi/getData.c
+++ src/hg/hubApi/getData.c
@@ -51,168 +51,247 @@
     jsonWriteNumber(jw, name, sqlLongLong(val));
 else
     jsonWriteString(jw, name, val);
 }
 
 static void wigColumnTypes(struct jsonWrite *jw)
 /* output column headers for a wiggle data output schema */
 {
 jsonWriteListStart(jw, "columnTypes");
 jsonWriteString(jw, NULL, "start - int - number");
 jsonWriteString(jw, NULL, "end - int - number");
 jsonWriteString(jw, NULL, "value - float - number");
 jsonWriteListEnd(jw);
 }	/* static void wigColumnTypes(struct jsonWrite jw) */
 
+static unsigned sqlQueryJsonOutput(struct sqlConnection *conn,
+    struct jsonWrite *jw, char *query, int columnCount, char **columnNames,
+	int *jsonTypes, unsigned itemsDone)
+/* with the SQL query set up, run through those selected items */
+{
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = NULL;
+unsigned itemCount = 0;
+while ((itemCount+itemsDone) < maxItemsOutput && (row = sqlNextRow(sr)) != NULL)
+    {
+    int i = 0;
+    if (jsonOutputArrays)
+	{
+	jsonWriteListStart(jw, NULL);
+	for (i = 0; i < columnCount; ++i)
+	    jsonDatumOut(jw, NULL, row[i], jsonTypes[i]);
+	jsonWriteListEnd(jw);
+	}
+    else
+	{
+	jsonWriteObjectStart(jw, NULL);
+	for (i = 0; i < columnCount; ++i)
+	    jsonDatumOut(jw, columnNames[i], row[i], jsonTypes[i]);
+	jsonWriteObjectEnd(jw);
+	}
+    ++itemCount;
+    }
+sqlFreeResult(&sr);
+return itemCount;
+}
+
 static void tableDataOutput(char *db, struct trackDb *tdb,
     struct sqlConnection *conn, struct jsonWrite *jw, char *track,
     char *chrom, unsigned start, unsigned end)
 /* output the SQL table data for given track */
 {
-char query[4096];
-
 /* for MySQL select statements, name for 'chrom' 'start' 'end' to use
  *     for a table which has different names than that
  */
 char chromName[256];
 char startName[256];
 char endName[256];
 
 /* defaults, normal stuff */
 safef(chromName, sizeof(chromName), "chrom");
 safef(startName, sizeof(startName), "chromStart");
 safef(endName, sizeof(endName), "chromEnd");
 
-/* might have a specific table defined instead of the track name */
+/* 'track' name in trackDb often refers to a SQL 'table' */
 char *sqlTable = cloneString(track);
+/* might have a specific table defined instead of the track name */
 char *tableName = trackDbSetting(tdb, "table");
 if (isNotEmpty(tableName))
     {
     freeMem(sqlTable);
     sqlTable = cloneString(tableName);
     jsonWriteString(jw, "sqlTable", sqlTable);
     }
 
+/* to be determined if this name is used or changes */
+char *splitSqlTable = cloneString(sqlTable);
+
+/* this function knows how to deal with split chromosomes, the NULL
+ * here for the chrom name means to use the first chrom name in chromInfo
+ */
+struct hTableInfo *hti = hFindTableInfoWithConn(conn, NULL, sqlTable);
+if (debug)
+    {
+    jsonWriteBoolean(jw, "isPos", hti->isPos);
+    jsonWriteBoolean(jw, "isSplit", hti->isSplit);
+    jsonWriteBoolean(jw, "hasBin", hti->hasBin);
+    }
+/* check if table name needs to be modified */
+if (hti->isSplit)
+    {
+    if (isNotEmpty(chrom))
+	{
+	char fullTableName[256];
+	safef(fullTableName, sizeof(fullTableName), "%s_%s", chrom, hti->rootName);
+	freeMem(splitSqlTable);
+	splitSqlTable = cloneString(fullTableName);
+	if (debug)
+	    jsonWriteString(jw, "splitSqlTable", splitSqlTable);
+	}
+    else
+	{
+	char *defaultChrom = hDefaultChrom(db);
+	char fullTableName[256];
+	safef(fullTableName, sizeof(fullTableName), "%s_%s", defaultChrom, hti->rootName);
+	freeMem(splitSqlTable);
+	splitSqlTable = cloneString(fullTableName);
+	if (debug)
+	    jsonWriteString(jw, "splitSqlTable", splitSqlTable);
+	}
+    }
+
 /* determine name for 'chrom' in table select */
-if (! sqlColumnExists(conn, sqlTable, "chrom"))
+if (! sqlColumnExists(conn, splitSqlTable, "chrom"))
     {
-    if (sqlColumnExists(conn, sqlTable, "tName"))	// track type psl
+    if (sqlColumnExists(conn, splitSqlTable, "tName"))	// track type psl
 	{
 	safef(chromName, sizeof(chromName), "tName");
 	safef(startName, sizeof(startName), "tStart");
 	safef(endName, sizeof(endName), "tEnd");
 	}
-    else if (sqlColumnExists(conn, sqlTable, "genoName"))// track type rmsk
+    else if (sqlColumnExists(conn, splitSqlTable, "genoName"))// track type rmsk
 	{
 	safef(chromName, sizeof(chromName), "genoName");
 	safef(startName, sizeof(startName), "genoStart");
 	safef(endName, sizeof(endName), "genoEnd");
 	}
     }
 
-if (sqlColumnExists(conn, sqlTable, "txStart"))	// track type genePred
+if (sqlColumnExists(conn, splitSqlTable, "txStart"))	// track type genePred
     {
     safef(startName, sizeof(startName), "txStart");
     safef(endName, sizeof(endName), "txEnd");
     }
 
+struct dyString *query = dyStringNew(64);
+
 /* no chrom specified, return entire table */
 if (isEmpty(chrom))
-    sqlSafef(query, sizeof(query), "select * from %s", sqlTable);
+    {
+    /* this setup here is for the case of non-split tables, will later
+     * determine if split, and then will go through each chrom
+     */
+    sqlDyStringPrintf(query, "select * from %s", splitSqlTable);
+    }
 else if (0 == (start + end))	/* have chrom, no start,end == full chr */
     {
-    if (! sqlColumnExists(conn, sqlTable, chromName))
+    if (! sqlColumnExists(conn, splitSqlTable, chromName))
 	apiErrAbort(err400, err400Msg, "track '%s' is not a position track, request track without chrom specification, genome: '%s'", track, db);
 
     jsonWriteString(jw, "chrom", chrom);
     struct chromInfo *ci = hGetChromInfo(db, chrom);
     jsonWriteNumber(jw, "start", (long long)0);
     jsonWriteNumber(jw, "end", (long long)ci->size);
-    sqlSafef(query, sizeof(query), "select * from %s where %s='%s'", sqlTable, chromName, chrom);
+    sqlDyStringPrintf(query, "select * from %s where %s='%s'", splitSqlTable, chromName, chrom);
     }
 else	/* fully specified chrom:start-end */
     {
     jsonWriteString(jw, "chrom", chrom);
     jsonWriteNumber(jw, "start", (long long)start);
     jsonWriteNumber(jw, "end", (long long)end);
     if (jsonOutputArrays || debug)
 	wigColumnTypes(jw);
     if (startsWith("wig", tdb->type))
 	{
-        wigTableDataOutput(jw, db, sqlTable, chrom, start, end);
+        wigTableDataOutput(jw, db, splitSqlTable, chrom, start, end);
         return;	/* DONE */
 	}
     else
 	{
-	sqlSafef(query, sizeof(query), "select * from %s where %s='%s' AND %s > %u AND %s < %u", sqlTable, chromName, chrom, endName, start, startName, end);
+	sqlDyStringPrintf(query, "select * from %s where ", splitSqlTable);
+        hAddBinToQuery(start, end, query);
+	sqlDyStringPrintf(query, "%s='%s' AND %s > %u AND %s < %u", chromName, chrom, endName, start, startName, end);
 	}
     }
 
 if (debug)
-    jsonWriteString(jw, "select", query);
+    jsonWriteString(jw, "select", query->string);
 
 /* continuing, not a wiggle output */
 char **columnNames = NULL;
 char **columnTypes = NULL;
 int *jsonTypes = NULL;
-struct asObject *as = asForTable(conn, sqlTable, tdb);
+struct asObject *as = asForTable(conn, splitSqlTable, tdb);
 struct asColumn *columnEl = as->columnList;
 int asColumnCount = slCount(columnEl);
-int columnCount = tableColumns(conn, jw, sqlTable, &columnNames, &columnTypes, &jsonTypes);
+int columnCount = tableColumns(conn, jw, splitSqlTable, &columnNames, &columnTypes, &jsonTypes);
 if (jsonOutputArrays || debug)
     {
     jsonWriteListStart(jw, "columnTypes");
     int i = 0;
     for (i = 0; i < columnCount; ++i)
 	{
 	char typeString[1024];
 	if ((0 == i) && (asColumnCount == (columnCount - 1)) && (sameWord("bin", columnNames[i])))
 	    safef(typeString, sizeof(typeString), "%s - %s - %s - Indexing field to speed chromosome range queries", columnNames[i], columnTypes[i], jsonTypeStrings[jsonTypes[i]]);
 	else if (columnEl && isNotEmpty(columnEl->comment))
 	    safef(typeString, sizeof(typeString), "%s - %s - %s - %s", columnNames[i], columnTypes[i], jsonTypeStrings[jsonTypes[i]], columnEl->comment);
 	else
 	    safef(typeString, sizeof(typeString), "%s - %s - %s", columnNames[i], columnTypes[i], jsonTypeStrings[jsonTypes[i]]);
 	jsonWriteString(jw, NULL, typeString);
         if (columnEl && ! ((0 == i) && (sameWord("bin", columnNames[i]))))
 	    columnEl = columnEl->next;
 	}
     jsonWriteListEnd(jw);
     }
 jsonWriteListStart(jw, track);
-struct sqlResult *sr = sqlGetResult(conn, query);
-char **row = NULL;
-unsigned itemCount = 0;
-while (itemCount < maxItemsOutput && (row = sqlNextRow(sr)) != NULL)
+
+unsigned itemsDone = 0;
+
+/* empty chrom and isSplit, needs to run through all chrom names */
+if (hti->isSplit && isEmpty(chrom))
     {
-    int i = 0;
-    if (jsonOutputArrays)
+    struct chromInfo *ciList = createChromInfoList(NULL, db);
+    slSort(ciList, chromInfoCmp);
+    struct chromInfo *el = ciList;
+    char fullTableName[256];
+    for ( ; itemsDone < maxItemsOutput && el != NULL; el = el->next )
 	{
-	jsonWriteListStart(jw, NULL);
-	for (i = 0; i < columnCount; ++i)
-	    jsonDatumOut(jw, NULL, row[i], jsonTypes[i]);
-	jsonWriteListEnd(jw);
+	freeDyString(&query);
+	query = dyStringNew(64);
+	safef(fullTableName, sizeof(fullTableName), "%s_%s", el->chrom, hti->rootName);
+	sqlDyStringPrintf(query, "select * from %s", fullTableName);
+	itemsDone += sqlQueryJsonOutput(conn, jw, query->string, columnCount,
+	    columnNames, jsonTypes, itemsDone);
+	}
     }
 else
     {
-	jsonWriteObjectStart(jw, NULL);
-	for (i = 0; i < columnCount; ++i)
-	    jsonDatumOut(jw, columnNames[i], row[i], jsonTypes[i]);
-	jsonWriteObjectEnd(jw);
+    itemsDone += sqlQueryJsonOutput(conn, jw, query->string, columnCount,
+	columnNames, jsonTypes, itemsDone);
     }
-    ++itemCount;
-    }
-sqlFreeResult(&sr);
+freeDyString(&query);
 jsonWriteListEnd(jw);
 }	/*  static void tableDataOutput(char *db, struct trackDb *tdb, ... ) */
 
 static boolean typedBig9Plus(struct trackDb *tdb)
 /* check if track type is 'bed 9+ ...' to determine itemRgb for big* types */
 {
 if (isNotEmpty(tdb->type) && (allowedBigBedType(tdb->type)))
     {
     char *words[8];
     int wordCount;
     wordCount = chopLine(cloneString(tdb->type), words);
     if ( (wordCount > 1) && startsWith("bigBed", words[0]))
 	{
 	if (isAllDigits(words[1]))
 	   if (8 < sqlUnsigned(words[1]))
@@ -459,33 +538,34 @@
     jsonWriteObjectEnd(jw);
     }
 bbiFileClose(&bbi);
 apiFinishOutput(0, NULL, jw);
 }	/*	static void getHubTrackData(char *hubUrl)	*/
 
 static void getTrackData()
 /* return data from a track, optionally just one chrom data,
  *  optionally just one section of that chrom data
  */
 {
 char *db = cgiOptionalString("db");
 char *chrom = cgiOptionalString("chrom");
 char *start = cgiOptionalString("start");
 char *end = cgiOptionalString("end");
-/* 'track' name in trackDb refers to a SQL 'table' */
+/* 'track' name in trackDb often refers to a SQL 'table' */
 char *track = cgiOptionalString("track");
-char *sqlTable = cloneString(track);
+char *sqlTable = cloneString(track); /* might be something else */
+     /* depends upon 'table' setting in track db, or split table business */
 
 unsigned chromSize = 0;	/* maybe set later */
 unsigned uStart = 0;
 unsigned uEnd = chromSize;	/* maybe set later */
 if ( ! (isEmpty(start) || isEmpty(end)) )
     {
     uStart = sqlUnsigned(start);
     uEnd = sqlUnsigned(end);
     if (uEnd < uStart)
 	apiErrAbort(err400, err400Msg, "given start coordinate %u is greater than given end coordinate", uStart, uEnd);
     }
 
 if (isEmpty(db))
     apiErrAbort(err400, err400Msg, "missing URL variable db=<ucscDb> name for endpoint '/getData/track");
 if (isEmpty(track))
@@ -495,55 +575,83 @@
 if (NULL == thisTrack)
     apiErrAbort(err400, err400Msg, "can not find track=%s name for endpoint '/getData/track", track);
 
 /* might be a big* track with no table */
 char *bigDataUrl = trackDbSetting(thisTrack, "bigDataUrl");
 boolean tableTrack = TRUE;
 
 /* might have a specific table defined instead of the track name */
 char *tableName = trackDbSetting(thisTrack, "table");
 if (isNotEmpty(tableName))
     {
     freeMem(sqlTable);
     sqlTable = cloneString(tableName);
     }
 
-struct sqlConnection *conn = hAllocConn(db);
-if (! sqlTableExists(conn, sqlTable))
+struct sqlConnection *conn = hAllocConnMaybe(db);
+if (NULL == conn)
+    apiErrAbort(err400, err400Msg, "can not find database 'db=%s' for endpoint '/getData/track", db);
+
+struct hTableInfo *hti = hFindTableInfoWithConn(conn, NULL, sqlTable);
+
+char *splitSqlTable = NULL;
+
+if (hti->isSplit)
+    {
+    if (isNotEmpty(chrom))
+	{
+	char fullTableName[256];
+	safef(fullTableName, sizeof(fullTableName), "%s_%s", chrom, hti->rootName);
+	splitSqlTable = cloneString(fullTableName);
+	}
+    else
+	{
+	char *defaultChrom = hDefaultChrom(db);
+	char fullTableName[256];
+	safef(fullTableName, sizeof(fullTableName), "%s_%s", defaultChrom, hti->rootName);
+	splitSqlTable = cloneString(fullTableName);
+	}
+    }
+
+if (! hTableOrSplitExists(db, sqlTable))
     {
     if (! bigDataUrl)
 	apiErrAbort(err400, err400Msg, "can not find specified 'track=%s' for endpoint: /getData/track?db=%s;track=%s", track, db, track);
     else
 	tableTrack = FALSE;
     }
 
 struct jsonWrite *jw = apiStartOutput();
 jsonWriteString(jw, "db", db);
 if (tableTrack)
     {
-    char *dataTime = sqlTableUpdate(conn, sqlTable);
+    char *dataTime = NULL;
+    if (hti->isSplit)
+	dataTime = sqlTableUpdate(conn, splitSqlTable);
+    else
+	dataTime = sqlTableUpdate(conn, sqlTable);
     time_t dataTimeStamp = sqlDateToUnixTime(dataTime);
     replaceChar(dataTime, ' ', 'T');	/*	ISO 8601	*/
     jsonWriteString(jw, "dataTime", dataTime);
     jsonWriteNumber(jw, "dataTimeStamp", (long long)dataTimeStamp);
     if (differentStringNullOk(sqlTable,track))
 	jsonWriteString(jw, "sqlTable", sqlTable);
     }
 jsonWriteString(jw, "trackType", thisTrack->type);
 jsonWriteString(jw, "track", track);
 if (debug)
-    jsonWriteString(jw, "jsonOutputArrays", jsonOutputArrays ? "TRUE":"FALSE");
+    jsonWriteBoolean(jw, "jsonOutputArrays", jsonOutputArrays);
 
 char query[4096];
 struct bbiFile *bbi = NULL;
 struct bbiChromInfo *chromList = NULL;
 
 if (startsWith("big", thisTrack->type))
     {
     if (bigDataUrl)
 	bbi = bigFileOpen(thisTrack->type, bigDataUrl);
     else
 	{
 	char quickReturn[2048];
         sqlSafef(query, sizeof(query), "select fileName from %s", sqlTable);
         if (sqlQuickQuery(conn, query, quickReturn, sizeof(quickReturn)))
 	    {
@@ -599,31 +707,30 @@
     jsonWriteListEnd(jw);
     }
 else if (startsWith("bigWig", thisTrack->type))
     {
     if (jsonOutputArrays || debug)
 	wigColumnTypes(jw);
 
     jsonWriteObjectStart(jw, track);
     wigData(jw, bbi, chrom, uStart, uEnd);
     jsonWriteObjectEnd(jw);
     bbiFileClose(&bbi);
     }
 else
     tableDataOutput(db, thisTrack, conn, jw, track, chrom, uStart, uEnd);
 
-
 apiFinishOutput(0, NULL, jw);
 hFreeConn(&conn);
 }
 
 static void getSequenceData(char *db, char *hubUrl)
 /* return DNA sequence, given at least a db=name and chrom=chr,
    optionally start and end, might be a track hub for UCSC database  */
 {
 char *chrom = cgiOptionalString("chrom");
 char *start = cgiOptionalString("start");
 char *end = cgiOptionalString("end");
 
 long timeStart = clock1000();
 
 if (isEmpty(chrom))