d1e16597846fc15d4388fc0625ce265aec2cd490 hiram Tue Jul 2 16:02:00 2019 -0700 adding new list function to show schema for a table with row count refs #23739 diff --git src/hg/hubApi/list.c src/hg/hubApi/list.c index fb53e03..02b5113 100644 --- src/hg/hubApi/list.c +++ src/hg/hubApi/list.c @@ -31,31 +31,31 @@ apiErrAbort(err400, err400Msg, "extraneous arguments found for function /list/publicHubs '%s'", extraArgs); struct sqlConnection *conn = hConnectCentral(); char *dataTime = sqlTableUpdate(conn, hubPublicTableName()); time_t dataTimeStamp = sqlDateToUnixTime(dataTime); replaceChar(dataTime, ' ', 'T'); /* ISO 8601 */ struct hubPublic *el = hubPublicDbLoadAll(); struct jsonWrite *jw = apiStartOutput(); jsonWriteString(jw, "dataTime", dataTime); jsonWriteNumber(jw, "dataTimeStamp", (long long)dataTimeStamp); freeMem(dataTime); // redundant: jsonWriteString(jw, "tableName", hubPublicTableName()); char **columnNames = NULL; char **columnTypes = NULL; int *jsonTypes = NULL; -int columnCount = tableColumns(conn, jw, hubPublicTableName(), &columnNames, +int columnCount = tableColumns(conn, hubPublicTableName(), &columnNames, &columnTypes, &jsonTypes); jsonWriteListStart(jw, "publicHubs"); for ( ; el != NULL; el = el->next ) { hubPublicJsonData(jw, el, columnCount, columnNames); } jsonWriteListEnd(jw); apiFinishOutput(0, NULL, jw); hDisconnectCentral(&conn); } static void dbDbJsonData(struct jsonWrite *jw, struct dbDb *el, int columnCount, char **columnNames) /* Print out dbDb table element in JSON format. * must be same as was stated in the columnName header element @@ -90,31 +90,31 @@ apiErrAbort(err400, err400Msg, "extraneous arguments found for function /list/ucscGenomes '%s'", extraArgs); struct sqlConnection *conn = hConnectCentral(); char *dataTime = sqlTableUpdate(conn, "dbDb"); time_t dataTimeStamp = sqlDateToUnixTime(dataTime); replaceChar(dataTime, ' ', 'T'); /* ISO 8601 */ struct dbDb *dbList = ucscDbDb(); struct dbDb *el; struct jsonWrite *jw = apiStartOutput(); jsonWriteString(jw, "dataTime", dataTime); jsonWriteNumber(jw, "dataTimeStamp", (long long)dataTimeStamp); freeMem(dataTime); char **columnNames = NULL; char **columnTypes = NULL; int *jsonTypes = NULL; -int columnCount = tableColumns(conn, jw, "dbDb", &columnNames, &columnTypes, +int columnCount = tableColumns(conn, "dbDb", &columnNames, &columnTypes, &jsonTypes); jsonWriteObjectStart(jw, "ucscGenomes"); for ( el=dbList; el != NULL; el = el->next ) { dbDbJsonData(jw, el, columnCount, columnNames); } jsonWriteObjectEnd(jw); apiFinishOutput(0, NULL, jw); hDisconnectCentral(&conn); } static void hubChromInfoJsonOutput(FILE *f, char *hubUrl, char *genome) /* for given hubUrl list the chromosomes in the sequence for specified genome */ { @@ -244,30 +244,206 @@ { *splitTableName = sqlTableName; /* return to caller */ } /* may need to extend this in the future for other track types */ if (sqlColumnExists(conn, sqlTableName, "chrom")) /* standard bed tables */ returnChrom = cloneString("chrom"); else if (sqlColumnExists(conn, sqlTableName, "tName")) /* track type psl */ returnChrom = cloneString("tName"); else if (sqlColumnExists(conn, sqlTableName, "genoName")) /* track type rmsk */ returnChrom = cloneString("genoName"); return returnChrom; } +static long long bbiItemCount(char *bigDataUrl, char *type, char *indexFileOrUrl) +/* check the bigDataUrl to see what the itemCount is there */ +{ +long long itemCount = 0; +struct errCatch *errCatch = errCatchNew(); +if (errCatchStart(errCatch)) + { + if (allowedBigBedType(type)) + { + struct bbiFile *bbi = NULL; + bbi = bigBedFileOpen(bigDataUrl); + itemCount = bigBedItemCount(bbi); + bbiFileClose(&bbi); + } + else if (startsWithWord("bigWig", type)) + { + struct bbiFile *bwf = bigWigFileOpen(bigDataUrl); + struct bbiSummaryElement sum = bbiTotalSummary(bwf); + itemCount = sum.validCount; + bbiFileClose(&bwf); + } + else if (sameString("bam", type)) + { + itemCount = bamFileItemCount(bigDataUrl, indexFileOrUrl); + } + else if (sameString("vcfTabix", type)) + { + itemCount = vcfTabixItemCount(bigDataUrl, indexFileOrUrl); + } + } +errCatchEnd(errCatch); +if (isNotEmpty(errCatch->message->string)) + fprintf(stderr, "%s", errCatch->message->string); +errCatchFree(&errCatch); +return itemCount; +} + +static void hubSchemaJsonOutput(FILE *f, char *hubUrl, char *genome, char *track) +/* for given hubUrl and track, output the schema for the hub track */ +{ +struct trackHub *hub = errCatchTrackHubOpen(hubUrl); +struct trackHubGenome *ge = NULL; + +if (isEmpty(genome)) + apiErrAbort(err400, err400Msg, "must specify a 'genome=name' with hubUrl for endpoint: /list/schema?hubUrl=%s;genome=<empty>", hubUrl); + +struct trackHubGenome *foundGenome = NULL; + +for (ge = hub->genomeList; ge; ge = ge->next) + { + if (sameOk(genome, ge->name)) + { + foundGenome = ge; + continue; /* found genome */ + } + } + +if (NULL == foundGenome) + apiErrAbort(err400, err400Msg, "can not find specified 'genome=%s' for endpoint: /list/schema?hubUrl=%s;genome=%s", genome, hubUrl, genome); + +struct jsonWrite *jw = apiStartOutput(); +jsonWriteString(jw, "hubUrl", hubUrl); +jsonWriteString(jw, "genome", genome); +jsonWriteString(jw, "track", track); + +struct trackDb *tdb = obtainTdb(foundGenome, NULL); +if (NULL == tdb) + apiErrAbort(err400, err400Msg, "failed to find a track hub definition in genome=%s track=%s for endpoint '/list/schema' given hubUrl=%s'", genome, track, hubUrl); + +struct trackDb *thisTrack = findTrackDb(track, tdb); +if (NULL == thisTrack) + apiErrAbort(err400, err400Msg, "failed to find specified track=%s in genome=%s for endpoint '/list/schema' given hubUrl='%s'", track, genome, hubUrl); + +char *bigDataUrl = hReplaceGbdb(trackDbSetting(thisTrack, "bigDataUrl")); +if (NULL == bigDataUrl) + apiErrAbort(err400, err400Msg, "failed to find bigDataUrl for specified track=%s in genome=%s for endpoint '/list/schema' given hubUrl='%s'", track, genome, hubUrl); +char *indexFileOrUrl = hReplaceGbdb(trackDbSetting(tdb, "bigDataIndex")); +struct bbiFile *bbi = bigFileOpen(thisTrack->type, bigDataUrl); +long long itemCount = bbiItemCount(bigDataUrl, thisTrack->type, indexFileOrUrl); + +jsonWriteNumber(jw, "itemCount", itemCount); + +struct asObject *as = bigBedAsOrDefault(bbi); +struct sqlFieldType *fiList = sqlFieldTypesFromAs(as); +bigColumnTypes(jw, fiList, as); + +apiFinishOutput(0, NULL, jw); +} /* static void hubSchemaJsonOutput(FILE *f, char *hubUrl, + * char *genome, char *track) */ + +static void schemaJsonOutput(FILE *f, char *db, char *track) +/* for given db and track, output the schema for the associated table */ +{ +struct sqlConnection *conn = hAllocConnMaybe(db); +if (NULL == conn) + apiErrAbort(err400, err400Msg, "can not find 'genome=%s' for endpoint '/list/schema", db); + +struct trackDb *tdb = obtainTdb(NULL, db); +struct trackDb *thisTrack = findTrackDb(track, tdb); +if (NULL == thisTrack) + apiErrAbort(err400, err400Msg, "failed to find specified track=%s in genome=%s for endpoint '/list/schema'", track, db); +if (thisTrack && (tdbIsComposite(thisTrack) || tdbIsCompositeView(thisTrack))) + apiErrAbort(err400, err400Msg, "container track '%s' does not contain data, use the children of this container for data access", track); + + +char *sqlTableName = cloneString(track); +/* the trackDb might have a specific table defined instead */ +char *tableName = trackDbSetting(thisTrack, "table"); +if (isNotEmpty(tableName)) + { + freeMem(sqlTableName); + sqlTableName = cloneString(tableName); + } + +/* this function knows how to deal with split chromosomes, the NULL + * here for the chrom name means to use the first chrom name in chromInfo + */ +struct hTableInfo *hti = hFindTableInfoWithConn(conn, NULL, sqlTableName); +/* check if table name needs to be modified */ +char *splitTableName = NULL; +if (hti && hti->isSplit) + { + char *defaultChrom = hDefaultChrom(db); + char fullTableName[256]; + safef(fullTableName, sizeof(fullTableName), "%s_%s", defaultChrom, hti->rootName); + freeMem(sqlTableName); + sqlTableName = cloneString(fullTableName); + splitTableName = cloneString(fullTableName); + } +else + { + splitTableName = sqlTableName; + } + +char **columnNames = NULL; +char **columnTypes = NULL; +int *jsonTypes = NULL; +int columnCount = tableColumns(conn, splitTableName, &columnNames, &columnTypes, &jsonTypes); +struct asObject *as = asForTable(conn, splitTableName, thisTrack); +struct asColumn *columnEl = as->columnList; +int asColumnCount = slCount(columnEl); + +char *dataTime = sqlTableUpdate(conn, splitTableName); + +time_t dataTimeStamp = sqlDateToUnixTime(dataTime); +replaceChar(dataTime, ' ', 'T'); /* ISO 8601 */ +struct jsonWrite *jw = apiStartOutput(); +jsonWriteString(jw, "genome", db); +jsonWriteString(jw, "track", track); +jsonWriteString(jw, "dataTime", dataTime); +jsonWriteNumber(jw, "dataTimeStamp", (long long)dataTimeStamp); +freeMem(dataTime); + +long long itemCount = 0; +char query[2048]; +sqlSafef(query, sizeof(query), "select count(*) from %s", splitTableName); +itemCount = sqlQuickNum(conn, query); +if (hti && hti->isSplit) /* punting on split table item count */ + itemCount = 0; +else + { + itemCount = sqlQuickNum(conn, query); + jsonWriteNumber(jw, "itemCount", itemCount); + } +hFreeConn(&conn); + +if (hti && (hti->isSplit || debug)) + jsonWriteBoolean(jw, "splitTable", hti->isSplit); + +outputSchema(thisTrack, jw, columnNames, columnTypes, jsonTypes, hti, + columnCount, asColumnCount, columnEl); + +apiFinishOutput(0, NULL, jw); + +} /* static void schemaJsonOutput(FILE *f, char *db, char *track) */ + static void chromInfoJsonOutput(FILE *f, char *db) /* for given db, if there is a track, list the chromosomes in that track, * for no track, simply list the chromosomes in the sequence */ { char *splitSqlTable = NULL; struct hTableInfo *tableInfo = NULL; char *chromName = NULL; char *table = cgiOptionalString("track"); struct sqlConnection *conn = hAllocConnMaybe(db); if (NULL == conn) apiErrAbort(err400, err400Msg, "can not find 'genome=%s' for endpoint '/list/chromosomes", db); if (table) chromName = validChromName(conn, db, table, &splitSqlTable, &tableInfo); @@ -331,67 +507,30 @@ jsonWriteString(jw, "track", table); jsonWriteNumber(jw, "dataTimeStamp", (long long)dataTimeStamp); freeMem(dataTime); jsonWriteNumber(jw, "chromCount", (long long)slCount(ciList)); jsonWriteObjectStart(jw, "chromosomes"); for ( ; el != NULL; el = el->next ) { jsonWriteNumber(jw, el->chrom, (long long)el->size); } jsonWriteObjectEnd(jw); /* chromosomes */ apiFinishOutput(0, NULL, jw); } hFreeConn(&conn); } -static long long bbiItemCount(char *bigDataUrl, char *type, char *indexFileOrUrl) -/* check the bigDataUrl to see what the itemCount is there */ -{ -long long itemCount = 0; -struct errCatch *errCatch = errCatchNew(); -if (errCatchStart(errCatch)) - { - if (allowedBigBedType(type)) - { - struct bbiFile *bbi = NULL; - bbi = bigBedFileOpen(bigDataUrl); - itemCount = bigBedItemCount(bbi); - bbiFileClose(&bbi); - } - else if (startsWithWord("bigWig", type)) - { - struct bbiFile *bwf = bigWigFileOpen(bigDataUrl); - struct bbiSummaryElement sum = bbiTotalSummary(bwf); - itemCount = sum.validCount; - bbiFileClose(&bwf); - } - else if (sameString("bam", type)) - { - itemCount = bamFileItemCount(bigDataUrl, indexFileOrUrl); - } - else if (sameString("vcfTabix", type)) - { - itemCount = vcfTabixItemCount(bigDataUrl, indexFileOrUrl); - } - } -errCatchEnd(errCatch); -if (isNotEmpty(errCatch->message->string)) - fprintf(stderr, "%s", errCatch->message->string); -errCatchFree(&errCatch); -return itemCount; -} - static long long bbiTableItemCount(struct sqlConnection *conn, char *type, char *tableName) /* Given a tableName that has a fileName column pointing to big*, bam or vcfTabix files, return the * total itemCount from all rows (BAM and VCF tables may have one row per chrom). */ { long long itemCount = 0; char query[2048]; sqlSafef(query, sizeof query, "select fileName from %s", tableName); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { itemCount += bbiItemCount(hReplaceGbdb(row[0]), type, NULL); } sqlFreeResult(&sr); return itemCount; @@ -655,18 +794,55 @@ } if (isEmpty(hubUrl) && isEmpty(db)) apiErrAbort(err400, err400Msg, "must supply hubUrl or genome name for endpoint '/list/chromosomes", hubUrl, db); if (isEmpty(hubUrl)) // missing hubUrl implies UCSC database { chromInfoJsonOutput(stdout, db); return; } else { hubChromInfoJsonOutput(stdout, hubUrl, genome); return; } } +else if (sameWord("schema", words[1])) + { + char *extraArgs = verifyLegalArgs(argListSchema); + if (extraArgs) + apiErrAbort(err400, err400Msg, "extraneous arguments found for function /list/schema '%s'", extraArgs); + + char *hubUrl = cgiOptionalString("hubUrl"); + char *genome = cgiOptionalString("genome"); + char *db = cgiOptionalString("genome"); + char *track = cgiOptionalString("track"); + + if (isEmpty(track)) + apiErrAbort(err400, err400Msg, "missing track=<name> for endpoint '/list/schema'"); + + if (isEmpty(hubUrl) && isNotEmpty(db)) + { + struct sqlConnection *conn = hAllocConnMaybe(db); + if (NULL == conn) + apiErrAbort(err400, err400Msg, "can not find 'genome=%s' for endpoint '/list/schema", db); + else + hFreeConn(&conn); + } + + if (isEmpty(hubUrl) && isEmpty(db)) + apiErrAbort(err400, err400Msg, "must supply hubUrl or genome name for endpoint '/list/schema", hubUrl, db); + + if (isEmpty(hubUrl)) // missing hubUrl implies UCSC database + { + schemaJsonOutput(stdout, db, track); + return; + } + else + { + hubSchemaJsonOutput(stdout, hubUrl, genome, track); + return; + } + } else apiErrAbort(err400, err400Msg, "do not recognize endpoint function: '/%s/%s'", words[0], words[1]); } /* void apiList(char *words[MAX_PATH_INFO]) */