b6ea7e02e7c946b1cb700d3cab07decce272d4c7 hiram Tue Feb 26 16:00:40 2019 -0800 better counting of legitimate tracks refs #18869 diff --git src/hg/hubApi/hubApi.c src/hg/hubApi/hubApi.c index dd0eddd..45a44b1 100644 --- src/hg/hubApi/hubApi.c +++ src/hg/hubApi/hubApi.c @@ -47,101 +47,162 @@ } static struct hubPublic *hubPublicLoad(char **row) /* Load a hubPublic from row fetched with select * from hubPublic * from database. Dispose of this with hubPublicFree(). */ { struct hubPublic *ret; AllocVar(ret); ret->hubUrl = cloneString(row[0]); ret->shortLabel = cloneString(row[1]); ret->longLabel = cloneString(row[2]); ret->registrationTime = cloneString(row[3]); ret->dbCount = sqlUnsigned(row[4]); ret->dbList = cloneString(row[5]); -// if (row[6]) ret->descriptionUrl = cloneString(row[6]); -// else -// ret->descriptionUrl = cloneString(""); return ret; } struct hubPublic *hubPublicLoadAll() +/* read entire hubPublic table in hgcentral and return resulting list */ { char query[1024]; struct hubPublic *list = NULL; struct sqlConnection *conn = hConnectCentral(); sqlSafef(query, sizeof(query), "select * from %s", hubPublicTableName()); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct hubPublic *el = hubPublicLoad(row); slAddHead(&list, el); } sqlFreeResult(&sr); hDisconnectCentral(&conn); publicHubSortCase(&list); int listSize = slCount(list); AllocArray(shortLabels, listSize); struct hubPublic *el = list; int i = 0; for ( ; el != NULL; el = el->next ) { shortLabels[i++] = el->shortLabel; ++publicHubCount; } return list; } static boolean timeOutReached() +/* see if the timeout has been reached to determine if an exit + * is appropriate at this time + */ { long nowTime = clock1000(); timedOut = FALSE; if ((nowTime - enteredMainTime) > (1000 * timeOutSeconds)) timedOut= TRUE; return timedOut; } -static void trackSettings(struct trackDb *tdb) -/* process the settingsHash for a trackDb, recursive if subtracks */ +#ifdef NOT +static void showCounts(struct hash *countTracks) +{ +if (countTracks->elCount) + { + hPrintf(" <ul>\n"); + struct hashEl *hel; + struct hashCookie hc = hashFirst(countTracks); + while ((hel = hashNext(&hc)) != NULL) + hPrintf(" <li>%d - %s</li>\n", ptToInt(hel->val), hel->name); + hPrintf(" </ul>\n"); + } +} +#endif + +static void hashCountTrack(struct trackDb *tdb, struct hash *countTracks) +/* this is counting up track types into the hash countTracks */ +{ +char *stripType = cloneString(tdb->type); +if (startsWith("chain ", tdb->type)) + stripType = cloneString("chain"); +else if (startsWith("netAlign ", tdb->type)) + stripType = cloneString("netAlign"); +else if (startsWith("genePred ", tdb->type)) + stripType = cloneString("genePred"); +else if (startsWith("bigWig ", tdb->type)) + stripType = cloneString("bigWig"); +else if (startsWith("wigMaf ", tdb->type)) + stripType = cloneString("wigMaf"); +else if (startsWith("wig ", tdb->type)) + stripType = cloneString("wig"); +else + stripType = cloneString(tdb->type); +// char *compositeTrack = trackDbLocalSetting(tdb, "compositeTrack"); +boolean compositeContainer = tdbIsComposite(tdb); +boolean compositeView = tdbIsCompositeView(tdb); +// char *superTrack = trackDbLocalSetting(tdb, "superTrack"); +boolean superChild = tdbIsSuperTrackChild(tdb); +if (compositeContainer) + hashIncInt(countTracks, "composite container"); +else if (compositeView) + hashIncInt(countTracks, "composite view"); +else if (superChild) + { + hashIncInt(countTracks, "superTrack child"); + hashIncInt(countTracks, stripType); + hashIncInt(countTracks, "track count"); + } +else if (isEmpty(tdb->type)) + hashIncInt(countTracks, "no type specified"); +else + { + hashIncInt(countTracks, stripType); + hashIncInt(countTracks, "track count"); + } +freeMem(stripType); +// showCounts(countTracks); +} + +static void trackSettings(struct trackDb *tdb, struct hash *countTracks) +/* process the settingsHash for a trackDb, recursive when subtracks */ { hPrintf(" <ul>\n"); // if (tdb->children) haven't yet seen a track with children ? // hPrintf(" <li>%s: has children</li>\n", tdb->track); // else // hPrintf(" <li>%s: NO children</li>\n", tdb->track); struct hashEl *hel; struct hashCookie hc = hashFirst(tdb->settingsHash); while ((hel = hashNext(&hc)) != NULL) { if (sameWord("track", hel->name)) continue; // already output in header if (isEmpty((char *)hel->val)) hPrintf(" <li>%s : <empty></li>\n", hel->name); else hPrintf(" <li>%s : '%s'</li>\n", hel->name, (char *)hel->val); } if (tdb->subtracks) { struct trackDb *tdbEl = tdb->subtracks; hPrintf(" <li>has %d subtrack(s)</li>\n", slCount(tdb->subtracks)); for (tdbEl = tdb->subtracks; tdbEl; tdbEl = tdbEl->next) { - hPrintf("<li>subtrack: %s of parent: %s</li>\n", tdbEl->track, tdbEl->parent->track); - trackSettings(tdbEl); + hPrintf("<li>subtrack: %s of parent: %s : type: '%s'</li>\n", tdbEl->track, tdbEl->parent->track, tdbEl->type); + hashCountTrack(tdbEl, countTracks); + trackSettings(tdbEl, countTracks); } } hPrintf(" </ul>\n"); } static int bbiBriefMeasure(char *type, char *bigDataUrl, char *bigDataIndex, long *chromCount, long *itemCount, struct dyString *errors) /* check a bigDataUrl to find chrom count and item count */ { int retVal = 0; *chromCount = 0; *itemCount = 0; struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { if (startsWithWord("bigNarrowPeak", type) @@ -218,140 +279,146 @@ retVal = 1; } } errCatchEnd(errCatch); if (errCatch->gotError) { retVal = 1; dyStringPrintf(errors, "%s", errCatch->message->string); } errCatchFree(&errCatch); return retVal; } /* static int bbiBriefMeasure() */ -static void hubTrackList(struct trackDb *topTrackDb, struct trackHubGenome *genome) -/* process the track list to show all tracks, return trackDb list */ -{ -if (topTrackDb) - { - struct hash *countTracks = hashNew(0); - hPrintf(" <ul>\n"); - struct trackDb *tdb = NULL; - for ( tdb = topTrackDb; tdb; tdb = tdb->next ) +static void countOneTdb(struct trackDb *tdb, char *bigDataIndex, + struct hash *countTracks) { char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl"); - char *compositeTrack = trackDbSetting(tdb, "compositeTrack"); - char *superTrack = trackDbSetting(tdb, "superTrack"); +// char *compositeTrack = trackDbSetting(tdb, "compositeTrack"); +boolean compositeContainer = tdbIsComposite(tdb); +boolean compositeView = tdbIsCompositeView(tdb); +// char *superTrack = trackDbSetting(tdb, "superTrack"); +boolean superChild = tdbIsSuperTrackChild(tdb); boolean depthSearch = cartUsualBoolean(cart, "depthSearch", FALSE); - if (compositeTrack) - hashIncInt(countTracks, "composite container"); - else if (superTrack) - hashIncInt(countTracks, "superTrack child"); - else if (isEmpty(tdb->type)) - hashIncInt(countTracks, "no type specified"); - else - hashIncInt(countTracks, tdb->type); +hashCountTrack(tdb, countTracks); + if (depthSearch && bigDataUrl) { - char *bigDataIndex = NULL; - char *relIdxUrl = trackDbSetting(topTrackDb, "bigDataIndex"); - if (relIdxUrl != NULL) - bigDataIndex = trackHubRelativeUrl(genome->trackDbFile, relIdxUrl); - long chromCount = 0; long itemCount = 0; struct dyString *errors = newDyString(1024); int retVal = bbiBriefMeasure(tdb->type, bigDataUrl, bigDataIndex, &chromCount, &itemCount, errors); if (retVal) { hPrintf(" <li>%s : %s : <font color='red'>ERROR: %s</font></li>\n", tdb->track, tdb->type, errors->string); } else { if (startsWithWord("bigBed", tdb->type)) hPrintf(" <li>%s : %s : %ld chroms : %ld item count</li>\n", tdb->track, tdb->type, chromCount, itemCount); else if (startsWithWord("bigWig", tdb->type)) hPrintf(" <li>%s : %s : %ld chroms : %ld bases covered</li>\n", tdb->track, tdb->type, chromCount, itemCount); else hPrintf(" <li>%s : %s : %ld chroms : %ld count</li>\n", tdb->track, tdb->type, chromCount, itemCount); } } else { - if (compositeTrack) + if (compositeContainer) hPrintf(" <li>%s : %s : composite track container</li>\n", tdb->track, tdb->type); - else if (superTrack) + else if (compositeView) + hPrintf(" <li>%s : %s : composite view</li>\n", tdb->track, tdb->type); + else if (superChild) hPrintf(" <li>%s : %s : superTrack child</li>\n", tdb->track, tdb->type); else if (! depthSearch) hPrintf(" <li>%s : %s : %s</li>\n", tdb->track, tdb->type, bigDataUrl); else hPrintf(" <li>%s : %s</li>\n", tdb->track, tdb->type); } if (allTrackSettings) { hPrintf(" <ul>\n"); - trackSettings(tdb); /* show all settings */ + trackSettings(tdb, countTracks); /* show all settings */ hPrintf(" </ul>\n"); } +return; +} /* static void countOneTdb(struct trackDb *tdb, + * char *bigDataIndex, struct hash *countTracks) + */ + +static void hubTrackList(struct trackDb *topTrackDb, struct trackHubGenome *genome) +/* process the track list in a hub to show all tracks */ +{ +if (topTrackDb) + { + struct hash *countTracks = hashNew(0); + hPrintf(" <ul>\n"); + struct trackDb *tdb = NULL; + for ( tdb = topTrackDb; tdb; tdb = tdb->next ) + { + char *bigDataIndex = NULL; + char *relIdxUrl = trackDbSetting(topTrackDb, "bigDataIndex"); + if (relIdxUrl != NULL) + bigDataIndex = trackHubRelativeUrl(genome->trackDbFile, relIdxUrl); + countOneTdb(tdb, bigDataIndex, countTracks); if (timeOutReached()) break; } /* for ( tdb = topTrackDb; tdb; tdb = tdb->next ) */ hPrintf(" <li>%d different track types</li>\n", countTracks->elCount); + /* add this single genome count to the overall multi-genome counts */ if (countTracks->elCount) { - hPrintf(" <ul>\n"); - struct hashEl *hel; - struct hashCookie hc = hashFirst(countTracks); - while ((hel = hashNext(&hc)) != NULL) + hPrintf(" <ol>\n"); + struct hashEl *hel, *helList = hashElListHash(countTracks); + slSort(&helList, hashElCmpIntValDesc); + for (hel = helList; hel; hel = hel->next) { int prevCount = ptToInt(hashFindVal(trackCounter, hel->name)); + if (differentStringNullOk("track count", hel->name)) totalTracks += ptToInt(hel->val); hashReplace(trackCounter, hel->name, intToPt(prevCount + ptToInt(hel->val))); hPrintf(" <li>%d - %s</li>\n", ptToInt(hel->val), hel->name); } - hPrintf(" </ul>\n"); + hPrintf(" </ol>\n"); } hPrintf(" </ul>\n"); } else hPrintf(" <li>no trackTopDb</li>\n"); } /* static struct trackDb *hubTrackList() */ static void assemblySettings(struct trackHubGenome *genome) /* display all the assembly 'settingsHash' */ { struct trackDb *tdb = trackHubTracksForGenome(genome->trackHub, genome); tdb = trackDbLinkUpGenerations(tdb); -int elCount = 0; hPrintf(" <ul>\n"); struct hashEl *hel; struct hashCookie hc = hashFirst(genome->settingsHash); while ((hel = hashNext(&hc)) != NULL) { - ++elCount; hPrintf(" <li>%s : %s</li>\n", hel->name, (char *)hel->val); if (sameWord("trackDb", hel->name)) /* examine the trackDb structure */ { hubTrackList(tdb, genome); } if (timeOutReached()) break; } -hPrintf(" <li>elCount: %d</li>\n", elCount); hPrintf(" </ul>\n"); } struct slName *genomeList(struct trackHub *hubTop, struct trackDb **dbTrackList, char *selectGenome) /* follow the pointers from the trackHub to trackHubGenome and around * in a circle from one to the other to find all hub resources * return slName list of the genomes in this track hub * optionally, return the trackList from this hub for the specified genome */ { struct slName *retList = NULL; long totalAssemblyCount = 0; struct trackHubGenome *genome = hubTop->genomeList; @@ -377,38 +444,38 @@ hPrintf("<li>%s</li>\n", genome->name); } assemblySettings(genome); if (measureTiming) { long thisTime = clock1000(); hPrintf("<em>processing time %s: %ld millis</em><br>\n", genome->name, thisTime - lastTime); } if (timeOutReached()) break; } if (trackCounter->elCount) { hPrintf(" <li>total genome assembly count: %ld</li>\n", totalAssemblyCount); hPrintf(" <li>%ld total tracks counted, %d different track types:</li>\n", totalTracks, trackCounter->elCount); - hPrintf(" <ul>\n"); - struct hashEl *hel; - struct hashCookie hc = hashFirst(trackCounter); - while ((hel = hashNext(&hc)) != NULL) + hPrintf(" <ol>\n"); + struct hashEl *hel, *helList = hashElListHash(trackCounter); + slSort(&helList, hashElCmpIntValDesc); + for (hel = helList; hel; hel = hel->next) { hPrintf(" <li>%d - %s - total</li>\n", ptToInt(hel->val), hel->name); } - hPrintf(" </ul>\n"); + hPrintf(" </ol>\n"); } hPrintf("</ul>\n"); return retList; } /* static struct slName *genomeList () */ static char *urlFromShortLabel(char *shortLabel) /* this is not a fair way to get the URL since shortLabel's are not * necessarily unique. This is temporary. TBD: need to always use URL * and then get the shortLabel */ { char hubUrl[1024]; char query[1024]; struct sqlConnection *conn = hConnectCentral(); // Build a query to select the hubUrl for the given shortLabel @@ -474,112 +541,60 @@ char *words[MAX_PATH_INFO];/*expect no more than MAX_PATH_INFO number of words*/ int wordCount = chopByChar(pathInfo, '/', words, ArraySize(words)); if (wordCount < 2) apiErrAbort("unknown endpoint command: '/%s'", pathInfo); struct hashEl *hel = hashLookup(apiFunctionHash, words[0]); if (hel == NULL) apiErrAbort("no such command: '%s' for endpoint '/%s'", words[0], pathInfo); void (*apiFunction)(char **) = hel->val; // void (*apiFunction)(char **) = hashMustFindVal(apiFunctionHash, words[0]); (*apiFunction)(words); } /* static void apiFunctionSwitch(char *pathInfo) */ -static void tracksForUcscDb(char * ucscDb) +static void tracksForUcscDb(char *db) +/* scan the specified database for all tracks */ { struct hash *countTracks = hashNew(0); -struct sqlConnection *conn = hAllocConn(ucscDb); -hPrintf("<p>Tracks in UCSC genome: '%s'<br>\n", ucscDb); -struct trackDb *tdbList = hTrackDb(ucscDb); +hPrintf("<p>Tracks in UCSC genome: '%s'<br>\n", db); +struct trackDb *tdbList = hTrackDb(db); struct trackDb *tdb; hPrintf("<ul>\n"); for (tdb = tdbList; tdb != NULL; tdb = tdb->next ) { - char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl"); - char *compositeTrack = trackDbSetting(tdb, "compositeTrack"); - char *superTrack = trackDbSetting(tdb, "superTrack"); - boolean depthSearch = cartUsualBoolean(cart, "depthSearch", FALSE); - if (compositeTrack) - hashIncInt(countTracks, "composite container"); - else if (superTrack) - hashIncInt(countTracks, "superTrack child"); - else if (isEmpty(tdb->type)) - hashIncInt(countTracks, "no type specified"); - else - hashIncInt(countTracks, tdb->type); - if (depthSearch && bigDataUrl) - { - long chromCount = 0; - long itemCount = 0; - struct dyString *errors = newDyString(1024); - int retVal = bbiBriefMeasure(tdb->type, bigDataUrl, NULL, &chromCount, &itemCount, errors); - if (retVal) - { - hPrintf(" <li>%s : %s : <font color='red'>ERROR: %s</font></li>\n", tdb->track, tdb->type, errors->string); - } - else - { - if (startsWithWord("bigBed", tdb->type)) - hPrintf(" <li>%s : %s : %ld chroms : %ld item count</li>\n", tdb->track, tdb->type, chromCount, itemCount); - else if (startsWithWord("bigWig", tdb->type)) - hPrintf(" <li>%s : %s : %ld chroms : %ld bases covered</li>\n", tdb->track, tdb->type, chromCount, itemCount); - else - hPrintf(" <li>%s : %s : %ld chroms : %ld count</li>\n", tdb->track, tdb->type, chromCount, itemCount); - } - } - else - { - if (isEmpty(bigDataUrl)) - { - int rowCount = 0; - if (sqlTableExists(conn, tdb->track)) - { - rowCount = sqlRowCount(conn, tdb->track); - hPrintf("<li>%s : %s : %d item count</li>\n", tdb->track, tdb->type, rowCount); - } - else - { - if (compositeTrack) - hPrintf("<li>%s : %s : composite container </li>\n", tdb->track, tdb->type); - else - hPrintf("<li>%s : %s</li>\n", tdb->track, tdb->type); - } - } - else - hPrintf("<li>%s : %s : %s</li>\n", tdb->track,tdb->type,bigDataUrl); - } - if (allTrackSettings) - trackSettings(tdb); /* show all settings */ + countOneTdb(tdb, NULL, countTracks); + if (timeOutReached()) + break; } -hPrintf(" <li>%d different track types</li>\n", countTracks->elCount); +int trackCount = ptToInt(hashFindVal(countTracks, "track count")); +hPrintf(" <li>%d total tracks counted, %d different track types</li>\n", trackCount, countTracks->elCount); if (countTracks->elCount) { - hPrintf(" <ul>\n"); - struct hashEl *hel; - struct hashCookie hc = hashFirst(countTracks); - while ((hel = hashNext(&hc)) != NULL) + hPrintf(" <ol>\n"); + struct hashEl *hel, *helList = hashElListHash(countTracks); + slSort(&helList, hashElCmpIntValDesc); + for (hel = helList; hel; hel = hel->next) { hPrintf(" <li>%d - %s</li>\n", ptToInt(hel->val), hel->name); } - hPrintf(" </ul>\n"); + hPrintf(" </ol>\n"); } hPrintf("</ul>\n"); hPrintf("</p>\n"); -hFreeConn(&conn); -} // static void tracksForUcscDb(char * ucscDb) +} // static void tracksForUcscDb(char * db) static void showExamples(char *url, struct trackHubGenome *hubGenome, char *ucscDb) { hPrintf("<h2>Example URLs to return json data structures:</h2>\n"); hPrintf("<ol>\n"); hPrintf("<li><a href='/cgi-bin/hubApi/list/publicHubs' target=_blank>list public hubs</a> <em>/cgi-bin/hubApi/list/publicHubs</em></li>\n"); hPrintf("<li><a href='/cgi-bin/hubApi/list/ucscGenomes' target=_blank>list database genomes</a> <em>/cgi-bin/hubApi/list/ucscGenomes</em></li>\n"); hPrintf("<li><a href='/cgi-bin/hubApi/list/hubGenomes?hubUrl=%s' target=_blank>list genomes from specified hub</a> <em>/cgi-bin/hubApi/list/hubGenomes?hubUrl=%s</em></li>\n", url, url); hPrintf("<li><a href='/cgi-bin/hubApi/list/tracks?hubUrl=%s&hubUrl=%s&genome=%s' target=_blank>list tracks from specified hub and genome</a> <em>/cgi-bin/hubApi/list/tracks?hubUrl=%s&genome=%s</em></li>\n", url, url, hubGenome->name, url, hubGenome->name); hPrintf("<li><a href='/cgi-bin/hubApi/list/tracks?db=%s' target=_blank>list tracks from specified UCSC database</a> <em>/cgi-bin/hubApi/list/tracks?db=%s</em></li>\n", ucscDb, ucscDb); hPrintf("<li><a href='/cgi-bin/hubApi/list/chromosomes?db=%s' target=_blank>list chromosomes from specified UCSC database</a> <em>/cgi-bin/hubApi/list/chromosomes?db=%s</em></li>\n", ucscDb, ucscDb); hPrintf("<li><a href='/cgi-bin/hubApi/list/chromosomes?db=%s&track=gap' target=_blank>list chromosomes from specified track from UCSC databaset</a> <em>/cgi-bin/hubApi/list/chromosomes?db=%s&track=gap</em></li>\n", ucscDb, ucscDb); hPrintf("<li><a href='/cgi-bin/hubApi/getData/sequence?db=%s&chrom=chrM' target=_blank>get sequence from specified database and chromosome</a> <em>/cgi-bin/hubApi/getData/sequence?db=%s&chrom=chrM</em></li>\n", ucscDb, ucscDb); hPrintf("<li><a href='/cgi-bin/hubApi/getData/sequence?db=%s&chrom=chrM&start=0&end=128' target=_blank>get sequence from specified database, chromosome with start,end coordinates</a> <em>/cgi-bin/hubApi/getData/sequence?db=%s&chrom=chrM&start=0&end=128</em></li>\n", ucscDb, ucscDb);