b6ea7e02e7c946b1cb700d3cab07decce272d4c7
hiram
Tue Feb 26 16:00:40 2019 -0800
better counting of legitimate tracks refs #18869
diff --git src/hg/hubApi/hubApi.c src/hg/hubApi/hubApi.c
index dd0eddd..45a44b1 100644
--- src/hg/hubApi/hubApi.c
+++ src/hg/hubApi/hubApi.c
@@ -47,101 +47,162 @@
}
static struct hubPublic *hubPublicLoad(char **row)
/* Load a hubPublic from row fetched with select * from hubPublic
* from database. Dispose of this with hubPublicFree(). */
{
struct hubPublic *ret;
AllocVar(ret);
ret->hubUrl = cloneString(row[0]);
ret->shortLabel = cloneString(row[1]);
ret->longLabel = cloneString(row[2]);
ret->registrationTime = cloneString(row[3]);
ret->dbCount = sqlUnsigned(row[4]);
ret->dbList = cloneString(row[5]);
-// if (row[6])
ret->descriptionUrl = cloneString(row[6]);
-// else
-// ret->descriptionUrl = cloneString("");
return ret;
}
struct hubPublic *hubPublicLoadAll()
+/* read entire hubPublic table in hgcentral and return resulting list */
{
char query[1024];
struct hubPublic *list = NULL;
struct sqlConnection *conn = hConnectCentral();
sqlSafef(query, sizeof(query), "select * from %s", hubPublicTableName());
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
while ((row = sqlNextRow(sr)) != NULL)
{
struct hubPublic *el = hubPublicLoad(row);
slAddHead(&list, el);
}
sqlFreeResult(&sr);
hDisconnectCentral(&conn);
publicHubSortCase(&list);
int listSize = slCount(list);
AllocArray(shortLabels, listSize);
struct hubPublic *el = list;
int i = 0;
for ( ; el != NULL; el = el->next )
{
shortLabels[i++] = el->shortLabel;
++publicHubCount;
}
return list;
}
static boolean timeOutReached()
+/* see if the timeout has been reached to determine if an exit
+ * is appropriate at this time
+ */
{
long nowTime = clock1000();
timedOut = FALSE;
if ((nowTime - enteredMainTime) > (1000 * timeOutSeconds))
timedOut= TRUE;
return timedOut;
}
-static void trackSettings(struct trackDb *tdb)
-/* process the settingsHash for a trackDb, recursive if subtracks */
+#ifdef NOT
+static void showCounts(struct hash *countTracks)
+{
+if (countTracks->elCount)
+ {
+ hPrintf("
\n");
+ struct hashEl *hel;
+ struct hashCookie hc = hashFirst(countTracks);
+ while ((hel = hashNext(&hc)) != NULL)
+ hPrintf(" - %d - %s
\n", ptToInt(hel->val), hel->name);
+ hPrintf("
\n");
+ }
+}
+#endif
+
+static void hashCountTrack(struct trackDb *tdb, struct hash *countTracks)
+/* this is counting up track types into the hash countTracks */
+{
+char *stripType = cloneString(tdb->type);
+if (startsWith("chain ", tdb->type))
+ stripType = cloneString("chain");
+else if (startsWith("netAlign ", tdb->type))
+ stripType = cloneString("netAlign");
+else if (startsWith("genePred ", tdb->type))
+ stripType = cloneString("genePred");
+else if (startsWith("bigWig ", tdb->type))
+ stripType = cloneString("bigWig");
+else if (startsWith("wigMaf ", tdb->type))
+ stripType = cloneString("wigMaf");
+else if (startsWith("wig ", tdb->type))
+ stripType = cloneString("wig");
+else
+ stripType = cloneString(tdb->type);
+// char *compositeTrack = trackDbLocalSetting(tdb, "compositeTrack");
+boolean compositeContainer = tdbIsComposite(tdb);
+boolean compositeView = tdbIsCompositeView(tdb);
+// char *superTrack = trackDbLocalSetting(tdb, "superTrack");
+boolean superChild = tdbIsSuperTrackChild(tdb);
+if (compositeContainer)
+ hashIncInt(countTracks, "composite container");
+else if (compositeView)
+ hashIncInt(countTracks, "composite view");
+else if (superChild)
+ {
+ hashIncInt(countTracks, "superTrack child");
+ hashIncInt(countTracks, stripType);
+ hashIncInt(countTracks, "track count");
+ }
+else if (isEmpty(tdb->type))
+ hashIncInt(countTracks, "no type specified");
+else
+ {
+ hashIncInt(countTracks, stripType);
+ hashIncInt(countTracks, "track count");
+ }
+freeMem(stripType);
+// showCounts(countTracks);
+}
+
+static void trackSettings(struct trackDb *tdb, struct hash *countTracks)
+/* process the settingsHash for a trackDb, recursive when subtracks */
{
hPrintf(" \n");
// if (tdb->children) haven't yet seen a track with children ?
// hPrintf(" - %s: has children
\n", tdb->track);
// else
// hPrintf(" - %s: NO children
\n", tdb->track);
struct hashEl *hel;
struct hashCookie hc = hashFirst(tdb->settingsHash);
while ((hel = hashNext(&hc)) != NULL)
{
if (sameWord("track", hel->name))
continue; // already output in header
if (isEmpty((char *)hel->val))
hPrintf(" - %s : <empty>
\n", hel->name);
else
hPrintf(" - %s : '%s'
\n", hel->name, (char *)hel->val);
}
if (tdb->subtracks)
{
struct trackDb *tdbEl = tdb->subtracks;
hPrintf(" - has %d subtrack(s)
\n", slCount(tdb->subtracks));
for (tdbEl = tdb->subtracks; tdbEl; tdbEl = tdbEl->next)
{
- hPrintf("- subtrack: %s of parent: %s
\n", tdbEl->track, tdbEl->parent->track);
- trackSettings(tdbEl);
+ hPrintf("- subtrack: %s of parent: %s : type: '%s'
\n", tdbEl->track, tdbEl->parent->track, tdbEl->type);
+ hashCountTrack(tdbEl, countTracks);
+ trackSettings(tdbEl, countTracks);
}
}
hPrintf("
\n");
}
static int bbiBriefMeasure(char *type, char *bigDataUrl, char *bigDataIndex, long *chromCount, long *itemCount, struct dyString *errors)
/* check a bigDataUrl to find chrom count and item count */
{
int retVal = 0;
*chromCount = 0;
*itemCount = 0;
struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
{
if (startsWithWord("bigNarrowPeak", type)
@@ -218,140 +279,146 @@
retVal = 1;
}
}
errCatchEnd(errCatch);
if (errCatch->gotError)
{
retVal = 1;
dyStringPrintf(errors, "%s", errCatch->message->string);
}
errCatchFree(&errCatch);
return retVal;
} /* static int bbiBriefMeasure() */
-static void hubTrackList(struct trackDb *topTrackDb, struct trackHubGenome *genome)
-/* process the track list to show all tracks, return trackDb list */
-{
-if (topTrackDb)
- {
- struct hash *countTracks = hashNew(0);
- hPrintf(" \n");
- struct trackDb *tdb = NULL;
- for ( tdb = topTrackDb; tdb; tdb = tdb->next )
+static void countOneTdb(struct trackDb *tdb, char *bigDataIndex,
+ struct hash *countTracks)
{
char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl");
- char *compositeTrack = trackDbSetting(tdb, "compositeTrack");
- char *superTrack = trackDbSetting(tdb, "superTrack");
+// char *compositeTrack = trackDbSetting(tdb, "compositeTrack");
+boolean compositeContainer = tdbIsComposite(tdb);
+boolean compositeView = tdbIsCompositeView(tdb);
+// char *superTrack = trackDbSetting(tdb, "superTrack");
+boolean superChild = tdbIsSuperTrackChild(tdb);
boolean depthSearch = cartUsualBoolean(cart, "depthSearch", FALSE);
- if (compositeTrack)
- hashIncInt(countTracks, "composite container");
- else if (superTrack)
- hashIncInt(countTracks, "superTrack child");
- else if (isEmpty(tdb->type))
- hashIncInt(countTracks, "no type specified");
- else
- hashIncInt(countTracks, tdb->type);
+hashCountTrack(tdb, countTracks);
+
if (depthSearch && bigDataUrl)
{
- char *bigDataIndex = NULL;
- char *relIdxUrl = trackDbSetting(topTrackDb, "bigDataIndex");
- if (relIdxUrl != NULL)
- bigDataIndex = trackHubRelativeUrl(genome->trackDbFile, relIdxUrl);
-
long chromCount = 0;
long itemCount = 0;
struct dyString *errors = newDyString(1024);
int retVal = bbiBriefMeasure(tdb->type, bigDataUrl, bigDataIndex, &chromCount, &itemCount, errors);
if (retVal)
{
hPrintf(" - %s : %s : ERROR: %s
\n", tdb->track, tdb->type, errors->string);
}
else
{
if (startsWithWord("bigBed", tdb->type))
hPrintf(" - %s : %s : %ld chroms : %ld item count
\n", tdb->track, tdb->type, chromCount, itemCount);
else if (startsWithWord("bigWig", tdb->type))
hPrintf(" - %s : %s : %ld chroms : %ld bases covered
\n", tdb->track, tdb->type, chromCount, itemCount);
else
hPrintf(" - %s : %s : %ld chroms : %ld count
\n", tdb->track, tdb->type, chromCount, itemCount);
}
}
else
{
- if (compositeTrack)
+ if (compositeContainer)
hPrintf(" - %s : %s : composite track container
\n", tdb->track, tdb->type);
- else if (superTrack)
+ else if (compositeView)
+ hPrintf(" - %s : %s : composite view
\n", tdb->track, tdb->type);
+ else if (superChild)
hPrintf(" - %s : %s : superTrack child
\n", tdb->track, tdb->type);
else if (! depthSearch)
hPrintf(" - %s : %s : %s
\n", tdb->track, tdb->type, bigDataUrl);
else
hPrintf(" - %s : %s
\n", tdb->track, tdb->type);
}
if (allTrackSettings)
{
hPrintf(" \n");
- trackSettings(tdb); /* show all settings */
+ trackSettings(tdb, countTracks); /* show all settings */
hPrintf("
\n");
}
+return;
+} /* static void countOneTdb(struct trackDb *tdb,
+ * char *bigDataIndex, struct hash *countTracks)
+ */
+
+static void hubTrackList(struct trackDb *topTrackDb, struct trackHubGenome *genome)
+/* process the track list in a hub to show all tracks */
+{
+if (topTrackDb)
+ {
+ struct hash *countTracks = hashNew(0);
+ hPrintf(" \n");
+ struct trackDb *tdb = NULL;
+ for ( tdb = topTrackDb; tdb; tdb = tdb->next )
+ {
+ char *bigDataIndex = NULL;
+ char *relIdxUrl = trackDbSetting(topTrackDb, "bigDataIndex");
+ if (relIdxUrl != NULL)
+ bigDataIndex = trackHubRelativeUrl(genome->trackDbFile, relIdxUrl);
+ countOneTdb(tdb, bigDataIndex, countTracks);
if (timeOutReached())
break;
} /* for ( tdb = topTrackDb; tdb; tdb = tdb->next ) */
hPrintf(" - %d different track types
\n", countTracks->elCount);
+ /* add this single genome count to the overall multi-genome counts */
if (countTracks->elCount)
{
- hPrintf(" \n");
- struct hashEl *hel;
- struct hashCookie hc = hashFirst(countTracks);
- while ((hel = hashNext(&hc)) != NULL)
+ hPrintf(" \n");
+ struct hashEl *hel, *helList = hashElListHash(countTracks);
+ slSort(&helList, hashElCmpIntValDesc);
+ for (hel = helList; hel; hel = hel->next)
{
int prevCount = ptToInt(hashFindVal(trackCounter, hel->name));
+ if (differentStringNullOk("track count", hel->name))
totalTracks += ptToInt(hel->val);
hashReplace(trackCounter, hel->name, intToPt(prevCount + ptToInt(hel->val)));
hPrintf(" - %d - %s
\n", ptToInt(hel->val), hel->name);
}
- hPrintf("
\n");
+ hPrintf(" \n");
}
hPrintf("
\n");
}
else
hPrintf(" - no trackTopDb
\n");
} /* static struct trackDb *hubTrackList() */
static void assemblySettings(struct trackHubGenome *genome)
/* display all the assembly 'settingsHash' */
{
struct trackDb *tdb = trackHubTracksForGenome(genome->trackHub, genome);
tdb = trackDbLinkUpGenerations(tdb);
-int elCount = 0;
hPrintf(" \n");
struct hashEl *hel;
struct hashCookie hc = hashFirst(genome->settingsHash);
while ((hel = hashNext(&hc)) != NULL)
{
- ++elCount;
hPrintf(" - %s : %s
\n", hel->name, (char *)hel->val);
if (sameWord("trackDb", hel->name)) /* examine the trackDb structure */
{
hubTrackList(tdb, genome);
}
if (timeOutReached())
break;
}
-hPrintf(" - elCount: %d
\n", elCount);
hPrintf("
\n");
}
struct slName *genomeList(struct trackHub *hubTop, struct trackDb **dbTrackList, char *selectGenome)
/* follow the pointers from the trackHub to trackHubGenome and around
* in a circle from one to the other to find all hub resources
* return slName list of the genomes in this track hub
* optionally, return the trackList from this hub for the specified genome
*/
{
struct slName *retList = NULL;
long totalAssemblyCount = 0;
struct trackHubGenome *genome = hubTop->genomeList;
@@ -377,38 +444,38 @@
hPrintf("- %s
\n", genome->name);
}
assemblySettings(genome);
if (measureTiming)
{
long thisTime = clock1000();
hPrintf("processing time %s: %ld millis
\n", genome->name, thisTime - lastTime);
}
if (timeOutReached())
break;
}
if (trackCounter->elCount)
{
hPrintf(" - total genome assembly count: %ld
\n", totalAssemblyCount);
hPrintf(" - %ld total tracks counted, %d different track types:
\n", totalTracks, trackCounter->elCount);
- hPrintf(" \n");
- struct hashEl *hel;
- struct hashCookie hc = hashFirst(trackCounter);
- while ((hel = hashNext(&hc)) != NULL)
+ hPrintf(" \n");
+ struct hashEl *hel, *helList = hashElListHash(trackCounter);
+ slSort(&helList, hashElCmpIntValDesc);
+ for (hel = helList; hel; hel = hel->next)
{
hPrintf(" - %d - %s - total
\n", ptToInt(hel->val), hel->name);
}
- hPrintf("
\n");
+ hPrintf(" \n");
}
hPrintf("
\n");
return retList;
} /* static struct slName *genomeList () */
static char *urlFromShortLabel(char *shortLabel)
/* this is not a fair way to get the URL since shortLabel's are not
* necessarily unique. This is temporary. TBD: need to always use URL
* and then get the shortLabel
*/
{
char hubUrl[1024];
char query[1024];
struct sqlConnection *conn = hConnectCentral();
// Build a query to select the hubUrl for the given shortLabel
@@ -474,112 +541,60 @@
char *words[MAX_PATH_INFO];/*expect no more than MAX_PATH_INFO number of words*/
int wordCount = chopByChar(pathInfo, '/', words, ArraySize(words));
if (wordCount < 2)
apiErrAbort("unknown endpoint command: '/%s'", pathInfo);
struct hashEl *hel = hashLookup(apiFunctionHash, words[0]);
if (hel == NULL)
apiErrAbort("no such command: '%s' for endpoint '/%s'", words[0], pathInfo);
void (*apiFunction)(char **) = hel->val;
// void (*apiFunction)(char **) = hashMustFindVal(apiFunctionHash, words[0]);
(*apiFunction)(words);
} /* static void apiFunctionSwitch(char *pathInfo) */
-static void tracksForUcscDb(char * ucscDb)
+static void tracksForUcscDb(char *db)
+/* scan the specified database for all tracks */
{
struct hash *countTracks = hashNew(0);
-struct sqlConnection *conn = hAllocConn(ucscDb);
-hPrintf("Tracks in UCSC genome: '%s'
\n", ucscDb);
-struct trackDb *tdbList = hTrackDb(ucscDb);
+hPrintf("
Tracks in UCSC genome: '%s'
\n", db);
+struct trackDb *tdbList = hTrackDb(db);
struct trackDb *tdb;
hPrintf("
\n");
for (tdb = tdbList; tdb != NULL; tdb = tdb->next )
{
- char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl");
- char *compositeTrack = trackDbSetting(tdb, "compositeTrack");
- char *superTrack = trackDbSetting(tdb, "superTrack");
- boolean depthSearch = cartUsualBoolean(cart, "depthSearch", FALSE);
- if (compositeTrack)
- hashIncInt(countTracks, "composite container");
- else if (superTrack)
- hashIncInt(countTracks, "superTrack child");
- else if (isEmpty(tdb->type))
- hashIncInt(countTracks, "no type specified");
- else
- hashIncInt(countTracks, tdb->type);
- if (depthSearch && bigDataUrl)
- {
- long chromCount = 0;
- long itemCount = 0;
- struct dyString *errors = newDyString(1024);
- int retVal = bbiBriefMeasure(tdb->type, bigDataUrl, NULL, &chromCount, &itemCount, errors);
- if (retVal)
- {
- hPrintf(" - %s : %s : ERROR: %s
\n", tdb->track, tdb->type, errors->string);
- }
- else
- {
- if (startsWithWord("bigBed", tdb->type))
- hPrintf(" - %s : %s : %ld chroms : %ld item count
\n", tdb->track, tdb->type, chromCount, itemCount);
- else if (startsWithWord("bigWig", tdb->type))
- hPrintf(" - %s : %s : %ld chroms : %ld bases covered
\n", tdb->track, tdb->type, chromCount, itemCount);
- else
- hPrintf(" - %s : %s : %ld chroms : %ld count
\n", tdb->track, tdb->type, chromCount, itemCount);
- }
- }
- else
- {
- if (isEmpty(bigDataUrl))
- {
- int rowCount = 0;
- if (sqlTableExists(conn, tdb->track))
- {
- rowCount = sqlRowCount(conn, tdb->track);
- hPrintf("- %s : %s : %d item count
\n", tdb->track, tdb->type, rowCount);
- }
- else
- {
- if (compositeTrack)
- hPrintf("- %s : %s : composite container
\n", tdb->track, tdb->type);
- else
- hPrintf("- %s : %s
\n", tdb->track, tdb->type);
- }
- }
- else
- hPrintf("- %s : %s : %s
\n", tdb->track,tdb->type,bigDataUrl);
- }
- if (allTrackSettings)
- trackSettings(tdb); /* show all settings */
+ countOneTdb(tdb, NULL, countTracks);
+ if (timeOutReached())
+ break;
}
-hPrintf(" - %d different track types
\n", countTracks->elCount);
+int trackCount = ptToInt(hashFindVal(countTracks, "track count"));
+hPrintf(" - %d total tracks counted, %d different track types
\n", trackCount, countTracks->elCount);
if (countTracks->elCount)
{
- hPrintf(" \n");
- struct hashEl *hel;
- struct hashCookie hc = hashFirst(countTracks);
- while ((hel = hashNext(&hc)) != NULL)
+ hPrintf(" \n");
+ struct hashEl *hel, *helList = hashElListHash(countTracks);
+ slSort(&helList, hashElCmpIntValDesc);
+ for (hel = helList; hel; hel = hel->next)
{
hPrintf(" - %d - %s
\n", ptToInt(hel->val), hel->name);
}
- hPrintf("
\n");
+ hPrintf(" \n");
}
hPrintf("
\n");
hPrintf("\n");
-hFreeConn(&conn);
-} // static void tracksForUcscDb(char * ucscDb)
+} // static void tracksForUcscDb(char * db)
static void showExamples(char *url, struct trackHubGenome *hubGenome, char *ucscDb)
{
hPrintf("Example URLs to return json data structures:
\n");
hPrintf("\n");
hPrintf("- list public hubs /cgi-bin/hubApi/list/publicHubs
\n");
hPrintf("- list database genomes /cgi-bin/hubApi/list/ucscGenomes
\n");
hPrintf("- list genomes from specified hub /cgi-bin/hubApi/list/hubGenomes?hubUrl=%s
\n", url, url);
hPrintf("- list tracks from specified hub and genome /cgi-bin/hubApi/list/tracks?hubUrl=%s&genome=%s
\n", url, url, hubGenome->name, url, hubGenome->name);
hPrintf("- list tracks from specified UCSC database /cgi-bin/hubApi/list/tracks?db=%s
\n", ucscDb, ucscDb);
hPrintf("- list chromosomes from specified UCSC database /cgi-bin/hubApi/list/chromosomes?db=%s
\n", ucscDb, ucscDb);
hPrintf("- list chromosomes from specified track from UCSC databaset /cgi-bin/hubApi/list/chromosomes?db=%s&track=gap
\n", ucscDb, ucscDb);
hPrintf("- get sequence from specified database and chromosome /cgi-bin/hubApi/getData/sequence?db=%s&chrom=chrM
\n", ucscDb, ucscDb);
hPrintf("- get sequence from specified database, chromosome with start,end coordinates /cgi-bin/hubApi/getData/sequence?db=%s&chrom=chrM&start=0&end=128
\n", ucscDb, ucscDb);