9d4635d2821b3d8a73396728cbd9cd56ad842f7c wisulliv Fri Feb 9 15:54:43 2024 -0800 Updated Home and Data Images. diff --git src/hg/cirm/cdw/cdwWebBrowse/cdwWebBrowse.c src/hg/cirm/cdw/cdwWebBrowse/cdwWebBrowse.c index 448051a..eee196c 100644 --- src/hg/cirm/cdw/cdwWebBrowse/cdwWebBrowse.c +++ src/hg/cirm/cdw/cdwWebBrowse/cdwWebBrowse.c @@ -1,2374 +1,2374 @@ /* cdwWebBrowse - Browse CIRM data warehouse.. */ /* Copyright (C) 2014 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "obscure.h" #include "cheapcgi.h" #include "sqlSanity.h" #include "trix.h" #include "htmshell.h" #include "fieldedTable.h" #include "portable.h" #include "paraFetch.h" #include "tagStorm.h" #include "rql.h" #include "intValTree.h" #include "cart.h" #include "cartDb.h" #include "jksql.h" #include "cdw.h" #include "cdwLib.h" #include "cdwValid.h" #include "hui.h" #include "hgConfig.h" #include "hgColors.h" #include "rainbow.h" #include "web.h" #include "tablesTables.h" #include "jsHelper.h" #include "wikiLink.h" #include "cdwFlowCharts.h" #include "cdwStep.h" #include "facetField.h" #include "rqlToSql.h" /* Global vars */ struct cart *cart; // User variables saved from click to click struct hash *oldVars; // Previous cart, before current round of CGI vars folded in struct cdwUser *user; // Our logged in user if any static char *accessibleFilesToken = NULL; // Token for file access if any boolean isPublicSite = FALSE; char *excludeVars[] = {"cdwCommand", "submit", "DownloadFormat", NULL}; void usage() /* Explain usage and exit. */ { errAbort( "cdwWebBrowse is a cgi script not meant to be run from command line.\n" ); } void printHash(char *label, struct hash *hash) /* Print out keys in hash alphabetically. */ { struct hashEl *list, *el; list = hashElListHash(hash); slSort(&list, hashElCmp); printf("%s:\n", label); for (el = list; el != NULL; el = el->next) printf(" %s\n", el->name); hashElFreeList(&list); } // fields/columns of the browse file table char *fileTableFields = NULL; char *visibleFacetFields = NULL; #define FILEFIELDS "file_name,file_size,ucsc_db" #define FILEFACETFIELDS "species,assay,format,lab,data_set_id" struct dyString *printPopularTags(struct hash *hash, int maxSize) /* Get all hash elements, sorted by count, and print all the ones that fit */ { maxSize -= 3; // Leave room for ... struct dyString *dy = dyStringNew(0); struct hashEl *hel, *helList = hashElListHash(hash); slSort(&helList, hashElCmpIntValDesc); for (hel = helList; hel != NULL; hel = hel->next) { int oldSize = dy->stringSize; if (oldSize != 0) dyStringAppend(dy, ", "); dyStringPrintf(dy, "%s (%d)", hel->name, ptToInt(hel->val)); if (dy->stringSize >= maxSize) { dy->string[oldSize] = 0; dy->stringSize = oldSize; dyStringAppend(dy, "..."); break; } } hashElFreeList(&helList); return dy; } static long long sumCounts(struct hash *hash) /* Figuring hash is integer valued, return sum of all vals in hash */ { long long total = 0; struct hashEl *hel, *helList = hashElListHash(hash); for (hel = helList; hel != NULL; hel = hel->next) { int val = ptToInt(hel->val); total += val; } hashElFreeList(&helList); return total; } int labCount(struct tagStorm *tags) /* Return number of different labs in tags */ { struct hash *hash = tagStormCountTagVals(tags, "lab", "accession"); int count = hash->elCount; hashFree(&hash); return count; } void wrapFileName(struct fieldedTable *table, struct fieldedRow *row, char *field, char *val, char *shortVal, void *context) /* Write out wrapper that links us to metadata display */ { printf("", field, val, cartSidUrlString(cart)); printf("%s", shortVal); } void wrapTagField(struct fieldedTable *table, struct fieldedRow *row, char *field, char *val, char *shortVal, void *context) /* Write out wrapper that links us to something nice */ { printf("", val, cartSidUrlString(cart)); printf("%s", shortVal); } void wrapTagValueInFiles(struct fieldedTable *table, struct fieldedRow *row, char *field, char *val, char *shortVal, void *context) /* Write out wrapper that links us to something nice */ { printf("%s", shortVal); } void wrapFileSize(struct fieldedTable *table, struct fieldedRow *row, char * field, char *val, char *shortVal, void *context) /* Write out wrapper that displays file sizes in human-readable format */ { if (!isdigit(val[0])) warn("Warning: expected a number for file_size, but got %s", val); double fVal = atof(val); char* valQual = ""; int intVal = 0; if (fVal>=1E12) { intVal = round(fVal/1E12); valQual = "TB"; } else if (fVal>=1E9) { intVal = round(fVal/1E9); valQual = "GB"; } else if (fVal>=1E6) { intVal = round(fVal/1E6); valQual = "MB"; } else if (fVal>=1E3) { intVal = round(fVal/1E3); valQual = "KB"; } else { intVal = round(fVal); valQual = "B"; } printf("%d %s", intVal, valQual); } static char *mustFindFieldInRow(char *field, struct slName *fieldList, char **row) /* Assuming field is in list, which is ordered same as row, return row cell * corrsepondint to field */ { int fieldIx = 0; struct slName *el; for (el = fieldList; el != NULL; el = el->next) { if (sameString(el->name, field)) { return row[fieldIx]; } ++fieldIx; } errAbort("Couldn't find field %s in row", field); return NULL; } char *tagDescription(char *tag) /* Return tag description given tag name. */ { char *unparsed[] = { #include "tagDescriptions.h" }; int unparsedCount = ArraySize(unparsed); int i; for (i=0; i%s tag description: %s
\n", tag, description); /* Print out some summary stats */ struct hash *hash = tagStormCountTagVals(tags, tag, "accession"); printf("The %s tag has %d distinct values and is used on %lld files. ", tag, hash->elCount, sumCounts(hash)); /* Initially sort from most popular to least popular */ struct hashEl *hel, *helList = hashElListHash(hash); slSort(&helList, hashElCmpIntValDesc); /* Create fielded table containing tag values */ char *labels[] = {"files", tag}; int fieldCount = ArraySize(labels); struct fieldedTable *table = fieldedTableNew("Tag Values", labels, fieldCount); for (hel = helList; hel != NULL; hel = hel->next) { char numBuf[16]; safef(numBuf, sizeof(numBuf), "%d", ptToInt(hel->val)); char *row[2] = {numBuf, hel->name}; fieldedTableAdd(table, row, fieldCount, 0); } /* Draw sortable table */ char returnUrl[PATH_LEN*2]; safef(returnUrl, sizeof(returnUrl), "../cgi-bin/cdwWebBrowse?cdwCommand=oneTag&cdwTagName=%s&%s", tag, cartSidUrlString(cart) ); struct hash *outputWrappers = hashNew(0); hashAdd(outputWrappers, tag, wrapTagValueInFiles); webSortableFieldedTable(cart, table, returnUrl, "cdwOneTag", 0, outputWrappers, NULL); fieldedTableFree(&table); } void generateTableRow(struct slName *list,char **row, char *idTag , char *idVal) { struct slName *el; static char *outputFields[] = {"tag", "value"}; struct fieldedTable *table = fieldedTableNew("File Tags", outputFields,ArraySize(outputFields)); int fieldIx = 0; char *accession = NULL; for (el = list; el != NULL; el = el->next) { char *outRow[2]; char *val = row[fieldIx]; if (val != NULL) { outRow[0] = el->name; outRow[1] = row[fieldIx]; // add a link to the accession row if (sameWord(el->name, "accession")) { char link[1024]; safef(link, sizeof(link), "%s download", outRow[1], outRow[1]); accession = cloneString(outRow[1]); outRow[1] = cloneString(link); } // add a link to the submit_file_name row if (sameWord(el->name, "submit_file_name")) { char link[1024]; safef(link, sizeof(link), "%s download", outRow[1], accession); outRow[1] = cloneString(link); } fieldedTableAdd(table, outRow, 2, fieldIx); } ++fieldIx; } char returnUrl[PATH_LEN*2]; safef(returnUrl, sizeof(returnUrl), "../cgi-bin/cdwWebBrowse?cdwCommand=oneFile&cdwFileTag=%s&cdwFileVal=%s&%s", idTag, idVal, cartSidUrlString(cart) ); struct hash *outputWrappers = hashNew(0); hashAdd(outputWrappers, "tag", wrapTagField); webSortableFieldedTable(cart, table, returnUrl, "cdwOneFile", 0, outputWrappers, NULL); fieldedTableFree(&table); freeMem(accession); } char *unquotedCartString(struct cart *cart, char *varName) /* Return unquoted cart variable */ { char *val = cartOptionalString(cart, varName); if (val == NULL) return ""; stripChar(val, '"'); stripChar(val, '\''); return val; } char *getCdwSetting(char *setting, char *deflt) /* Get string cdw. */ { char cdwSetting[1024]; safef(cdwSetting, sizeof cdwSetting, "cdw.%s", setting); return cfgOptionDefault(cdwSetting, deflt); } char *getCdwTableSetting(char *setting) /* Get string cdw. * Allows us to use non-default settings for tables */ { return getCdwSetting(setting, setting); // table name is its own default } void doFileFlowchart(struct sqlConnection *conn) /* Put up a page with info on one file */ { char *idTag = cartUsualString(cart, "cdwFileTag", "accession"); char *idVal = cartString(cart, "cdwFileVal"); char query[512]; sqlSafef(query, sizeof(query), "select * from %s where %s='%s'", getCdwTableSetting("cdwFileTags"), idTag, idVal); struct sqlResult *sr = sqlGetResult(conn, query); struct slName *list = sqlResultFieldList(sr); char **row; struct dyString *dy = dyStringNew(1024); while ((row = sqlNextRow(sr)) != NULL) { char *fileId = mustFindFieldInRow("file_id", list, row); printf("Click on a box in the flow chart to navigate to that file."); dy = makeCdwFlowchart(sqlSigned(fileId), cart); printf("Remove flow chart"); generateTableRow(list, row, idTag, idVal); } jsInline(dy->string); dyStringFree(&dy); sqlFreeResult(&sr); } void doOneFile(struct sqlConnection *conn) /* Put up a page with info on one file */ { struct sqlConnection *conn2 = cdwConnect(); char *idTag = cartUsualString(cart, "cdwFileTag", "accession"); char *idVal = cartString(cart, "cdwFileVal"); char query[512]; sqlSafef(query, sizeof(query), "select * from %s where %s='%s'", getCdwTableSetting("cdwFileTags"), idTag, idVal); struct sqlResult *sr = sqlGetResult(conn, query); struct slName *list = sqlResultFieldList(sr); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *fileName = mustFindFieldInRow("file_name", list, row); char *fileSize = mustFindFieldInRow("file_size", list, row); char *format = mustFindFieldInRow("format", list, row); char *fileId = mustFindFieldInRow("file_id", list, row); long long size = sqlLongLongInList(&fileSize); printf("Tags associated with %s a %s format file of size ", fileName, format); printLongWithCommas(stdout, size); printf("
\n"); /* Figure out number of file inputs and outputs, and put up link to flowcharts */ sqlSafef(query, sizeof(query), "select * from cdwStepIn where fileId = %s", fileId); struct cdwStepIn *cSI = cdwStepInLoadByQuery(conn2, query), *stepIter; int outFiles = 0, stepRunId; for (stepIter = cSI; stepIter != NULL; stepIter=stepIter->next) { sqlSafef(query, sizeof(query), "select count(*) from cdwStepOut where stepRunId = %i", stepIter->stepRunId); outFiles += sqlQuickNum(conn2, query); } sqlSafef(query, sizeof(query), "select stepRunId from cdwStepOut where fileId = %s", fileId); stepRunId = sqlQuickNum(conn2, query); sqlSafef(query, sizeof(query), "select count(*) from cdwStepIn where stepRunId = %i", stepRunId); int inFiles = sqlQuickNum(conn2, query); if (inFiles > 0 || outFiles > 0) { printf("File relationships: %d inputs, %d outputs ", inFiles, outFiles); printf("flow chart"); } generateTableRow(list, row, idTag, idVal); } sqlFreeResult(&sr); sqlDisconnect(&conn2); } struct dyString *customTextForFile(struct sqlConnection *conn, struct cdwTrackViz *viz) /* Create custom track text */ { struct dyString *dy = dyStringNew(0); dyStringPrintf(dy, "track name=\"%s\" ", viz->shortLabel); dyStringPrintf(dy, "description=\"%s\" ", viz->longLabel); //char *host = hHttpHost(); dyStringPrintf(dy, "bigDataUrl=https://localhost/cgi-bin/cdwGetFile?acc=%s", viz->shortLabel); if (accessibleFilesToken != NULL) dyStringPrintf(dy, "&token=%s", accessibleFilesToken); dyStringPrintf(dy, " "); char *indexExt = NULL; if (sameWord(viz->type, "bam")) indexExt = ".bai"; else if (sameWord(viz->type, "vcfTabix")) indexExt = ".tbi"; if (indexExt != NULL) { dyStringPrintf(dy, "bigDataIndex=http://localhost/cgi-bin/cdwGetFile?addExt=%s&acc=%s", indexExt, viz->shortLabel); if (accessibleFilesToken != NULL) dyStringPrintf(dy, "&token=%s", accessibleFilesToken); } dyStringPrintf(dy, " type=%s", viz->type); return dy; } struct cdwTrackViz *cdwTrackVizFromFileId(struct sqlConnection *conn, long long fileId) /* Return cdwTrackViz if any associated with file ID */ { char query[256]; sqlSafef(query, sizeof(query), "select * from cdwTrackViz where fileId=%lld", fileId); return cdwTrackVizLoadByQuery(conn, query); } void wrapFileVis(struct sqlConnection *conn, char *acc, char *unwrapped) /* Wrap hyperlink link to file around unwrapped text. Link goes to file in vf. */ { char *host = hHttpHost(); printf(""); printf("%s", unwrapped); } boolean wrapTrackVis(struct sqlConnection *conn, struct cdwValidFile *vf, char *unwrapped) /* Attempt to wrap genome browser link around unwrapped text. Link goes to file in vf. */ { if (vf == NULL) return FALSE; struct cdwTrackViz *viz = cdwTrackVizFromFileId(conn, vf->fileId); if (viz == NULL) return FALSE; struct dyString *track = customTextForFile(conn, viz); char *encoded = cgiEncode(track->string); printf("ucscDb); printf("&hgt.customText="); printf("%s", encoded); printf("\">"); // Finish HREF quote and A tag printf("%s", unwrapped); freez(&encoded); dyStringFree(&track); return TRUE; } void wrapTrackNearFileName(struct fieldedTable *table, struct fieldedRow *row, char *tag, char *val, char *shortVal, void *context) /* Construct wrapper to UCSC if row actually is a track */ { struct sqlConnection *conn = context; int fileNameIx = stringArrayIx("file_name", table->fields, table->fieldCount); boolean printed = FALSE; if (fileNameIx >= 0) { char *fileName = row->row[fileNameIx]; char acc[FILENAME_LEN]; safef(acc, sizeof(acc), "%s", fileName); char *dot = strchr(acc, '.'); if (dot != NULL) *dot = 0; struct cdwValidFile *vf = cdwValidFileFromLicensePlate(conn, acc); struct cdwFile *ef = cdwFileFromId(conn, vf->fileId); if (cdwCheckAccess(conn, ef, user, cdwAccessRead)) printed = wrapTrackVis(conn, vf, shortVal); } if (!printed) printf("%s", shortVal); } void wrapTrackNearAccession(struct fieldedTable *table, struct fieldedRow *row, char *tag, char *val, char *shortVal, void *context) /* Construct wrapper that can link to Genome Browser if any field in table * is an accession */ { struct sqlConnection *conn = context; int accIx = stringArrayIx("accession", table->fields, table->fieldCount); boolean printed = FALSE; if (accIx >= 0) { char *acc = row->row[accIx]; if (acc != NULL) { struct cdwValidFile *vf = cdwValidFileFromLicensePlate(conn, acc); struct cdwFile *ef = cdwFileFromId(conn, vf->fileId); if (cdwCheckAccess(conn, ef, user, cdwAccessRead)) printed = wrapTrackVis(conn, vf, shortVal); } } if (!printed) printf("%s", shortVal); } boolean isWebBrowsableFormat(char *format) /* Return TRUE if it's one of the web-browseable formats */ { char *formats[] = {"html", "jpg", "pdf", "png", "text", "tsv"}; return stringArrayIx(format, formats, ArraySize(formats)) >= 0; } void wrapFormat(struct fieldedTable *table, struct fieldedRow *row, char *field, char *val, char *shortVal, void *context) /* Write out wrapper that links us to something nice */ { struct sqlConnection *conn = context; char *format = val; if (isWebBrowsableFormat(format)) { /* Get file name out of table */ int fileNameIx = stringArrayIx("file_name", table->fields, table->fieldCount); if (fileNameIx < 0) errAbort("Expecting a file_name in this table"); char *fileName = row->row[fileNameIx]; /* Convert file to accession by chopping off at first dot */ char *acc = cloneString(fileName); char *dot = strchr(acc, '.'); if (dot != NULL) *dot = 0; struct cdwValidFile *vf = cdwValidFileFromLicensePlate(conn, acc); struct cdwFile *ef = cdwFileFromId(conn, vf->fileId); if (cdwCheckAccess(conn, ef, user, cdwAccessRead)) wrapFileVis(conn, acc, shortVal); freez(&acc); } else printf("%s", format); } void wrapMetaNearAccession(struct fieldedTable *table, struct fieldedRow *row, char *field, char *val, char *shortVal, void *context) /* Write out wrapper on a column that looks for accession in same table and uses * that to link us to oneFile display. */ { struct sqlConnection *conn = context; int accIx = stringArrayIx("accession", table->fields, table->fieldCount); boolean wrapped = FALSE; if (accIx >= 0) { char *acc = row->row[accIx]; if (acc != NULL) { struct cdwValidFile *vf = cdwValidFileFromLicensePlate(conn, acc); if (vf != NULL) { wrapped = TRUE; printf("", acc, cartSidUrlString(cart)); printf("%s", shortVal); } } } if (!wrapped) printf("%s", shortVal); } void wrapExternalUrl(struct fieldedTable *table, struct fieldedRow *row, char *field, char *val, char *shortVal, void *context) /* Attempt to wrap genome browser link around unwrapped text. Link goes to file in vf. */ { printf("%s", val, shortVal); } static void rSumLocalMatching(struct tagStanza *list, char *field, int *pSum) /* Recurse through tree adding matches to *pSum */ { struct tagStanza *stanza; for (stanza = list; stanza != NULL; stanza = stanza->next) { if (tagFindLocalVal(stanza, field)) *pSum += 1; if (stanza->children != NULL) rSumLocalMatching(stanza->children, field, pSum); } } int tagStormCountStanzasWithLocal(struct tagStorm *tags, char *localField) /* Return count of all stanzas that include locally a given field */ { int sum = 0; rSumLocalMatching(tags->forest, localField, &sum); return sum; } int hashElCmpIntValDescNameAsc(const void *va, const void *vb) /* Compare two hashEl from a hashInt type hash, with highest integer values * comingFirst. */ { struct hashEl *a = *((struct hashEl **)va); struct hashEl *b = *((struct hashEl **)vb); int diff = b->val - a->val; if (diff == 0) diff = strcmp(b->name, a->name); return diff; } struct suggestBuilder /* A structure to help build a list of suggestions for each file */ { struct suggestBuilder *next; char *name; /* Field name */ struct hash *hash; /* Keyed by field values, values are # of times seen */ }; struct hash *accessibleSuggestHash(struct sqlConnection *conn, char *fields, struct cdwFile *efList) /* Create hash keyed by field name and with values the distinct values of this * field. Only do this on fields where it looks like suggest would be useful. */ { struct hash *suggestHash = hashNew(0); int totalFiles = slCount(efList); /* Make up list of helper structures */ struct slName *name, *nameList = slNameListFromComma(fields); struct suggestBuilder *field, *fieldList = NULL; for (name = nameList; name != NULL; name = name->next) { AllocVar(field); field->name = name->name; field->hash = hashNew(0); slAddHead(&fieldList, field); } slReverse(&fieldList); /* Build up sql query to fetch all our fields */ struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select "); for (field = fieldList; field != NULL; field = field->next) { if (field != fieldList) // not first one sqlDyStringPrintf(query, ","); sqlDyStringPrintf(query, "%s", field->name); } /* Put where on it to limit it to accessible files */ sqlDyStringPrintf(query, " from %s where file_id in (", getCdwTableSetting("cdwFileTags")); struct cdwFile *ef; for (ef = efList; ef != NULL; ef = ef->next) { if (ef != efList) // not first one sqlDyStringPrintf(query, ","); sqlDyStringPrintf(query, "%u", ef->id); } sqlDyStringPrintf(query, ")"); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { int fieldIx = 0; for (field = fieldList; field != NULL; field = field->next, ++fieldIx) { char *val = row[fieldIx]; if (val != NULL) hashIncInt(field->hash, val); } } sqlFreeResult(&sr); /* Loop through fields making suggestion hash entries where appropriate */ for (field = fieldList; field != NULL; field = field->next) { struct hash *valHash = field->hash; if (valHash->elCount < 20 || valHash->elCount < totalFiles/3) { struct hashEl *hel, *helList = hashElListHash(valHash); slSort(&helList, hashElCmpIntValDescNameAsc); struct slName *valList = NULL; int limit = 200; for (hel = helList ; hel != NULL; hel = hel->next) { if (--limit < 0) break; slNameAddHead(&valList, hel->name); } slReverse(&valList); hashAdd(suggestHash, field->name, valList); } hashFree(&field->hash); } slFreeList(&fieldList); slFreeList(&nameList); dyStringFree(&query); return suggestHash; } static struct hash *sqlHashFields(struct sqlConnection *conn, char *table) /* Return a hash containing all fields in table */ { struct slName *field, *fieldList = sqlListFields(conn, table); struct hash *hash = hashNew(7); for (field = fieldList; field != NULL; field = field->next) hashAdd(hash, field->name, NULL); slFreeList(&fieldList); return hash; } static char *filterFieldsToJustThoseInTable(struct sqlConnection *conn, char *fields, char *table) /* Return subset of all fields just containing those that exist in table */ { struct hash *hash = sqlHashFields(conn, table); struct slName *name, *nameList = slNameListFromComma(fields); struct dyString *dy = dyStringNew(strlen(fields)); char *separator = ""; for (name = nameList; name != NULL; name = name->next) { char *s = name->name; if (hashLookup(hash, s)) { dyStringPrintf(dy, "%s%s", separator, s); separator = ","; } } hashFree(&hash); slFreeList(&nameList); return dyStringCannibalize(&dy); } void searchFilesWithAccess(struct sqlConnection *conn, char *searchString, char *allFields, char* initialWhere, struct cdwFile **retList, struct dyString **retWhere, char **retFields, boolean securityColumnsInTable) { /* Get list of files that we are authorized to see and that match searchString in the trix file * Returns: retList of matching files, retWhere with sql where expression for these files, retFields * If nothing to see, retList is NULL * DO NOT Convert to safef V2 since the where clause is checked by gbSanity in tablesTables.c * */ char *fields = filterFieldsToJustThoseInTable(conn, allFields, getCdwTableSetting("cdwFileTags")); struct cdwFile *efList = NULL; if (!securityColumnsInTable) efList = cdwAccessibleFileList(conn, user); struct cdwFile *ef; if (!securityColumnsInTable && !efList) { *retList = NULL; return; } struct rbTree *searchPassTree = NULL; if (!isEmpty(searchString)) { searchPassTree = intValTreeNew(0); char *lowered = cloneString(searchString); tolowers(lowered); char *words[128]; int wordCount = chopLine(lowered, words); char *trixPath = "/gbdb/cdw/cdw.ix"; struct trix *trix = trixOpen(trixPath); struct trixSearchResult *tsr, *tsrList = trixSearch(trix, wordCount, words, tsmExpand); for (tsr = tsrList; tsr != NULL; tsr = tsr->next) { if (securityColumnsInTable) // creates a list with all found items file ids on it. { AllocVar(ef); ef->id = sqlUnsigned(tsr->itemId); slAddHead(&efList, ef); } else { intValTreeAdd(searchPassTree, sqlUnsigned(tsr->itemId), tsr); } } if (securityColumnsInTable) slReverse(&efList); } /* Loop through all files constructing a SQL where clause that restricts us * to just the ones that we're authorized to hit, and that also pass initial where clause * if any. */ struct dyString *where = dyStringNew(0); if (!isEmpty(initialWhere)) sqlDyStringPrintf(where, "(%-s)", initialWhere); // trust if (securityColumnsInTable) { if (user) { // get all groupIds belonging to this user char query[256]; if (!user->isAdmin) { sqlSafef(query, sizeof(query), "select groupId from cdwGroupUser " " where cdwGroupUser.userId = %d", user->id); struct sqlResult *sr = sqlGetResult(conn, query); char **row; if (!isEmpty(where->string)) sqlDyStringPrintf(where, " and "); sqlDyStringPrintf(where, "(allAccess > 0"); while ((row = sqlNextRow(sr)) != NULL) { int groupId = sqlUnsigned(row[0]); sqlDyStringPrintf(where, " or FIND_IN_SET('%u', groupIds)", groupId); } sqlFreeResult(&sr); sqlDyStringPrintf(where, ")"); } } else { if (!isEmpty(where->string)) sqlDyStringPrintf(where, " and "); sqlDyStringPrintf(where, "allAccess > 0"); } } if (efList || (securityColumnsInTable && (!isEmpty(searchString)))) // have search terms but nothing was found { if (!isEmpty(where->string)) sqlDyStringPrintf(where, " and "); sqlDyStringPrintf(where, "file_id in (0"); // initial 0 never found, just makes code smaller for (ef = efList; ef != NULL; ef = ef->next) { if (searchPassTree == NULL || securityColumnsInTable || intValTreeFind(searchPassTree, ef->id) != NULL) { sqlDyStringPrintf(where, ",%u", ef->id); } } sqlDyStringPrintf(where, ")"); } rbTreeFree(&searchPassTree); // return three variables *retWhere = where; *retList = efList; *retFields = fields; } struct cdwFile* findDownloadableFiles(struct sqlConnection *conn, struct cart *cart, char* initialWhere, char *searchString) /* return list of files that we are allowed to see and that match current filters */ { // get query of files that match and where we have access struct cdwFile *efList = NULL; struct dyString *accWhere; char *fields; searchFilesWithAccess(conn, searchString, fileTableFields, initialWhere, &efList, &accWhere, &fields, FALSE); // reduce query to those that match our filters struct dyString *dummy; struct dyString *filteredWhere; char *table = isEmpty(initialWhere) ? getCdwTableSetting("cdwFileFacets") : getCdwTableSetting("cdwFileTags"); webTableBuildQuery(cart, table, accWhere->string, "cdwBrowseFiles", fileTableFields, FALSE, &dummy, &filteredWhere); // Selected Facet Values Filtering char *selectedFacetValues=cartUsualString(cart, "cdwBrowseFiles_facet_selList", ""); struct facetField *selectedList = deLinearizeFacetValString(selectedFacetValues); struct facetField *sff = NULL; struct dyString *facetedWhere = dyStringNew(1024); for (sff = selectedList; sff; sff=sff->next) { if (slCount(sff->valList)>0) { sqlDyStringPrintf(facetedWhere, " and "); // use Frag to prevent NOSQLINJ tag sqlDyStringPrintf(facetedWhere, "ifnull(%s,'n/a') in (", sff->fieldName); struct facetVal *el; for (el=sff->valList; el; el=el->next) { sqlDyStringPrintf(facetedWhere, "'%s'", el->val); if (el->next) sqlDyStringPrintf(facetedWhere, ","); } sqlDyStringPrintf(facetedWhere, ")"); } } // get their fileIds struct dyString *tagQuery = sqlDyStringCreate("SELECT file_id from %s %-s", table, filteredWhere->string); // trust if (!isEmpty(facetedWhere->string)) sqlDyStringPrintf(tagQuery, "%-s", facetedWhere->string); // trust because it was created safely struct slName *fileIds = sqlQuickList(conn, tagQuery->string); // retrieve the cdwFiles objects for these struct dyString *fileQuery = sqlDyStringCreate("SELECT * FROM cdwFile WHERE id IN ("); sqlDyStringPrintValuesList(fileQuery, fileIds); sqlDyStringPrintf(fileQuery, ")"); return cdwFileLoadByQuery(conn, fileQuery->string); } static void continueSearchVars() /* print out hidden forms variables for the current search */ { cgiContinueHiddenVar("cdwFileSearch"); char *fieldNames[128]; char *tempFileTableFields = cloneString(fileTableFields); // cannot modify string literals int fieldCount = chopString(tempFileTableFields, ",", fieldNames, ArraySize(fieldNames)); int i; for (i = 0; iDownload %d File%s", count, count>1?"s":""); printf("

\n"); } char *createTokenForUser() /* create a random token and add it to the cdwDownloadToken table with the current username. * Returns token, should be freed.*/ { struct sqlConnection *conn = hConnectCentral(); // r/w access -> has to be in hgcentral char query[4096]; if (!sqlTableExists(conn, "cdwDownloadToken")) { sqlSafef(query, sizeof(query), "CREATE TABLE cdwDownloadToken (token varchar(255) NOT NULL PRIMARY KEY, " "userId int NOT NULL, createTime datetime DEFAULT NOW())"); sqlUpdate(conn, query); } char *token = makeRandomKey(80); sqlSafef(query, sizeof(query), "INSERT INTO cdwDownloadToken (token, userId) VALUES ('%s', %d)", token, user->id); sqlUpdate(conn, query); hDisconnectCentral(&conn); return token; } void accessibleFilesTable(struct cart *cart, struct sqlConnection *conn, char *searchString, char *allFields, char *fromTable, char *initialWhere, char *returnUrl, char *varPrefix, int maxFieldWidth, struct hash *tagOutWrappers, void *wrapperContext, boolean withFilters, char *itemPlural, int pageSize, char *visibleFacetList, boolean securityColumnsInTable) { struct cdwFile *efList = NULL; struct dyString *where; char *fields; searchFilesWithAccess(conn, searchString, allFields, initialWhere, &efList, &where, &fields, securityColumnsInTable); if (!securityColumnsInTable && !efList) { if (user != NULL && user->isAdmin) printf("
The file database is empty."); else printf("
Unfortunately there are no %s you are authorized to see.", itemPlural); return; } if (user!=NULL) accessibleFilesToken = createTokenForUser(); /* Let the sql system handle the rest. Might be one long 'in' clause.... */ struct hash *suggestHash = NULL; if (!securityColumnsInTable) suggestHash = accessibleSuggestHash(conn, fields, efList); webFilteredSqlTable(cart, conn, fields, fromTable, where->string, returnUrl, varPrefix, maxFieldWidth, tagOutWrappers, wrapperContext, withFilters, itemPlural, pageSize, 15, suggestHash, visibleFacetList, makeDownloadAllButtonForm); /* Clean up and go home. */ cdwFileFreeList(&efList); dyStringFree(&where); } char *showSearchControl(char *varName, char *itemPlural) /* Put up the search control text and stuff. Returns current search string. */ { /* Get cart variable and clean it up some removing quotes and the like */ char *varVal = unquotedCartString(cart, varName); printf("Search ", varName, varName, varVal); printf(" "); printf("\n"); jsInlineF( "$(function () {\n" " $('#%s').watermark(\"type in words or starts of words to find specific %s\");\n" " $('form').delegate('#%s','change keyup paste',function(e){\n" " $('[name=cdwBrowseFiles_page]').val('1');\n" " });\n" "});\n", varName, itemPlural, varName); return varVal; } void doDownloadUrls() /* serve textfile with file URLs and ask user's internet browser to save it to disk */ { struct sqlConnection *conn = sqlConnect(cdwDatabase); user = cdwCurrentUser(conn); // on non-public cirm (and development vhosts) users must be logged in to use download tokens. if (user==NULL && !isPublicSite) { // this should never happen through normal UI use puts("Content-type: text/html\n\n"); puts("Error: user is not logged in"); return; } // on public cirm we have users not logged in. user=NULL and download tokens are not required char *token = NULL; if (!isPublicSite) { token = createTokenForUser(); } // if we recreate the submission dir structure, we need to create a shell script boolean createSubdirs = FALSE; if (sameOk(cgiOptionalString("cdwDownloadName"), "subAndDir")) createSubdirs = TRUE; cart = cartAndCookieWithHtml(hUserCookie(), excludeVars, oldVars, FALSE); if (createSubdirs) puts("Content-disposition: attachment; filename=downloadCirm.sh\n"); else puts("Content-disposition: attachment; filename=fileUrls.txt\n"); char *searchString = unquotedCartString(cart, "cdwFileSearch"); char *initialWhere = cartUsualString(cart, "cdwBrowseFiles_filter", ""); if (!sameString(initialWhere, "")) { struct dyString *safeWhere = dyStringNew(0); sqlSanityCheckWhere(initialWhere, safeWhere); initialWhere = dyStringCannibalize(&safeWhere); } struct cdwFile *efList = findDownloadableFiles(conn, cart, initialWhere, searchString); char *host = hHttpHost(); // user may want to download with original submitted filename, not with format . char *optArg = ""; if (sameOk(cgiOptionalString("cdwDownloadName"), "sub")) optArg = "&useSubmitFname=1"; struct cdwFile *ef; for (ef = efList; ef != NULL; ef = ef->next) { struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id); if (createSubdirs) { struct cdwFile *cf = cdwFileFromId(conn, vf->fileId); // if we have an absolute pathname in our DB, strip the leading '/' // so if someone runs the script as root, it will not start to write // files in strange directories char* submitFname = cf->submitFileName; if ( (submitFname!=NULL) && (!isEmpty(submitFname)) && (*submitFname=='/') ) submitFname += 1; printf("curl "); if (!isPublicSite) printf("--netrc-file cirm_credentials "); printf("'https://%s/cgi-bin/cdwGetFile?acc=%s", host, vf->licensePlate); if (!isPublicSite) printf("&token=%s", token); printf("' --create-dirs -o %s\n", submitFname); } else { printf("https://%s/cgi-bin/cdwGetFile?acc=%s", host, vf->licensePlate); if (!isPublicSite) printf("&token=%s", token); printf("%s\n", optArg); } } } void doDownloadFileConfirmation(struct sqlConnection *conn) /* show overview page of download files */ { if (user==NULL && !isPublicSite) { printf("Sorry, you have to log in before you can download files."); return; } printf("
\n"); cartSaveSession(cart); cgiMakeHiddenVar("cdwCommand", "downloadUrls"); continueSearchVars(); char *searchString = unquotedCartString(cart, "cdwFileSearch"); char *initialWhere = cartUsualString(cart, "cdwBrowseFiles_filter", ""); if (!sameString(initialWhere, "")) { struct dyString *safeWhere = dyStringNew(0); sqlSanityCheckWhere(initialWhere, safeWhere); initialWhere = dyStringCannibalize(&safeWhere); } struct cdwFile *efList = findDownloadableFiles(conn, cart, initialWhere, searchString); // get total size struct cdwFile *ef; long long size = 0; for (ef = efList; ef != NULL; ef = ef->next) size += ef->size; int fCount = slCount(efList); char sizeStr[4096]; sprintWithGreekByte(sizeStr, sizeof(sizeStr), size); printf("

Data Download Options

\n"); printf("Number of files: %d
\n", fCount); printf("Total size: %s

\n", sizeStr); puts("\n"); //cgiMakeRadioButton("cdwDownloadName", "acc", TRUE); puts("Name files by accession, one single directory
"); //cgiMakeRadioButton("cdwDownloadName", "sub", FALSE); puts("\n"); puts("Name files as submitted, one single directory
"); //cgiMakeRadioButton("cdwDownloadName", "subAndDir", FALSE); puts("\n"); puts("Name files as submitted and put into subdirectories

"); printf("\n"); printf("

\n"); jsInline ( "$('.scriptButton').change( function() {$('#urlListDoc').hide(); $('#scriptDoc').show()} );\n" "$('.urlListButton').change( function() {$('#urlListDoc').show(); $('#scriptDoc').hide()} );\n" ); puts("
\n"); puts("When you click 'submit', a text file with the URLs of the files will get downloaded.\n"); puts("The URLs are valid for one week.

\n"); puts("To download the files:\n"); puts("

    \n"); puts("
  • With Firefox and DownThemAll: Click Tools - DownThemAll! - Manager. Right click - Advanced - Import from file. Right-click - Select All. Right-click - Toogle All\n"); if (isPublicSite) { puts("
  • OSX/Linux: With curl and a single thread: xargs -n1 curl -JO < fileUrls.txt\n"); puts("
  • Linux: With wget and a single thread: wget --content-disposition -i fileUrls.txt\n"); puts("
  • With wget and 4 threads: xargs -n 1 -P 4 wget --content-disposition -q < fileUrls.txt\n"); puts("
  • With aria2c, 16 threads and two threads per file: aria2c -x 16 -s 2 -i fileUrls.txt\n"); } else { puts("
  • OSX/Linux: With curl and a single thread: xargs -n1 curl -JO --user YOUREMAIL:YOURPASS < fileUrls.txt\n"); puts("
  • Linux: With wget and a single thread: wget --content-disposition -i fileUrls.txt --user YOUREMAIL --password YOURPASS\n"); puts("
  • With wget and 4 threads: xargs -n 1 -P 4 wget --content-disposition -q --user YOUREMAIL --password YOURPASS < fileUrls.txt\n"); puts("
  • With aria2c, 16 threads and two threads per file: aria2c --http-user YOUREMAIL --http-password YOURPASS -x 16 -s 2 -i fileUrls.txt\n"); } puts("
\n"); puts("
\n"); puts("