90aa101ee4f9e47a77b2eac496eca213a787d513 jcasper Mon Jul 22 01:53:32 2019 -0700 Adding table browser support for hic tracks, refs #22316 diff --git src/hg/hgTables/hic.c src/hg/hgTables/hic.c new file mode 100644 index 0000000..290444d --- /dev/null +++ src/hg/hgTables/hic.c @@ -0,0 +1,397 @@ +/* hic - stuff to handle Hi-C stuff in table browser. */ + +/* Copyright (C) 2019 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + +#include "common.h" +#include "hash.h" +#include "linefile.h" +#include "dystring.h" +#include "localmem.h" +#include "jksql.h" +#include "cheapcgi.h" +#include "cart.h" +#include "web.h" +#include "bed.h" +#include "hdb.h" +#include "trackDb.h" +#include "obscure.h" +#include "hmmstats.h" +#include "correlate.h" +#include "asParse.h" +#include "bbiFile.h" +#include "bigBed.h" +#include "hubConnect.h" +#include "trackHub.h" +#include "hgTables.h" +#include "asFilter.h" +#include "xmlEscape.h" +#include "hic.h" +#include "interact.h" +#include "hgConfig.h" +#include "chromAlias.h" +#include "hicUi.h" + +boolean isHicTable(char *table) +/* Return TRUE if table corresponds to a hic file. */ +{ +if (isHubTrack(table)) + { + struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); + return sameWord("hic", tdb->type); + } +else + return trackIsType(database, table, curTrack, "hic", ctLookupName); +} + +char *hicFileName(char *table, struct sqlConnection *conn) +/* Return file name associated with hic. This handles differences whether it's + * a custom or built-in track. Do a freeMem on returned string when done. */ +{ +char *fileName = bigFileNameFromCtOrHub(table, conn); +// NB: Not planning to do hic tracks via filenames in tables. If this changes, +// add a check and a call to hicFileNameFromTable(), which will have to be written. +return fileName; +} + +struct hTableInfo *hicToHti(char *table) +/* Get standard fields of hic into hti structure. */ +{ +struct hTableInfo *hti; +AllocVar(hti); +hti->rootName = cloneString(table); +hti->isPos= TRUE; +safecpy(hti->chromField, sizeof(hti->chromField), "chrom"); +safecpy(hti->startField, sizeof(hti->startField), "chromStart"); +safecpy(hti->endField, sizeof(hti->endField), "chromEnd"); +safecpy(hti->scoreField, sizeof(hti->scoreField), "value"); +hti->type = cloneString("hic"); +return hti; +} + +struct slName *hicGetFields() +/* Get fields of hic as simple name list. We represent hic with an interact structure, so + * this is really just an interact as object. */ +{ +struct asObject *as = interactAsObj(); +return asColNames(as); +} + +struct sqlFieldType *hicListFieldsAndTypes() +/* Get fields of hic as list of sqlFieldType (again, this is really just the list of interact fields. */ +{ +struct asObject *as = interactAsObj(); +return sqlFieldTypesFromAs(as); +} + +#define HIC_NUM_BUF_SIZE 4096 + +void hicRecordToRow(struct interact *sample, char *numBuf, char *row[INTERACT_NUM_COLS]) +/* Convert samAlignment data structure to an array of strings, using numBuf to store + * ascii versions of numbers temporarily */ +{ +char *numPt = numBuf; +char *numBufEnd = numBuf + HIC_NUM_BUF_SIZE; +row[0] = sample->chrom; +row[1] = numPt; numPt += sprintf(numPt, "%u", sample->chromStart); numPt += 1; +row[2] = numPt; numPt += sprintf(numPt, "%u", sample->chromEnd); numPt += 1; +row[3] = sample->name; +row[4] = numPt; numPt += sprintf(numPt, "%u", sample->score); numPt += 1; +row[5] = numPt; numPt += sprintf(numPt, "%f", sample->value); numPt += 1; +row[6] = sample->exp; +row[7] = numPt; numPt += sprintf(numPt, "%u", sample->color); numPt += 1; +row[8] = sample->sourceChrom; +row[9] = numPt; numPt += sprintf(numPt, "%u", sample->sourceStart); numPt += 1; +row[10] = numPt; numPt += sprintf(numPt, "%u", sample->sourceEnd); numPt += 1; +row[11] = sample->sourceName; +row[12] = sample->sourceStrand; +row[13] = sample->targetChrom; +row[14] = numPt; numPt += sprintf(numPt, "%u", sample->targetStart); numPt += 1; +row[15] = numPt; numPt += sprintf(numPt, "%u", sample->targetEnd); numPt += 1; +row[16] = sample->targetName; +row[17] = sample->targetStrand; +assert(numPt < numBufEnd); +} + +int hicCompare(const void *elem1, const void *elem2) +/* Simple comparison function for sorting hic data in interact structures, + * since it is not always returned in sorted order. */ +{ +const struct interact *first = *((struct interact **)elem1); +const struct interact *second = *((struct interact **)elem2); +int dif = strcmp(first->chrom, second->chrom); +if (dif == 0) + dif = first->chromStart - second->chromStart; +return dif; +} + +void hicTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f) +/* Print out selected fields from hic. If fields is NULL, then print out all fields. */ +{ +if (f == NULL) + f = stdout; + +/* Convert comma separated list of fields to array. */ +int fieldCount = chopByChar(fields, ',', NULL, 0); +char **fieldArray; +AllocArray(fieldArray, fieldCount); +chopByChar(fields, ',', fieldArray, fieldCount); + +/* Get list of all fields in hic and turn it into a hash of column indexes keyed by + * column name. */ +struct hash *fieldHash = hashNew(0); +struct slName *field, *fieldList = hicGetFields(); +int i; +for (field = fieldList, i=0; field != NULL; field = field->next, ++i) + { + hashAddInt(fieldHash, field->name, i); + } + +/* Create an array of column indexes corresponding to the selected field list. */ +int *columnArray; +AllocArray(columnArray, fieldCount); +for (i=0; icolumnList)); + } + } + +/* Loop through outputting each region */ +struct region *region, *regionList = getRegions(); + +int maxOut = bigFileMaxOutput(); + +char *fileName = hicFileName(table, conn); +struct hicMeta *fileInfo; +char *errMsg = hicLoadHeader(fileName, &fileInfo); +if (errMsg != NULL) + errAbort("%s", errMsg); + +char *norm = hicUiFetchNormalization(cart, table, fileInfo); + +for (region = regionList; region != NULL && (maxOut > 0); region = region->next) + { + struct interact *results = NULL, *result = NULL; + int res = hicUiFetchResolutionAsInt(cart, table, fileInfo, region->end-region->start); + char *errMsg = hicLoadData(fileInfo, res, norm, region->chrom, region->start, region->end, + region->chrom, region->start, region->end, &results); + if (errMsg != NULL) + warn("%s", errMsg); + slSort(&results, &hicCompare); + char *row[INTERACT_NUM_COLS]; + char numBuf[HIC_NUM_BUF_SIZE]; + for (result = results; result != NULL && (maxOut > 0); result = result->next) + { + hicRecordToRow(result, numBuf, row); + if (asFilterOnRow(filter, row)) + { + int i; + fprintf(f, "%s", row[columnArray[0]]); + for (i=1; iend-region->start); + char *norm = hicUiFetchNormalization(cart, table, fileInfo); + + struct interact *results = NULL, *result = NULL; + errMsg = hicLoadData(fileInfo, res, norm, region->chrom, region->start, region->end, + region->chrom, region->start, region->end, &results); + if (errMsg != NULL) + warn("%s", errMsg); + slSort(&results, &hicCompare); + char *row[INTERACT_NUM_COLS]; + char numBuf[HIC_NUM_BUF_SIZE]; + for (result=results; results != NULL; result = result->next) + { + hicRecordToRow(result, numBuf, row); + if (asFilterOnRow(filter, row)) + { + struct bed *bed; + lmAllocVar(bedLm, bed); + bed->chrom = lmCloneString(bedLm, result->chrom); + bed->chromStart = result->chromStart; + bed->chromEnd = result->chromEnd; + bed->name = NULL; + slAddHead(pBedList, bed); + } + (*pMaxOut)--; + if (*pMaxOut <= 0) + break; + } + } +} + +struct bed *hicGetFilteredBedsOnRegions(struct sqlConnection *conn, + char *db, char *table, struct region *regionList, struct lm *lm, + int *retFieldCount) +/* Get list of beds from HIC, in all regions, that pass filtering. */ +{ +int maxOut = bigFileMaxOutput(); +/* Figure out hic file name get column info and filter. */ +struct asObject *as = interactAsObj(); +struct asFilter *filter = asFilterFromCart(cart, db, table, as); +struct hash *idHash = identifierHash(db, table); + +/* Get beds a region at a time. */ +struct bed *bedList = NULL; +struct region *region; +for (region = regionList; region != NULL; region = region->next) + { + char *fileName = hicFileName(table, conn); + addFilteredBedsOnRegion(fileName, region, table, filter, lm, &bedList, idHash, &maxOut); + freeMem(fileName); + if (maxOut <= 0) + { + errAbort("Reached output limit of %d data values, please make region smaller,\n" + "\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); + } + } +slReverse(&bedList); +return bedList; +} + + +void showSchemaHic(char *table, struct trackDb *tdb) +/* Show schema on hic. */ +{ +struct sqlConnection *conn = NULL; +if (!trackHubDatabase(database)) + conn = hAllocConn(database); +char *fileName = hicFileName(table, conn); + +struct asObject *as = interactAsObj(); +hPrintf("Database: %s", database); +hPrintf("    Primary Table: %s
", table); +hPrintf("HI-C File: %s", fileName); +hPrintf("
\n"); +hPrintf("Format description: %s
", as->comment); +hPrintf("See the Interact Format Specification for more details on " + "the interact format, which is available for output here. For details on the .hic format by " + "the Aiden Lab, please see this page.
\n", + "../goldenPath/help/interact.html", "https://www.aidenlab.org/documentation.html"); + +/* Put up table that describes fields. */ +hTableStart(); +hPrintf("field"); +hPrintf("description "); +puts("\n"); +struct asColumn *col; +int colCount = 0; +for (col = as->columnList; col != NULL; col = col->next) + { + hPrintf("%s", col->name); + hPrintf("%s", col->comment); + ++colCount; + } +hTableEnd(); + +/* Put up another section with sample rows. */ +webNewSection("Sample Rows"); +hTableStart(); + +/* Print field names as column headers for example */ +hPrintf(""); +int colIx = 0; +for (col = as->columnList; col != NULL; col = col->next) + { + hPrintf("%s", col->name); + ++colIx; + } +hPrintf("\n"); + +struct hicMeta *fileInfo = NULL; +char *errMsg = hicLoadHeader(fileName, &fileInfo); + +if (errMsg != NULL) + { + warn("%s", errMsg); + } +else + { + /* Print sample lines. */ + struct hash *chromAliasHash = chromAliasMakeLookupTable(database); + char *chrName = fileInfo->chromNames[1]; // Skip 0, which is "All" + struct chromAlias *a = hashFindVal(chromAliasHash, chrName); + char *ucscChrName = NULL; + if (a != NULL) + ucscChrName = a->chrom; + else + ucscChrName = chrName; + + int maxRes = atoi(fileInfo->resolutions[0]); + + struct interact *sampleList = NULL, *sample = NULL; + errMsg = hicLoadData(fileInfo, maxRes, "NONE", ucscChrName, 0, hChromSize(database, ucscChrName), + ucscChrName, 0, hChromSize(database, ucscChrName), &sampleList); + if (errMsg != NULL) + warn("%s", errMsg); + slSort(&sampleList, &hicCompare); + char *row[INTERACT_NUM_COLS]; + char numBuf[HIC_NUM_BUF_SIZE]; + int count = 0; + for (sample=sampleList; sample != NULL && count < 10; sample = sample->next, count++) + { + hicRecordToRow(sample, numBuf, row); + hPrintf(""); + for (colIx=0; colIx"); + xmlEscapeStringToFile(row[colIx], stdout); + hPrintf(""); + } + hPrintf("\n"); + } + } +hTableEnd(); +printTrackHtml(tdb); + +/* Clean up and go home. */ +freeMem(fileName); +hFreeConn(&conn); +} +