src/hg/hgTables/bigBed.c 1.5
1.5 2009/03/17 17:24:50 kent
Simplifying some logic where code was unnecessarily distinguishing between hPrintf and fprintf(stdout - which is an issue in the Genome Browser, but not here in the Table Browser. Also enabling select fields user interface.
Index: src/hg/hgTables/bigBed.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgTables/bigBed.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 1000000 -r1.4 -r1.5
--- src/hg/hgTables/bigBed.c 17 Mar 2009 04:28:38 -0000 1.4
+++ src/hg/hgTables/bigBed.c 17 Mar 2009 17:24:50 -0000 1.5
@@ -1,185 +1,295 @@
/* bigBed - stuff to handle bigBed in the Table Browser. */
#include "common.h"
#include "hash.h"
#include "linefile.h"
#include "dystring.h"
#include "localmem.h"
#include "jksql.h"
#include "cheapcgi.h"
#include "cart.h"
#include "web.h"
#include "bed.h"
#include "hdb.h"
#include "trackDb.h"
#include "obscure.h"
#include "hmmstats.h"
#include "correlate.h"
#include "asParse.h"
#include "bbiFile.h"
#include "bigBed.h"
#include "hgTables.h"
static char const rcsid[] = "$Id$";
boolean isBigBed(char *table)
/* Return TRUE if table corresponds to a bigBed file. */
{
return trackIsType(table, "bigBed");
}
char *bigBedFileName(char *table, struct sqlConnection *conn)
/* Return file name associated with bigBed. This handles differences whether it's
* a custom or built-in track. Do a freeMem on returned string when done. */
{
/* Implementation is same as bigWig. */
return bigWigFileName(table, conn);
}
struct hash *asColumnHash(struct asObject *as)
/* Return a hash full of the object's columns, keyed by colum name */
{
struct hash *hash = hashNew(6);
struct asColumn *col;
for (col = as->columnList; col != NULL; col = col->next)
hashAdd(hash, col->name, col);
return hash;
}
static void fillField(struct hash *colHash, char *key, char output[HDB_MAX_FIELD_STRING])
/* If key is in colHash, then copy key to output. */
{
if (hashLookup(colHash, key))
strncpy(output, key, HDB_MAX_FIELD_STRING-1);
}
+static struct asObject *bigBedAsOrDefault(struct bbiFile *bbi)
+/* Get asObject associated with bigBed - if none exists in file make it up from field counts. */
+{
+struct asObject *as = bigBedAs(bbi);
+if (as == NULL)
+ as = asParseText(bedAsDef(bbi->definedFieldCount, bbi->fieldCount));
+return as;
+}
+
+struct asObject *bigBedAsForTable(char *table, struct sqlConnection *conn)
+/* Get asObject associated with bigBed table. */
+{
+char *fileName = bigBedFileName(table, conn);
+struct bbiFile *bbi = bigBedFileOpen(fileName);
+struct asObject *as = bigBedAsOrDefault(bbi);
+bbiFileClose(&bbi);
+freeMem(fileName);
+return as;
+}
+
struct hTableInfo *bigBedToHti(char *table, struct sqlConnection *conn)
/* Get fields of bigBed into hti structure. */
{
+/* Get columns in asObject format. */
char *fileName = bigBedFileName(table, conn);
-/* Get columns in asObject format, from file or failing that as bed-standard. */
struct bbiFile *bbi = bigBedFileOpen(fileName);
-struct asObject *as = bigBedAs(bbi);
-if (as == NULL)
- as = asParseText(bedAsDef(bbi->definedFieldCount, bbi->fieldCount));
+struct asObject *as = bigBedAsOrDefault(bbi);
/* Allocate hTableInfo structure and fill in info about bed fields. */
struct hash *colHash = asColumnHash(as);
struct hTableInfo *hti;
AllocVar(hti);
hti->rootName = cloneString(table);
hti->isPos= TRUE;
fillField(colHash, "chrom", hti->chromField);
fillField(colHash, "chromStart", hti->startField);
fillField(colHash, "chromEnd", hti->endField);
fillField(colHash, "name", hti->nameField);
fillField(colHash, "score", hti->scoreField);
fillField(colHash, "strand", hti->strandField);
fillField(colHash, "thickStart", hti->cdsStartField);
fillField(colHash, "thickEnd", hti->cdsEndField);
fillField(colHash, "blockCount", hti->countField);
fillField(colHash, "chromStarts", hti->startsField);
fillField(colHash, "blockSizes", hti->endsSizesField);
hti->hasCDS = (bbi->definedFieldCount >= 8);
hti->hasBlocks = (bbi->definedFieldCount >= 12);
char type[256];
safef(type, sizeof(type), "bed %d %c", bbi->definedFieldCount,
(bbi->definedFieldCount == bbi->fieldCount ? '.' : '+'));
hti->type = cloneString(type);
+freeMem(fileName);
hashFree(&colHash);
bbiFileClose(&bbi);
return hti;
}
+struct slName *asColNames(struct asObject *as)
+/* Get list of column names. */
+{
+struct slName *list = NULL, *el;
+struct asColumn *col;
+for (col = as->columnList; col != NULL; col = col->next)
+ {
+ el = slNameNew(col->name);
+ slAddHead(&list, el);
+ }
+slReverse(&list);
+return list;
+}
+
+struct slName *bigBedGetFields(char *table, struct sqlConnection *conn)
+/* Get fields of bigBed as simple name list. */
+{
+char *fileName = bigBedFileName(table, conn);
+struct bbiFile *bbi = bigBedFileOpen(fileName);
+struct asObject *as = bigBedAsOrDefault(bbi);
+struct slName *names = asColNames(as);
+freeMem(fileName);
+bbiFileClose(&bbi);
+return names;
+}
+
+void bigBedTabOut(char *table, struct sqlConnection *conn, char *fields, FILE *f)
+/* Print out selected fields from Big Bed. If fields is NULL, then print out all fields. */
+{
+if (f == NULL)
+ f = stdout;
+
+/* Convert comma separated list of fields to array. */
+int fieldCount = chopByChar(fields, ',', NULL, 0);
+char **fieldArray;
+AllocArray(fieldArray, fieldCount);
+chopByChar(fields, ',', fieldArray, fieldCount);
+
+/* Get list of all fields in big bed and turn it into a hash of column indexes keyed by
+ * column name. */
+struct hash *fieldHash = hashNew(0);
+struct slName *bb, *bbList = bigBedGetFields(table, conn);
+int i;
+for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i)
+ hashAddInt(fieldHash, bb->name, i);
+
+/* Create an array of column indexes corresponding to the selected field list. */
+int *columnArray;
+AllocArray(columnArray, fieldCount);
+for (i=0; i<fieldCount; ++i)
+ {
+ columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
+ }
+
+/* Output row of labels */
+fprintf(f, "#%s", fieldArray[0]);
+for (i=1; i<fieldCount; ++i)
+ fprintf(f, "\t%s", fieldArray[i]);
+fprintf(f, "\n");
+
+/* Open up bigBed file. */
+char *fileName = bigBedFileName(table, conn);
+struct bbiFile *bbi = bigBedFileOpen(fileName);
+
+/* Loop through outputting each region */
+struct region *region, *regionList = getRegions();
+for (region = regionList; region != NULL; region = region->next)
+ {
+ struct lm *lm = lmInit(0);
+ struct bigBedInterval *iv, *ivList = bigBedIntervalQuery(bbi, region->chrom,
+ region->start, region->end, 0, lm);
+ char *row[bbi->fieldCount];
+ char startBuf[16], endBuf[16];
+ for (iv = ivList; iv != NULL; iv = iv->next)
+ {
+ bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount);
+ int i;
+ fprintf(f, "%s", row[columnArray[0]]);
+ for (i=1; i<fieldCount; ++i)
+ fprintf(f, "\t%s", row[columnArray[i]]);
+ fprintf(f, "\n");
+ }
+ lmCleanup(&lm);
+ }
+
+/* Clean up and exit. */
+bbiFileClose(&bbi);
+hashFree(&fieldHash);
+freeMem(fieldArray);
+freeMem(columnArray);
+}
void showSchemaBigBed(char *table)
/* Show schema on bigBed. */
{
/* Figure out bigBed file name and open it. Get contents for first chromosome as an example. */
struct sqlConnection *conn = hAllocConn(database);
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct bbiChromInfo *chromList = bbiChromList(bbi);
struct lm *lm = lmInit(0);
struct bigBedInterval *ivList = bigBedIntervalQuery(bbi, chromList->name, 0,
chromList->size, 10, lm);
/* Get description of columns, making it up from BED records if need be. */
struct asObject *as = bigBedAs(bbi);
if (as == NULL)
as = asParseText(bedAsDef(bbi->definedFieldCount, bbi->fieldCount));
hPrintf("<B>Big Bed File:</B> %s", fileName);
hPrintf(" <B>Item Count:</B> ");
printLongWithCommas(stdout, bigBedItemCount(bbi));
hPrintf("<BR>\n");
hPrintf("<B>Format description:</B> %s<BR>", as->comment);
/* Put up table that describes fields. */
hTableStart();
hPrintf("<TR><TH>field</TH>");
hPrintf("<TH>example</TH>");
hPrintf("<TH>description</TH> ");
puts("</TR>\n");
struct asColumn *col;
int colCount = 0;
char *row[bbi->fieldCount];
char startBuf[16], endBuf[16];
char *dupeRest = lmCloneString(lm, ivList->rest); /* Manage rest-stomping side-effect */
bigBedIntervalToRow(ivList, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
ivList->rest = dupeRest;
for (col = as->columnList; col != NULL; col = col->next)
{
hPrintf("<TR><TD><TT>%s</TT></TD>", col->name);
hPrintf("<TD>%s</TD>", row[colCount]);
hPrintf("<TD>%s</TD></TR>", col->comment);
++colCount;
}
/* If more fields than descriptions put up minimally helpful info (at least has example). */
for ( ; colCount < bbi->fieldCount; ++colCount)
{
hPrintf("<TR><TD><TT>column%d</TT></TD>", colCount+1);
hPrintf("<TD>%s</TD>", row[colCount]);
hPrintf("<TD>n/a</TD></TR>\n");
}
hTableEnd();
/* Put up another section with sample rows. */
webNewSection("Sample Rows");
hTableStart();
/* Print field names as column headers for example */
hPrintf("<TR>");
int colIx = 0;
for (col = as->columnList; col != NULL; col = col->next)
{
hPrintf("<TH>%s</TH>", col->name);
++colIx;
}
for (; colIx < colCount; ++colIx)
hPrintf("<TH>column%d</TH>", colIx+1);
hPrintf("</TR>\n");
/* Print sample lines. */
struct bigBedInterval *iv;
for (iv=ivList; iv != NULL; iv = iv->next)
{
bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
hPrintf("<TR>");
for (colIx=0; colIx<colCount; ++colIx)
{
writeHtmlCell(row[colIx]);
}
hPrintf("</TR>\n");
}
hTableEnd();
/* Clean up and go home. */
lmCleanup(&lm);
bbiFileClose(&bbi);
freeMem(fileName);
hFreeConn(&conn);
}