src/inc/bigBed.h 1.13
1.13 2009/04/29 17:59:33 mikep
splitting bigBedFileCreate() logic to load bed records from a file separately from calculating summary and writing files
Index: src/inc/bigBed.h
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/inc/bigBed.h,v
retrieving revision 1.12
retrieving revision 1.13
diff -b -B -U 4 -r1.12 -r1.13
--- src/inc/bigBed.h 20 Apr 2009 23:16:18 -0000 1.12
+++ src/inc/bigBed.h 29 Apr 2009 17:59:33 -0000 1.13
@@ -17,8 +17,19 @@
bits32 start, end; /* Range inside chromosome - half open zero based. */
char *rest; /* Rest of line. May be NULL*/
};
+struct ppBed
+/* A partially parsed out bed record plus some extra fields. */
+ {
+ struct ppBed *next; /* Next in list. */
+ char *chrom; /* Chromosome name (not allocated here) */
+ bits32 start, end; /* Range inside chromosome - half open zero based. */
+ char *rest; /* The rest of the bed. */
+ bits64 fileOffset; /* File offset. */
+ bits32 chromId; /* Chromosome ID. */
+ };
+
struct bbiFile *bigBedFileOpen(char *fileName);
/* Open up big bed file. Free this up with bbiFileFree */
struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom,
@@ -62,18 +73,53 @@
char *asFileName, /* If non-null points to a .as file that describes fields. */
char *outName); /* BigBed output file name. */
/* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */
-void bigBedFileCreateDetailed(
+void bigBedFileCreateReadInfile(
char *inName, /* Input file in a tabular bed format <chrom><start><end> + whatever. */
- boolean sorted, /* Input is already sorted */
char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */
int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */
int itemsPerSlot, /* Number of items in lowest level of tree. 64 is good. */
bits16 definedFieldCount, /* Number of defined bed fields - 3-16 or so. 0 means all fields
* are the defined bed ones. */
char *asFileName, /* If non-null points to a .as file that describes fields. */
+ char *outName, /* BigBed output file name. */
+ struct ppBed **ppbList, /* Input bed data, will be sorted. */
+ bits64 *count, /* size of input pbList */
+ double *averageSize, /* average size of elements in pbList */
+ struct hash **pChromHash, /* Hash containing sizes of all chroms. */
+ bits16 *fieldCount, /* actual field count from input data. */
+ struct asObject **pAs, /* If non-null contains as object that describes fields. */
+ bits64 *fullSize); /* full size of ppBed on disk */
+/* Load data to prepare bigBed. */
+
+void bigBedFileCreateDetailed(
+ struct ppBed *pbList, /* Input bed data. Must be sorted. */
+ bits64 pbCount, /* size of input pbList */
+ double pbAverageSize, /* average size of elements in pbList */
+ char *inName, /* Input file name (for error message reporting) */
+ struct hash *chromHash, /* Hash containing sizes of all chroms. */
+ int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */
+ int itemsPerSlot, /* Number of items in lowest level of tree. 64 is good. */
+ bits16 definedFieldCount, /* Number of defined bed fields - 3-16 or so. 0 means all fields
+ * are the defined bed ones. */
+ bits16 fieldCount, /* actual field count from input data. */
+ char *asFileName, /* If non-null points to a .as file that describes fields. */
+ struct asObject *as, /* If non-null contains as object that describes fields. */
+ bits64 fullSize, /* full size of ppBed on disk */
char *outName); /* BigBed output file name. */
-/* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */
+/* create zoomed bigBed version from ppBed list. */
+
+struct ppBed *ppBedLoadOne(char **row, int fieldCount, struct lineFile *lf, struct hash *chromHash, struct lm *lm, struct asObject *as, bits64 *diskSize);
+/* Return a ppBed record from a line of bed file in lf.
+ Return the disk size it would occupy in *diskSize.
+ row is a preallocated array of pointers to the individual fields in this row to load.
+ fieldCount is the number of fields.
+ lf is the lineFile the row is coming from, used for error messages and parsing fields.
+ chromHash is a hash of the chromosome sizes.
+ lm is localMem to allocate ppBed memory from - don't ppBedFreeList or slFree
+ list!
+ as is the autoSql object describing this bed file or NULL if standard bed.
+ */
#endif /* BIGBED_H */