src/lib/bbiWrite.c 1.4
1.4 2009/08/12 21:35:45 kent
Moving some functions from bedGraphToBigBed to library.
Index: src/lib/bbiWrite.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/bbiWrite.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/lib/bbiWrite.c 27 Jul 2009 18:02:01 -0000 1.3
+++ src/lib/bbiWrite.c 12 Aug 2009 21:35:45 -0000 1.4
@@ -81,8 +81,80 @@
const struct bbiChromInfo *a = ((struct bbiChromInfo *)va);
return (void*)(&a->id);
}
+struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf,
+ struct hash *chromSizesHash, int *retMinDiff, double *retAveSize)
+/* Go through bed file and collect chromosomes and statistics. */
+{
+char *row[3];
+struct hash *uniqHash = hashNew(0);
+struct bbiChromUsage *usage = NULL, *usageList = NULL;
+int lastStart = -1;
+bits32 id = 0;
+bits64 totalBases = 0, bedCount = 0;
+int minDiff = BIGNUM;
+for (;;)
+ {
+ int rowSize = lineFileChopNext(lf, row, ArraySize(row));
+ if (rowSize == 0)
+ break;
+ lineFileExpectWords(lf, 3, rowSize);
+ char *chrom = row[0];
+ int start = lineFileNeedNum(lf, row, 1);
+ int end = lineFileNeedNum(lf, row, 2);
+ ++bedCount;
+ totalBases += (end - start);
+ if (usage == NULL || differentString(usage->name, chrom))
+ {
+ if (hashLookup(uniqHash, chrom))
+ {
+ errAbort("%s is not sorted at line %d. Please use \"sort -k1,1 -k2,2n\" or bedSort and try again.",
+ lf->fileName, lf->lineIx);
+ }
+ hashAdd(uniqHash, chrom, NULL);
+ AllocVar(usage);
+ usage->name = cloneString(chrom);
+ usage->id = id++;
+ usage->size = hashIntVal(chromSizesHash, chrom);
+ slAddHead(&usageList, usage);
+ lastStart = -1;
+ }
+ usage->itemCount += 1;
+ if (lastStart >= 0)
+ {
+ int diff = start - lastStart;
+ if (diff < minDiff)
+ {
+ if (diff < 0)
+ errAbort("%s is not sorted at line %d. Please use \"sort -k1,1 -k2,2n\" or bedSort and try again.",
+ lf->fileName, lf->lineIx);
+ minDiff = diff;
+ }
+ }
+ lastStart = start;
+ }
+slReverse(&usageList);
+*retMinDiff = minDiff;
+*retAveSize = (double)totalBases/bedCount;
+return usageList;
+}
+
+int bbiCountSectionsNeeded(struct bbiChromUsage *usageList, int itemsPerSlot)
+/* Count up number of sections needed for data. */
+{
+struct bbiChromUsage *usage;
+int count = 0;
+for (usage = usageList; usage != NULL; usage = usage->next)
+ {
+ int countOne = (usage->itemCount + itemsPerSlot - 1)/itemsPerSlot;
+ count += countOne;
+ verbose(2, "%s %d, %d blocks of %d\n", usage->name, usage->itemCount, countOne, itemsPerSlot);
+ }
+return count;
+}
+
+
void bbiAddToSummary(bits32 chromId, bits32 chromSize, bits32 start, bits32 end,
bits32 validCount, double minVal, double maxVal, double sumData, double sumSquares,
int reduction, struct bbiSummary **pOutList)
/* Add data range to summary - putting it onto top of list if possible, otherwise