src/hg/bedSplitOnChrom/bedSplitOnChrom.c 1.3
1.3 2009/09/29 00:54:16 braney
add a little optimization to avoid hash search on each line
Index: src/hg/bedSplitOnChrom/bedSplitOnChrom.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/bedSplitOnChrom/bedSplitOnChrom.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/bedSplitOnChrom/bedSplitOnChrom.c 29 Sep 2009 00:17:04 -0000 1.2
+++ src/hg/bedSplitOnChrom/bedSplitOnChrom.c 29 Sep 2009 00:54:16 -0000 1.3
@@ -42,8 +42,12 @@
/* Open file and figure out how many fields there are. */
struct lineFile *lf = lineFileOpen(inFile, TRUE);
char *row[100];
int numFields = lineFileChopNext(lf, row, ArraySize(row));
+char lastChrom[2048];
+
+lastChrom[0] = 0;
+
if (numFields == 0)
return; /* Empty file, nothing to do. */
if (numFields >= ArraySize(row))
errAbort("Too many fields (%d) in bed file %s. Max is %d",
@@ -54,8 +58,9 @@
/* Output as needed, creating a hash of open files. */
char path[PATH_LEN];
struct hash *fileHash = hashNew(8);
char buffer[4096];
+FILE *f = NULL;
for (;;)
{
/* Look up file in hash, creating a new file if need be. */
@@ -72,9 +77,15 @@
*ptr++ = 0;
chrom = buffer;
}
- FILE *f = hashFindVal(fileHash, chrom);
+
+ if (differentString(chrom, lastChrom))
+ {
+ f = hashFindVal(fileHash, chrom);
+ strcpy(lastChrom, chrom);
+ }
+
if (f == NULL)
{
if (fileHash->elCount >= maxChromCount)
errAbort("%s is the %dth chromosome, which is too many. "
@@ -96,10 +107,10 @@
safef(path, sizeof(path), "%s/%s.bed", outDir, chrom);
errnoAbort("Couldn't write to %s.", path);
}
- /* Fetch next line, breaking loop if it's there, and insuring that it has the
- * usual number of fields. */
+ /* Fetch next line, breaking loop if it's not there,
+ * and maybe insuring that it has the usual number of fields. */
int fieldsInLine = lineFileChopNext(lf, row, ArraySize(row));
if (fieldsInLine == 0)
break;
if (nfCheck && (fieldsInLine != numFields))