src/hg/bedSplitOnChrom/bedSplitOnChrom.c 1.3

1.3 2009/09/29 00:54:16 braney
add a little optimization to avoid hash search on each line
Index: src/hg/bedSplitOnChrom/bedSplitOnChrom.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/bedSplitOnChrom/bedSplitOnChrom.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/bedSplitOnChrom/bedSplitOnChrom.c	29 Sep 2009 00:17:04 -0000	1.2
+++ src/hg/bedSplitOnChrom/bedSplitOnChrom.c	29 Sep 2009 00:54:16 -0000	1.3
@@ -42,8 +42,12 @@
 /* Open file and figure out how many fields there are. */
 struct lineFile *lf = lineFileOpen(inFile, TRUE);
 char *row[100];
 int numFields = lineFileChopNext(lf, row, ArraySize(row));
+char lastChrom[2048];
+
+lastChrom[0] = 0;
+
 if (numFields == 0)
     return;	/* Empty file, nothing to do. */
 if (numFields >= ArraySize(row))
     errAbort("Too many fields (%d) in bed file %s.  Max is %d", 
@@ -54,8 +58,9 @@
 /* Output as needed, creating a hash of open files. */
 char path[PATH_LEN];
 struct hash *fileHash = hashNew(8);
 char buffer[4096];
+FILE *f = NULL;
 
 for (;;)
     {
     /* Look up file in hash, creating a new file if need be. */
@@ -72,9 +77,15 @@
 	*ptr++ = 0;
 
 	chrom = buffer;
 	}
-    FILE *f = hashFindVal(fileHash, chrom);
+
+    if (differentString(chrom, lastChrom))
+	{
+	f = hashFindVal(fileHash, chrom);
+	strcpy(lastChrom, chrom);
+	}
+
     if (f == NULL)
         {
 	if (fileHash->elCount >= maxChromCount)
 	    errAbort("%s is the %dth chromosome, which is too many. "
@@ -96,10 +107,10 @@
 	safef(path, sizeof(path), "%s/%s.bed", outDir, chrom);
         errnoAbort("Couldn't write to %s.", path);
 	}
 
-    /* Fetch next line, breaking loop if it's there, and insuring that it has the
-     * usual number of fields. */
+    /* Fetch next line, breaking loop if it's not there, 
+     * and maybe insuring that it has the usual number of fields. */
     int fieldsInLine = lineFileChopNext(lf, row, ArraySize(row));
     if (fieldsInLine == 0)
         break;
     if (nfCheck && (fieldsInLine != numFields))