src/utils/bedCommonRegions/bedCommonRegions.c a66b3d61a232fc46f17a33eef83163603f86f2a3

a66b3d61a232fc46f17a33eef83163603f86f2a3
kent
  Mon Mar 14 17:27:11 2011 -0700
Removing removeOverlaps flag since under larger scale testing didn't work, and it looked to be a scary pipeline mode problem.
diff --git src/utils/bedCommonRegions/bedCommonRegions.c src/utils/bedCommonRegions/bedCommonRegions.c
index 20862aa..1ad277d 100644
--- src/utils/bedCommonRegions/bedCommonRegions.c
+++ src/utils/bedCommonRegions/bedCommonRegions.c
@@ -7,109 +7,89 @@
 #include "options.h"
 #include "obscure.h"
 #include "pipeline.h"
 
 static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $";
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bedCommonRegions - Create a bed file (just bed3) that contains the regions common to all inputs.\n"
   "Regions are common only if exactly the same chromosome, starts, and end.  Overlap is not enough.\n"
   "Each region must be in each input at most once. Output is stdout.\n"
   "usage:\n"
   "   bedCommonRegions file1 file2 file3 ... fileN\n"
-  "options:\n"
-  "   -removeOverlap - instead of treating overlap within a file as error, remove it\n"
-  "                    Files must be sorted for this one\n"
   );
 }
 
 static struct optionSpec options[] = {
-   {"removeOverlap", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 #define BED_STRING_SIZE 256
 
 char *bedString(char *chrom, char *start, char *end, char result[BED_STRING_SIZE])
 /* Return space delimited concatenation: chrom start end */
 {
 safef(result, BED_STRING_SIZE, "%s\t%s\t%s", chrom, start, end);
 return result;
 }
 
-struct lineFile *openNonoverlapBed(char *fileName)
-/* Wrap deduplication pipeline around file and return it. 
- * Careful - can not close this, so will leak file handles.
- * If you need to run this on lots of files then you'll have
- * to fix.*/
-{
-if (optionExists("removeOverlap"))
-    {
-    static char *cmd[4] = {"bedRemoveOverlap", "stdin", "stdout", NULL};
-    struct pipeline *pl = pipelineOpen1(cmd, pipelineRead, fileName, NULL);
-    return pipelineLineFile(pl);
-    }
-else
-    return lineFileOpen(fileName, TRUE);
-}
-
 struct hash *readFileIntoHashCountOfOne(char *fileName, struct slRef **pRetList)
 /* Read in a bed file.  Return a integer hash keyed by bedString. 
  * The returned list is the hashEls of the hash, but in the same order
  * as they appear as lines in the file. */
 {
 /* Add each bed item to hash, and list, checking uniqueness */
 struct hash *hash = hashNew(21);
-struct lineFile *lf = openNonoverlapBed(fileName);
+struct lineFile *lf = lineFileOpen(fileName, TRUE);
 struct slRef *refList = NULL;
 char *row[3];
 while (lineFileRow(lf, row))
     {
     char key[BED_STRING_SIZE];
     bedString(row[0], row[1], row[2], key);
     if (hashLookup(hash, key))
         errAbort("Got %s:%s-%s twice (second time line %d of %s), key not unique",
 		row[0], row[1], row[2], lf->lineIx, lf->fileName);
     struct hashEl *hel = hashAddInt(hash, key, 1);
     refAdd(&refList, hel);
     }
 
 /* Clean up and go home. */
-// lineFileClose(&lf); Actually really don't want to close because of pipeline shortcut
+lineFileClose(&lf);
 slReverse(&refList);
 *pRetList = refList;
 return hash;
 }
 
 void addFileToCountHash(char *fileName, struct hash *countHash)
 /* Add bedStrings from file to countHash, just ignoring the ones
  * that don't appear. */
 {
-struct lineFile *lf = openNonoverlapBed(fileName);
+struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[3];
 while (lineFileRow(lf, row))
     {
     char key[BED_STRING_SIZE];
     bedString(row[0], row[1], row[2], key);
     struct hashEl *hel = hashLookup(countHash, key);
     if (hel != NULL)
         hel->val = ((char *)hel->val)+1;
     }
-// lineFileClose(&lf); Actually really don't want to close because of pipeline shortcut
+lineFileClose(&lf);
 }
 
 void bedCommonRegions(int fileCount, char *files[])
 /* Create a bed file (just bed3) that contains the regions common to all
  * input beds.  Regions are common only if exactly the same chromosome, starts, 
  * and end.  Mere overlap is not enough. */
 {
 /* Build up hash with counts of usage */
 struct slRef *ref, *refList = NULL;
 struct hash *countHash = readFileIntoHashCountOfOne(files[0], &refList);
 int i;
 for (i=1; i<fileCount; ++i)
     addFileToCountHash(files[i], countHash);
 
 /* Loop through and output the ones where count indicates they are once in