src/hg/bedItemOverlapCount/bedItemOverlapCount.c 1.12
1.12 2009/04/21 23:02:25 larrym
support -chromSize option
Index: src/hg/bedItemOverlapCount/bedItemOverlapCount.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/bedItemOverlapCount/bedItemOverlapCount.c,v
retrieving revision 1.11
retrieving revision 1.12
diff -b -B -U 4 -r1.11 -r1.12
--- src/hg/bedItemOverlapCount/bedItemOverlapCount.c 16 Apr 2009 20:38:04 -0000 1.11
+++ src/hg/bedItemOverlapCount/bedItemOverlapCount.c 21 Apr 2009 23:02:25 -0000 1.12
@@ -21,11 +21,13 @@
static struct hash *chromHash = NULL;
static char *host = NULL;
static char *user = NULL;
static char *password = NULL;
+char *chromSizes = NULL; /* read chrom sizes from file instead of database . */
/* command line option specifications */
static struct optionSpec optionSpecs[] = {
+ {"chromSize", OPTION_STRING},
{"host", OPTION_STRING},
{"user", OPTION_STRING},
{"password", OPTION_STRING},
{"strand", OPTION_STRING},
@@ -44,17 +46,18 @@
" sort -k1,1 -k2,2n bedFile.bed \\\n"
" | bedItemOverlapCount [options] <database> stdin \\\n"
" | wigEncode stdin data.wig data.wib\n"
"options:\n"
- " -host\tmysql host\n"
- " -user\tmysql user\n"
- " -password\tmysql password\n\n"
+ " -chromSize=sizefile\tRead chrom sizes from file instead of database\n"
+ " -host=hostname\tmysql host used to get chrom sizes\n"
+ " -user=username\tmysql user\n"
+ " -password=password\tmysql password\n\n"
+ "\tchromSize file is three white space separated fields per line: chrom name, size, and dummy value\n"
"\tYou will want to separate your + and - strand\n"
"\titems before sending into this program as it only looks at\n"
"\tthe chrom, start and end columns of the bed file.\n"
- "\tIt wants a <database> connection to lookup chrom sizes for a sanity\n"
- "\tcheck of the incoming data. This should be redone to take a chrom.sizes\n"
- "\targument instead to be independent of the database.\n"
+ "\tIt requires a <database> connection to lookup chrom sizes for a sanity\n"
+ "\tcheck of the incoming data (unless you use -chromSize argument).\n\n"
"The bed file must be sorted at least by chrom since the processing is\n"
"\tgoing to be chrom by chrom with no going back.\n"
" *** AND *** this is only for simple bed files without multiple blocks. ***"
);
@@ -151,9 +154,25 @@
char *prevChrom = (char *)NULL;
boolean outputToDo = FALSE;
unsigned chromSize = 0;
-chromHash = loadAllChromInfo(database, &maxChromSize);
+if (chromSizes != NULL)
+ {
+ chromHash = newHash(0);
+ // unfortunately, chromInfoLoadAll requires that the file have three fields (I don't know why),
+ // so that's why we require a dummy third column in the chromInfo file.
+ struct chromInfo *el = chromInfoLoadAll(chromSizes);
+ for(;el != NULL;el=el->next)
+ {
+ if (el->size > maxChromSize) maxChromSize = el->size;
+ verbose(4, "Add hash %s value %u (%#lx)\n", el->chrom, el->size, (unsigned long)&el->size);
+ hashAdd(chromHash, el->chrom, (void *)(& el->size));
+ }
+ }
+else
+ {
+ chromHash = loadAllChromInfo(database, &maxChromSize);
+ }
verbose(2,"#\tmaxChromSize: %u\n", maxChromSize);
if (maxChromSize < 1)
errAbort("maxChromSize is zero ?");
@@ -237,8 +256,9 @@
usage();
host = optionVal("host", NULL);
user = optionVal("user", NULL);
password = optionVal("password", NULL);
+chromSizes = optionVal("chromSize", NULL);
verbose(2, "#\tworking on database: %s\n", argv[1]);
bedItemOverlapCount(argv[1], argc-2, argv+2);
return 0;
}