src/hg/utils/pslUniq/pslUniq.c 1.3
1.3 2004/02/07 20:28:23 braney
added option to allow for N entries (dumb name then eh?)
Index: src/hg/utils/pslUniq/pslUniq.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/pslUniq/pslUniq.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/hg/utils/pslUniq/pslUniq.c 5 Nov 2003 05:03:50 -0000 1.2
+++ src/hg/utils/pslUniq/pslUniq.c 7 Feb 2004 20:28:23 -0000 1.3
@@ -1,46 +1,59 @@
/* pslUniq - strip out all but first record found */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "psl.h"
static char const rcsid[] = "$Id$";
+int numAllow = 1;
+
void usage()
/* Explain usage and exit. */
{
-errAbort("usage: pslUniq in.psl out.psl\n");
+errAbort("usage: pslUniq in.psl out.psl\n"
+" -numAllow=N how many of each identifier to keep (default 1)\n"
+);
}
-void pslUniq( char *pslName, char *outName)
+void pslUniq( char *pslName, char *outName, int numAllow)
{
int size;
char *name;
struct psl *psl;
struct hash *pslHash = newHash(0);
+struct hashEl *hel;
char *start;
FILE *out = mustOpen(outName, "w");
struct lineFile *list;
struct lineFile *pslF = pslFileOpen(pslName);
while ( psl = pslNext(pslF))
{
- if ( !hashLookup(pslHash, psl->qName))
+ if ( (hel = hashLookup(pslHash, psl->qName)) == NULL)
{
- hashAdd(pslHash, psl->qName, psl);
+ hashAdd(pslHash, psl->qName, 1);
pslTabOut(psl, out);
}
+ else
+ {
+ hel->val++;
+ if (hel->val <= numAllow)
+ pslTabOut(psl, out);
+ }
+
}
lineFileClose(&pslF);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionHash(&argc, argv);
+numAllow = optionInt("numAllow", 1);
if (argc != 3)
usage();
-pslUniq(argv[1], argv[2]);
+pslUniq(argv[1], argv[2], numAllow);
return 0;
}