src/utils/raMerge/raMerge.c 1.3
1.3 2009/11/23 07:38:17 kent
Adding -firstOnly option.
Index: src/utils/raMerge/raMerge.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/raMerge/raMerge.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 1000000 -r1.2 -r1.3
--- src/utils/raMerge/raMerge.c 13 Mar 2007 01:41:20 -0000 1.2
+++ src/utils/raMerge/raMerge.c 23 Nov 2009 07:38:17 -0000 1.3
@@ -1,98 +1,106 @@
/* raMerge - Merge together info in two ra files, doing a record-by-record concatenation for the most part.. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "localmem.h"
#include "ra.h"
static char const rcsid[] = "$Id$";
-boolean dupeOk = FALSE, uniqDupeOk = FALSE;
+boolean dupeOk = FALSE, firstOnly = FALSE;
void usage()
/* Explain usage and exit. */
{
errAbort(
"raMerge - Merge together info in two ra files, producing records that are a\n"
"concatenation of all the input records\n"
"usage:\n"
" raMerge keyField file1.ra file2.ra ... fileN.ra\n"
"options:\n"
" -dupeOk - will pass through multiple instances of fields with same name.\n"
+ " -firstOnly - only keep records that are started in the first file\n"
);
}
static struct optionSpec options[] = {
{"dupeOk", OPTION_BOOLEAN},
+ {"firstOnly", OPTION_BOOLEAN},
{NULL, 0},
};
void raMerge(char *keyField, int raCount, char *raFiles[])
/* raMerge - Merge together info in two ra files, doing a record-by-record
* concatenation for the most part.. */
{
struct hash *outerHash = hashNew(20);
int i;
+boolean isFirstFile = TRUE;
for (i=0; i<raCount; ++i)
{
struct lineFile *lf = lineFileOpen(raFiles[i], TRUE);
struct hash *newRa;
while ((newRa = raNextRecord(lf)) != NULL)
{
char *id = hashFindVal(newRa, keyField);
if (id == NULL)
errAbort("Missing %s field in record ending line %d of %s", keyField,
lf->lineIx, lf->fileName);
struct hash *oldRa = hashFindVal(outerHash, id);
if (oldRa == NULL)
+ {
+ if (isFirstFile || !firstOnly)
hashAdd(outerHash, id, newRa);
+ }
else
{
struct hashCookie cookie = hashFirst(newRa);
struct hashEl *hel;
while ((hel = hashNext(&cookie)) != NULL)
{
if (!sameString(hel->name, keyField))
{
if (!dupeOk && hashLookup(oldRa, hel->name))
errAbort("Field %s duplicated in %s", hel->name, id);
hashAdd(oldRa, hel->name, lmCloneString(oldRa->lm, hel->val));
}
}
hashFree(&newRa);
}
}
lineFileClose(&lf);
+ isFirstFile = FALSE;
}
struct hashEl *outerHel, *outerList = hashElListHash(outerHash);
slSort(&outerList, hashElCmp);
FILE *f = stdout;
for (outerHel = outerList; outerHel != NULL; outerHel = outerHel->next)
{
struct hash *ra = outerHel->val;
char *id = hashMustFindVal(ra, keyField);
fprintf(f, "%s %s\n", keyField, id);
struct hashEl *raHel, *raList = hashElListHash(ra);
slSort(&raList, hashElCmp);
for (raHel = raList; raHel != NULL; raHel = raHel->next)
{
if (!sameString(raHel->name, keyField))
fprintf(f, "%s %s\n", raHel->name, (char *)raHel->val);
}
fprintf(f, "\n");
slFreeList(&raList);
}
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc < 3)
usage();
dupeOk = optionExists("dupeOk");
+firstOnly = optionExists("firstOnly");
raMerge(argv[1], argc-2, argv+2);
return 0;
}