2a7d8d36d651b483f7624b95c494086d5595ebf5 hiram Mon Apr 25 10:20:41 2016 -0700 hashVal is not unique for different strings, try using md5sum refs #17168 diff --git src/utils/twoBitDup/twoBitDup.c src/utils/twoBitDup/twoBitDup.c index 200b0cf..973201c 100644 --- src/utils/twoBitDup/twoBitDup.c +++ src/utils/twoBitDup/twoBitDup.c @@ -1,27 +1,28 @@ /* twoBitDup - check to see if a twobit file has any identical sequences in it. */ /* Copyright (C) 2013 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "twoBit.h" #include "dnaseq.h" #include "math.h" #include "udc.h" +#include "md5.h" static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $"; void usage() /* Explain usage and exit. */ { errAbort( "twoBitDup - check to see if a twobit file has any identical sequences in it\n" "usage:\n" " twoBitDup file.2bit\n" "options:\n" " -keyList=file - file to write a key list, two columns: key and sequenceName\n" " -udcDir=/dir/to/cache - place to put cache for remote bigBed/bigWigs\n" ); } @@ -52,32 +53,35 @@ verbose(2, "writing key list to %s\n", keyList); keyListFile = mustOpen(keyList, "w"); } for (index = tbf->indexList; index != NULL; index = index->next) { verbose(2,"grabbing seq %s\n", index->name); int size; struct dnaSeq *seq = twoBitReadSeqFragExt(tbf, index->name, 0, 0, FALSE, &size); struct hashEl *hel; if ((hel = hashLookup(seqHash, seq->dna)) != NULL) printf("%s and %s are identical\n", index->name, (char *)hel->val); else hel = hashAdd(seqHash, seq->dna, index->name); - if (keyListFile) - fprintf(keyListFile, "%x\t%s\n", hel->hashVal, index->name); + if (keyListFile) { + char *md5Sum = md5HexForString(seq->dna); + fprintf(keyListFile, "%s\t%s\n", md5Sum, index->name); + freeMem(md5Sum); + } freeDnaSeq(&seq); } } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 2) usage(); keyList = optionVal("keyList", NULL); udcSetDefaultDir(optionVal("udcDir", udcDefaultDir())); twoBitDup(argv[1]); return 0; }