8f2ecd4892ee20712e9bd069edc632eed32eb4b0 braney Mon Jan 14 13:22:05 2013 -0800 add an option to pslDiff to check for same locus #7568 diff --git src/hg/pslDiff/pslDiff.c src/hg/pslDiff/pslDiff.c index d75e585..415d977 100644 --- src/hg/pslDiff/pslDiff.c +++ src/hg/pslDiff/pslDiff.c @@ -1,49 +1,52 @@ /* pslDiff - compare psl files */ #include "common.h" #include "psl.h" #include "pslTbl.h" #include "pslSets.h" #include "options.h" /* command line options and values */ static struct optionSpec optionSpecs[] = { {"details", OPTION_STRING}, {"setNames", OPTION_BOOLEAN}, + {"sameLocus", OPTION_BOOLEAN}, {NULL, 0} }; boolean gHaveSetNames = FALSE; /* set names in args */ boolean gNumAligns = FALSE; /* compare by number of alignments */ +boolean gSameLocus = FALSE; /* just check to see if range overlaps */ void usage(char *msg, ...) /* usage msg and exit */ { va_list ap; va_start(ap, msg); vfprintf(stderr, msg, ap); errAbort("\n%s", "pslDiff [options] psl1 psl2 ...\n" "pslDiff [options] -setNames setName1 pslFile1 setName2 pslFile2 ...\n" "\n" "Compare queries in two or more psl files \n" "\n" " -setNames - commmand line specifies name to use for a set of alignments\n" " found in a psl file. If this is not specified, the set names are the\n" " base names of the psl files\n" " -details=file - write details of psls that differ to this file\n" + " -sameLocus - just check to see if alignments overlap\n" "\n" "The program matches psls in sets by exon structure. The output\n" "list the that are not the same in all sets. A psl is identified\n" "by it's qName and target location. There is then a column per\n" "input set that indicates which sets have the psl and where they\n" "match. A set not contain the psl has a '-' in it's column.\n" "Otherwise, there is a letter indicating which set has matching\n" "psls. All columns with the same letter have the same psl. The\n" "letters are assigned independently for each row. For example:\n" "\n" "#qName tName tStart tEnd reps cdnafilt\n" "NM_001001944.1 chr12 23614031 23614102 - A\n" "NM_001006007.1 chrUn 142900969 142902487 A -\n" "NM_001024180.1 chr14 540798 559130 A B\n" ); @@ -65,34 +68,42 @@ for (i = 0; i < numPslSpecs; i++) { splitPath(pslSpecs[i], NULL, setName, NULL); pslSetsLoadSet(ps, i, pslSpecs[i], setName); } } return ps; } boolean pslSame(struct psl *psl1, struct psl *psl2) /* determine if two psls (with same query and target) are the same */ { int iBlk; if ((psl1 == NULL) || (psl2 == NULL)) return FALSE; -if (psl1->blockCount != psl2->blockCount) - return FALSE; if (!sameString(psl1->strand, psl2->strand)) return FALSE; +if (gSameLocus) + { + if (positiveRangeIntersection(psl1->tStart, psl1->tEnd, + psl2->tStart, psl2->tEnd)) + return TRUE; + return FALSE; + } + +if (psl1->blockCount != psl2->blockCount) + return FALSE; for(iBlk = 0; iBlk < psl1->blockCount; iBlk++) { if ((psl1->qStarts[iBlk] != psl2->qStarts[iBlk]) || (psl1->tStarts[iBlk] != psl2->tStarts[iBlk]) || (psl1->blockSizes[iBlk] != psl2->blockSizes[iBlk])) return FALSE; } return TRUE; } boolean allMatchesSame(struct pslMatches *matches) /* determine if all sets have matches and are same */ { int iSet; if (matches->psls[0] == NULL) @@ -220,30 +231,31 @@ prHeader(stdout, ps); for (query = queries; query != NULL; query = query->next) diffQuery(stdout, detailsFh, ps, query->name); slFreeList(&queries); pslSetsFree(&ps); carefulClose(&detailsFh); } int main(int argc, char *argv[]) /* Process command line */ { char *detailsFile; optionInit(&argc, argv, optionSpecs); +gSameLocus = optionExists("sameLocus"); gHaveSetNames = optionExists("setNames"); if (((gHaveSetNames) && (argc < 5)) || ((!gHaveSetNames) && (argc < 3))) usage("wrong # of args:"); if (gHaveSetNames && ((argc-1)&1)) /* must have even number */ usage("-setNames requires pairs of setName and pslFile"); detailsFile = optionVal("details", NULL); pslDiff(argc-1, argv+1, detailsFile); return 0; } /* * Local Variables: