8f2ecd4892ee20712e9bd069edc632eed32eb4b0
braney
  Mon Jan 14 13:22:05 2013 -0800
add an option to pslDiff to check for same locus #7568
diff --git src/hg/pslDiff/pslDiff.c src/hg/pslDiff/pslDiff.c
index d75e585..415d977 100644
--- src/hg/pslDiff/pslDiff.c
+++ src/hg/pslDiff/pslDiff.c
@@ -1,49 +1,52 @@
 /* pslDiff - compare psl files */
 #include "common.h"
 #include "psl.h"
 #include "pslTbl.h"
 #include "pslSets.h"
 #include "options.h"
 
 /* command line options and values */
 static struct optionSpec optionSpecs[] =
 {
     {"details", OPTION_STRING},
     {"setNames", OPTION_BOOLEAN},
+    {"sameLocus", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 boolean gHaveSetNames = FALSE;  /* set names in args */
 boolean gNumAligns = FALSE;     /* compare by number of alignments */
+boolean gSameLocus = FALSE;     /* just check to see if range overlaps */
 
 void usage(char *msg, ...)
 /* usage msg and exit */
 {
 va_list ap;
 va_start(ap, msg);
 vfprintf(stderr, msg, ap);
 errAbort("\n%s",
          "pslDiff [options] psl1 psl2 ...\n"
          "pslDiff [options] -setNames setName1 pslFile1 setName2 pslFile2 ...\n"
          "\n"
          "Compare queries in two or more psl files \n"
          "\n"
          "   -setNames - commmand line specifies name to use for a set of alignments\n"
          "    found in a psl file.  If this is not specified, the set names are the\n"
          "    base names of the psl files\n"
          "   -details=file - write details of psls that differ to this file\n"
+	 "   -sameLocus - just check to see if alignments overlap\n"
          "\n"
          "The program matches psls in sets by exon structure.  The output\n"
          "list the that are not the same in all sets.  A psl is identified\n"
          "by it's qName and target location.  There is then a column per\n"
          "input set that indicates which sets have the psl and where they\n"
          "match.  A set not contain the psl has a '-' in it's column.\n"
          "Otherwise, there is a letter indicating which set has matching\n"
          "psls.  All columns with the same letter have the same psl.  The\n"
          "letters are assigned independently for each row.  For example:\n"
          "\n"
          "#qName          tName  tStart     tEnd       reps  cdnafilt\n"
          "NM_001001944.1  chr12  23614031   23614102   -     A\n"
          "NM_001006007.1  chrUn  142900969  142902487  A     -\n"
          "NM_001024180.1  chr14  540798     559130     A     B\n"
          );
@@ -65,34 +68,42 @@
     for (i = 0; i < numPslSpecs; i++)
         {
         splitPath(pslSpecs[i], NULL, setName, NULL);
         pslSetsLoadSet(ps, i, pslSpecs[i], setName);
         }
     }
 return ps;
 }
 
 boolean pslSame(struct psl *psl1, struct psl *psl2)
 /* determine if two psls (with same query and target) are the same */
 {
 int iBlk;
 if ((psl1 == NULL) || (psl2 == NULL))
     return FALSE;
-if (psl1->blockCount != psl2->blockCount)
-    return FALSE;
 if (!sameString(psl1->strand, psl2->strand))
     return FALSE;
+if (gSameLocus)
+    {
+    if (positiveRangeIntersection(psl1->tStart, psl1->tEnd, 
+				  psl2->tStart, psl2->tEnd))
+	return TRUE;
+    return FALSE;
+    }
+
+if (psl1->blockCount != psl2->blockCount)
+    return FALSE;
 for(iBlk = 0; iBlk < psl1->blockCount; iBlk++)
     {
     if ((psl1->qStarts[iBlk] != psl2->qStarts[iBlk])
         || (psl1->tStarts[iBlk] != psl2->tStarts[iBlk])
         || (psl1->blockSizes[iBlk] != psl2->blockSizes[iBlk]))
         return FALSE;
     }
 return TRUE;
 }
 
 boolean allMatchesSame(struct pslMatches *matches)
 /* determine if all sets have matches and are same */
 {
 int iSet;
 if (matches->psls[0] == NULL)
@@ -220,30 +231,31 @@
 prHeader(stdout, ps);
 
 for (query = queries; query != NULL; query = query->next)
     diffQuery(stdout, detailsFh, ps, query->name);
 
 slFreeList(&queries);
 pslSetsFree(&ps);
 carefulClose(&detailsFh);
 }
 
 int main(int argc, char *argv[])
 /* Process command line */
 {
 char *detailsFile;
 optionInit(&argc, argv, optionSpecs);
+gSameLocus = optionExists("sameLocus");
 gHaveSetNames = optionExists("setNames");
 if (((gHaveSetNames) && (argc < 5))
     || ((!gHaveSetNames) && (argc < 3)))
     usage("wrong # of args:");
 
 if (gHaveSetNames && ((argc-1)&1)) /* must have even number */
     usage("-setNames requires pairs of setName and pslFile");
 
 detailsFile = optionVal("details", NULL);
 pslDiff(argc-1, argv+1, detailsFile);
 return 0;
 }
 
 /*
  * Local Variables: