a97f8a0fffb31a2b3521e573a03f454fb60275d3
markd
  Tue May 30 21:30:12 2023 -0700
Add mechnaism to fix problem with mapping prot-prot via prot-na alignment by explictly specifying the type of alignments

diff --git src/utils/pslMap/pslMap.c src/utils/pslMap/pslMap.c
index 2477cda..783c48e 100644
--- src/utils/pslMap/pslMap.c
+++ src/utils/pslMap/pslMap.c
@@ -13,67 +13,85 @@
 #include "dnautil.h"
 #include "chain.h"
 #include "verbose.h"
 
 
 /* command line option specifications */
 static struct optionSpec optionSpecs[] = {
     {"suffix", OPTION_STRING},
     {"keepTranslated", OPTION_BOOLEAN},
     {"mapFileWithInQName", OPTION_BOOLEAN},
     {"chainMapFile", OPTION_BOOLEAN},
     {"swapMap", OPTION_BOOLEAN},
     {"swapIn", OPTION_BOOLEAN},
     {"mapInfo", OPTION_STRING},
     {"mappingPsls", OPTION_STRING},
+    {"inType", OPTION_STRING},
+    {"mapType", OPTION_STRING},
     {"simplifyMappingIds", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 /* Values parsed from command line */
 static char* suffix = NULL;
 static unsigned mapOpts = pslTransMapNoOpts;
 static boolean mapFileWithInQName = FALSE;
 static boolean chainMapFile = FALSE;
 static boolean swapMap = FALSE;
 static boolean swapIn = FALSE;
 static boolean simplifyMappingIds = FALSE;
 static char* mapInfoFile = NULL;
 static char* mappingPslFile = NULL;
+static enum pslType inPslType = pslTypeUnspecified;
+static enum pslType mapPslType = pslTypeUnspecified;
 
 static char *mapInfoHdr =
     "#srcQName\t" "srcQStart\t" "srcQEnd\t" "srcQSize\t"
     "srcTName\t" "srcTStart\t" "srcTEnd\t"
     "srcStrand\t" "srcAligned\t"
     "mappingQName\t" "mappingQStart\t" "mappingQEnd\t"
     "mappingTName\t" "mappingTStart\t" "mappingTEnd\t"
     "mappingStrand\t" "mappingId\t"
     "mappedQName\t" "mappedQStart\t" "mappedQEnd\t"
     "mappedTName\t" "mappedTStart\t" "mappedTEnd\t"
     "mappedStrand\t"
     "mappedAligned\t" "qStartTrunc\t" "qEndTrunc\t" "mappedPslLine\n";
 
 static void usage()
 /* usage msg and exit */
 {
 /* message got huge, so it's in a generate file */
 static char *usageMsg =
 #include "usage.msg"
     ;
 errAbort("%s", usageMsg);
 }
 
+static enum pslType parsePslType(char *typeStr)
+/* parse argument value of pslType */
+{
+if (sameString(typeStr, "prot_prot"))
+    return pslTypeProtProt;
+else if (sameString(typeStr, "prot_na"))
+    return pslTypeProtNa;
+else if (sameString(typeStr, "na_na"))
+    return pslTypeNaNa;
+else
+    errAbort("invalid value for PSL type '%s', expected 'prot_prot', 'prot_na', or 'na_na'", typeStr);
+return pslTypeUnspecified;
+}
+
 static void verbosePslNl(int verbosity, char *msg, struct psl *psl)
 /* Verbose logging of  msg, if not null, followed by a psl if not null, followed by a new line */
 {
 if (verboseLevel() >= verbosity)
     {
     if (msg != NULL)
         verbose(verbosity, "%s ", msg);
     if (psl != NULL)
         pslTabOut(psl, verboseLogFile());
     else
         verbose(verbosity, "NULL\n");
     }
 }
 
 struct mapAln
@@ -296,31 +314,31 @@
     pslTabOut(mapAln->psl, mappingPslFh);
 }
 
 static boolean mapPslPair(struct psl *inPsl, struct mapAln *mapAln,
                           FILE* outPslFh, FILE *mapInfoFh, FILE *mappingPslFh,
                           unsigned* outPslLineRef)
 /* map one pair of query and target PSL */
 {
 struct psl* mappedPsl;
 if (inPsl->tSize != mapAln->psl->qSize)
     errAbort("Error: inPsl %s tSize (%d) != mapping alignment %s qSize (%d) (perhaps you need to specify -swapMap?)\n",
              inPsl->tName, inPsl->tSize, mapAln->psl->qName, mapAln->psl->qSize);
 verbosePslNl(2, "inAln", inPsl);
 verbosePslNl(2, "mapAln", mapAln->psl);
 
-mappedPsl = pslTransMap(mapOpts, inPsl, mapAln->psl);
+mappedPsl = pslTransMap(mapOpts, inPsl, inPslType, mapAln->psl, mapPslType);
 
 verbosePslNl(2, "mappedAln", mappedPsl);
 
 /* only output if blocks were actually mapped */
 boolean wasMapped = mappedPsl != NULL;
 if (wasMapped)
     {
     mappedPslOutput(inPsl, mapAln, mappedPsl, outPslFh, mapInfoFh, mappingPslFh, *outPslLineRef);
     (*outPslLineRef)++;
     }
 pslFree(&mappedPsl);
 return wasMapped;
 }
 
 static void mapQueryPsl(struct psl* inPsl, struct genomeRangeTree *mapAlns,
@@ -387,27 +405,32 @@
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc != 4)
     usage();
 suffix = optionVal("suffix", NULL);
 if (optionExists("keepTranslated"))
     mapOpts |= pslTransMapKeepTrans;
 mapFileWithInQName = optionExists("mapFileWithInQName");
 chainMapFile = optionExists("chainMapFile");
 if (mapFileWithInQName && chainMapFile)
     errAbort("can't specify -mapFileWithInQName with -chainMapFile");
 swapMap = optionExists("swapMap");
 swapIn = optionExists("swapIn");
 simplifyMappingIds = optionExists("simplifyMappingIds");
+char *typeStr;
+if ((typeStr = optionVal("inType", NULL)) != NULL)
+    inPslType = parsePslType(typeStr);
+if ((typeStr = optionVal("mapType", NULL)) != NULL)
+    mapPslType = parsePslType(typeStr);
+
 mapInfoFile = optionVal("mapInfo", NULL);
 mappingPslFile = optionVal("mappingPsls", NULL);
 pslMap(argv[1], argv[2], argv[3]);
 
 return 0;
 }
 /*
  * Local Variables:
  * c-file-style: "jkent-c"
  * End:
  */
-