6557cfc305545ee5c43930fb7548b3310061c2d6
kent
  Wed Jun 22 20:32:53 2022 -0700
First full genome functioning build of dynamic programming version to pick best chains to use for liftOver purposes out of a a pairwise genome alignment to allow best liftovers from query to target maximizing coverage of target while minimizing coverage of query.  An improvement over the greedy algorithm used in netChain/chainNetSubset.

diff --git src/hg/oneShot/testLiftRechainer/testLiftRechainer.c src/hg/oneShot/testLiftRechainer/testLiftRechainer.c
index 1cb65435..29bd0fa 100644
--- src/hg/oneShot/testLiftRechainer/testLiftRechainer.c
+++ src/hg/oneShot/testLiftRechainer/testLiftRechainer.c
@@ -1,79 +1,95 @@
 /* testLiftRechainer - Work out which chains to use for lift-over purposes using dynamic 
  * programming rather than greedy fill-in-the-gaps method of nets.. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "dlist.h"
 #include "options.h"
 #include "sqlNum.h"
 #include "chain.h"
 #include "twoBit.h"
 #include "obscure.h"
 #include "dnaseq.h"
 
+boolean noAlt = FALSE, sameChrom = FALSE;
+
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "testLiftRechainer - Work out which chains to use for lift-over purposes using dynamic \n"
   "programming rather than greedy fill-in-the-gaps method of nets.\n"
   "usage:\n"
   "   testLiftRechainer in.chain target.2bit query.2bit out.chain\n"
   "options:\n"
-  "   -xxx=XXX\n"
+  "   -noAlt - don't include chains involving _alt or _fix chromosome fragments\n"
+  "   -sameChrom - only include chains from same chromosome\n"
   );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
+   {"noAlt", OPTION_BOOLEAN},
+   {"sameChrom", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 struct chainTarget
 /* All the chains associated with a target chromosome. */
     {
     struct chainTarget *next;
     char *name;	    /* Target sequence name */
     struct chain *chainList;	    /* Sorted by tStart */
+    struct chain *bigChain;	    /* Biggest scoring chain on list */
     };
 
+boolean isAltName(char *name)
+/* Return true if ends with _alt or _fix */
+{
+return endsWith(name, "_alt") || endsWith(name, "_fix");
+}
+
 struct chainTarget *readChainTargets(char *fileName, struct hash *tHash, struct hash *qHash)
 /* Return list of chainTargets read from file.  Returned hash will be keyed by
  * target name */
 {
 struct hash *hash = hashNew(0);
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 struct chainTarget *targetList = NULL, *target;
 
 struct chain *chain;
 while ((chain = chainRead(lf)) != NULL)
     {
+    if (noAlt && (isAltName(chain->qName) || isAltName(chain->tName)))
+        continue;
     char *tName = chain->tName;
     target = hashFindVal(hash, tName);
     if (target == NULL)
          {
 	 if (hashLookup(tHash, tName) == NULL)
 	     errAbort("Target sequence %s is in chain but not target.2bit", tName);
 	 AllocVar(target);
 	 target->name = cloneString(tName);
 	 hashAdd(hash, tName, target);
 	 slAddHead(&targetList, target);
 	 }
     if (hashLookup(qHash, chain->qName) == NULL)
 	 errAbort("Query sequence %s is in chain but not target.2bit", chain->qName);
     slAddHead(&target->chainList, chain);
+    if (target->bigChain == NULL || chain->score > target->bigChain->score)
+        target->bigChain = chain;
     }
 
 /* Sort chains within all targets */
 for (target = targetList; target != NULL; target = target->next)
     slSort(&target->chainList, chainCmpTarget);
 
 /* Clean up and return */
 hashFree(&hash);
 lineFileClose(&lf);
 slReverse(&targetList);
 return targetList;
 }
 
 struct crossover
 /* List of positions where one chain flips to another */
@@ -267,45 +283,48 @@
 	next = el->next;
 	struct rechain *rechain = el->val;
 	if (tPos >= rechain->tEnd)
 	    {
 	    dlRemove(el);
 	    dlAddTail(archiveList, el);
 	    }
 	}
 
     /* Add new chains */
     struct rechain *bestChain = NULL;
     long long bestScore = -BIGNUM;
     /* Add new chains to active list */
     while (chain != NULL && chain->tStart == tPos)
 	{
+	if (!sameChrom || sameString(chain->qName, target->bigChain->qName))
+	    {
 	    if (bestChain == NULL)
 		{
 		struct dlNode *el;
 		for (el = activeList->head; !dlEnd(el); el = el->next)
 		    {
 		    struct rechain *rechain = el->val;
 		    if (rechain->score > bestScore)
 			{
 			bestScore = rechain->score;
 			bestChain = rechain;
 			}
 		    }
 		}
 
 	    addNewRechain(chain, tSeq, qSeqHash, qRcSeqHash, bestChain, bestScore, activeList);
+	    }
 	chain = chain->next;
 	}
 
     /* Go through active list updating scores */
     for (el = activeList->head; !dlEnd(el); el = el->next)
         {
 	/* Get rechain structure and point block to relevant one for current tPos */
 	struct rechain *rechain = el->val;
 	struct cBlock *block = rechain->curBlock;
 	while (block != NULL && tPos >= block->tEnd)
 	     {
 	     block = block->next;
 	     }
 	rechain->curBlock = block;
 
@@ -442,33 +461,35 @@
     toLowerN(qSeq->dna, qSeq->size);
     hashAdd(qSeqHash, qSeq->name, qSeq);
     qTotal += qSeq->size;
     }
 verbose(1, "Read %lld query bases in %d sequences\n", qTotal, qSeqHash->elCount);
 
 /* Open output now that all input has been at least checked */
 FILE *f = mustOpen(out, "w");
 
 struct twoBitFile *targetTwoBit = twoBitOpen(tTwoBit);
 struct chainTarget *target;
 for (target = targetList; target != NULL; target = target->next)
     {
     int tSize = hashIntVal(tSizeHash, target->name);
     struct dnaSeq *tSeq = twoBitReadSeqFragLower(targetTwoBit, target->name, 0, tSize);
-    verbose(1, "%s has %d bases and %d chains\n", 
-	target->name, tSeq->size, slCount(target->chainList));
+    verbose(1, "%s has %d bases and %d chains, biggest going to %s\n", 
+	target->name, tSeq->size, slCount(target->chainList), target->bigChain->qName);
     rechainOneTarget(target, tSeq, qSeqHash, qRcSeqHash, f);
     dnaSeqFree(&tSeq);
     }
 
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 5)
     usage();
+noAlt = optionExists("noAlt");
+sameChrom = optionExists("sameChrom");
 testLiftRechainer(argv[1], argv[2], argv[3], argv[4]);
 return 0;
 }