433d9b4fce034247a83b60de96fea913fa4631d9
markd
  Thu Apr 30 08:40:54 2026 -0700
added chainToMaf command

diff --git src/hg/mouseStuff/chainToMaf/chainToMaf.c src/hg/mouseStuff/chainToMaf/chainToMaf.c
new file mode 100644
index 00000000000..95565b19c13
--- /dev/null
+++ src/hg/mouseStuff/chainToMaf/chainToMaf.c
@@ -0,0 +1,194 @@
+/* chainToMaf - Convert from chain to maf format. */
+
+/* Copyright (C) 2026 The Regents of the University of California
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "dnaseq.h"
+#include "chain.h"
+#include "nibTwo.h"
+#include "axt.h"
+#include "chainToAxt.h"
+#include "maf.h"
+
+#undef BIGNUM
+#define BIGNUM 0x7fffffff
+
+int maxGap = 100;
+double minIdRatio = 0.0;
+double minScore = 0;
+char *qPrefix = "";
+char *tPrefix = "";
+boolean rescore = FALSE;
+boolean rescoreZero = FALSE;
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "chainToMaf - Convert from chain to maf format\n"
+  "usage:\n"
+  "   chainToMaf in.chain tNibDirOr2bit qNibDirOr2bit out.maf\n"
+  "options:\n"
+  "   -maxGap=N      maximum gap allowed without breaking, default %d\n"
+  "   -minScore=N    minimum chain score\n"
+  "   -minId=N       minimum percentage ID within blocks (0-100)\n"
+  "   -qPrefix=XX.   prepend XX. to query sequence name in maf\n"
+  "   -tPrefix=YY.   prepend YY. to target sequence name in maf\n"
+  "   -score         recalculate score for each maf block\n"
+  "   -scoreZero     recalculate score only when chain score is zero\n"
+  , maxGap
+  );
+}
+
+static struct optionSpec options[] = {
+   {"maxGap", OPTION_INT},
+   {"minScore", OPTION_FLOAT},
+   {"minId", OPTION_FLOAT},
+   {"qPrefix", OPTION_STRING},
+   {"tPrefix", OPTION_STRING},
+   {"score", OPTION_BOOLEAN},
+   {"scoreZero", OPTION_BOOLEAN},
+   {NULL, 0},
+};
+
+static struct dnaSeq *loadSeqStrand(struct nibTwoCache *ntc, char *seqName, int start, int end,
+                                    char strand)
+/* Load mixed case sequence, reverse-complemented if strand is '-'. */
+{
+struct dnaSeq *seq;
+int size = end - start;
+assert(size >= 0);
+if (strand == '-')
+    {
+    reverseIntRange(&start, &end, nibTwoGetSize(ntc, seqName));
+    seq = nibTwoCacheSeqPartExt(ntc, seqName, start, size, TRUE, NULL);
+    reverseComplement(seq->dna, seq->size);
+    }
+else
+    {
+    seq = nibTwoCacheSeqPartExt(ntc, seqName, start, size, TRUE, NULL);
+    }
+return seq;
+}
+
+static double axtIdRatio(struct axt *axt)
+/* Return percentage ID ignoring indels. */
+{
+int match = 0, ali = 0;
+int i, symCount = axt->symCount;
+for (i = 0; i < symCount; ++i)
+    {
+    char q = toupper(axt->qSym[i]);
+    char t = toupper(axt->tSym[i]);
+    if (q != '-' && t != '-')
+        {
+        ++ali;
+        if (q == t)
+            ++match;
+        }
+    }
+if (match == 0)
+    return 0.0;
+return (double)match / (double)ali;
+}
+
+static struct mafAli *axtToMafAli(struct axt *axt, int tSize, int qSize)
+/* Build a mafAli from an axt. Steals qSym/tSym from axt. */
+{
+struct mafAli *ali;
+struct mafComp *comp;
+char srcName[256];
+
+AllocVar(ali);
+ali->score = axt->score;
+if ((ali->score == 0 && rescoreZero) || rescore)
+    ali->score = axtScoreDnaDefault(axt);
+
+AllocVar(comp);
+safef(srcName, sizeof(srcName), "%s%s", qPrefix, axt->qName);
+comp->src = cloneString(srcName);
+comp->srcSize = qSize;
+comp->strand = axt->qStrand;
+comp->start = axt->qStart;
+comp->size = axt->qEnd - axt->qStart;
+comp->text = axt->qSym;
+axt->qSym = NULL;
+slAddHead(&ali->components, comp);
+
+AllocVar(comp);
+safef(srcName, sizeof(srcName), "%s%s", tPrefix, axt->tName);
+comp->src = cloneString(srcName);
+comp->srcSize = tSize;
+comp->strand = axt->tStrand;
+comp->start = axt->tStart;
+comp->size = axt->tEnd - axt->tStart;
+comp->text = axt->tSym;
+axt->tSym = NULL;
+slAddHead(&ali->components, comp);
+
+return ali;
+}
+
+static void doAChain(struct chain *chain, struct nibTwoCache *tSeqCache,
+                     struct nibTwoCache *qSeqCache, FILE *f)
+/* Convert one chain to one or more maf blocks. */
+{
+struct dnaSeq *qSeq = loadSeqStrand(qSeqCache, chain->qName, chain->qStart, chain->qEnd,
+                                    chain->qStrand);
+struct dnaSeq *tSeq = loadSeqStrand(tSeqCache, chain->tName, chain->tStart, chain->tEnd, '+');
+struct axt *axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM);
+struct axt *axt;
+
+for (axt = axtList; axt != NULL; axt = axt->next)
+    {
+    if (minIdRatio > 0.0 && axtIdRatio(axt) < minIdRatio)
+        continue;
+    struct mafAli *ali = axtToMafAli(axt, chain->tSize, chain->qSize);
+    mafWrite(f, ali);
+    mafAliFree(&ali);
+    }
+axtFreeList(&axtList);
+freeDnaSeq(&qSeq);
+freeDnaSeq(&tSeq);
+}
+
+static void chainToMaf(char *inName, char *tNibDirOr2bit, char *qNibDirOr2bit, char *outName)
+/* chainToMaf - Convert from chain to maf format. */
+{
+struct lineFile *lf = lineFileOpen(inName, TRUE);
+struct nibTwoCache *tSeqCache = nibTwoCacheNew(tNibDirOr2bit);
+struct nibTwoCache *qSeqCache = nibTwoCacheNew(qNibDirOr2bit);
+FILE *f = mustOpen(outName, "w");
+struct chain *chain;
+
+mafWriteStart(f, "blastz");
+while ((chain = chainRead(lf)) != NULL)
+    {
+    if (chain->score >= minScore)
+        doAChain(chain, tSeqCache, qSeqCache, f);
+    chainFree(&chain);
+    }
+mafWriteEnd(f);
+carefulClose(&f);
+lineFileClose(&lf);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+maxGap = optionInt("maxGap", maxGap);
+minIdRatio = optionFloat("minId", 0.0) / 100.0;
+minScore = optionFloat("minScore", minScore);
+qPrefix = optionVal("qPrefix", qPrefix);
+tPrefix = optionVal("tPrefix", tPrefix);
+rescore = optionExists("score");
+rescoreZero = optionExists("scoreZero");
+if (argc != 5)
+    usage();
+chainToMaf(argv[1], argv[2], argv[3], argv[4]);
+return 0;
+}