a94ab9b578fa7ff2bad82cefb1742985c55b6a76
braney
  Thu May 9 15:03:10 2024 -0700
ongoing work on pair browser

diff --git src/hg/utils/buildPairAssembly/buildPairAssembly.c src/hg/utils/buildPairAssembly/buildPairAssembly.c
index ec6fc83..9f6fc08 100644
--- src/hg/utils/buildPairAssembly/buildPairAssembly.c
+++ src/hg/utils/buildPairAssembly/buildPairAssembly.c
@@ -1,23 +1,24 @@
 /* buildPairAssembly - From a single coverage chain build a pairwise assembly including all sequence from both query and target. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "chain.h"
 #include "twoBit.h"
 #include "dnaseq.h"
+#include "psl.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "buildPairAssembly - From a single coverage chain build a pairwise assembly including all sequence from both query and target\n"
   "usage:\n"
   "   buildPairAssembly input.chain tSequenceName tSeq.2bit qSequenceName qSeq.2bit out.fa query.psl target.psl map.bed\n"
   "options:\n"
   "   -xxx=XXX\n"
   );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
@@ -45,63 +46,64 @@
 {
 const struct linearBlock *a = *((struct linearBlock **)va);
 const struct linearBlock *b = *((struct linearBlock **)vb);
 return a->tStart - b->tStart;
 }
 
 void buildPairAssembly(char *inputChain, char *tSequenceName, char *t2bitName, char *qSequenceName, char *q2bitName, char *outFaName, char *outQueryPsl, char *outTargetPsl, char *outMapBed)
 /* buildPairAssembly - From a single coverage chain build a pairwise assembly including all sequence from both query and target. */
 {
 struct lineFile *lf = lineFileOpen(inputChain, TRUE);
 struct chain *chain;
 struct cBlock *blockList = NULL, *cBlock, *nextBlock;
 struct twoBitFile *q2bit = twoBitOpen(q2bitName);
 struct twoBitFile *t2bit = twoBitOpen(t2bitName);
 FILE *outFa = mustOpen(outFaName, "w");
+FILE *outMap = mustOpen(outMapBed, "w");
 
-fprintf(outFa, ">chr1\n");
+fprintf(outFa, ">buildPair\n");
 
 while ((chain = chainRead(lf)) != NULL)
     {
     if (!sameString(tSequenceName, chain->tName) || !sameString(qSequenceName, chain->qName))
         continue;
 
     for (cBlock = chain->blockList; cBlock; cBlock = nextBlock)
         {
         nextBlock = cBlock->next;
         if (chain->qStrand == '-')
             {
             unsigned qStart = cBlock->qStart;
             cBlock->qStart = chain->qSize - cBlock->qEnd;
             cBlock->qEnd = chain->qSize - qStart;
             cBlock->score = 1;
             }
         else
             cBlock->score = 0;
 
         slAddHead(&blockList, cBlock);
         }
     }
 
 slSort(&blockList, cBlockCmpQuery);
 unsigned prevBase = blockList->qEnd;
 struct linearBlock *linearBlock;
 for (cBlock = blockList->next; cBlock; cBlock = cBlock->next)
     {
     if (cBlock->qStart < prevBase)
         {
-        printf("qBlock overlap %d %d\n", cBlock->qStart, prevBase);
+        //printf("qBlock overlap %d %d\n", cBlock->qStart, prevBase);
         cBlock->score = 2;
         continue;
         }
     if (cBlock->qStart > prevBase)
         {
         AllocVar(linearBlock);
         linearBlock->qStart = prevBase;
         linearBlock->qEnd = cBlock->qStart;
         if (cBlock->score == 1)
             linearBlock->isFlipped = TRUE;
         cBlock->data = (void *)linearBlock;
         }
     prevBase = cBlock->qEnd;
     }
 
@@ -138,53 +140,124 @@
         firstBlock = FALSE;
 
     AllocVar(linearBlock);
     linearBlock->qStart = cBlock->qStart;
     linearBlock->qEnd = cBlock->qEnd;
     linearBlock->tStart = cBlock->tStart;
     linearBlock->tEnd = cBlock->tEnd;
     if (cBlock->score == 1)
         linearBlock->isFlipped = TRUE;
     slAddHead(&linearBlockList, linearBlock);
 
     prevBase = cBlock->tEnd;
     }
 
 slReverse(&linearBlockList);
+
 unsigned startAddress = 0, size;
 for(linearBlock = linearBlockList; linearBlock; linearBlock = linearBlock->next)
     {
     if (linearBlock->qStart == linearBlock->qEnd)
         {
         struct dnaSeq *dna = twoBitReadSeqFrag(t2bit, tSequenceName, linearBlock->tStart, linearBlock->tEnd);
         fprintf(outFa, "%s\n", (char *)dna->dna);
         size = linearBlock->tEnd - linearBlock->tStart;
-        printf("chr1 %d %d %d refOnly %d %d\n", startAddress, startAddress + size, linearBlock->isFlipped, linearBlock->tStart, linearBlock->tEnd);
+        //printf("chr1 %d %d refOnly 0 + %d %d 255,255,0\n", startAddress, startAddress + size, linearBlock->isFlipped, linearBlock->tStart, linearBlock->tEnd);
+        fprintf(outMap,"buildPair %d %d refOnly 0 + %d %d 255,255,0\n", startAddress, startAddress + size, startAddress, startAddress + size);
         }
     else if (linearBlock->tStart == linearBlock->tEnd)
         {
         struct dnaSeq *dna = twoBitReadSeqFrag(q2bit, qSequenceName, linearBlock->qStart, linearBlock->qEnd);
         fprintf(outFa, "%s\n", (char *)dna->dna);
         size = linearBlock->qEnd - linearBlock->qStart;
-        printf("chr1 %d %d %d queryOnly %d %d\n", startAddress, startAddress + size, linearBlock->isFlipped, linearBlock->qStart, linearBlock->qEnd);
+        char strand = linearBlock->isFlipped ? '-' : '+';
+        //printf("chr1 %d %d %d queryOnly %d %d\n", startAddress, startAddress + size, linearBlock->isFlipped, linearBlock->qStart, linearBlock->qEnd);
+        fprintf(outMap,"buildPair %d %d queryOnly 0 %c %d %d 170,255,60\n", startAddress, startAddress + size, strand, startAddress, startAddress + size);
         }
     else
         {
         struct dnaSeq *dna = twoBitReadSeqFrag(t2bit, tSequenceName, linearBlock->tStart, linearBlock->tEnd);
         fprintf(outFa, "%s\n", (char *)dna->dna);
         size = linearBlock->qEnd - linearBlock->qStart;
-        printf("chr1 %d %d %d both %d %d %d %d\n", startAddress, startAddress + size, linearBlock->isFlipped,linearBlock->tStart, linearBlock->tEnd, linearBlock->qStart, linearBlock->qEnd);
+        char strand = linearBlock->isFlipped ? '-' : '+';
+        //printf("chr1 %d %d both 0 %c  %d %d %d %d\n", startAddress, startAddress + size, linearBlock->isFlipped,linearBlock->tStart, linearBlock->tEnd, linearBlock->qStart, linearBlock->qEnd);
+        fprintf(outMap,"buildPair %d %d both 0 %c  %d %d 20,40,140\n", startAddress, startAddress + size, strand,  startAddress, startAddress + size); 
+        }
+    startAddress += size;
+    }
+
+unsigned sumSize = startAddress;
+unsigned tSize = twoBitSeqSize(t2bit, tSequenceName);
+unsigned qSize = twoBitSeqSize(q2bit, qSequenceName);
+printf("qSize %d\n", qSize);
+FILE *outQuery = mustOpen(outQueryPsl, "w");
+FILE *outTarget = mustOpen(outTargetPsl, "w");
+startAddress = 0;
+for(linearBlock = linearBlockList; linearBlock; linearBlock = linearBlock->next)
+    {
+    if (linearBlock->qStart == linearBlock->qEnd)
+        {
+        size = linearBlock->tEnd - linearBlock->tStart;
+        struct psl *psl = pslNew(tSequenceName, tSize, linearBlock->tStart, linearBlock->tEnd,
+            "buildPair", sumSize,  startAddress, startAddress + size,
+            "+", 1, 0);
+        psl->qStarts[0] = linearBlock->tStart;
+        psl->tStarts[0] = startAddress;
+        psl->blockSizes[0] = size;
+        psl->blockCount = 1;
+        pslTabOut(psl, outTarget);
+        }
+    else if (linearBlock->tStart == linearBlock->tEnd)
+        {
+        size = linearBlock->qEnd - linearBlock->qStart;
+        struct psl *psl = pslNew(qSequenceName, qSize, linearBlock->qStart, linearBlock->qEnd,
+            "buildPair", sumSize,  startAddress, startAddress + size,
+            linearBlock->isFlipped ? "-" : "+", 1, 0);
+        if (linearBlock->isFlipped)
+            psl->qStarts[0] = qSize - linearBlock->qEnd;
+        else
+            psl->qStarts[0] = linearBlock->qStart;
+        psl->tStarts[0] = startAddress;
+        psl->blockSizes[0] = size;
+        psl->blockCount = 1;
+        pslTabOut(psl, outQuery);
+        }
+    else
+        {
+        size = linearBlock->qEnd - linearBlock->qStart;
+        struct psl *psl = pslNew(tSequenceName, tSize, linearBlock->tStart, linearBlock->tEnd,
+            "buildPair", sumSize,  startAddress, startAddress + size,
+            "+", 1, 0);
+        psl->qStarts[0] = linearBlock->tStart;
+        psl->tStarts[0] = startAddress;
+        psl->blockSizes[0] = size;
+        psl->blockCount = 1;
+        pslTabOut(psl, outTarget);
+
+        {
+        struct psl *psl = pslNew(qSequenceName, qSize, linearBlock->qStart, linearBlock->qEnd,
+            "buildPair", sumSize,  startAddress, startAddress + size,
+            linearBlock->isFlipped ? "-" : "+", 1, 0);
+        if (linearBlock->isFlipped)
+            psl->qStarts[0] = qSize - linearBlock->qEnd;
+        else
+            psl->qStarts[0] = linearBlock->qStart;
+        psl->tStarts[0] = startAddress;
+        psl->blockSizes[0] = size;
+        psl->blockCount = 1;
+        pslTabOut(psl, outQuery);
+        }
         }
     startAddress += size;
     }
 
 // validate
 slSort(&linearBlockList, lbCmpTarget);
 firstBlock = TRUE;
 for(linearBlock = linearBlockList; linearBlock; linearBlock = linearBlock->next)
     {
     if (linearBlock->tStart == linearBlock->tEnd)
         continue;
     if (!firstBlock)
         {
         if (linearBlock->tStart != startAddress)
             errAbort("tStart %d %d", startAddress, linearBlock->tStart);