48ff31d7c310549df1111e86dcd3abd66abe6586
angie
  Wed Oct 24 16:37:39 2018 -0700
Alt haplotype and fix patch alignment tracks with links to multi-region mode (refs #18854)
* Adding altSeqLiftOverPsl and fixSeqListOverPsl as top-level tracks (release alpha).
* New 'db' option for baseColorUseSequence makes it fetch sequence from the current database
(or other database if specified) -- we don't need seq/extFile for alts and patches now that
they have been added to the .2bit.
* Special hgc handler for {alt,fix}SeqLiftOverPsl has links to view the alt/fix in multi-region
mode, to see the alignment details in the current position range, and to jump to the
corresponding position range in the browser.

diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c
index 8bbdee4..be40dfe 100644
--- src/hg/hgc/hgc.c
+++ src/hg/hgc/hgc.c
@@ -4301,31 +4301,31 @@
 	errAbort("bigItemImagePath setting for %s track incorrect. Needs to be \"itemImagePath <path> <suffix>\".", tdb->track);
     printf("<A HREF=\"%s/%s.%s\">Download Original Image</A><BR>\n", bothWords[0], item, bothWords[1]);
     }
 
 if ((sameString(tdb->table,"altLocations") || sameString(tdb->table,"fixLocations")) &&
     strchr(item,'_'))
     {
     // Truncate item (alt/fix sequence name) at colon if found:
     char itemCpy[strlen(item)+1];
     safecpy(itemCpy, sizeof(itemCpy), item);
     char *p = strchr(itemCpy, ':');
     if (p)
         *p = '\0';
     char *hgsid = cartSessionId(cart);
     char *desc = sameString(tdb->table, "altLocations") ? "alternate haplotype" : "fix patch";
-    printf("<A HREF=\"/cgi-bin/hgTracks?hgsid=%s&virtModeType=singleAltHaplo&singleAltHaploId=%s\">"
+    printf("<A HREF=\"hgTracks?hgsid=%s&virtModeType=singleAltHaplo&singleAltHaploId=%s\">"
            "Show this %s placed on its chromosome</A><BR>\n",
            hgsid, itemCpy, desc);
     }
 
 printTrackHtml(tdb);
 freez(&dupe);
 hFreeConn(&conn);
 }
 
 void genericClickHandler(struct trackDb *tdb, char *item, char *itemForUrl)
 /* Put up generic track info */
 {
 #ifdef OLD /* Called now by cartWebStart... */
 jsIncludeFile("jquery.js", NULL);
 jsIncludeFile("utils.js",NULL);
@@ -5943,31 +5943,35 @@
 {
 printAlignmentsExtra(pslList, startFirst, hgcCommand, "htcCdnaAliInWindow", tableName, itemIn);
 }
 
 struct psl *getAlignments(struct sqlConnection *conn, char *table, char *acc)
 /* get the list of alignments for the specified acc */
 {
 struct sqlResult *sr = NULL;
 char **row;
 struct psl *psl, *pslList = NULL;
 boolean hasBin;
 char splitTable[64];
 char query[256];
 if (!hFindSplitTable(database, seqName, table, splitTable, &hasBin))
     errAbort("can't find table %s or %s_%s", table, seqName, table);
-
+char *chr = cartOptionalString(cart, "c");
+if (isNotEmpty(chr))
+    sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tName = '%s'",
+             splitTable, acc, chr);
+else
     sqlSafef(query, sizeof(query), "select * from %s where qName = '%s'", splitTable, acc);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     psl = pslLoad(row+hasBin);
     slAddHead(&pslList, psl);
     }
 sqlFreeResult(&sr);
 slReverse(&pslList);
 return pslList;
 }
 
 struct psl *loadPslRangeT(char *table, char *qName, char *tName, int tStart, int tEnd)
 /* Load a list of psls given qName tName tStart tEnd */
 {
@@ -7357,35 +7361,43 @@
 else
     partPsl = pslTrimToTargetRange(wholePsl, winStart, winEnd);
 struct dnaSeq *rnaSeq = newDnaSeq(seq, strlen(seq), acc);
 showSomePartialDnaAlignment(partPsl, wholePsl, rnaSeq,
                             NULL, cdsStart, cdsEnd);
 }
 
 static struct dnaSeq *getBaseColorSequence(char *itemName, char *table)
 /* Grab sequence using the sequence and extFile table names out of BASE_COLOR_USE_SEQUENCE. */
 {
 struct trackDb *tdb = hashMustFindVal(trackHash, table);
 char *spec = trackDbRequiredSetting(tdb, BASE_COLOR_USE_SEQUENCE);
 char *specCopy = cloneString(spec);
 
 // value is: extFile seqTbl extFileTbl
+// or:       db [dddBbb1]
 char *words[3];
 int nwords = chopByWhite(specCopy, words, ArraySize(words));
-if ((nwords != ArraySize(words)) || !sameString(words[0], "extFile"))
-    errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec);
+if (sameString(words[0], "extFile") && (nwords == ArraySize(words)))
     return hDnaSeqGet(database, itemName, words[1], words[2]);
+else if (sameString(words[0], "db"))
+    {
+    char *db = (nwords == 2) ? words[1] : database;
+    return hChromSeq(db, itemName, 0, 0);
+    }
+else
+    errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec);
+return NULL; // make compiler happy
 }
 
 void htcCdnaAli(char *acc)
 /* Show alignment for accession. */
 {
 char query[256];
 char table[64];
 char accTmp[64];
 struct sqlConnection *conn;
 struct sqlResult *sr;
 char **row;
 struct psl *psl;
 struct dnaSeq *rnaSeq;
 char *aliTable;
 int start;
@@ -7420,31 +7432,32 @@
 if (sameString("mrnaBlastz", aliTable) || sameString("pseudoMrna", aliTable))
     {
     struct sqlConnection *conn = hAllocConn(database);
     unsigned retId = 0;
     safef(accTmp, sizeof accTmp, "bz-%s", acc);
     if (hRnaSeqAndIdx(accTmp, &rnaSeq, &retId, conn) == -1)
         rnaSeq = hRnaSeq(database, acc);
     hFreeConn(&conn);
     }
 else if (sameString("HInvGeneMrna", aliTable))
     {
     /* get RNA accession for the gene id in the alignment */
     sqlSafef(query, sizeof query, "select mrnaAcc from HInv where geneId='%s'", acc);
     rnaSeq = hRnaSeq(database, sqlQuickString(conn, query));
     }
-else if (sameString("ncbiRefSeqPsl", aliTable) || startsWith("altSeqLiftOverPsl", aliTable))
+else if (sameString("ncbiRefSeqPsl", aliTable) || startsWith("altSeqLiftOverPsl", aliTable) ||
+         startsWith("fixSeqLiftOverPsl", aliTable))
     {
     rnaSeq = getBaseColorSequence(acc, aliTable);
     }
 else
     {
     char *cdnaTable = NULL;
     struct trackDb *tdb = hashFindVal(trackHash, aliTable);
     if (tdb != NULL)
 	cdnaTable = trackDbSetting(tdb, "cdnaTable");
     if (isNotEmpty(cdnaTable) && hTableExists(database, cdnaTable))
 	rnaSeq = hGenBankGetMrna(database, acc, cdnaTable);
     else
 	rnaSeq = hRnaSeq(database, acc);
     }
 
@@ -7548,31 +7561,33 @@
     rnaSeq = oSeq;
     }
 else
     {
     /* Look up alignments in database */
     hFindSplitTable(database, seqName, aliTable, table, &hasBin);
     sqlSafef(query, sizeof(query),
          "select * from %s where qName = '%s' and tName=\"%s\" and tStart=%d", 
          table, acc, seqName, start);
     sr = sqlGetResult(conn, query);
     if ((row = sqlNextRow(sr)) == NULL)
 	errAbort("Couldn't find alignment for %s at %d", acc, start);
     wholePsl = pslLoad(row+hasBin);
     sqlFreeResult(&sr);
 
-    if (startsWith("ucscRetroAli", aliTable) || startsWith("retroMrnaAli", aliTable) || sameString("pseudoMrna", aliTable) || startsWith("altSeqLiftOverPsl", aliTable) || startsWith("ncbiRefSeqPsl", aliTable))
+    if (startsWith("ucscRetroAli", aliTable) || startsWith("retroMrnaAli", aliTable) ||
+        sameString("pseudoMrna", aliTable) || startsWith("altSeqLiftOverPsl", aliTable) ||
+        startsWith("fixSeqLiftOverPsl", aliTable) || startsWith("ncbiRefSeqPsl", aliTable))
 	{
         rnaSeq = getBaseColorSequence(acc, aliTable);
 	}
     else if (sameString("HInvGeneMrna", aliTable))
 	{
 	/* get RNA accession for the gene id in the alignment */
 	sqlSafef(query, sizeof(query), "select mrnaAcc from HInv where geneId='%s'",
 	      acc);
 	rnaSeq = hRnaSeq(database, sqlQuickString(conn, query));
 	}
     else
 	{
 	char *cdnaTable = NULL;
 	struct trackDb *tdb = hashFindVal(trackHash, aliTable);
 	if (tdb != NULL)
@@ -8846,73 +8861,136 @@
 struct sqlConnection *conn = hAllocConn(database);
 int start = cartInt(cart, "o");
 struct psl *pslList = NULL;
 
 cartWebStart(cart, database, "Viral Gene");
 printf("<H2>Viral Gene %s</H2>\n", geneName);
 printCustomUrl(tdb, geneName, TRUE);
 
 pslList = getAlignments(conn, "chr1_viralProt", geneName);
 htmlHorizontalLine();
 printf("<H3>Protein Alignments</H3>");
 printAlignments(pslList, start, "htcProteinAli", "chr1_viralProt", geneName);
 printTrackHtml(tdb);
 }
 
+static boolean pslTrimListToTargetRange(struct psl *pslList, int winStart, int winEnd,
+                                        int *retQRangeStart, int *retQRangeEnd)
+/* If the current window overlaps the target coords of any psl(s) in pslList, then return TRUE and
+ * set *retQRange{Start,End} to span all query coord ranges corresponding to target coord ranges.
+ * errAbort if qName is not consistent across all psls.
+ * Otherwise return FALSE and set *retQRange{Start,End} to -1. */
+{
+boolean foundOverlap = FALSE;
+char *qName = NULL;
+int qRangeStart = -1, qRangeEnd = -1;
+struct psl *psl;
+for (psl = pslList;  psl != NULL;  psl = psl->next)
+    {
+    if (qName == NULL)
+        qName = psl->qName;
+    else if (differentString(qName, psl->qName))
+        errAbort("pslTrimListToTargetRange: inconsistent qName: got both '%s' and '%s'",
+                 qName, psl->qName);
+    if (psl->tStart >= winStart && psl->tEnd <= winEnd)
+        {
+        foundOverlap = TRUE;
+        if (qRangeStart < 0 || psl->qStart < qRangeStart)
+            qRangeStart = psl->qStart;
+        if (qRangeEnd < 0 || psl->qEnd > qRangeEnd)
+            qRangeEnd = psl->qEnd;
+        }
+    else
+        {
+        struct psl *partPsl = pslTrimToTargetRange(psl, winStart, winEnd);
+        if (partPsl)
+            {
+            foundOverlap = TRUE;
+            if (qRangeStart < 0 || partPsl->qStart < qRangeStart)
+                qRangeStart = partPsl->qStart;
+            if (qRangeEnd < 0 || partPsl->qEnd > qRangeEnd)
+                qRangeEnd = partPsl->qEnd;
+            }
+        }
+    }
+*retQRangeStart = qRangeStart;
+*retQRangeEnd = qRangeEnd;
+return foundOverlap;
+}
+
 void doPslAltSeq(struct trackDb *tdb, char *item)
-/* Fairly generic PSL handler -- print out some more details about the
- * alignment. */
+/* Details for alignments between chromosomes and alt haplogtype or fix patch sequences. */
 {
 int start = cartInt(cart, "o");
-int total = 0, i = 0;
-struct psl *pslList = NULL;
 struct sqlConnection *conn = hAllocConn(database);
-// char *otherDb = trackDbSetting(tdb, "otherDb");
-// int altSize = hChromSize(otherDb, item);
 
 genericHeader(tdb, item);
 printCustomUrl(tdb, item, TRUE);
 
 puts("<P>");
-puts("<B>Alignment Summary:</B><BR>\n");
-// char strBuf[64];
-// sprintLongWithCommas(strBuf, altSize);
-// printf("<B>Alignment Summary: '%s' %s</B><BR>\n", item, strBuf);
-pslList = getAlignments(conn, tdb->table, item);
+struct psl *pslList = getAlignments(conn, tdb->table, item);
+if (pslList)
+    {
+    printf("<B>Alignment of %s to %s:</B><BR>\n", item, pslList->tName);
     printAlignments(pslList, start, "htcCdnaAli", tdb->table, item);
 
-puts("<P>");
-total = 0;
-for (i=0;  i < pslList -> blockCount;  i++)
+    char *hgsid = cartSessionId(cart);
+    int rangeStart = 0, rangeEnd = 0;
+    if (pslTrimListToTargetRange(pslList, winStart, winEnd, &rangeStart, &rangeEnd))
+        {
+        printf("<A HREF='hgTracks?hgsid=%s&position=%s:%d-%d'>"
+               "View corresponding position range on %s</A><BR>\n",
+               hgsid, item, rangeStart+1, rangeEnd, item);
+        }
+    char *altFix = item;
+    if (!endsWith(altFix, "alt") && !endsWith(altFix, "fix"))
+        altFix = pslList->tName;
+    printf("<A HREF=\"hgTracks?hgsid=%s&virtModeType=singleAltHaplo&singleAltHaploId=%s\">"
+           "Show %s placed on its chromosome</A><BR>\n",
+           hgsid, altFix, altFix);
+
+    puts("<P><B>Alignment stats:</B><BR>");
+    // Sometimes inversions cause alignments to be split up; just sum up all the stats.
+    int totalBlocks = 0, totalSize = 0, totalMatch = 0, totalMismatch = 0, totalN = 0;
+    int totalTIns = 0, totalQIns = 0;
+    struct psl *psl;
+    for (psl = pslList;  psl != NULL;  psl = psl->next)
         {
-    total += pslList->blockSizes[i];
+        totalBlocks += psl->blockCount;
+        int i;
+        for (i=0;  i < psl->blockCount;  i++)
+            totalSize += psl->blockSizes[i];
+        totalMatch += (psl->match + psl->repMatch);
+        totalMismatch += psl->misMatch;
+        totalN += psl->nCount;
+        totalTIns += psl->tBaseInsert;
+        totalQIns += psl->qBaseInsert;
         }
     printf("%d block(s) covering %d bases<BR>\n"
-       "%d matching bases<BR>\n"
-       "%d mismatching bases<BR>\n"
-       "%d N bases<BR>\n"
+           "%d matching bases (%.2f%%)<BR>\n"
+           "%d mismatching bases (%.2f%%)<BR>\n"
+           "%d N bases(%.2f%%)<BR>\n"
            "%d bases inserted in %s<BR>\n"
-       "%d bases inserted in %s<BR>\n"
-       "score: %d<BR>\n",
-       pslList->blockCount, total,
-       pslList->match,
-       pslList->misMatch,
-       pslList->nCount,
-       pslList->tBaseInsert, hOrganism(database),
-       pslList->qBaseInsert, item,
-       pslScore(pslList));
-
+           "%d bases inserted in %s<BR>\n",
+           totalBlocks, totalSize,
+           totalMatch, 100.0 * (totalMatch / (double)totalSize),
+           totalMismatch, 100.0 * (totalMismatch / (double)totalSize),
+           totalN, 100.0 * (totalN / (double)totalSize),
+           totalTIns, pslList->tName, totalQIns, item);
+    }
+else
+    warn("Unable to find alignment for '%s' in table %s", item, tdb->table);
 printTrackHtml(tdb);
 hFreeConn(&conn);
 }
 
 void doPslDetailed(struct trackDb *tdb, char *item)
 /* Fairly generic PSL handler -- print out some more details about the
  * alignment. */
 {
 int start = cartInt(cart, "o");
 int total = 0, i = 0;
 struct psl *pslList = NULL;
 struct sqlConnection *conn = hAllocConn(database);
 
 genericHeader(tdb, item);
 printCustomUrl(tdb, item, TRUE);
@@ -8924,31 +9002,31 @@
 
 puts("<P>");
 total = 0;
 for (i=0;  i < pslList -> blockCount;  i++)
     {
     total += pslList->blockSizes[i];
     }
 printf("%d block(s) covering %d bases<BR>\n"
        "%d matching bases<BR>\n"
        "%d mismatching bases<BR>\n"
        "%d N bases<BR>\n"
        "%d bases inserted in %s<BR>\n"
        "%d bases inserted in %s<BR>\n"
        "score: %d<BR>\n",
        pslList->blockCount, total,
-       pslList->match,
+       pslList->match + pslList->repMatch,
        pslList->misMatch,
        pslList->nCount,
        pslList->tBaseInsert, hOrganism(database),
        pslList->qBaseInsert, item,
        pslScore(pslList));
 
 printTrackHtml(tdb);
 hFreeConn(&conn);
 }
 
 void printEnsemblCustomUrl(struct trackDb *tdb, char *itemName, boolean encode,
     char *archive)
 /* Print Ensembl Gene URL. */
 {
 char *shortItemName;
@@ -25853,31 +25931,31 @@
 else if (startsWith("atom", table) && !startsWith("atomMaf", table))
     {
     doAtom(tdb, item);
     }
 else if (sameWord(table, "firstEF"))
     {
     firstEF(tdb, item);
     }
 else if ( sameWord(table, "blastHg16KG") ||  sameWord(table, "blatHg16KG" ) ||
         startsWith("blastDm",  table) || sameWord(table, "blastMm6KG") ||
         sameWord(table, "blastSacCer1SG") || sameWord(table, "blastHg17KG") ||
         startsWith("blastCe", table) || sameWord(table, "blastHg18KG") )
     {
     blastProtein(tdb, item);
     }
-else if (startsWith("altSeqLiftOverPsl", table))
+else if (startsWith("altSeqLiftOverPsl", table) || startsWith("fixSeqLiftOverPsl", table))
     {
     doPslAltSeq(tdb, item);
     }
 else if (sameWord(table, "chimpSimpleDiff"))
     {
     doSimpleDiff(tdb, "Chimp");
     }
 /* This is a catch-all for blastz/blat tracks -- any special cases must be
  * above this point! */
 else if (startsWith("map", table) ||startsWith("blastz", table) || startsWith("blat", table) || startsWith("tblast", table) || endsWith(table, "Blastz"))
     {
     char *genome = "Unknown";
     if (startsWith("tblast", table))
         genome = &table[6];
     if (startsWith("map", table))