004d56eff1d9b74e312104b30ea872201e92cf97
braney
  Thu Dec 10 15:50:57 2020 -0800
add the ability for bigPsl tracks to show query sequences like genePred
if they have the sequence and CDS.   #24672

diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c
index 4fea33b..4649d02 100644
--- src/hg/hgc/hgc.c
+++ src/hg/hgc/hgc.c
@@ -3096,40 +3096,73 @@
             {
             char **extraFields = (restFields + restBedFields);
             int extraFieldCount = restCount - restBedFields;
             int printCount = extraFieldsPrint(tdb,NULL,extraFields, extraFieldCount);
             printCount += 0;
             }
         }
     }
 
 char *bedRow[32];
 char startBuf[16], endBuf[16];
 
 int lastChromId = -1;
 char chromName[bbi->chromBpt->keySize+1];
 
+boolean firstTime = TRUE;
 for (bb = bbList; bb != NULL; bb = bb->next)
     {
     bbiCachedChromLookup(bbi, bb->chromId, lastChromId, chromName, sizeof(chromName));
 
     lastChromId=bb->chromId;
     bigBedIntervalToRow(bb, chromName, startBuf, endBuf, bedRow, 4);
     if (showEvery || sameString(bedRow[3], item))
 	{
-        struct psl *psl= pslFromBigPsl(chromName, bb, seqTypeField, NULL, NULL);
+        char *cdsStr, *seq;
+        struct psl *psl= pslFromBigPsl(chromName, bb, seqTypeField, &seq, &cdsStr);
         slAddHead(&pslList, psl);
+
+        // we're assuming that if there are multiple psl's with the same id that
+        // they are the same query sequence so we only put out one set of sequences
+        if (firstTime)  
+            {
+            firstTime = FALSE;
+            printf("<H3>Links to sequence:</H3>\n");
+            printf("<UL>\n");
+
+            if (!isEmpty(seq))    // if there is a query sequence
+                {
+                if (!isEmpty(cdsStr))  // if we have CDS 
+                    {
+                    puts("<LI>\n");
+                    hgcAnchorSomewhere("htcTranslatedBigPsl", item, "translate", seqName);
+                    printf("Translated Amino Acid Sequence</A> from Query Sequence\n");
+                    puts("</LI>\n");
+                    }
+
+                puts("<LI>\n");
+                hgcAnchorSomewhere("htcBigPslSequence", item, tdb->track, seqName);
+                printf("Query Sequence</A> \n");
+                puts("</LI>\n");
+                }
+
+            puts("<LI>\n");
+            hgcAnchorSomewhere("htcGeneInGenome", item, tdb->track, seqName);
+            printf("Genomic Sequence</A> from assembly\n");
+            puts("</LI>\n");
+            printf("</UL>\n");
+            }
 	}
     }
 
 char *sort = cartUsualString(cart, "sort", pslSortList[0]);
 pslSortListByVar(&pslList, sort);
 
 if (showEvery)
     printf("<H3>Genomic Alignments</H3>");
 else
     printf("<H3>%s/Genomic Alignments</H3>", item);
 if (showEvery || pslIsProtein(pslList))
     printAlignmentsSimple(pslList, start, "htcBigPslAli", tdb->table, item);
 else
     printAlignmentsExtra(pslList, start, "htcBigPslAli", "htcBigPslAliInWindow",
         tdb->table, item);
@@ -7336,52 +7369,55 @@
         sqlSafef(query, sizeof query, "select name from %s where id = '%s'", cdsTable, cdsId);
         char *cdsString = sqlQuickString(conn, query);
         if (isNotEmpty(cdsString))
             genbankParseCds(cdsString, retCdsStart, retCdsEnd);
         }
     }
 }
 
 void htcBigPslAli(char *acc)
 /* Show alignment for accession in bigPsl file. */
 {
 struct psl *psl;
 char *aliTable;
 int start;
 unsigned int cdsStart = 0, cdsEnd = 0;
+struct sqlConnection *conn = NULL;
 
+if (!trackHubDatabase(database))
+    conn = hAllocConnTrack(database, tdb);
 
 aliTable = cartString(cart, "aliTable");
 if (isCustomTrack(aliTable))
     {
     struct customTrack *ct = lookupCt(aliTable);
     tdb = ct->tdb;
     }
 else
     tdb = hashFindVal(trackHash, aliTable);
 char title[1024];
 safef(title, sizeof title, "%s vs Genomic [%s]", acc, aliTable);
 htmlFramesetStart(title);
 
 /* Get some environment vars. */
 start = cartInt(cart, "l");
 int end = cartInt(cart, "r");
 char *chrom = cartString(cart, "c");
 
 char *seq, *cdsString = NULL;
 struct lm *lm = lmInit(0);
-char *fileName = bbiNameFromSettingOrTable(tdb, NULL, tdb->table);
+char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table);
 struct bbiFile *bbi = bigBedFileOpen(fileName);
 struct bigBedInterval *bb, *bbList = bigBedIntervalQuery(bbi, chrom, start, end, 0, lm);
 char *bedRow[32];
 char startBuf[16], endBuf[16];
 for (bb = bbList; bb != NULL; bb = bb->next)
     {
     bigBedIntervalToRow(bb, seqName, startBuf, endBuf, bedRow, ArraySize(bedRow));
     struct bed *bed = bedLoadN(bedRow, 12);
     if (sameString(bed->name, acc) && (bb->start == start) && (bb->end == end))
 	{
 	bb->next = NULL;
 	break;
 	}
     }
 if (bb == NULL)
@@ -8590,30 +8626,111 @@
 gp = getGenePredForPosition(table, geneName);
 
 if (gp == NULL)
     errAbort("%s not found in %s when translating to protein",
              geneName, table);
 else if (gp->cdsStart == gp->cdsEnd)
     errAbort("No CDS defined: no protein translation for %s", geneName);
 prot = getPredMRnaProtSeq(gp);
 safef(protName, sizeof(protName), "%s_prot", geneName);
 displayProteinPrediction(protName, prot);
 
 freez(&prot);
 genePredFree(&gp);
 }
 
+void htcBigPsl( char *acc, boolean translate)
+/* Output bigPsl query sequence, translated to amino acid sequence if requested. */
+{
+struct sqlConnection *conn = NULL;
+
+if (!trackHubDatabase(database))
+    conn = hAllocConn(database);
+
+int start = cartInt(cart, "l");
+int end = cartInt(cart, "r");
+char *table = cartString(cart, "table");
+
+struct trackDb *tdb ;
+if (isHubTrack(table))
+    tdb = hubConnectAddHubForTrackAndFindTdb( database, table, NULL, trackHash);
+else
+    tdb = hashFindVal(trackHash, table);
+char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table);
+struct bbiFile *bbi = bigBedFileOpen(fileName);
+struct lm *lm = lmInit(0);
+int ivStart = start, ivEnd = end;
+if (start == end)
+    {  
+    // item is an insertion; expand the search range from 0 bases to 2 so we catch it:
+    ivStart = max(0, start-1);
+    ivEnd++;
+    }  
+
+unsigned seqTypeField =  bbExtraFieldIndex(bbi, "seqType");
+struct bigBedInterval *bb, *bbList = NULL;
+
+bbList = bigBedIntervalQuery(bbi, seqName, ivStart, ivEnd, 0, lm);
+
+char *bedRow[32];
+char startBuf[16], endBuf[16];
+
+int lastChromId = -1;
+char chromName[bbi->chromBpt->keySize+1];
+
+for (bb = bbList; bb != NULL; bb = bb->next)
+    {
+    bbiCachedChromLookup(bbi, bb->chromId, lastChromId, chromName, sizeof(chromName));
+
+    lastChromId=bb->chromId;
+    bigBedIntervalToRow(bb, chromName, startBuf, endBuf, bedRow, 4);
+
+    // we're assuming that if there are multiple psl's with the same id that
+    // they are the same query sequence so we only put out one sequence
+    if (sameString(bedRow[3], acc))
+	{
+        char *cdsStr, *seq;
+        pslFromBigPsl(chromName, bb, seqTypeField, &seq, &cdsStr);
+
+        if (translate)
+            {
+            struct genbankCds cds;
+            if (!genbankCdsParse(cdsStr, &cds))
+                errAbort("can't parse CDS for %s: %s", acc, cdsStr);
+            int protBufSize = ((cds.end-cds.start)/3)+4;
+            char *prot = needMem(protBufSize);
+
+            seq[cds.end] = '\0';
+            dnaTranslateSome(seq+cds.start, prot, protBufSize);
+
+            cartHtmlStart("Protein Translation of query sequence");
+            displayProteinPrediction(acc, prot);
+            return;
+            }
+        else
+            {
+            cartHtmlStart("Query sequence");
+            printf("<PRE><TT>");
+            printf(">%s length=%d\n", acc,(int)strlen(seq));
+            printLines(stdout, seq, 50);
+            printf("</TT></PRE>");
+            return;
+            }
+	}
+    }
+}
+
 void htcTranslatedMRna(struct trackDb *tdb, char *acc)
 /* Translate mRNA to protein and display it. */
 {
 struct sqlConnection *conn = hAllocConn(database);
 struct genbankCds cds = getCds(conn, acc);
 struct dnaSeq *mrna = hGenBankGetMrna(database, acc, NULL);
 if (mrna == NULL)
     errAbort("mRNA sequence %s not found", acc);
 if (cds.end > mrna->size)
     errAbort("CDS bounds exceed length of mRNA for %s", acc);
 
 int protBufSize = ((cds.end-cds.start)/3)+4;
 char *prot = needMem(protBufSize);
 
 mrna->dna[cds.end] = '\0';
@@ -8958,31 +9075,35 @@
 struct trackDb *tdb = NULL;
 
 if (isHubTrack(table))
     {
     tdb = hubConnectAddHubForTrackAndFindTdb( database, table, NULL, trackHash);
     itemCount = getSeqForBigGene(tdb, geneName);
     }
 else if (isCustomTrack(table))
     {
     tdb = getCustomTrackTdb(table);
     itemCount = getSeqForBigGene(tdb, geneName);
     }
 else
     {
     tdb = hashFindVal(trackHash, table);
-    char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl");
+    struct sqlConnection *conn = NULL;
+
+    if (!trackHubDatabase(database))
+        conn = hAllocConnTrack(database, tdb);
+    char *bigDataUrl = bbiNameFromSettingOrTable(tdb, conn, tdb->table);
     if (bigDataUrl)
         {
         itemCount = getSeqForBigGene(tdb, geneName);
         }
     else
         {
         char constraints[256];
         safef(constraints, sizeof(constraints), "name = %s", quotedItem);
         itemCount = hgSeqItemsInRange(database, table, seqName, winStart, winEnd, constraints);
         }
     }
 if (itemCount == 0)
     printf("\n# No results returned from query.\n\n");
 puts("</PRE>");
 freeMem(quotedItem);
@@ -26386,30 +26507,38 @@
     {
     htcProteinAli(item, cartString(cart, "aliTable"));
     }
 else if (sameWord(table, "htcBlatXeno"))
     {
     htcBlatXeno(item, cartString(cart, "aliTable"));
     }
 else if (sameWord(table, "htcExtSeq"))
     {
     htcExtSeq(item);
     }
 else if (sameWord(table, "htcTranslatedProtein"))
     {
     htcTranslatedProtein(item);
     }
+else if (sameWord(table, "htcBigPslSequence"))
+    {
+    htcBigPsl(item, FALSE);
+    }
+else if (sameWord(table, "htcTranslatedBigPsl"))
+    {
+    htcBigPsl(item, TRUE);
+    }
 else if (sameWord(table, "htcTranslatedPredMRna"))
     {
     htcTranslatedPredMRna(item);
     }
 else if (sameWord(table, "htcTranslatedMRna"))
     {
     htcTranslatedMRna(tdbForTableArg(), item);
     }
 else if (sameWord(table, "ncbiRefSeqSequence"))
     {
     ncbiRefSeqSequence(item);
     }
 else if (sameWord(table, "htcGeneMrna"))
     {
     htcGeneMrna(item);