48ff31d7c310549df1111e86dcd3abd66abe6586
angie
Wed Oct 24 16:37:39 2018 -0700
Alt haplotype and fix patch alignment tracks with links to multi-region mode (refs #18854)
* Adding altSeqLiftOverPsl and fixSeqListOverPsl as top-level tracks (release alpha).
* New 'db' option for baseColorUseSequence makes it fetch sequence from the current database
(or other database if specified) -- we don't need seq/extFile for alts and patches now that
they have been added to the .2bit.
* Special hgc handler for {alt,fix}SeqLiftOverPsl has links to view the alt/fix in multi-region
mode, to see the alignment details in the current position range, and to jump to the
corresponding position range in the browser.
diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c
index 8bbdee4..be40dfe 100644
--- src/hg/hgc/hgc.c
+++ src/hg/hgc/hgc.c
@@ -4301,31 +4301,31 @@
errAbort("bigItemImagePath setting for %s track incorrect. Needs to be \"itemImagePath <path> <suffix>\".", tdb->track);
printf("<A HREF=\"%s/%s.%s\">Download Original Image</A><BR>\n", bothWords[0], item, bothWords[1]);
}
if ((sameString(tdb->table,"altLocations") || sameString(tdb->table,"fixLocations")) &&
strchr(item,'_'))
{
// Truncate item (alt/fix sequence name) at colon if found:
char itemCpy[strlen(item)+1];
safecpy(itemCpy, sizeof(itemCpy), item);
char *p = strchr(itemCpy, ':');
if (p)
*p = '\0';
char *hgsid = cartSessionId(cart);
char *desc = sameString(tdb->table, "altLocations") ? "alternate haplotype" : "fix patch";
- printf("<A HREF=\"/cgi-bin/hgTracks?hgsid=%s&virtModeType=singleAltHaplo&singleAltHaploId=%s\">"
+ printf("<A HREF=\"hgTracks?hgsid=%s&virtModeType=singleAltHaplo&singleAltHaploId=%s\">"
"Show this %s placed on its chromosome</A><BR>\n",
hgsid, itemCpy, desc);
}
printTrackHtml(tdb);
freez(&dupe);
hFreeConn(&conn);
}
void genericClickHandler(struct trackDb *tdb, char *item, char *itemForUrl)
/* Put up generic track info */
{
#ifdef OLD /* Called now by cartWebStart... */
jsIncludeFile("jquery.js", NULL);
jsIncludeFile("utils.js",NULL);
@@ -5943,31 +5943,35 @@
{
printAlignmentsExtra(pslList, startFirst, hgcCommand, "htcCdnaAliInWindow", tableName, itemIn);
}
struct psl *getAlignments(struct sqlConnection *conn, char *table, char *acc)
/* get the list of alignments for the specified acc */
{
struct sqlResult *sr = NULL;
char **row;
struct psl *psl, *pslList = NULL;
boolean hasBin;
char splitTable[64];
char query[256];
if (!hFindSplitTable(database, seqName, table, splitTable, &hasBin))
errAbort("can't find table %s or %s_%s", table, seqName, table);
-
+char *chr = cartOptionalString(cart, "c");
+if (isNotEmpty(chr))
+ sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tName = '%s'",
+ splitTable, acc, chr);
+else
sqlSafef(query, sizeof(query), "select * from %s where qName = '%s'", splitTable, acc);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
{
psl = pslLoad(row+hasBin);
slAddHead(&pslList, psl);
}
sqlFreeResult(&sr);
slReverse(&pslList);
return pslList;
}
struct psl *loadPslRangeT(char *table, char *qName, char *tName, int tStart, int tEnd)
/* Load a list of psls given qName tName tStart tEnd */
{
@@ -7357,35 +7361,43 @@
else
partPsl = pslTrimToTargetRange(wholePsl, winStart, winEnd);
struct dnaSeq *rnaSeq = newDnaSeq(seq, strlen(seq), acc);
showSomePartialDnaAlignment(partPsl, wholePsl, rnaSeq,
NULL, cdsStart, cdsEnd);
}
static struct dnaSeq *getBaseColorSequence(char *itemName, char *table)
/* Grab sequence using the sequence and extFile table names out of BASE_COLOR_USE_SEQUENCE. */
{
struct trackDb *tdb = hashMustFindVal(trackHash, table);
char *spec = trackDbRequiredSetting(tdb, BASE_COLOR_USE_SEQUENCE);
char *specCopy = cloneString(spec);
// value is: extFile seqTbl extFileTbl
+// or: db [dddBbb1]
char *words[3];
int nwords = chopByWhite(specCopy, words, ArraySize(words));
-if ((nwords != ArraySize(words)) || !sameString(words[0], "extFile"))
- errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec);
+if (sameString(words[0], "extFile") && (nwords == ArraySize(words)))
return hDnaSeqGet(database, itemName, words[1], words[2]);
+else if (sameString(words[0], "db"))
+ {
+ char *db = (nwords == 2) ? words[1] : database;
+ return hChromSeq(db, itemName, 0, 0);
+ }
+else
+ errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec);
+return NULL; // make compiler happy
}
void htcCdnaAli(char *acc)
/* Show alignment for accession. */
{
char query[256];
char table[64];
char accTmp[64];
struct sqlConnection *conn;
struct sqlResult *sr;
char **row;
struct psl *psl;
struct dnaSeq *rnaSeq;
char *aliTable;
int start;
@@ -7420,31 +7432,32 @@
if (sameString("mrnaBlastz", aliTable) || sameString("pseudoMrna", aliTable))
{
struct sqlConnection *conn = hAllocConn(database);
unsigned retId = 0;
safef(accTmp, sizeof accTmp, "bz-%s", acc);
if (hRnaSeqAndIdx(accTmp, &rnaSeq, &retId, conn) == -1)
rnaSeq = hRnaSeq(database, acc);
hFreeConn(&conn);
}
else if (sameString("HInvGeneMrna", aliTable))
{
/* get RNA accession for the gene id in the alignment */
sqlSafef(query, sizeof query, "select mrnaAcc from HInv where geneId='%s'", acc);
rnaSeq = hRnaSeq(database, sqlQuickString(conn, query));
}
-else if (sameString("ncbiRefSeqPsl", aliTable) || startsWith("altSeqLiftOverPsl", aliTable))
+else if (sameString("ncbiRefSeqPsl", aliTable) || startsWith("altSeqLiftOverPsl", aliTable) ||
+ startsWith("fixSeqLiftOverPsl", aliTable))
{
rnaSeq = getBaseColorSequence(acc, aliTable);
}
else
{
char *cdnaTable = NULL;
struct trackDb *tdb = hashFindVal(trackHash, aliTable);
if (tdb != NULL)
cdnaTable = trackDbSetting(tdb, "cdnaTable");
if (isNotEmpty(cdnaTable) && hTableExists(database, cdnaTable))
rnaSeq = hGenBankGetMrna(database, acc, cdnaTable);
else
rnaSeq = hRnaSeq(database, acc);
}
@@ -7548,31 +7561,33 @@
rnaSeq = oSeq;
}
else
{
/* Look up alignments in database */
hFindSplitTable(database, seqName, aliTable, table, &hasBin);
sqlSafef(query, sizeof(query),
"select * from %s where qName = '%s' and tName=\"%s\" and tStart=%d",
table, acc, seqName, start);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) == NULL)
errAbort("Couldn't find alignment for %s at %d", acc, start);
wholePsl = pslLoad(row+hasBin);
sqlFreeResult(&sr);
- if (startsWith("ucscRetroAli", aliTable) || startsWith("retroMrnaAli", aliTable) || sameString("pseudoMrna", aliTable) || startsWith("altSeqLiftOverPsl", aliTable) || startsWith("ncbiRefSeqPsl", aliTable))
+ if (startsWith("ucscRetroAli", aliTable) || startsWith("retroMrnaAli", aliTable) ||
+ sameString("pseudoMrna", aliTable) || startsWith("altSeqLiftOverPsl", aliTable) ||
+ startsWith("fixSeqLiftOverPsl", aliTable) || startsWith("ncbiRefSeqPsl", aliTable))
{
rnaSeq = getBaseColorSequence(acc, aliTable);
}
else if (sameString("HInvGeneMrna", aliTable))
{
/* get RNA accession for the gene id in the alignment */
sqlSafef(query, sizeof(query), "select mrnaAcc from HInv where geneId='%s'",
acc);
rnaSeq = hRnaSeq(database, sqlQuickString(conn, query));
}
else
{
char *cdnaTable = NULL;
struct trackDb *tdb = hashFindVal(trackHash, aliTable);
if (tdb != NULL)
@@ -8846,73 +8861,136 @@
struct sqlConnection *conn = hAllocConn(database);
int start = cartInt(cart, "o");
struct psl *pslList = NULL;
cartWebStart(cart, database, "Viral Gene");
printf("<H2>Viral Gene %s</H2>\n", geneName);
printCustomUrl(tdb, geneName, TRUE);
pslList = getAlignments(conn, "chr1_viralProt", geneName);
htmlHorizontalLine();
printf("<H3>Protein Alignments</H3>");
printAlignments(pslList, start, "htcProteinAli", "chr1_viralProt", geneName);
printTrackHtml(tdb);
}
+static boolean pslTrimListToTargetRange(struct psl *pslList, int winStart, int winEnd,
+ int *retQRangeStart, int *retQRangeEnd)
+/* If the current window overlaps the target coords of any psl(s) in pslList, then return TRUE and
+ * set *retQRange{Start,End} to span all query coord ranges corresponding to target coord ranges.
+ * errAbort if qName is not consistent across all psls.
+ * Otherwise return FALSE and set *retQRange{Start,End} to -1. */
+{
+boolean foundOverlap = FALSE;
+char *qName = NULL;
+int qRangeStart = -1, qRangeEnd = -1;
+struct psl *psl;
+for (psl = pslList; psl != NULL; psl = psl->next)
+ {
+ if (qName == NULL)
+ qName = psl->qName;
+ else if (differentString(qName, psl->qName))
+ errAbort("pslTrimListToTargetRange: inconsistent qName: got both '%s' and '%s'",
+ qName, psl->qName);
+ if (psl->tStart >= winStart && psl->tEnd <= winEnd)
+ {
+ foundOverlap = TRUE;
+ if (qRangeStart < 0 || psl->qStart < qRangeStart)
+ qRangeStart = psl->qStart;
+ if (qRangeEnd < 0 || psl->qEnd > qRangeEnd)
+ qRangeEnd = psl->qEnd;
+ }
+ else
+ {
+ struct psl *partPsl = pslTrimToTargetRange(psl, winStart, winEnd);
+ if (partPsl)
+ {
+ foundOverlap = TRUE;
+ if (qRangeStart < 0 || partPsl->qStart < qRangeStart)
+ qRangeStart = partPsl->qStart;
+ if (qRangeEnd < 0 || partPsl->qEnd > qRangeEnd)
+ qRangeEnd = partPsl->qEnd;
+ }
+ }
+ }
+*retQRangeStart = qRangeStart;
+*retQRangeEnd = qRangeEnd;
+return foundOverlap;
+}
+
void doPslAltSeq(struct trackDb *tdb, char *item)
-/* Fairly generic PSL handler -- print out some more details about the
- * alignment. */
+/* Details for alignments between chromosomes and alt haplogtype or fix patch sequences. */
{
int start = cartInt(cart, "o");
-int total = 0, i = 0;
-struct psl *pslList = NULL;
struct sqlConnection *conn = hAllocConn(database);
-// char *otherDb = trackDbSetting(tdb, "otherDb");
-// int altSize = hChromSize(otherDb, item);
genericHeader(tdb, item);
printCustomUrl(tdb, item, TRUE);
puts("<P>");
-puts("<B>Alignment Summary:</B><BR>\n");
-// char strBuf[64];
-// sprintLongWithCommas(strBuf, altSize);
-// printf("<B>Alignment Summary: '%s' %s</B><BR>\n", item, strBuf);
-pslList = getAlignments(conn, tdb->table, item);
+struct psl *pslList = getAlignments(conn, tdb->table, item);
+if (pslList)
+ {
+ printf("<B>Alignment of %s to %s:</B><BR>\n", item, pslList->tName);
printAlignments(pslList, start, "htcCdnaAli", tdb->table, item);
-puts("<P>");
-total = 0;
-for (i=0; i < pslList -> blockCount; i++)
+ char *hgsid = cartSessionId(cart);
+ int rangeStart = 0, rangeEnd = 0;
+ if (pslTrimListToTargetRange(pslList, winStart, winEnd, &rangeStart, &rangeEnd))
+ {
+ printf("<A HREF='hgTracks?hgsid=%s&position=%s:%d-%d'>"
+ "View corresponding position range on %s</A><BR>\n",
+ hgsid, item, rangeStart+1, rangeEnd, item);
+ }
+ char *altFix = item;
+ if (!endsWith(altFix, "alt") && !endsWith(altFix, "fix"))
+ altFix = pslList->tName;
+ printf("<A HREF=\"hgTracks?hgsid=%s&virtModeType=singleAltHaplo&singleAltHaploId=%s\">"
+ "Show %s placed on its chromosome</A><BR>\n",
+ hgsid, altFix, altFix);
+
+ puts("<P><B>Alignment stats:</B><BR>");
+ // Sometimes inversions cause alignments to be split up; just sum up all the stats.
+ int totalBlocks = 0, totalSize = 0, totalMatch = 0, totalMismatch = 0, totalN = 0;
+ int totalTIns = 0, totalQIns = 0;
+ struct psl *psl;
+ for (psl = pslList; psl != NULL; psl = psl->next)
{
- total += pslList->blockSizes[i];
+ totalBlocks += psl->blockCount;
+ int i;
+ for (i=0; i < psl->blockCount; i++)
+ totalSize += psl->blockSizes[i];
+ totalMatch += (psl->match + psl->repMatch);
+ totalMismatch += psl->misMatch;
+ totalN += psl->nCount;
+ totalTIns += psl->tBaseInsert;
+ totalQIns += psl->qBaseInsert;
}
printf("%d block(s) covering %d bases<BR>\n"
- "%d matching bases<BR>\n"
- "%d mismatching bases<BR>\n"
- "%d N bases<BR>\n"
+ "%d matching bases (%.2f%%)<BR>\n"
+ "%d mismatching bases (%.2f%%)<BR>\n"
+ "%d N bases(%.2f%%)<BR>\n"
"%d bases inserted in %s<BR>\n"
- "%d bases inserted in %s<BR>\n"
- "score: %d<BR>\n",
- pslList->blockCount, total,
- pslList->match,
- pslList->misMatch,
- pslList->nCount,
- pslList->tBaseInsert, hOrganism(database),
- pslList->qBaseInsert, item,
- pslScore(pslList));
-
+ "%d bases inserted in %s<BR>\n",
+ totalBlocks, totalSize,
+ totalMatch, 100.0 * (totalMatch / (double)totalSize),
+ totalMismatch, 100.0 * (totalMismatch / (double)totalSize),
+ totalN, 100.0 * (totalN / (double)totalSize),
+ totalTIns, pslList->tName, totalQIns, item);
+ }
+else
+ warn("Unable to find alignment for '%s' in table %s", item, tdb->table);
printTrackHtml(tdb);
hFreeConn(&conn);
}
void doPslDetailed(struct trackDb *tdb, char *item)
/* Fairly generic PSL handler -- print out some more details about the
* alignment. */
{
int start = cartInt(cart, "o");
int total = 0, i = 0;
struct psl *pslList = NULL;
struct sqlConnection *conn = hAllocConn(database);
genericHeader(tdb, item);
printCustomUrl(tdb, item, TRUE);
@@ -8924,31 +9002,31 @@
puts("<P>");
total = 0;
for (i=0; i < pslList -> blockCount; i++)
{
total += pslList->blockSizes[i];
}
printf("%d block(s) covering %d bases<BR>\n"
"%d matching bases<BR>\n"
"%d mismatching bases<BR>\n"
"%d N bases<BR>\n"
"%d bases inserted in %s<BR>\n"
"%d bases inserted in %s<BR>\n"
"score: %d<BR>\n",
pslList->blockCount, total,
- pslList->match,
+ pslList->match + pslList->repMatch,
pslList->misMatch,
pslList->nCount,
pslList->tBaseInsert, hOrganism(database),
pslList->qBaseInsert, item,
pslScore(pslList));
printTrackHtml(tdb);
hFreeConn(&conn);
}
void printEnsemblCustomUrl(struct trackDb *tdb, char *itemName, boolean encode,
char *archive)
/* Print Ensembl Gene URL. */
{
char *shortItemName;
@@ -25853,31 +25931,31 @@
else if (startsWith("atom", table) && !startsWith("atomMaf", table))
{
doAtom(tdb, item);
}
else if (sameWord(table, "firstEF"))
{
firstEF(tdb, item);
}
else if ( sameWord(table, "blastHg16KG") || sameWord(table, "blatHg16KG" ) ||
startsWith("blastDm", table) || sameWord(table, "blastMm6KG") ||
sameWord(table, "blastSacCer1SG") || sameWord(table, "blastHg17KG") ||
startsWith("blastCe", table) || sameWord(table, "blastHg18KG") )
{
blastProtein(tdb, item);
}
-else if (startsWith("altSeqLiftOverPsl", table))
+else if (startsWith("altSeqLiftOverPsl", table) || startsWith("fixSeqLiftOverPsl", table))
{
doPslAltSeq(tdb, item);
}
else if (sameWord(table, "chimpSimpleDiff"))
{
doSimpleDiff(tdb, "Chimp");
}
/* This is a catch-all for blastz/blat tracks -- any special cases must be
* above this point! */
else if (startsWith("map", table) ||startsWith("blastz", table) || startsWith("blat", table) || startsWith("tblast", table) || endsWith(table, "Blastz"))
{
char *genome = "Unknown";
if (startsWith("tblast", table))
genome = &table[6];
if (startsWith("map", table))