48ff31d7c310549df1111e86dcd3abd66abe6586 angie Wed Oct 24 16:37:39 2018 -0700 Alt haplotype and fix patch alignment tracks with links to multi-region mode (refs #18854) * Adding altSeqLiftOverPsl and fixSeqListOverPsl as top-level tracks (release alpha). * New 'db' option for baseColorUseSequence makes it fetch sequence from the current database (or other database if specified) -- we don't need seq/extFile for alts and patches now that they have been added to the .2bit. * Special hgc handler for {alt,fix}SeqLiftOverPsl has links to view the alt/fix in multi-region mode, to see the alignment details in the current position range, and to jump to the corresponding position range in the browser. diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index 8bbdee4..be40dfe 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -4301,31 +4301,31 @@ errAbort("bigItemImagePath setting for %s track incorrect. Needs to be \"itemImagePath <path> <suffix>\".", tdb->track); printf("<A HREF=\"%s/%s.%s\">Download Original Image</A><BR>\n", bothWords[0], item, bothWords[1]); } if ((sameString(tdb->table,"altLocations") || sameString(tdb->table,"fixLocations")) && strchr(item,'_')) { // Truncate item (alt/fix sequence name) at colon if found: char itemCpy[strlen(item)+1]; safecpy(itemCpy, sizeof(itemCpy), item); char *p = strchr(itemCpy, ':'); if (p) *p = '\0'; char *hgsid = cartSessionId(cart); char *desc = sameString(tdb->table, "altLocations") ? "alternate haplotype" : "fix patch"; - printf("<A HREF=\"/cgi-bin/hgTracks?hgsid=%s&virtModeType=singleAltHaplo&singleAltHaploId=%s\">" + printf("<A HREF=\"hgTracks?hgsid=%s&virtModeType=singleAltHaplo&singleAltHaploId=%s\">" "Show this %s placed on its chromosome</A><BR>\n", hgsid, itemCpy, desc); } printTrackHtml(tdb); freez(&dupe); hFreeConn(&conn); } void genericClickHandler(struct trackDb *tdb, char *item, char *itemForUrl) /* Put up generic track info */ { #ifdef OLD /* Called now by cartWebStart... */ jsIncludeFile("jquery.js", NULL); jsIncludeFile("utils.js",NULL); @@ -5943,31 +5943,35 @@ { printAlignmentsExtra(pslList, startFirst, hgcCommand, "htcCdnaAliInWindow", tableName, itemIn); } struct psl *getAlignments(struct sqlConnection *conn, char *table, char *acc) /* get the list of alignments for the specified acc */ { struct sqlResult *sr = NULL; char **row; struct psl *psl, *pslList = NULL; boolean hasBin; char splitTable[64]; char query[256]; if (!hFindSplitTable(database, seqName, table, splitTable, &hasBin)) errAbort("can't find table %s or %s_%s", table, seqName, table); - +char *chr = cartOptionalString(cart, "c"); +if (isNotEmpty(chr)) + sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tName = '%s'", + splitTable, acc, chr); +else sqlSafef(query, sizeof(query), "select * from %s where qName = '%s'", splitTable, acc); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { psl = pslLoad(row+hasBin); slAddHead(&pslList, psl); } sqlFreeResult(&sr); slReverse(&pslList); return pslList; } struct psl *loadPslRangeT(char *table, char *qName, char *tName, int tStart, int tEnd) /* Load a list of psls given qName tName tStart tEnd */ { @@ -7357,35 +7361,43 @@ else partPsl = pslTrimToTargetRange(wholePsl, winStart, winEnd); struct dnaSeq *rnaSeq = newDnaSeq(seq, strlen(seq), acc); showSomePartialDnaAlignment(partPsl, wholePsl, rnaSeq, NULL, cdsStart, cdsEnd); } static struct dnaSeq *getBaseColorSequence(char *itemName, char *table) /* Grab sequence using the sequence and extFile table names out of BASE_COLOR_USE_SEQUENCE. */ { struct trackDb *tdb = hashMustFindVal(trackHash, table); char *spec = trackDbRequiredSetting(tdb, BASE_COLOR_USE_SEQUENCE); char *specCopy = cloneString(spec); // value is: extFile seqTbl extFileTbl +// or: db [dddBbb1] char *words[3]; int nwords = chopByWhite(specCopy, words, ArraySize(words)); -if ((nwords != ArraySize(words)) || !sameString(words[0], "extFile")) - errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec); +if (sameString(words[0], "extFile") && (nwords == ArraySize(words))) return hDnaSeqGet(database, itemName, words[1], words[2]); +else if (sameString(words[0], "db")) + { + char *db = (nwords == 2) ? words[1] : database; + return hChromSeq(db, itemName, 0, 0); + } +else + errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec); +return NULL; // make compiler happy } void htcCdnaAli(char *acc) /* Show alignment for accession. */ { char query[256]; char table[64]; char accTmp[64]; struct sqlConnection *conn; struct sqlResult *sr; char **row; struct psl *psl; struct dnaSeq *rnaSeq; char *aliTable; int start; @@ -7420,31 +7432,32 @@ if (sameString("mrnaBlastz", aliTable) || sameString("pseudoMrna", aliTable)) { struct sqlConnection *conn = hAllocConn(database); unsigned retId = 0; safef(accTmp, sizeof accTmp, "bz-%s", acc); if (hRnaSeqAndIdx(accTmp, &rnaSeq, &retId, conn) == -1) rnaSeq = hRnaSeq(database, acc); hFreeConn(&conn); } else if (sameString("HInvGeneMrna", aliTable)) { /* get RNA accession for the gene id in the alignment */ sqlSafef(query, sizeof query, "select mrnaAcc from HInv where geneId='%s'", acc); rnaSeq = hRnaSeq(database, sqlQuickString(conn, query)); } -else if (sameString("ncbiRefSeqPsl", aliTable) || startsWith("altSeqLiftOverPsl", aliTable)) +else if (sameString("ncbiRefSeqPsl", aliTable) || startsWith("altSeqLiftOverPsl", aliTable) || + startsWith("fixSeqLiftOverPsl", aliTable)) { rnaSeq = getBaseColorSequence(acc, aliTable); } else { char *cdnaTable = NULL; struct trackDb *tdb = hashFindVal(trackHash, aliTable); if (tdb != NULL) cdnaTable = trackDbSetting(tdb, "cdnaTable"); if (isNotEmpty(cdnaTable) && hTableExists(database, cdnaTable)) rnaSeq = hGenBankGetMrna(database, acc, cdnaTable); else rnaSeq = hRnaSeq(database, acc); } @@ -7548,31 +7561,33 @@ rnaSeq = oSeq; } else { /* Look up alignments in database */ hFindSplitTable(database, seqName, aliTable, table, &hasBin); sqlSafef(query, sizeof(query), "select * from %s where qName = '%s' and tName=\"%s\" and tStart=%d", table, acc, seqName, start); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) == NULL) errAbort("Couldn't find alignment for %s at %d", acc, start); wholePsl = pslLoad(row+hasBin); sqlFreeResult(&sr); - if (startsWith("ucscRetroAli", aliTable) || startsWith("retroMrnaAli", aliTable) || sameString("pseudoMrna", aliTable) || startsWith("altSeqLiftOverPsl", aliTable) || startsWith("ncbiRefSeqPsl", aliTable)) + if (startsWith("ucscRetroAli", aliTable) || startsWith("retroMrnaAli", aliTable) || + sameString("pseudoMrna", aliTable) || startsWith("altSeqLiftOverPsl", aliTable) || + startsWith("fixSeqLiftOverPsl", aliTable) || startsWith("ncbiRefSeqPsl", aliTable)) { rnaSeq = getBaseColorSequence(acc, aliTable); } else if (sameString("HInvGeneMrna", aliTable)) { /* get RNA accession for the gene id in the alignment */ sqlSafef(query, sizeof(query), "select mrnaAcc from HInv where geneId='%s'", acc); rnaSeq = hRnaSeq(database, sqlQuickString(conn, query)); } else { char *cdnaTable = NULL; struct trackDb *tdb = hashFindVal(trackHash, aliTable); if (tdb != NULL) @@ -8846,73 +8861,136 @@ struct sqlConnection *conn = hAllocConn(database); int start = cartInt(cart, "o"); struct psl *pslList = NULL; cartWebStart(cart, database, "Viral Gene"); printf("<H2>Viral Gene %s</H2>\n", geneName); printCustomUrl(tdb, geneName, TRUE); pslList = getAlignments(conn, "chr1_viralProt", geneName); htmlHorizontalLine(); printf("<H3>Protein Alignments</H3>"); printAlignments(pslList, start, "htcProteinAli", "chr1_viralProt", geneName); printTrackHtml(tdb); } +static boolean pslTrimListToTargetRange(struct psl *pslList, int winStart, int winEnd, + int *retQRangeStart, int *retQRangeEnd) +/* If the current window overlaps the target coords of any psl(s) in pslList, then return TRUE and + * set *retQRange{Start,End} to span all query coord ranges corresponding to target coord ranges. + * errAbort if qName is not consistent across all psls. + * Otherwise return FALSE and set *retQRange{Start,End} to -1. */ +{ +boolean foundOverlap = FALSE; +char *qName = NULL; +int qRangeStart = -1, qRangeEnd = -1; +struct psl *psl; +for (psl = pslList; psl != NULL; psl = psl->next) + { + if (qName == NULL) + qName = psl->qName; + else if (differentString(qName, psl->qName)) + errAbort("pslTrimListToTargetRange: inconsistent qName: got both '%s' and '%s'", + qName, psl->qName); + if (psl->tStart >= winStart && psl->tEnd <= winEnd) + { + foundOverlap = TRUE; + if (qRangeStart < 0 || psl->qStart < qRangeStart) + qRangeStart = psl->qStart; + if (qRangeEnd < 0 || psl->qEnd > qRangeEnd) + qRangeEnd = psl->qEnd; + } + else + { + struct psl *partPsl = pslTrimToTargetRange(psl, winStart, winEnd); + if (partPsl) + { + foundOverlap = TRUE; + if (qRangeStart < 0 || partPsl->qStart < qRangeStart) + qRangeStart = partPsl->qStart; + if (qRangeEnd < 0 || partPsl->qEnd > qRangeEnd) + qRangeEnd = partPsl->qEnd; + } + } + } +*retQRangeStart = qRangeStart; +*retQRangeEnd = qRangeEnd; +return foundOverlap; +} + void doPslAltSeq(struct trackDb *tdb, char *item) -/* Fairly generic PSL handler -- print out some more details about the - * alignment. */ +/* Details for alignments between chromosomes and alt haplogtype or fix patch sequences. */ { int start = cartInt(cart, "o"); -int total = 0, i = 0; -struct psl *pslList = NULL; struct sqlConnection *conn = hAllocConn(database); -// char *otherDb = trackDbSetting(tdb, "otherDb"); -// int altSize = hChromSize(otherDb, item); genericHeader(tdb, item); printCustomUrl(tdb, item, TRUE); puts("<P>"); -puts("<B>Alignment Summary:</B><BR>\n"); -// char strBuf[64]; -// sprintLongWithCommas(strBuf, altSize); -// printf("<B>Alignment Summary: '%s' %s</B><BR>\n", item, strBuf); -pslList = getAlignments(conn, tdb->table, item); +struct psl *pslList = getAlignments(conn, tdb->table, item); +if (pslList) + { + printf("<B>Alignment of %s to %s:</B><BR>\n", item, pslList->tName); printAlignments(pslList, start, "htcCdnaAli", tdb->table, item); -puts("<P>"); -total = 0; -for (i=0; i < pslList -> blockCount; i++) + char *hgsid = cartSessionId(cart); + int rangeStart = 0, rangeEnd = 0; + if (pslTrimListToTargetRange(pslList, winStart, winEnd, &rangeStart, &rangeEnd)) + { + printf("<A HREF='hgTracks?hgsid=%s&position=%s:%d-%d'>" + "View corresponding position range on %s</A><BR>\n", + hgsid, item, rangeStart+1, rangeEnd, item); + } + char *altFix = item; + if (!endsWith(altFix, "alt") && !endsWith(altFix, "fix")) + altFix = pslList->tName; + printf("<A HREF=\"hgTracks?hgsid=%s&virtModeType=singleAltHaplo&singleAltHaploId=%s\">" + "Show %s placed on its chromosome</A><BR>\n", + hgsid, altFix, altFix); + + puts("<P><B>Alignment stats:</B><BR>"); + // Sometimes inversions cause alignments to be split up; just sum up all the stats. + int totalBlocks = 0, totalSize = 0, totalMatch = 0, totalMismatch = 0, totalN = 0; + int totalTIns = 0, totalQIns = 0; + struct psl *psl; + for (psl = pslList; psl != NULL; psl = psl->next) { - total += pslList->blockSizes[i]; + totalBlocks += psl->blockCount; + int i; + for (i=0; i < psl->blockCount; i++) + totalSize += psl->blockSizes[i]; + totalMatch += (psl->match + psl->repMatch); + totalMismatch += psl->misMatch; + totalN += psl->nCount; + totalTIns += psl->tBaseInsert; + totalQIns += psl->qBaseInsert; } printf("%d block(s) covering %d bases<BR>\n" - "%d matching bases<BR>\n" - "%d mismatching bases<BR>\n" - "%d N bases<BR>\n" + "%d matching bases (%.2f%%)<BR>\n" + "%d mismatching bases (%.2f%%)<BR>\n" + "%d N bases(%.2f%%)<BR>\n" "%d bases inserted in %s<BR>\n" - "%d bases inserted in %s<BR>\n" - "score: %d<BR>\n", - pslList->blockCount, total, - pslList->match, - pslList->misMatch, - pslList->nCount, - pslList->tBaseInsert, hOrganism(database), - pslList->qBaseInsert, item, - pslScore(pslList)); - + "%d bases inserted in %s<BR>\n", + totalBlocks, totalSize, + totalMatch, 100.0 * (totalMatch / (double)totalSize), + totalMismatch, 100.0 * (totalMismatch / (double)totalSize), + totalN, 100.0 * (totalN / (double)totalSize), + totalTIns, pslList->tName, totalQIns, item); + } +else + warn("Unable to find alignment for '%s' in table %s", item, tdb->table); printTrackHtml(tdb); hFreeConn(&conn); } void doPslDetailed(struct trackDb *tdb, char *item) /* Fairly generic PSL handler -- print out some more details about the * alignment. */ { int start = cartInt(cart, "o"); int total = 0, i = 0; struct psl *pslList = NULL; struct sqlConnection *conn = hAllocConn(database); genericHeader(tdb, item); printCustomUrl(tdb, item, TRUE); @@ -8924,31 +9002,31 @@ puts("<P>"); total = 0; for (i=0; i < pslList -> blockCount; i++) { total += pslList->blockSizes[i]; } printf("%d block(s) covering %d bases<BR>\n" "%d matching bases<BR>\n" "%d mismatching bases<BR>\n" "%d N bases<BR>\n" "%d bases inserted in %s<BR>\n" "%d bases inserted in %s<BR>\n" "score: %d<BR>\n", pslList->blockCount, total, - pslList->match, + pslList->match + pslList->repMatch, pslList->misMatch, pslList->nCount, pslList->tBaseInsert, hOrganism(database), pslList->qBaseInsert, item, pslScore(pslList)); printTrackHtml(tdb); hFreeConn(&conn); } void printEnsemblCustomUrl(struct trackDb *tdb, char *itemName, boolean encode, char *archive) /* Print Ensembl Gene URL. */ { char *shortItemName; @@ -25853,31 +25931,31 @@ else if (startsWith("atom", table) && !startsWith("atomMaf", table)) { doAtom(tdb, item); } else if (sameWord(table, "firstEF")) { firstEF(tdb, item); } else if ( sameWord(table, "blastHg16KG") || sameWord(table, "blatHg16KG" ) || startsWith("blastDm", table) || sameWord(table, "blastMm6KG") || sameWord(table, "blastSacCer1SG") || sameWord(table, "blastHg17KG") || startsWith("blastCe", table) || sameWord(table, "blastHg18KG") ) { blastProtein(tdb, item); } -else if (startsWith("altSeqLiftOverPsl", table)) +else if (startsWith("altSeqLiftOverPsl", table) || startsWith("fixSeqLiftOverPsl", table)) { doPslAltSeq(tdb, item); } else if (sameWord(table, "chimpSimpleDiff")) { doSimpleDiff(tdb, "Chimp"); } /* This is a catch-all for blastz/blat tracks -- any special cases must be * above this point! */ else if (startsWith("map", table) ||startsWith("blastz", table) || startsWith("blat", table) || startsWith("tblast", table) || endsWith(table, "Blastz")) { char *genome = "Unknown"; if (startsWith("tblast", table)) genome = &table[6]; if (startsWith("map", table))