bc21bd3d27fe3d29971231955b3fc544fa1c3d1e angie Wed Oct 16 11:51:39 2013 -0700 Two new tracks for Locus Reference Genomic (LRG) (#11863) with customhandlers: LRG Regions and LRG Transcripts. LRGs are frozen reference sequences for a particular gene plus some upstream and downstream sequence. They are intended to provide a stable coordinate system for gene annotations that won't change with every new genome assembly, but can be mapped to each genome assembly. Since there is a lot of metadata associated with each region, I made LRG Regions a bigBed 12 + with fields describing mismatches and indels, so that PSL can be derived from the bigBed and the original LRG sequence can be reconstructed using genome assembly sequence and the mismatch/indel info. hgTracks shows differences and LRG insertions into the reference assembly using the cds.c baseColor code. (LRG deletions from the reference appear as gaps, which we get for free with bed12 info). For LRG Transcripts, I found the genePred codon-coloring code inadequate for showing an insertion into hg19 (or even mismatches), so instead of genePred I ended up using PSL + sequence, more like the mRNA track representation and display. diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index 3a8d37f..9204c60 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -1463,48 +1463,30 @@ char dbOnly[4096]; diff = comp2->start - (comp1->start + comp1->size); safef(dbOnly, sizeof(dbOnly), "%s", comp1->src); chopPrefix(dbOnly); printf("%-20s %d\n",hOrganism(dbOnly), diff); } printf("<BR>"); } } } } -struct hash* hashFromString(char* string) -/* parse a whitespace-separated string with tuples in the format name=val or - * name="val" to a hash name->val */ -{ -if (string==NULL) - return NULL; - -struct slPair *keyVals = slPairListFromString(string, TRUE); -if (keyVals==NULL) - return NULL; - -struct hash *nameToVal = newHash(0); -struct slPair *kv; -for (kv = keyVals; kv != NULL; kv = kv->next) - hashAdd(nameToVal, kv->name, kv->val); -return nameToVal; -} - void printIdOrLinks(struct asColumn *col, struct hash *fieldToUrl, struct trackDb *tdb, char *idList) /* if trackDb does not contain a "urls" entry for current column name, just print idList as it is. * Otherwise treat idList as a comma-sep list of IDs and print one row per id, with a link to url, * ($$ in url is OK, wildcards like $P, $p, are also OK) * */ { // try to find a fieldName=url setting in the "urls" tdb statement, print id if not found char *url = NULL; if (fieldToUrl!=NULL) url = (char*)hashFindVal(fieldToUrl, col->name); if (url==NULL) { printf("<td>%s</td></tr>\n", idList); return; } @@ -2658,30 +2640,31 @@ puts("</LI>\n"); } } void geneShowPosAndLinksPal(char *geneName, char *pepName, struct trackDb *tdb, char *pepTable, char *pepClick, char *mrnaClick, char *genomicClick, char *mrnaDescription, struct palInfo *palInfo) /* Show parts of gene common to everything. If pepTable is not null, * it's the old table name, but will check gbSeq first. */ { char *geneTable = tdb->table; boolean foundPep = FALSE; showGenePos(geneName, tdb); + if (startsWith("ENCODE Gencode",tdb->longLabel)) { char *yaleTable = trackDbSetting(tdb, "yalePseudoAssoc"); if ((yaleTable != NULL) && (hTableExists(database, yaleTable))) { struct sqlConnection *conn = hAllocConn(database); char query[512]; sqlSafef(query, sizeof(query), "select * from %s where transcript = '%s'", yaleTable, geneName); char buffer[512]; struct sqlResult *sr = sqlGetResult(conn, query); char *yaleUrl = trackDbSetting(tdb, "yaleUrl"); char **row; while ((row = sqlNextRow(sr)) != NULL) @@ -6901,103 +6884,127 @@ /* Write (to stdout) the main html page containing just the frame info. */ if (partPsl != wholePsl) printf("<FRAMESET COLS = \"13%%,87%% \" " "ONLOAD=\"body.location.href = '%s#cDNAStart';\">\n", bodyTn.forCgi); else puts("<FRAMESET COLS = \"13%,87% \" >"); printf(" <FRAME SRC=\"%s\" NAME=\"index\">\n", indexTn.forCgi); printf(" <FRAME SRC=\"%s\" NAME=\"body\">\n", bodyTn.forCgi); puts("<NOFRAMES><BODY></BODY></NOFRAMES>"); puts("</FRAMESET>"); puts("</HTML>\n"); exit(0); /* Avoid cartHtmlEnd. */ } +static void getCdsStartAndStop(struct sqlConnection *conn, char *acc, char *trackTable, + uint *retCdsStart, uint *retCdsEnd) +/* Get cds start and stop, if available */ +{ +char query[256]; +if (sqlTableExists(conn, "gbCdnaInfo")) + { + sqlSafef(query, sizeof query, "select cds from gbCdnaInfo where acc = '%s'", acc); + char *cdsId = sqlQuickString(conn, query); + if (isNotEmpty(cdsId)) + { + sqlSafef(query, sizeof query, "select name from cds where id = '%s'", cdsId); + char *cdsString = sqlQuickString(conn, query); + if (isNotEmpty(cdsString)) + genbankParseCds(cdsString, retCdsStart, retCdsEnd); + } + } +else + { + struct trackDb *tdb = hashMustFindVal(trackHash, trackTable); + char *cdsTable = trackDbSetting(tdb, "cdsTable"); + if (isNotEmpty(cdsTable) && hTableExists(database, cdsTable)) + { + sqlSafef(query, sizeof(query), "select cds from %s where id = '%s'", cdsTable, acc); + char *cdsString = sqlQuickString(conn, query); + if (isNotEmpty(cdsString)) + genbankParseCds(cdsString, retCdsStart, retCdsEnd); + } + } +} + void htcCdnaAli(char *acc) /* Show alignment for accession. */ { char query[256]; char table[64]; char accTmp[64]; struct sqlConnection *conn; struct sqlResult *sr; char **row; struct psl *psl; struct dnaSeq *rnaSeq; char *aliTable; int start; unsigned int cdsStart = 0, cdsEnd = 0; boolean hasBin; char accChopped[512] ; safef(accChopped, sizeof(accChopped), "%s",acc); chopSuffix(accChopped); /* Print start of HTML. */ writeFramesetType(); puts("<HTML>"); aliTable = cartString(cart, "aliTable"); printf("<HEAD>\n<TITLE>%s vs Genomic [%s]</TITLE>\n</HEAD>\n\n", accChopped, aliTable); /* Get some environment vars. */ start = cartInt(cart, "o"); -/* Get cds start and stop, if available */ conn = hAllocConn(database); -if (sqlTableExists(conn, "gbCdnaInfo")) - { - sqlSafef(query, sizeof query, "select cds from gbCdnaInfo where acc = '%s'", accChopped); - sr = sqlGetResult(conn, query); - if ((row = sqlNextRow(sr)) != NULL) - { - sqlSafef(query, sizeof query, "select name from cds where id = '%d'", atoi(row[0])); - sqlFreeResult(&sr); - sr = sqlGetResult(conn, query); - if ((row = sqlNextRow(sr)) != NULL) - genbankParseCds(row[0], &cdsStart, &cdsEnd); - } - sqlFreeResult(&sr); - } +getCdsStartAndStop(conn, accChopped, aliTable, &cdsStart, &cdsEnd); /* Look up alignments in database */ hFindSplitTable(database, seqName, aliTable, table, &hasBin); sqlSafef(query, sizeof query, "select * from %s where qName like '%s%%' and tName=\"%s\" and tStart=%d", table, acc, seqName, start); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) == NULL) errAbort("Couldn't find alignment for %s at %d", acc, start); psl = pslLoad(row+hasBin); sqlFreeResult(&sr); /* get bz rna snapshot for blastz alignments */ if (sameString("mrnaBlastz", aliTable) || sameString("pseudoMrna", aliTable)) { struct sqlConnection *conn = hAllocConn(database); unsigned retId = 0; safef(accTmp, sizeof accTmp, "bz-%s", acc); if (hRnaSeqAndIdx(accTmp, &rnaSeq, &retId, conn) == -1) rnaSeq = hRnaSeq(database, acc); hFreeConn(&conn); } else if (sameString("HInvGeneMrna", aliTable)) { /* get RNA accession for the gene id in the alignment */ sqlSafef(query, sizeof query, "select mrnaAcc from HInv where geneId='%s'", acc); rnaSeq = hRnaSeq(database, sqlQuickString(conn, query)); } else + { + struct trackDb *tdb = hashMustFindVal(trackHash, aliTable); + char *cdnaTable = trackDbSetting(tdb, "cdnaTable"); + if (isNotEmpty(cdnaTable) && hTableExists(database, cdnaTable)) + rnaSeq = hGenBankGetMrna(database, acc, cdnaTable); + else rnaSeq = hRnaSeq(database, acc); + } if (startsWith("xeno", aliTable)) showSomeAlignment(psl, rnaSeq, gftDnaX, 0, rnaSeq->size, NULL, cdsStart, cdsEnd); else showSomeAlignment(psl, rnaSeq, gftDna, 0, rnaSeq->size, NULL, cdsStart, cdsEnd); hFreeConn(&conn); } void htcCdnaAliInWindow(char *acc) /* Show part of alignment in browser window for accession. */ { char query[256]; char table[64]; struct sqlConnection *conn; struct sqlResult *sr; @@ -7010,48 +7017,32 @@ boolean hasBin; char accChopped[512] ; safef(accChopped, sizeof(accChopped), "%s",acc); chopSuffix(accChopped); /* Get some environment vars. */ aliTable = cartString(cart, "aliTable"); start = cartInt(cart, "o"); /* Print start of HTML. */ writeFramesetType(); puts("<HTML>"); printf("<HEAD>\n<TITLE>%s vs Genomic [%s]</TITLE>\n</HEAD>\n\n", accChopped, aliTable); -/* Get cds start and stop, if available */ conn = hAllocConn(database); -if (sqlTableExists(conn, "gbCdnaInfo")) - { - sqlSafef(query, sizeof(query), "select cds from gbCdnaInfo where acc = '%s'", - accChopped); - sr = sqlGetResult(conn, query); - if ((row = sqlNextRow(sr)) != NULL) - { - sqlSafef(query, sizeof(query), "select name from cds where id = '%d'", - atoi(row[0])); - sqlFreeResult(&sr); - sr = sqlGetResult(conn, query); - if ((row = sqlNextRow(sr)) != NULL) - genbankParseCds(row[0], &cdsStart, &cdsEnd); - } - sqlFreeResult(&sr); - } +getCdsStartAndStop(conn, accChopped, aliTable, &cdsStart, &cdsEnd); if (startsWith("user", aliTable)) { char *pslName, *faName, *qName; struct lineFile *lf; bioSeq *oSeqList = NULL, *oSeq = NULL; struct psl *psl; int start; enum gfType tt, qt; boolean isProt; char *ss = cartOptionalString(cart, "ss"); if ((ss != NULL) && !ssFilesExist(ss)) { ss = NULL; @@ -7113,32 +7104,39 @@ // value is: extFile seqTbl extFileTbl char *words[3]; int nwords = chopByWhite(specCopy, words, ArraySize(words)); if ((nwords != ArraySize(words)) || !sameString(words[0], "extFile")) errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec); rnaSeq = hDnaSeqGet(database, acc, words[1], words[2]); } else if (sameString("HInvGeneMrna", aliTable)) { /* get RNA accession for the gene id in the alignment */ sqlSafef(query, sizeof(query), "select mrnaAcc from HInv where geneId='%s'", acc); rnaSeq = hRnaSeq(database, sqlQuickString(conn, query)); } else + { + struct trackDb *tdb = hashMustFindVal(trackHash, aliTable); + char *cdnaTable = trackDbSetting(tdb, "cdnaTable"); + if (isNotEmpty(cdnaTable) && hTableExists(database, cdnaTable)) + rnaSeq = hGenBankGetMrna(database, acc, cdnaTable); + else rnaSeq = hRnaSeq(database, acc); } + } /* Get partial psl for part of alignment in browser window: */ if (wholePsl->tStart >= winStart && wholePsl->tEnd <= winEnd) partPsl = wholePsl; else partPsl = pslTrimToTargetRange(wholePsl, winStart, winEnd); if (startsWith("xeno", aliTable)) errAbort("htcCdnaAliInWindow does not support translated alignments."); else showSomePartialDnaAlignment(partPsl, wholePsl, rnaSeq, NULL, cdsStart, cdsEnd); hFreeConn(&conn); } void htcChainAli(char *item) @@ -25305,30 +25303,42 @@ { doNumtS(tdb, item); } else if (startsWith("cosmic", table)) { doCosmic(tdb, item); } else if (sameString("geneReviews", table)) { doGeneReviews(tdb, item); } else if (startsWith("qPcrPrimers", table)) { doQPCRPrimers(tdb, item); } +else if (sameString("lrg", table)) + { + doLrg(tdb, item); + } +else if (sameString("lrgTranscriptAli", table)) + { + doLrgTranscriptPsl(tdb, item); + } +else if (sameWord(table, "htcLrgCdna")) + { + htcLrgCdna(item); + } else if (isHubTrack(table) && startsWith("snake", trackHubSkipHubName(table))) { doSnakeClick(tdb, item); } else if (tdb != NULL) { genericClickHandler(tdb, item, NULL); } else { cartWebStart(cart, database, "%s", track); printf("Sorry, clicking there doesn't do anything yet (%s).", track); } /* End of 1000+ line dispatch on table involving 100+ if/elses. */