004d56eff1d9b74e312104b30ea872201e92cf97 braney Thu Dec 10 15:50:57 2020 -0800 add the ability for bigPsl tracks to show query sequences like genePred if they have the sequence and CDS. #24672 diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index 4fea33b..4649d02 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -3096,40 +3096,73 @@ { char **extraFields = (restFields + restBedFields); int extraFieldCount = restCount - restBedFields; int printCount = extraFieldsPrint(tdb,NULL,extraFields, extraFieldCount); printCount += 0; } } } char *bedRow[32]; char startBuf[16], endBuf[16]; int lastChromId = -1; char chromName[bbi->chromBpt->keySize+1]; +boolean firstTime = TRUE; for (bb = bbList; bb != NULL; bb = bb->next) { bbiCachedChromLookup(bbi, bb->chromId, lastChromId, chromName, sizeof(chromName)); lastChromId=bb->chromId; bigBedIntervalToRow(bb, chromName, startBuf, endBuf, bedRow, 4); if (showEvery || sameString(bedRow[3], item)) { - struct psl *psl= pslFromBigPsl(chromName, bb, seqTypeField, NULL, NULL); + char *cdsStr, *seq; + struct psl *psl= pslFromBigPsl(chromName, bb, seqTypeField, &seq, &cdsStr); slAddHead(&pslList, psl); + + // we're assuming that if there are multiple psl's with the same id that + // they are the same query sequence so we only put out one set of sequences + if (firstTime) + { + firstTime = FALSE; + printf("<H3>Links to sequence:</H3>\n"); + printf("<UL>\n"); + + if (!isEmpty(seq)) // if there is a query sequence + { + if (!isEmpty(cdsStr)) // if we have CDS + { + puts("<LI>\n"); + hgcAnchorSomewhere("htcTranslatedBigPsl", item, "translate", seqName); + printf("Translated Amino Acid Sequence</A> from Query Sequence\n"); + puts("</LI>\n"); + } + + puts("<LI>\n"); + hgcAnchorSomewhere("htcBigPslSequence", item, tdb->track, seqName); + printf("Query Sequence</A> \n"); + puts("</LI>\n"); + } + + puts("<LI>\n"); + hgcAnchorSomewhere("htcGeneInGenome", item, tdb->track, seqName); + printf("Genomic Sequence</A> from assembly\n"); + puts("</LI>\n"); + printf("</UL>\n"); + } } } char *sort = cartUsualString(cart, "sort", pslSortList[0]); pslSortListByVar(&pslList, sort); if (showEvery) printf("<H3>Genomic Alignments</H3>"); else printf("<H3>%s/Genomic Alignments</H3>", item); if (showEvery || pslIsProtein(pslList)) printAlignmentsSimple(pslList, start, "htcBigPslAli", tdb->table, item); else printAlignmentsExtra(pslList, start, "htcBigPslAli", "htcBigPslAliInWindow", tdb->table, item); @@ -7336,52 +7369,55 @@ sqlSafef(query, sizeof query, "select name from %s where id = '%s'", cdsTable, cdsId); char *cdsString = sqlQuickString(conn, query); if (isNotEmpty(cdsString)) genbankParseCds(cdsString, retCdsStart, retCdsEnd); } } } void htcBigPslAli(char *acc) /* Show alignment for accession in bigPsl file. */ { struct psl *psl; char *aliTable; int start; unsigned int cdsStart = 0, cdsEnd = 0; +struct sqlConnection *conn = NULL; +if (!trackHubDatabase(database)) + conn = hAllocConnTrack(database, tdb); aliTable = cartString(cart, "aliTable"); if (isCustomTrack(aliTable)) { struct customTrack *ct = lookupCt(aliTable); tdb = ct->tdb; } else tdb = hashFindVal(trackHash, aliTable); char title[1024]; safef(title, sizeof title, "%s vs Genomic [%s]", acc, aliTable); htmlFramesetStart(title); /* Get some environment vars. */ start = cartInt(cart, "l"); int end = cartInt(cart, "r"); char *chrom = cartString(cart, "c"); char *seq, *cdsString = NULL; struct lm *lm = lmInit(0); -char *fileName = bbiNameFromSettingOrTable(tdb, NULL, tdb->table); +char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table); struct bbiFile *bbi = bigBedFileOpen(fileName); struct bigBedInterval *bb, *bbList = bigBedIntervalQuery(bbi, chrom, start, end, 0, lm); char *bedRow[32]; char startBuf[16], endBuf[16]; for (bb = bbList; bb != NULL; bb = bb->next) { bigBedIntervalToRow(bb, seqName, startBuf, endBuf, bedRow, ArraySize(bedRow)); struct bed *bed = bedLoadN(bedRow, 12); if (sameString(bed->name, acc) && (bb->start == start) && (bb->end == end)) { bb->next = NULL; break; } } if (bb == NULL) @@ -8590,30 +8626,111 @@ gp = getGenePredForPosition(table, geneName); if (gp == NULL) errAbort("%s not found in %s when translating to protein", geneName, table); else if (gp->cdsStart == gp->cdsEnd) errAbort("No CDS defined: no protein translation for %s", geneName); prot = getPredMRnaProtSeq(gp); safef(protName, sizeof(protName), "%s_prot", geneName); displayProteinPrediction(protName, prot); freez(&prot); genePredFree(&gp); } +void htcBigPsl( char *acc, boolean translate) +/* Output bigPsl query sequence, translated to amino acid sequence if requested. */ +{ +struct sqlConnection *conn = NULL; + +if (!trackHubDatabase(database)) + conn = hAllocConn(database); + +int start = cartInt(cart, "l"); +int end = cartInt(cart, "r"); +char *table = cartString(cart, "table"); + +struct trackDb *tdb ; +if (isHubTrack(table)) + tdb = hubConnectAddHubForTrackAndFindTdb( database, table, NULL, trackHash); +else + tdb = hashFindVal(trackHash, table); +char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table); +struct bbiFile *bbi = bigBedFileOpen(fileName); +struct lm *lm = lmInit(0); +int ivStart = start, ivEnd = end; +if (start == end) + { + // item is an insertion; expand the search range from 0 bases to 2 so we catch it: + ivStart = max(0, start-1); + ivEnd++; + } + +unsigned seqTypeField = bbExtraFieldIndex(bbi, "seqType"); +struct bigBedInterval *bb, *bbList = NULL; + +bbList = bigBedIntervalQuery(bbi, seqName, ivStart, ivEnd, 0, lm); + +char *bedRow[32]; +char startBuf[16], endBuf[16]; + +int lastChromId = -1; +char chromName[bbi->chromBpt->keySize+1]; + +for (bb = bbList; bb != NULL; bb = bb->next) + { + bbiCachedChromLookup(bbi, bb->chromId, lastChromId, chromName, sizeof(chromName)); + + lastChromId=bb->chromId; + bigBedIntervalToRow(bb, chromName, startBuf, endBuf, bedRow, 4); + + // we're assuming that if there are multiple psl's with the same id that + // they are the same query sequence so we only put out one sequence + if (sameString(bedRow[3], acc)) + { + char *cdsStr, *seq; + pslFromBigPsl(chromName, bb, seqTypeField, &seq, &cdsStr); + + if (translate) + { + struct genbankCds cds; + if (!genbankCdsParse(cdsStr, &cds)) + errAbort("can't parse CDS for %s: %s", acc, cdsStr); + int protBufSize = ((cds.end-cds.start)/3)+4; + char *prot = needMem(protBufSize); + + seq[cds.end] = '\0'; + dnaTranslateSome(seq+cds.start, prot, protBufSize); + + cartHtmlStart("Protein Translation of query sequence"); + displayProteinPrediction(acc, prot); + return; + } + else + { + cartHtmlStart("Query sequence"); + printf("<PRE><TT>"); + printf(">%s length=%d\n", acc,(int)strlen(seq)); + printLines(stdout, seq, 50); + printf("</TT></PRE>"); + return; + } + } + } +} + void htcTranslatedMRna(struct trackDb *tdb, char *acc) /* Translate mRNA to protein and display it. */ { struct sqlConnection *conn = hAllocConn(database); struct genbankCds cds = getCds(conn, acc); struct dnaSeq *mrna = hGenBankGetMrna(database, acc, NULL); if (mrna == NULL) errAbort("mRNA sequence %s not found", acc); if (cds.end > mrna->size) errAbort("CDS bounds exceed length of mRNA for %s", acc); int protBufSize = ((cds.end-cds.start)/3)+4; char *prot = needMem(protBufSize); mrna->dna[cds.end] = '\0'; @@ -8958,31 +9075,35 @@ struct trackDb *tdb = NULL; if (isHubTrack(table)) { tdb = hubConnectAddHubForTrackAndFindTdb( database, table, NULL, trackHash); itemCount = getSeqForBigGene(tdb, geneName); } else if (isCustomTrack(table)) { tdb = getCustomTrackTdb(table); itemCount = getSeqForBigGene(tdb, geneName); } else { tdb = hashFindVal(trackHash, table); - char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl"); + struct sqlConnection *conn = NULL; + + if (!trackHubDatabase(database)) + conn = hAllocConnTrack(database, tdb); + char *bigDataUrl = bbiNameFromSettingOrTable(tdb, conn, tdb->table); if (bigDataUrl) { itemCount = getSeqForBigGene(tdb, geneName); } else { char constraints[256]; safef(constraints, sizeof(constraints), "name = %s", quotedItem); itemCount = hgSeqItemsInRange(database, table, seqName, winStart, winEnd, constraints); } } if (itemCount == 0) printf("\n# No results returned from query.\n\n"); puts("</PRE>"); freeMem(quotedItem); @@ -26386,30 +26507,38 @@ { htcProteinAli(item, cartString(cart, "aliTable")); } else if (sameWord(table, "htcBlatXeno")) { htcBlatXeno(item, cartString(cart, "aliTable")); } else if (sameWord(table, "htcExtSeq")) { htcExtSeq(item); } else if (sameWord(table, "htcTranslatedProtein")) { htcTranslatedProtein(item); } +else if (sameWord(table, "htcBigPslSequence")) + { + htcBigPsl(item, FALSE); + } +else if (sameWord(table, "htcTranslatedBigPsl")) + { + htcBigPsl(item, TRUE); + } else if (sameWord(table, "htcTranslatedPredMRna")) { htcTranslatedPredMRna(item); } else if (sameWord(table, "htcTranslatedMRna")) { htcTranslatedMRna(tdbForTableArg(), item); } else if (sameWord(table, "ncbiRefSeqSequence")) { ncbiRefSeqSequence(item); } else if (sameWord(table, "htcGeneMrna")) { htcGeneMrna(item);