0564395ec363631f2ff8d295da6f6b50f873fda4 braney Mon Jan 24 17:01:53 2022 -0800 more chromAlias work: some name changes and support for the new genark chromAlias format diff --git src/hg/hgc/transMapClick.c src/hg/hgc/transMapClick.c index c55dd83..6c8bae3 100644 --- src/hg/hgc/transMapClick.c +++ src/hg/hgc/transMapClick.c @@ -1,442 +1,442 @@ /* transMapClick - transMap click handling */ /* Copyright (C) 2010 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ /* FIXME: the two version support should be be delete once support * for table version is fully dropped. */ /* * For the older table-based version of transMap, the following * tables are used. These are mapped into the bigTransMap objects. * * Per-genome tables associated with transMap: * transMapAliXxx - mapped PSLs alignments * transMapInfoXxx - information for mapped alignments * hgFixed tables associated with transMap: * transMapSrcXxx - information about source alignments * transMapGeneXxx - gene information * transMapSeqXxx - seq table for accessing sequence * transMapExtFileXxx - extFile table * * Xxx is: * - UcscGene - UCSC genes * - RefSeq - RefSeq mRNAs * - MRna - GenBank mRNAs * - SplicedEst - GenBank spliced ESTs */ #include "common.h" #include "hgc.h" #include "hui.h" #include "hCommon.h" #include "transMapClick.h" #include "bigTransMap.h" #include "bigBed.h" #include "bigPsl.h" #include "transMapStuff.h" #include "transMapInfo.h" #include "transMapSrc.h" #include "transMapGene.h" #include "genbank.h" #include "chromAlias.h" enum geneSrcSetType /* constants for source sets */ { ENSEMBL_SRC_SET, REFSEQ_SRC_SET, RNA_SRC_SET, EST_SRC_SET, UCSC_GENES_SRC_SET, UNKNOWN_SRC_SET }; struct transMapBag /* object contain collected information on a specific transMap mapping * this has both PSL and bigTransMap objects */ { struct psl *psl; // transMap alignment struct bigTransMap *meta; // bigTransMap record for metadata enum geneSrcSetType srcSet; // source data set (not owned) boolean srcDbIsActive; // source database is active }; static char *guessTranscriptType(struct transMapGene *gene) /* guess the transcript type when not known */ { if (gene == NULL) return "unknown"; // EST else if (sameString(gene->cds, "n/a") || sameString(gene->cds, "")) return "non_coding"; else return "protein_coding"; } static enum geneSrcSetType guessGeneSrcSet(struct trackDb *tdb) /* guess the the source set from the table name for table * based-transmap */ { if (stringIn("UcscGenes", tdb->table)) return UCSC_GENES_SRC_SET; else if (stringIn("RefSeq", tdb->table)) return REFSEQ_SRC_SET; else if (stringIn("MRna", tdb->table)) return RNA_SRC_SET; else if (stringIn("SplicedEst", tdb->table)) return EST_SRC_SET; else return UNKNOWN_SRC_SET; } static char *chainSubsetToBigStr(enum transMapInfoChainSubset cs) /* convert chain subset type to string used in bigTransMap */ { switch (cs) { case transMapInfoUnknown: assert(FALSE); return "unknown"; case transMapInfoAll: return "all"; case transMapInfoSyn: return "syn"; case transMapInfoRbest: return "rbest"; } assert(FALSE); return NULL; } static struct bigTransMap *buildFakeBigTransMapRec(struct transMapInfo *info, struct transMapSrc *src, struct transMapGene *gene) /* build a partial bigTransMap record from the tables; only metadata fields * are filled in. */ { struct bigTransMap *bigTransMap; AllocVar(bigTransMap); bigTransMap->seqType = 1; bigTransMap->name = cloneString(src->id); bigTransMap->srcDb = cloneString(src->db); bigTransMap->srcChrom = cloneString(src->chrom); bigTransMap->srcChromStart = src->chromStart; bigTransMap->srcChromEnd = src->chromEnd; bigTransMap->srcIdent = (int)(1000.0*src->ident); bigTransMap->srcAligned = (int)(1000.0*src->aligned); if (gene != NULL) { bigTransMap->geneName = cloneString(gene->geneName); bigTransMap->geneId = cloneString(gene->geneId); bigTransMap->oCDS = cloneString(gene->cds); } bigTransMap->geneType = cloneString(guessTranscriptType(gene)); // set for ESTs bigTransMap->transcriptType = cloneString(guessTranscriptType(gene)); bigTransMap->chainType = cloneString(chainSubsetToBigStr(info->chainSubset)); bigTransMap->commonName = hOrganism(info->srcDb); if (bigTransMap->commonName == NULL) bigTransMap->commonName = cloneString(""); bigTransMap->scientificName = hScientificName(info->srcDb); if (bigTransMap->scientificName == NULL) bigTransMap->scientificName = cloneString(""); bigTransMap->orgAbbrev = cloneString(emptyForNull(hOrgShortForDb(info->srcDb))); return bigTransMap; } static struct transMapBag *transMapBagLoadDb(struct trackDb *tdb, char *mappedId) /* load information from various tables for database version of transmap */ { struct sqlConnection *conn = hAllocConn(database); struct transMapBag *bag; AllocVar(bag); bag->psl = getAlignments(conn, tdb->table, mappedId); char *transMapInfoTbl = trackDbRequiredSetting(tdb, transMapInfoTblSetting); struct transMapInfo *info = transMapInfoQuery(conn, transMapInfoTbl, mappedId); char *transMapSrcTbl = trackDbRequiredSetting(tdb, transMapSrcTblSetting); struct transMapSrc *src = transMapSrcQuery(conn, transMapSrcTbl, info->srcDb, info->srcId); struct transMapGene *gene = NULL; char *transMapGeneTbl = trackDbSetting(tdb, transMapGeneTblSetting); if (transMapGeneTbl != NULL) gene = transMapGeneQuery(conn, transMapGeneTbl, info->srcDb, transMapIdToSeqId(info->srcId)); bag->srcSet = guessGeneSrcSet(tdb); bag->srcDbIsActive = hDbIsActive(info->srcDb); bag->meta = buildFakeBigTransMapRec(info, src, gene); transMapInfoFree(&info); transMapSrcFree(&src); transMapGeneFree(&gene); hFreeConn(&conn); return bag; } static enum geneSrcSetType getGeneSrcSet(struct trackDb *tdb) /* get the geneSrcSetType from trackDb */ { char *srcSet = trackDbRequiredSetting(tdb, "transMapSrcSet"); if (sameString(srcSet, "ensembl")) return ENSEMBL_SRC_SET; else if (sameString(srcSet, "refseq")) return REFSEQ_SRC_SET; else if (sameString(srcSet, "rna")) return RNA_SRC_SET; else if (sameString(srcSet, "est")) return EST_SRC_SET; else return UNKNOWN_SRC_SET; } static char *formatGeneSrcSet(enum geneSrcSetType srcSet) /* get display version of source set */ { switch (srcSet) { case ENSEMBL_SRC_SET: return "Ensembl"; case REFSEQ_SRC_SET: return "RefSeq RNA"; case RNA_SRC_SET: return "GenBank RNA"; case EST_SRC_SET: return "GenBank EST"; case UCSC_GENES_SRC_SET: return "UCSC Genes"; case UNKNOWN_SRC_SET: return "Unknown"; } return "Unknown"; } static struct transMapBag *transMapBagLoadBig(struct trackDb *tdb, char *mappedId) /* load information from bigTransMap file */ { struct sqlConnection *conn = hAllocConn(database); struct transMapBag *bag; AllocVar(bag); char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table); char *chrom = cartString(cart, "c"); -struct bbiFile *bbi = bigBedFileOpenAlias(fileName, chromAliasGetHash(database)); +struct bbiFile *bbi = bigBedFileOpenAlias(fileName, chromAliasChromToAliasHash(database)); struct lm *lm = lmInit(0); int fieldIx; struct bptFile *bpt = bigBedOpenExtraIndex(bbi, "name", &fieldIx); struct bigBedInterval *bb = bigBedNameQuery(bbi, bpt, fieldIx, mappedId, lm); if (slCount(bb) != 1) errAbort("expected 1 item named \"%s\", got %d from %s", mappedId, slCount(bb), fileName); char *fields[bbi->fieldCount]; char startBuf[32], endBuf[32]; int bbFieldCount = bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields, bbi->fieldCount); if (bbFieldCount != BIGTRANSMAP_NUM_COLS) errAbort("expected %d fields in bigTransMap record, got %d in %s", BIGTRANSMAP_NUM_COLS, bbFieldCount, fileName); bag->psl = pslFromBigPsl(chrom, bb, 0, NULL, NULL); bag->meta = bigTransMapLoad(fields); bag->srcSet = getGeneSrcSet(tdb); bag->srcDbIsActive = hDbIsActive(bag->meta->srcDb); bigBedFileClose(&bbi); lmCleanup(&lm); hFreeConn(&conn); return bag; } static void transMapBagFree(struct transMapBag **bagPtr) /* free the bag */ { struct transMapBag *bag = *bagPtr; if (bag != NULL) { pslFree(&bag->psl); bigTransMapFree(&bag->meta); freez(bagPtr); } } static void prOrgScientificDb(char *db) /* print organism and scientific name for a database. */ { char *org = hOrganism(db); char *sciName = hScientificName(db); if ((org != NULL) && (sciName != NULL)) printf("%s (%s)", org, sciName); else printf("n/a"); freeMem(org); freeMem(sciName); } static void displayMapped(struct transMapBag *bag) /* display information about the mapping alignment */ { printf("<TABLE class=\"transMap\">\n"); printf("<CAPTION>TransMap Alignment</CAPTION>\n"); printf("<TBODY>\n"); // organism/assembly printf("<TR CLASS=\"transMapLeft\"><TD>Organism<TD>"); prOrgScientificDb(database); printf("</TR>\n"); printf("<TR CLASS=\"transMapLeft\"><TD>Genome<TD>%s</TR>\n", database); // position printf("<TR CLASS=\"transMapLeft\">"); printf("<TD>Position<TD CLASS=\"transMapNoWrap\">"); printf("<A HREF=\"%s&position=%s:%d-%d\">", hgTracksPathAndSettings(), bag->psl->tName, bag->psl->tStart, bag->psl->tEnd); printf("%s:%d-%d</A>", bag->psl->tName, bag->psl->tStart, bag->psl->tEnd); printf("</TR>\n"); // % identity and % aligned printf("<TR CLASS=\"transMapLeft\"><TD>Identity<TD>%0.1f%%</TR>\n", 100.0*pslIdent(bag->psl)); printf("<TR CLASS=\"transMapLeft\"><TD>Aligned<TD>%0.1f%%</TR>\n", 100.0*pslQueryAligned(bag->psl)); // chain type used in mapping printf("<TR CLASS=\"transMapLeft\"><TD>Chain subset<TD>%s</TR>\n", bag->meta->chainType); printf("</TBODY></TABLE>\n"); } static void displaySource(struct transMapBag *bag) /* display information about the source gene that was mapped */ { printf("<TABLE class=\"transMap\">\n"); printf("<CAPTION>Source Alignment</CAPTION>\n"); printf("<TBODY>\n"); // organism/assembly printf("<TR CLASS=\"transMapLeft\"><TD>Organism<TD>%s (%s)</TR>\n", bag->meta->commonName, bag->meta->scientificName); printf("<TR CLASS=\"transMapLeft\"><TD>Genome<TD>%s</TR>\n", bag->meta->srcDb); printf("<TR CLASS=\"transMapLeft\"><TD>Source<TD>%s</TR>\n", formatGeneSrcSet(bag->srcSet)); // position printf("<TR CLASS=\"transMapLeft\"><TD>Position\n"); printf("<TD CLASS=\"transMapNoWrap\">"); if (bag->srcDbIsActive) printf("<A HREF=\"%s?db=%s&position=%s:%d-%d\" target=_blank>" "%s:%d-%d</A>", hgTracksName(), bag->meta->srcDb, bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd, bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd); else printf("%s:%d-%d", bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd); printf("</TR>\n"); // % identity and % aligned, values stored as 1000*fraction ident printf("<TR CLASS=\"transMapLeft\"><TD>Identity<TD>%0.1f%%</TR>\n", bag->meta->srcIdent/10.0); printf("<TR CLASS=\"transMapLeft\"><TD>Aligned<TD>%0.1f%%</TR>\n", bag->meta->srcAligned/10.0); // gene and CDS printf("<TR CLASS=\"transMapLeft\"><TD>Gene<TD>%s</TR>\n", strOrNbsp(bag->meta->geneName)); printf("<TR CLASS=\"transMapLeft\"><TD>Gene Id<TD>%s</TR>\n", strOrNbsp(bag->meta->geneId)); printf("<TR CLASS=\"transMapLeft\"><TD>Gene Type<TD>%s</TR>\n", strOrNbsp(bag->meta->geneType)); printf("<TR CLASS=\"transMapLeft\"><TD>Transcript Id<TD>%s</TR>\n", transMapIdToAcc(bag->meta->name)); printf("<TR CLASS=\"transMapLeft\"><TD>Transcript Type<TD>%s</TR>\n", strOrNbsp(bag->meta->transcriptType)); printf("<TR CLASS=\"transMapLeft\"><TD>CDS<TD>%s</TR>\n", strOrNbsp(bag->meta->oCDS)); printf("</TBODY></TABLE>\n"); } static void displayAligns(struct trackDb *tdb, struct transMapBag *bag) /* display cDNA alignments */ { int start = cartInt(cart, "o"); printf("<H3>mRNA/Genomic Alignments</H3>"); printAlignmentsSimple(bag->psl, start, "hgcTransMapCdnaAli", tdb->table, bag->psl->qName); } void transMapClickHandler(struct trackDb *tdb, char *mappedId) /* Handle click on a transMap tracks */ { if (tdb == NULL) errAbort("transMapClickHandler called without trackDb"); if (mappedId == NULL) errAbort("transMapClickHandler called without mappedId"); struct transMapBag *bag = (trackDbSetting(tdb, "bigDataUrl") == NULL) ? transMapBagLoadDb(tdb, mappedId) : transMapBagLoadBig(tdb, mappedId); genericHeader(tdb, mappedId); printf("<TABLE class=\"transMapLayout\">\n"); printf("<TR><TD>\n"); displayMapped(bag); printf("<TD>\n"); displaySource(bag); printf("<TD> \n"); printf("</TR>\n"); printf("<TR><TD COLSPAN=3>\n"); displayAligns(tdb, bag); printf("</TR>\n"); printf("</TABLE>\n"); printTrackHtml(tdb); transMapBagFree(&bag); } static struct dnaSeq *getCdnaSeqDb(struct trackDb *tdb, char *name) /* look up sequence name in seq and extFile tables specified * for base coloring. */ { /* FIXME: this is really a rip off of some of the code in * hgTracks/cds.c; really need to centralize it somewhere */ char *spec = trackDbRequiredSetting(tdb, BASE_COLOR_USE_SEQUENCE); char *specCopy = cloneString(spec); // value is: extFile seqTbl extFileTbl char *words[3]; int nwords = chopByWhite(specCopy, words, ArraySize(words)); if ((nwords != ArraySize(words)) || !sameString(words[0], "extFile")) errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec); struct dnaSeq *seq = hDnaSeqMustGet(NULL, name, words[1], words[2]); freeMem(specCopy); return seq; } void transMapShowCdnaAli(struct trackDb *tdb, char *mappedId) /* Show alignment for mappedId, mostly ripped off from htcCdnaAli */ { if (tdb == NULL) errAbort("transMapShowCdnaAli called without trackDb"); if (mappedId == NULL) errAbort("transMapShowCdnaAli called without mappedId"); struct transMapBag *bag = NULL; struct dnaSeq *seq = NULL; if (trackDbSetting(tdb, "bigDataUrl") == NULL) { bag = transMapBagLoadDb(tdb, mappedId); seq = getCdnaSeqDb(tdb, transMapIdToSeqId(mappedId)); } else { bag = transMapBagLoadBig(tdb, mappedId); seq = newDnaSeq(cloneString(bag->meta->oSequence), strlen(bag->meta->oSequence), mappedId); } struct genbankCds cds; if (isEmpty(bag->meta->oCDS) || !genbankCdsParse(bag->meta->oCDS, &cds)) ZeroVar(&cds); /* can't get or parse CDS, so zero it */ char title[1024]; safef(title, sizeof title, "%s vs Genomic", mappedId); htmlFramesetStart(title); showSomeAlignment(bag->psl, seq, gftDna, 0, seq->size, NULL, cds.start, cds.end); dnaSeqFree(&seq); transMapBagFree(&bag); }