82c4da4047101ecc794214c6d630e14e52c06fa8 markd Sun Apr 16 13:40:42 2017 -0700 fixed incorrect documentation in bigTransMap; gave one field a more meaninful name diff --git src/hg/hgc/transMapClick.c src/hg/hgc/transMapClick.c index d8dbafa..a768b36 100644 --- src/hg/hgc/transMapClick.c +++ src/hg/hgc/transMapClick.c @@ -1,441 +1,441 @@ /* transMapClick - transMap click handling */ /* Copyright (C) 2010 The Regents of the University of California * See README in this or parent directory for licensing information. */ /* FIXME: the two version support should be be delete once support * for table version is fully dropped. */ /* * For the older table-based version of transMap, the following * tables are used. These are mapped into the bigTransMap objects. * * Per-genome tables associated with transMap: * transMapAliXxx - mapped PSLs alignments * transMapInfoXxx - information for mapped alignments * hgFixed tables associated with transMap: * transMapSrcXxx - information about source alignments * transMapGeneXxx - gene information * transMapSeqXxx - seq table for accessing sequence * transMapExtFileXxx - extFile table * * Xxx is: * - UcscGene - UCSC genes * - RefSeq - RefSeq mRNAs * - MRna - GenBank mRNAs * - SplicedEst - GenBank spliced ESTs */ #include "common.h" #include "hgc.h" #include "hui.h" #include "hCommon.h" #include "transMapClick.h" #include "bigTransMap.h" #include "bigBed.h" #include "bigPsl.h" #include "transMapStuff.h" #include "transMapInfo.h" #include "transMapSrc.h" #include "transMapGene.h" #include "genbank.h" enum geneSrcSetType /* constants for source sets */ { ENSEMBL_SRC_SET, REFSEQ_SRC_SET, RNA_SRC_SET, EST_SRC_SET, UCSC_GENES_SRC_SET, UNKNOWN_SRC_SET }; struct transMapBag /* object contain collected information on a specific transMap mapping * this has both PSL and bigTransMap objects */ { struct psl *psl; // transMap alignment struct bigTransMap *meta; // bigTransMap record for metadata enum geneSrcSetType srcSet; // source data set (not owned) boolean srcDbIsActive; // source database is active }; static char *guessTranscriptType(struct transMapGene *gene) /* guess the transcript type when not known */ { if (gene == NULL) return "unknown"; // EST else if (sameString(gene->cds, "n/a") || sameString(gene->cds, "")) return "non_coding"; else return "protein_coding"; } static enum geneSrcSetType guessGeneSrcSet(struct trackDb *tdb) /* guess the the source set from the table name for table * based-transmap */ { if (stringIn("UcscGenes", tdb->table)) return UCSC_GENES_SRC_SET; else if (stringIn("RefSeq", tdb->table)) return REFSEQ_SRC_SET; else if (stringIn("MRna", tdb->table)) return RNA_SRC_SET; else if (stringIn("SplicedEst", tdb->table)) return EST_SRC_SET; else return UNKNOWN_SRC_SET; } static char *chainSubsetToBigStr(enum transMapInfoChainSubset cs) /* convert chain subset type to string used in bigTransMap */ { switch (cs) { case transMapInfoUnknown: assert(FALSE); return "unknown"; case transMapInfoAll: return "all"; case transMapInfoSyn: return "syn"; case transMapInfoRbest: return "rbest"; } assert(FALSE); return NULL; } static struct bigTransMap *buildFakeBigTransMapRec(struct transMapInfo *info, struct transMapSrc *src, struct transMapGene *gene) /* build a partial bigTransMap record from the tables; only metadata fields * are filled in. */ { struct bigTransMap *bigTransMap; AllocVar(bigTransMap); bigTransMap->seqType = 1; bigTransMap->name = cloneString(src->id); bigTransMap->srcDb = cloneString(src->db); bigTransMap->srcChrom = cloneString(src->chrom); bigTransMap->srcChromStart = src->chromStart; bigTransMap->srcChromEnd = src->chromEnd; -bigTransMap->srcScore = (int)(1000.0*src->ident); +bigTransMap->srcIdent = (int)(1000.0*src->ident); bigTransMap->srcAligned = (int)(1000.0*src->aligned); if (gene != NULL) { bigTransMap->geneName = cloneString(gene->geneName); bigTransMap->geneId = cloneString(gene->geneId); bigTransMap->oCDS = cloneString(gene->cds); } bigTransMap->geneType = cloneString(guessTranscriptType(gene)); // set for ESTs bigTransMap->transcriptType = cloneString(guessTranscriptType(gene)); bigTransMap->chainType = cloneString(chainSubsetToBigStr(info->chainSubset)); bigTransMap->commonName = hOrganism(info->srcDb); if (bigTransMap->commonName == NULL) bigTransMap->commonName = cloneString(""); bigTransMap->scientificName = hScientificName(info->srcDb); if (bigTransMap->scientificName == NULL) bigTransMap->scientificName = cloneString(""); bigTransMap->orgAbbrev = cloneString(emptyForNull(hOrgShortForDb(info->srcDb))); return bigTransMap; } static struct transMapBag *transMapBagLoadDb(struct trackDb *tdb, char *mappedId) /* load information from various tables for database version of transmap */ { struct sqlConnection *conn = hAllocConn(database); struct transMapBag *bag; AllocVar(bag); bag->psl = getAlignments(conn, tdb->table, mappedId); char *transMapInfoTbl = trackDbRequiredSetting(tdb, transMapInfoTblSetting); struct transMapInfo *info = transMapInfoQuery(conn, transMapInfoTbl, mappedId); char *transMapSrcTbl = trackDbRequiredSetting(tdb, transMapSrcTblSetting); struct transMapSrc *src = transMapSrcQuery(conn, transMapSrcTbl, info->srcDb, info->srcId); struct transMapGene *gene = NULL; char *transMapGeneTbl = trackDbSetting(tdb, transMapGeneTblSetting); if (transMapGeneTbl != NULL) gene = transMapGeneQuery(conn, transMapGeneTbl, info->srcDb, transMapIdToSeqId(info->srcId)); bag->srcSet = guessGeneSrcSet(tdb); bag->srcDbIsActive = hDbIsActive(info->srcDb); bag->meta = buildFakeBigTransMapRec(info, src, gene); transMapInfoFree(&info); transMapSrcFree(&src); transMapGeneFree(&gene); hFreeConn(&conn); return bag; } static enum geneSrcSetType getGeneSrcSet(struct trackDb *tdb) /* get the geneSrcSetType from trackDb */ { char *srcSet = trackDbRequiredSetting(tdb, "transMapSrcSet"); if (sameString(srcSet, "ensembl")) return ENSEMBL_SRC_SET; else if (sameString(srcSet, "refseq")) return REFSEQ_SRC_SET; else if (sameString(srcSet, "rna")) return RNA_SRC_SET; else if (sameString(srcSet, "est")) return EST_SRC_SET; else return UNKNOWN_SRC_SET; } static char *formatGeneSrcSet(enum geneSrcSetType srcSet) /* get display version of source set */ { switch (srcSet) { case ENSEMBL_SRC_SET: return "Ensembl"; case REFSEQ_SRC_SET: return "RefSeq RNA"; case RNA_SRC_SET: return "GenBank RNA"; case EST_SRC_SET: return "GenBank EST"; case UCSC_GENES_SRC_SET: return "UCSC Genes"; case UNKNOWN_SRC_SET: return "Unknown"; } return "Unknown"; } static struct transMapBag *transMapBagLoadBig(struct trackDb *tdb, char *mappedId) /* load information from bigTransMap file */ { struct sqlConnection *conn = hAllocConn(database); struct transMapBag *bag; AllocVar(bag); char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table); char *chrom = cartString(cart, "c"); struct bbiFile *bbi = bigBedFileOpen(fileName); struct lm *lm = lmInit(0); int fieldIx; struct bptFile *bpt = bigBedOpenExtraIndex(bbi, "name", &fieldIx); struct bigBedInterval *bb = bigBedNameQuery(bbi, bpt, fieldIx, mappedId, lm); if (slCount(bb) != 1) errAbort("expected 1 item named \"%s\", got %d from %s", mappedId, slCount(bb), fileName); char *fields[bbi->fieldCount]; char startBuf[32], endBuf[32]; int bbFieldCount = bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields, bbi->fieldCount); if (bbFieldCount != BIGTRANSMAP_NUM_COLS) errAbort("expected %d fields in bigTransMap record, got %d in %s", BIGTRANSMAP_NUM_COLS, bbFieldCount, fileName); bag->psl = pslFromBigPsl(chrom, bb, 0, NULL, NULL); bag->meta = bigTransMapLoad(fields); bag->srcSet = getGeneSrcSet(tdb); bag->srcDbIsActive = hDbIsActive(bag->meta->srcDb); bigBedFileClose(&bbi); lmCleanup(&lm); hFreeConn(&conn); return bag; } static void transMapBagFree(struct transMapBag **bagPtr) /* free the bag */ { struct transMapBag *bag = *bagPtr; if (bag != NULL) { pslFree(&bag->psl); bigTransMapFree(&bag->meta); freez(bagPtr); } } static void prOrgScientificDb(char *db) /* print organism and scientific name for a database. */ { char *org = hOrganism(db); char *sciName = hScientificName(db); if ((org != NULL) && (sciName != NULL)) printf("%s (%s)", org, sciName); else printf("n/a"); freeMem(org); freeMem(sciName); } static void displayMapped(struct transMapBag *bag) /* display information about the mapping alignment */ { printf("\n"); printf("\n"); printf("\n"); // organism/assembly printf("\n"); printf("\n", database); // position printf(""); printf("\n"); // % identity and % aligned printf("\n", 100.0*pslIdent(bag->psl)); printf("\n", 100.0*pslQueryAligned(bag->psl)); // chain type used in mapping printf("\n", bag->meta->chainType); printf("
TransMap Alignment
Organism"); prOrgScientificDb(database); printf("
Genome%s
Position"); printf("", hgTracksPathAndSettings(), bag->psl->tName, bag->psl->tStart, bag->psl->tEnd); printf("%s:%d-%d", bag->psl->tName, bag->psl->tStart, bag->psl->tEnd); printf("
Identity%0.1f%%
Aligned%0.1f%%
Chain subset%s
\n"); } static void displaySource(struct transMapBag *bag) /* display information about the source gene that was mapped */ { printf("\n"); printf("\n"); printf("\n"); // organism/assembly printf("\n", bag->meta->commonName, bag->meta->scientificName); printf("\n", bag->meta->srcDb); printf("\n", formatGeneSrcSet(bag->srcSet)); // position printf("\n"); // % identity and % aligned, values stored as 1000*fraction ident printf("\n", - bag->meta->srcScore/10.0); + bag->meta->srcIdent/10.0); printf("\n", bag->meta->srcAligned/10.0); // gene and CDS printf("\n", strOrNbsp(bag->meta->geneName)); printf("\n", strOrNbsp(bag->meta->geneId)); printf("\n", strOrNbsp(bag->meta->geneType)); printf("\n", transMapIdToAcc(bag->meta->name)); printf("\n", strOrNbsp(bag->meta->transcriptType)); printf("\n", strOrNbsp(bag->meta->oCDS)); printf("
Source Alignment
Organism%s (%s)
Genome%s
Source%s
Position\n"); printf(""); if (bag->srcDbIsActive) printf("" "%s:%d-%d", hgTracksName(), bag->meta->srcDb, bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd, bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd); else printf("%s:%d-%d", bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd); printf("
Identity%0.1f%%
Aligned%0.1f%%
Gene%s
Gene Id%s
Gene Type%s
Transcript Id%s
Transcript Type%s
CDS%s
\n"); } static void displayAligns(struct trackDb *tdb, struct transMapBag *bag) /* display cDNA alignments */ { int start = cartInt(cart, "o"); printf("

mRNA/Genomic Alignments

"); printAlignmentsSimple(bag->psl, start, "hgcTransMapCdnaAli", tdb->table, bag->psl->qName); } void transMapClickHandler(struct trackDb *tdb, char *mappedId) /* Handle click on a transMap tracks */ { if (tdb == NULL) errAbort("transMapClickHandler called without trackDb"); if (mappedId == NULL) errAbort("transMapClickHandler called without mappedId"); struct transMapBag *bag = (trackDbSetting(tdb, "bigDataUrl") == NULL) ? transMapBagLoadDb(tdb, mappedId) : transMapBagLoadBig(tdb, mappedId); genericHeader(tdb, mappedId); printf("\n"); printf("\n"); printf("\n"); printf("
\n"); displayMapped(bag); printf("\n"); displaySource(bag); printf(" \n"); printf("
\n"); displayAligns(tdb, bag); printf("
\n"); printTrackHtml(tdb); transMapBagFree(&bag); } static struct dnaSeq *getCdnaSeqDb(struct trackDb *tdb, char *name) /* look up sequence name in seq and extFile tables specified * for base coloring. */ { /* FIXME: this is really a rip off of some of the code in * hgTracks/cds.c; really need to centralize it somewhere */ char *spec = trackDbRequiredSetting(tdb, BASE_COLOR_USE_SEQUENCE); char *specCopy = cloneString(spec); // value is: extFile seqTbl extFileTbl char *words[3]; int nwords = chopByWhite(specCopy, words, ArraySize(words)); if ((nwords != ArraySize(words)) || !sameString(words[0], "extFile")) errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec); struct dnaSeq *seq = hDnaSeqMustGet(NULL, name, words[1], words[2]); freeMem(specCopy); return seq; } void transMapShowCdnaAli(struct trackDb *tdb, char *mappedId) /* Show alignment for mappedId, mostly ripped off from htcCdnaAli */ { if (tdb == NULL) errAbort("transMapShowCdnaAli called without trackDb"); if (mappedId == NULL) errAbort("transMapShowCdnaAli called without mappedId"); struct transMapBag *bag = NULL; struct dnaSeq *seq = NULL; if (trackDbSetting(tdb, "bigDataUrl") == NULL) { bag = transMapBagLoadDb(tdb, mappedId); seq = getCdnaSeqDb(tdb, transMapIdToSeqId(mappedId)); } else { bag = transMapBagLoadBig(tdb, mappedId); seq = newDnaSeq(cloneString(bag->meta->oSequence), strlen(bag->meta->oSequence), mappedId); } struct genbankCds cds; if (isEmpty(bag->meta->oCDS) || !genbankCdsParse(bag->meta->oCDS, &cds)) ZeroVar(&cds); /* can't get or parse CDS, so zero it */ writeFramesetType(); puts(""); printf("\n%s vs Genomic\n\n\n", mappedId); showSomeAlignment(bag->psl, seq, gftDna, 0, seq->size, NULL, cds.start, cds.end); dnaSeqFree(&seq); transMapBagFree(&bag); }