491bae89e94aedf11666a9e662d2d80012cc312d
markd
Wed Jan 25 07:38:10 2017 -0800
add more information to display of transmap source gene
diff --git src/hg/hgc/transMapClick.c src/hg/hgc/transMapClick.c
index fdff8a3..bc7f82e 100644
--- src/hg/hgc/transMapClick.c
+++ src/hg/hgc/transMapClick.c
@@ -1,354 +1,428 @@
/* transMapClick - transMap click handling */
/* Copyright (C) 2010 The Regents of the University of California
* See README in this or parent directory for licensing information. */
/* FIXME: the two version support should be be delete once support
* for table version is fully dropped.
*/
/*
* For the older table-based version of transMap, the following
* tables are used. These are mapped into the bigTransMap objects.
*
* Per-genome tables associated with transMap:
* transMapAliXxx - mapped PSLs alignments
* transMapInfoXxx - information for mapped alignments
* hgFixed tables associated with transMap:
* transMapSrcXxx - information about source alignments
* transMapGeneXxx - gene information
* transMapSeqXxx - seq table for accessing sequence
* transMapExtFileXxx - extFile table
*
* Xxx is:
* - UcscGene - UCSC genes
* - RefSeq - RefSeq mRNAs
* - MRna - GenBank mRNAs
* - SplicedEst - GenBank spliced ESTs
*/
#include "common.h"
#include "hgc.h"
#include "hui.h"
#include "hCommon.h"
#include "transMapClick.h"
#include "bigTransMap.h"
#include "bigBed.h"
#include "bigPsl.h"
#include "transMapStuff.h"
#include "transMapInfo.h"
#include "transMapSrc.h"
#include "transMapGene.h"
#include "genbank.h"
+enum geneSrcSetType
+/* constants for source sets */
+{
+ ENSEMBL_SRC_SET,
+ REFSEQ_SRC_SET,
+ RNA_SRC_SET,
+ EST_SRC_SET,
+ UCSC_GENES_SRC_SET,
+ UNKNOWN_SRC_SET
+};
struct transMapBag
/* object contain collected information on a specific transMap mapping
* this has both PSL and bigTransMap objects */
{
struct psl *psl; // transMap alignment
struct bigTransMap *meta; // bigTransMap record for metadata
+ enum geneSrcSetType srcSet; // source data set (not owned)
boolean srcDbIsActive; // source database is active
};
static char *guessTranscriptType(struct transMapGene *gene)
/* guess the transcript type when not known */
{
if (gene == NULL)
return "unknown"; // EST
else if (sameString(gene->cds, "n/a") || sameString(gene->cds, ""))
return "non_coding";
else
return "protein_coding";
}
+static enum geneSrcSetType guessGeneSrcSet(struct trackDb *tdb)
+/* guess the the source set from the table name for table
+ * based-transmap */
+{
+if (stringIn("UcscGenes", tdb->table))
+ return UCSC_GENES_SRC_SET;
+else if (stringIn("RefSeq", tdb->table))
+ return REFSEQ_SRC_SET;
+else if (stringIn("MRna", tdb->table))
+ return RNA_SRC_SET;
+else if (stringIn("SplicedEst", tdb->table))
+ return EST_SRC_SET;
+else
+ return UNKNOWN_SRC_SET;
+}
+
static char *chainSubsetToBigStr(enum transMapInfoChainSubset cs)
/* convert chain subset type to string used in bigTransMap */
{
switch (cs)
{
case transMapInfoUnknown:
assert(FALSE);
return "unknown";
case transMapInfoAll:
return "all";
case transMapInfoSyn:
return "syn";
case transMapInfoRbest:
return "rbest";
}
assert(FALSE);
return NULL;
}
static struct bigTransMap *buildFakeBigTransMapRec(struct transMapInfo *info,
struct transMapSrc *src,
struct transMapGene *gene)
/* build a partial bigTransMap record from the tables; only metadata fields
* are filled in. */
{
struct bigTransMap *bigTransMap;
AllocVar(bigTransMap);
bigTransMap->seqType = 1;
bigTransMap->srcDb = cloneString(src->db);
bigTransMap->srcChrom = cloneString(src->chrom);
bigTransMap->srcChromStart = src->chromStart;
bigTransMap->srcChromEnd = src->chromEnd;
bigTransMap->srcScore = (int)(1000.0*src->ident);
bigTransMap->srcAligned = (int)(1000.0*src->aligned);
if (gene != NULL)
{
bigTransMap->geneName = cloneString(gene->geneName);
bigTransMap->geneId = cloneString(gene->geneId);
}
bigTransMap->geneType = cloneString(guessTranscriptType(gene)); // set for ESTs
bigTransMap->transcriptType = cloneString(guessTranscriptType(gene));
bigTransMap->chainType = cloneString(chainSubsetToBigStr(info->chainSubset));
bigTransMap->commonName = hOrganism(info->srcDb);
if (bigTransMap->commonName == NULL)
bigTransMap->commonName = cloneString("");
bigTransMap->orgAbbrev = cloneString(emptyForNull(hOrgShortForDb(info->srcDb)));
return bigTransMap;
}
static struct transMapBag *transMapBagLoadDb(struct trackDb *tdb, char *mappedId)
/* load information from various tables for database version of transmap */
{
struct sqlConnection *conn = hAllocConn(database);
struct transMapBag *bag;
AllocVar(bag);
bag->psl = getAlignments(conn, tdb->table, mappedId);
char *transMapInfoTbl = trackDbRequiredSetting(tdb, transMapInfoTblSetting);
struct transMapInfo *info = transMapInfoQuery(conn, transMapInfoTbl, mappedId);
char *transMapSrcTbl = trackDbRequiredSetting(tdb, transMapSrcTblSetting);
struct transMapSrc *src = transMapSrcQuery(conn, transMapSrcTbl, info->srcDb, info->srcId);
struct transMapGene *gene = NULL;
char *transMapGeneTbl = trackDbSetting(tdb, transMapGeneTblSetting);
if (transMapGeneTbl != NULL)
gene = transMapGeneQuery(conn, transMapGeneTbl,
info->srcDb, transMapIdToSeqId(info->srcId));
+bag->srcSet = guessGeneSrcSet(tdb);
bag->srcDbIsActive = hDbIsActive(info->srcDb);
bag->meta = buildFakeBigTransMapRec(info, src, gene);
transMapInfoFree(&info);
transMapSrcFree(&src);
transMapGeneFree(&gene);
hFreeConn(&conn);
return bag;
}
+static enum geneSrcSetType getGeneSrcSet(struct trackDb *tdb)
+/* get the geneSrcSetType from trackDb */
+{
+char *srcSet = trackDbRequiredSetting(tdb, "transMapSrcSet");
+if (sameString(srcSet, "ensembl"))
+ return ENSEMBL_SRC_SET;
+else if (sameString(srcSet, "refseq"))
+ return REFSEQ_SRC_SET;
+else if (sameString(srcSet, "rna"))
+ return RNA_SRC_SET;
+else if (sameString(srcSet, "est"))
+ return EST_SRC_SET;
+else
+ return UNKNOWN_SRC_SET;
+}
+
+static char *formatGeneSrcSet(enum geneSrcSetType srcSet)
+/* get display version of source set */
+{
+switch (srcSet)
+ {
+ case ENSEMBL_SRC_SET:
+ return "Ensembl";
+ case REFSEQ_SRC_SET:
+ return "RefSeq RNA";
+ case RNA_SRC_SET:
+ return "GenBank RNA";
+ case EST_SRC_SET:
+ return "GenBank EST";
+ case UCSC_GENES_SRC_SET:
+ return "UCSC Genes";
+ case UNKNOWN_SRC_SET:
+ return "Unknown";
+ }
+return "Unknown";
+}
+
static struct transMapBag *transMapBagLoadBig(struct trackDb *tdb, char *mappedId)
/* load information from bigTransMap file */
{
struct sqlConnection *conn = hAllocConn(database);
struct transMapBag *bag;
AllocVar(bag);
char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table);
char *chrom = cartString(cart, "c");
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct lm *lm = lmInit(0);
int fieldIx;
struct bptFile *bpt = bigBedOpenExtraIndex(bbi, "name", &fieldIx);
struct bigBedInterval *bb = bigBedNameQuery(bbi, bpt, fieldIx, mappedId, lm);
if (slCount(bb) != 1)
errAbort("expected 1 item named \"%s\", got %d from %s", mappedId, slCount(bb), fileName);
char *fields[bbi->fieldCount];
char startBuf[32], endBuf[32];
int bbFieldCount = bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields,
bbi->fieldCount);
if (bbFieldCount != BIGTRANSMAP_NUM_COLS)
errAbort("expected %d fields in bigTransMap record, got %d in %s",
BIGTRANSMAP_NUM_COLS, bbFieldCount, fileName);
bag->psl = pslFromBigPsl(chrom, bb, 0, NULL, NULL);
bag->meta = bigTransMapLoad(fields);
+bag->srcSet = getGeneSrcSet(tdb);
bag->srcDbIsActive = hDbIsActive(bag->meta->srcDb);
bigBedFileClose(&bbi);
lmCleanup(&lm);
hFreeConn(&conn);
return bag;
}
static void transMapBagFree(struct transMapBag **bagPtr)
/* free the bag */
{
struct transMapBag *bag = *bagPtr;
if (bag != NULL)
{
pslFree(&bag->psl);
bigTransMapFree(&bag->meta);
freez(bagPtr);
}
}
-static void prOrgScientific(char *db)
-/* print organism and scientific name for a database. */
+static void prOrgScientificDb(char *db)
+/* print organism and scientific name for a database.
+ */
{
char *org = hOrganism(db);
char *sciName = hScientificName(db);
if ((org != NULL) && (sciName != NULL))
printf("%s (%s)", org, sciName);
else
printf("n/a");
freeMem(org);
freeMem(sciName);
}
static void displayMapped(struct transMapBag *bag)
/* display information about the mapping alignment */
{
printf("
\n");
printf("TransMap Alignment\n");
printf("\n");
// organism/assembly
printf("Organism | ");
-prOrgScientific(database);
+prOrgScientificDb(database);
printf(" |
\n");
printf("Genome | %s |
\n", database);
// position
printf("");
printf("Position | ");
printf("",
hgTracksPathAndSettings(),
bag->psl->tName, bag->psl->tStart, bag->psl->tEnd);
printf("%s:%d-%d", bag->psl->tName, bag->psl->tStart, bag->psl->tEnd);
printf(" |
\n");
// % identity and % aligned
printf("Identity | %0.1f%% |
\n",
100.0*pslIdent(bag->psl));
printf("Aligned | %0.1f%% |
\n",
100.0*pslQueryAligned(bag->psl));
// chain type used in mapping
printf("Chain subset | %s |
\n",
bag->meta->chainType);
printf("
\n");
}
static void displaySource(struct transMapBag *bag)
/* display information about the source gene that was mapped */
{
printf("\n");
printf("Source Alignment\n");
+
printf("\n");
// organism/assembly
-printf("Organism | ");
-prOrgScientific(bag->meta->srcDb);
-printf(" |
\n");
+printf("Organism | %s (%s) |
\n",
+ bag->meta->commonName, bag->meta->scientificName);
printf("Genome | %s |
\n", bag->meta->srcDb);
+printf("Source | %s |
\n", formatGeneSrcSet(bag->srcSet));
// position
printf("Position\n");
printf(" | ");
if (bag->srcDbIsActive)
printf(""
"%s:%d-%d",
hgTracksName(), bag->meta->srcDb,
bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd,
bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd);
else
printf("%s:%d-%d", bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd);
printf(" |
\n");
// % identity and % aligned, values stored as 1000*fraction ident
printf("Identity | %0.1f%% |
\n",
bag->meta->srcScore/10.0);
printf("Aligned | %0.1f%% |
\n",
bag->meta->srcAligned/10.0);
// gene and CDS
printf("Gene | %s |
\n",
strOrNbsp(bag->meta->geneName));
printf("Gene Id | %s |
\n",
strOrNbsp(bag->meta->geneId));
+printf("Gene Type | %s |
\n",
+ strOrNbsp(bag->meta->geneType));
+printf("Transcript Id | %s |
\n",
+ transMapIdToAcc(bag->meta->name));
+printf("Transcript Type | %s |
\n",
+ strOrNbsp(bag->meta->transcriptType));
printf("CDS | %s |
\n",
strOrNbsp(bag->meta->oCDS));
printf("
\n");
}
static void displayAligns(struct trackDb *tdb, struct transMapBag *bag)
/* display cDNA alignments */
{
int start = cartInt(cart, "o");
printf("mRNA/Genomic Alignments
");
printAlignmentsSimple(bag->psl, start, "hgcTransMapCdnaAli", tdb->table, bag->psl->qName);
}
void transMapClickHandler(struct trackDb *tdb, char *mappedId)
/* Handle click on a transMap tracks */
{
struct transMapBag *bag = (trackDbSetting(tdb, "bigDataUrl") == NULL)
? transMapBagLoadDb(tdb, mappedId)
: transMapBagLoadBig(tdb, mappedId);
genericHeader(tdb, mappedId);
printf("\n");
printf("\n");
displayMapped(bag);
printf(" | \n");
displaySource(bag);
printf(" | \n");
printf(" |
\n");
printf("\n");
displayAligns(tdb, bag);
printf(" |
\n");
printf("
\n");
printTrackHtml(tdb);
transMapBagFree(&bag);
}
static struct dnaSeq *getCdnaSeqDb(struct trackDb *tdb, char *name)
/* look up sequence name in seq and extFile tables specified
* for base coloring. */
{
/* FIXME: this is really a rip off of some of the code in
* hgTracks/cds.c; really need to centralize it somewhere */
char *spec = trackDbRequiredSetting(tdb, BASE_COLOR_USE_SEQUENCE);
char *specCopy = cloneString(spec);
// value is: extFile seqTbl extFileTbl
char *words[3];
int nwords = chopByWhite(specCopy, words, ArraySize(words));
if ((nwords != ArraySize(words)) || !sameString(words[0], "extFile"))
errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec);
struct dnaSeq *seq = hDnaSeqMustGet(NULL, name, words[1], words[2]);
freeMem(specCopy);
return seq;
}
void transMapShowCdnaAli(struct trackDb *tdb, char *mappedId)
/* Show alignment for mappedId, mostly ripped off from htcCdnaAli */
{
struct transMapBag *bag = NULL;
struct dnaSeq *seq = NULL;
if (trackDbSetting(tdb, "bigDataUrl") == NULL)
{
bag = transMapBagLoadDb(tdb, mappedId);
seq = getCdnaSeqDb(tdb, transMapIdToSeqId(mappedId));
}
else
{
bag = transMapBagLoadBig(tdb, mappedId);
seq = newDnaSeq(cloneString(bag->meta->oSequence), strlen(bag->meta->oSequence),
mappedId);
}
struct genbankCds cds;
if (isEmpty(bag->meta->oCDS) || !genbankCdsParse(bag->meta->oCDS, &cds))
ZeroVar(&cds); /* can't get or parse CDS, so zero it */
writeFramesetType();
puts("");
printf("\n%s vs Genomic\n\n\n", mappedId);
showSomeAlignment(bag->psl, seq, gftDna, 0, seq->size, NULL, cds.start, cds.end);
dnaSeqFree(&seq);
transMapBagFree(&bag);
}