314b81670d2d1446d4da81108ac0ce2a72fc569d
markd
Fri Jan 6 15:32:57 2017 -0800
bigTransMap hgc working
diff --git src/hg/hgc/transMapClick.c src/hg/hgc/transMapClick.c
index 827e019..92a70b3 100644
--- src/hg/hgc/transMapClick.c
+++ src/hg/hgc/transMapClick.c
@@ -1,265 +1,341 @@
/* transMapClick - transMap click handling */
/* Copyright (C) 2010 The Regents of the University of California
* See README in this or parent directory for licensing information. */
+/* FIXME: the two version support should be be delete once support
+ * for table version is fully dropped.
+ */
+
/*
+ * For the older table-based version of transMap, the following
+ * tables are used. These are mapped into the bigTransMap objects.
+ *
* Per-genome tables associated with transMap:
* transMapAliXxx - mapped PSLs alignments
* transMapInfoXxx - information for mapped alignments
* hgFixed tables associated with transMap:
* transMapSrcXxx - information about source alignments
* transMapGeneXxx - gene information
* transMapSeqXxx - seq table for accessing sequence
* transMapExtFileXxx - extFile table
*
* Xxx is:
* - UcscGene - UCSC genes
* - RefSeq - RefSeq mRNAs
* - MRna - GenBank mRNAs
* - SplicedEst - GenBank spliced ESTs
*/
#include "common.h"
#include "hgc.h"
#include "hui.h"
#include "hCommon.h"
#include "transMapClick.h"
+#include "bigTransMap.h"
+#include "bigBed.h"
+#include "bigPsl.h"
#include "transMapStuff.h"
#include "transMapInfo.h"
#include "transMapSrc.h"
#include "transMapGene.h"
#include "genbank.h"
struct transMapBag
-/* object contain collected information on a specific transMap mapping */
+/* object contain collected information on a specific transMap mapping
+* this has both PSL and bigTransMap objects */
{
struct psl *psl; // transMap alignment
- struct transMapInfo *info; // addition information about mapping
- struct transMapSrc *src; // source information
- struct transMapGene *gene; // gene information
+ struct bigTransMap *meta; // bigTransMap record for metadata
boolean srcDbIsActive; // source database is active
};
-static struct transMapBag *transMapBagLoad(struct sqlConnection *conn,
- struct trackDb *tdb, char *mappedId,
- boolean getSrcRec)
-/* load information from various tables */
+static char *guessTranscriptType(struct transMapGene *gene)
+/* guess the transcript type when not known */
+{
+if (gene == NULL)
+ return "unknown"; // EST
+else if (sameString(gene->cds, "n/a") || sameString(gene->cds, ""))
+ return "non_coding";
+else
+ return "protein_coding";
+}
+
+static char *chainSubsetToBigStr(enum transMapInfoChainSubset cs)
+/* convert chain subset type to string used in bigTransMap */
{
+switch (cs)
+ {
+ case transMapInfoUnknown:
+ assert(FALSE);
+ return "unknown";
+ case transMapInfoAll:
+ return "all";
+ case transMapInfoSyn:
+ return "syn";
+ case transMapInfoRbest:
+ return "rbest";
+ }
+assert(FALSE);
+return NULL;
+}
+
+static struct bigTransMap *buildFakeBigTransMapRec(struct transMapInfo *info,
+ struct transMapSrc *src,
+ struct transMapGene *gene)
+/* build a partial bigTransMap record from the tables; only metadata fields
+ * are filled in. */
+{
+struct bigTransMap *bigTransMap;
+AllocVar(bigTransMap);
+bigTransMap->seqType = 1;
+bigTransMap->srcDb = cloneString(src->db);
+bigTransMap->srcChrom = cloneString(src->chrom);
+bigTransMap->srcChromStart = src->chromStart;
+bigTransMap->srcChromEnd = src->chromEnd;
+bigTransMap->srcScore = (int)(1000.0*src->ident);
+bigTransMap->srcAligned = (int)(1000.0*src->aligned);
+if (gene != NULL)
+ {
+ bigTransMap->geneName = cloneString(gene->geneName);
+ bigTransMap->geneId = cloneString(gene->geneId);
+ }
+bigTransMap->geneType = cloneString(guessTranscriptType(gene)); // set for ESTs
+bigTransMap->transcriptType = cloneString(guessTranscriptType(gene));
+bigTransMap->chainType = cloneString(chainSubsetToBigStr(info->chainSubset));
+bigTransMap->commonName = hOrganism(info->srcDb);
+if (bigTransMap->commonName == NULL)
+ bigTransMap->commonName = cloneString("");
+bigTransMap->orgAbbrev = cloneString(emptyForNull(hOrgShortForDb(info->srcDb)));
+return bigTransMap;
+}
+
+static struct transMapBag *transMapBagLoadDb(struct trackDb *tdb, char *mappedId)
+/* load information from various tables for database version of transmap */
+{
+struct sqlConnection *conn = hAllocConn(database);
struct transMapBag *bag;
AllocVar(bag);
bag->psl = getAlignments(conn, tdb->table, mappedId);
char *transMapInfoTbl = trackDbRequiredSetting(tdb, transMapInfoTblSetting);
-bag->info = transMapInfoQuery(conn, transMapInfoTbl, mappedId);
+struct transMapInfo *info = transMapInfoQuery(conn, transMapInfoTbl, mappedId);
-if (getSrcRec)
- {
char *transMapSrcTbl = trackDbRequiredSetting(tdb, transMapSrcTblSetting);
- bag->src = transMapSrcQuery(conn, transMapSrcTbl, bag->info->srcDb, bag->info->srcId);
- }
+struct transMapSrc *src = transMapSrcQuery(conn, transMapSrcTbl, info->srcDb, info->srcId);
+struct transMapGene *gene = NULL;
char *transMapGeneTbl = trackDbSetting(tdb, transMapGeneTblSetting);
if (transMapGeneTbl != NULL)
- bag->gene = transMapGeneQuery(conn, transMapGeneTbl,
- bag->info->srcDb, transMapIdToSeqId(bag->info->srcId));
-bag->srcDbIsActive = hDbIsActive(bag->info->srcDb);
+ gene = transMapGeneQuery(conn, transMapGeneTbl,
+ info->srcDb, transMapIdToSeqId(info->srcId));
+bag->srcDbIsActive = hDbIsActive(info->srcDb);
+bag->meta = buildFakeBigTransMapRec(info, src, gene);
+transMapInfoFree(&info);
+transMapSrcFree(&src);
+transMapGeneFree(&gene);
+hFreeConn(&conn);
+return bag;
+}
+
+static struct transMapBag *transMapBagLoadBig(struct trackDb *tdb, char *mappedId)
+/* load information from bigTransMap file */
+{
+struct sqlConnection *conn = hAllocConn(database);
+struct transMapBag *bag;
+AllocVar(bag);
+
+char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table);
+char *chrom = cartString(cart, "c");
+struct bbiFile *bbi = bigBedFileOpen(fileName);
+struct lm *lm = lmInit(0);
+int fieldIx;
+struct bptFile *bpt = bigBedOpenExtraIndex(bbi, "name", &fieldIx);
+struct bigBedInterval *bb = bigBedNameQuery(bbi, bpt, fieldIx, mappedId, lm);
+if (slCount(bb) != 1)
+ errAbort("expected 1 item named \"%s\", got %d from %s", mappedId, slCount(bb), fileName);
+char *fields[bbi->fieldCount];
+char startBuf[32], endBuf[32];
+int bbFieldCount = bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields,
+ bbi->fieldCount);
+if (bbFieldCount != BIGTRANSMAP_NUM_COLS)
+ errAbort("expected %d fields in bigTransMap record, got %d in %s",
+ BIGTRANSMAP_NUM_COLS, bbFieldCount, fileName);
+bag->psl = pslFromBigPsl(chrom, bb, 0, NULL, NULL);
+bag->meta = bigTransMapLoad(fields);
+
+bigBedFileClose(&bbi);
+lmCleanup(&lm);
+hFreeConn(&conn);
return bag;
}
static void transMapBagFree(struct transMapBag **bagPtr)
/* free the bag */
{
struct transMapBag *bag = *bagPtr;
if (bag != NULL)
{
pslFree(&bag->psl);
- transMapInfoFree(&bag->info);
- transMapSrcFree(&bag->src);
- transMapGeneFree(&bag->gene);
+ bigTransMapFree(&bag->meta);
freez(bagPtr);
}
}
static void prOrgScientific(char *db)
/* print organism and scientific name for a database. */
{
char *org = hOrganism(db);
char *sciName = hScientificName(db);
if ((org != NULL) && (sciName != NULL))
printf("%s (%s)", org, sciName);
else
printf("n/a");
freeMem(org);
freeMem(sciName);
}
-static char *chainSubsetDesc(enum transMapInfoChainSubset cs)
-/* get description for chain subset */
-{
-switch (cs)
- {
- case transMapInfoUnknown:
- return "unknown";
- case transMapInfoAll:
- return "all";
- case transMapInfoSyn:
- return "syntenic";
- case transMapInfoRbest:
- return "reciprocal best";
- }
-return NULL;
-}
-
static void displayMapped(struct transMapBag *bag)
/* display information about the mapping alignment */
{
printf("
\n");
printf("TransMap Alignment\n");
printf("\n");
// organism/assembly
printf("Organism | ");
prOrgScientific(database);
printf(" |
\n");
printf("Genome | %s |
\n", database);
// position
printf("");
printf("Position | ");
printf("",
hgTracksPathAndSettings(),
bag->psl->tName, bag->psl->tStart, bag->psl->tEnd);
printf("%s:%d-%d", bag->psl->tName, bag->psl->tStart, bag->psl->tEnd);
printf(" |
\n");
// % identity and % aligned
printf("Identity | %0.1f%% |
\n",
100.0*pslIdent(bag->psl));
printf("Aligned | %0.1f%% |
\n",
100.0*pslQueryAligned(bag->psl));
-// chain used in mapping
-printf("Chain ID | %s |
\n",
- bag->info->mappingId);
+// chain type used in mapping
printf("Chain subset | %s |
\n",
- chainSubsetDesc(bag->info->chainSubset));
+ bag->meta->chainType);
printf("
\n");
}
static void displaySource(struct transMapBag *bag)
/* display information about the source gene that was mapped */
{
printf("\n");
printf("Source Alignment\n");
printf("\n");
// organism/assembly
printf("Organism | ");
-prOrgScientific(bag->info->srcDb);
+prOrgScientific(bag->meta->srcDb);
printf(" |
\n");
-printf("Genome | %s |
\n", bag->info->srcDb);
+printf("Genome | %s |
\n", bag->meta->srcDb);
// position
printf("Position\n");
printf(" | ");
if (bag->srcDbIsActive)
printf(""
"%s:%d-%d",
- hgTracksName(), bag->src->db,
- bag->src->chrom, bag->src->chromStart, bag->src->chromEnd,
- bag->src->chrom, bag->src->chromStart, bag->src->chromEnd);
+ hgTracksName(), bag->meta->srcDb,
+ bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd,
+ bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd);
else
- printf("%s:%d-%d", bag->src->chrom, bag->src->chromStart, bag->src->chromEnd);
+ printf("%s:%d-%d", bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd);
printf(" |
\n");
-// % identity and % aligned
+// % identity and % aligned, values stored as 1000*fraction ident
printf("Identity | %0.1f%% |
\n",
- 100.0*bag->src->ident);
+ bag->meta->srcScore/10.0);
printf("Aligned | %0.1f%% |
\n",
- 100.0*bag->src->aligned);
+ bag->meta->srcAligned/10.0);
// gene and CDS
printf("Gene | %s |
\n",
- (((bag->gene != NULL) && (strlen(bag->gene->geneName) > 0))
- ? bag->gene->geneName : " "));
+ strOrNbsp(bag->meta->geneName));
printf("Gene Id | %s |
\n",
- (((bag->gene != NULL) && (strlen(bag->gene->geneId) > 0))
- ? bag->gene->geneId : " "));
+ strOrNbsp(bag->meta->geneId));
printf("CDS | %s |
\n",
- (((bag->gene != NULL) && (strlen(bag->gene->cds) > 0))
- ? bag->gene->cds : " "));
+ strOrNbsp(bag->meta->oCDS));
printf("
\n");
}
static void displayAligns(struct trackDb *tdb, struct transMapBag *bag)
/* display cDNA alignments */
{
int start = cartInt(cart, "o");
printf("mRNA/Genomic Alignments
");
-printAlignmentsSimple(bag->psl, start, "hgcTransMapCdnaAli", tdb->table, bag->info->mappedId);
+printAlignmentsSimple(bag->psl, start, "hgcTransMapCdnaAli", tdb->table, bag->psl->qName);
}
void transMapClickHandler(struct trackDb *tdb, char *mappedId)
/* Handle click on a transMap tracks */
{
-struct sqlConnection *conn = hAllocConn(database);
-struct transMapBag *bag = transMapBagLoad(conn, tdb, mappedId, TRUE);
+struct transMapBag *bag = (trackDbSetting(tdb, "bigDataUrl") == NULL)
+ ? transMapBagLoadDb(tdb, mappedId)
+ : transMapBagLoadBig(tdb, mappedId);
genericHeader(tdb, mappedId);
printf("\n");
printf("\n");
displayMapped(bag);
printf(" | \n");
displaySource(bag);
printf(" | \n");
printf(" |
\n");
printf("\n");
displayAligns(tdb, bag);
printf(" |
\n");
printf("
\n");
printTrackHtml(tdb);
transMapBagFree(&bag);
-hFreeConn(&conn);
}
static struct dnaSeq *getCdnaSeq(struct trackDb *tdb, char *name)
/* look up sequence name in seq and extFile tables specified
* for base coloring. */
{
/* FIXME: this is really a rip off of some of the code in
* hgTracks/cds.c; really need to centralize it somewhere */
char *spec = trackDbRequiredSetting(tdb, BASE_COLOR_USE_SEQUENCE);
char *specCopy = cloneString(spec);
// value is: extFile seqTbl extFileTbl
char *words[3];
int nwords = chopByWhite(specCopy, words, ArraySize(words));
if ((nwords != ArraySize(words)) || !sameString(words[0], "extFile"))
errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec);
struct dnaSeq *seq = hDnaSeqMustGet(NULL, name, words[1], words[2]);
freeMem(specCopy);
return seq;
}
void transMapShowCdnaAli(struct trackDb *tdb, char *mappedId)
/* Show alignment for mappedId, mostly ripped off from htcCdnaAli */
{
-struct sqlConnection *conn = hAllocConn(database);
-struct transMapBag *bag = transMapBagLoad(conn, tdb, mappedId, FALSE);
+struct transMapBag *bag = transMapBagLoadDb(tdb, mappedId);
struct genbankCds cds;
-if ((bag->gene == NULL) || (strlen(bag->gene->cds) == 0)
- || !genbankCdsParse(bag->gene->cds, &cds))
- ZeroVar(&cds); /* can't get or parse cds */
+if (isEmpty(bag->meta->oCDS) || !genbankCdsParse(bag->meta->oCDS, &cds))
+ ZeroVar(&cds); /* can't get or parse CDS, so zero it */
struct dnaSeq *seq = getCdnaSeq(tdb, transMapIdToSeqId(mappedId));
writeFramesetType();
puts("");
printf("\n%s vs Genomic\n\n\n", mappedId);
showSomeAlignment(bag->psl, seq, gftDna, 0, seq->size, NULL, cds.start, cds.end);
dnaSeqFree(&seq);
transMapBagFree(&bag);
-hFreeConn(&conn);
}