e2d7638b2848fe00bbf9b66bca395ec68a9ee9e6 braney Thu Oct 15 16:22:06 2015 -0700 first cut at bigMaf support #15935 diff --git src/hg/hgTracks/wigMafTrack.c src/hg/hgTracks/wigMafTrack.c index 86d10aa..7bda215 100644 --- src/hg/hgTracks/wigMafTrack.c +++ src/hg/hgTracks/wigMafTrack.c @@ -10,30 +10,32 @@ #include "linefile.h" #include "jksql.h" #include "hdb.h" #include "hgTracks.h" #include "maf.h" #include "scoredRef.h" #include "wiggle.h" #include "hCommon.h" #include "hgMaf.h" #include "mafTrack.h" #include "customTrack.h" #include "mafSummary.h" #include "mafFrames.h" #include "phyloTree.h" #include "soTerm.h" +#include "bigBed.h" +#include "hubConnect.h" #define GAP_ITEM_LABEL "Gaps" #define MAX_SP_SIZE 2000 struct wigMafItem /* A maf track item -- * a line of bases (base level) or pairwise density gradient (zoomed out). */ { struct wigMafItem *next; char *name; /* Common name */ char *db; /* Database */ int group; /* number of species group/clade */ int ix; /* Position in list. */ int height; /* Pixel height of item. */ @@ -273,54 +275,79 @@ char *getTrackMafFile(struct track *track) /* look up MAF file name in track setting, return NULL if not found */ { return hashFindVal(track->tdb->settingsHash, "mafFile"); } char *getCustomMafFile(struct track *track) { char *fileName = getTrackMafFile(track); if (fileName == NULL) errAbort("cannot find custom maf setting"); return fileName; } +static struct mafAli *bigMafLoadInRegion( struct track *track, char *chrom, int start, int end) +/* Read in MAF blocks from bigBed. */ +{ +struct lm *lm = lmInit(0); +struct bigBedInterval *bb, *bbList = bigBedSelectRange(track, chrom, start, end, lm); +struct mafAli *mafList = NULL; +for (bb = bbList; bb != NULL; bb = bb->next) + { + // the MAF block in the record as \001 instead of newlines + char *mafText = replaceChars(bb->rest, "\001","\n"); + + struct mafFile mf; + mf.lf = lineFileOnString(NULL, TRUE, mafText); + + struct mafAli *maf = mafNext(&mf); + slAddHead(&mafList, maf); + } +return mafList; +} + static void loadMafsToTrack(struct track *track) /* load mafs in region to track custom pointer */ { struct sqlConnection *conn; struct sqlConnection *conn2; struct mafPriv *mp = getMafPriv(track); if (inSummaryMode(cart, track->tdb, winBaseCount)) return; int begin = winStart - 2; if (begin < 0) begin = 0; /* we open two connections to the database * that has the maf track in it. One is * for the scoredRefs, the other to access * the extFile database. We could get away * with just one connection, but then we'd * have to allocate more memory to hold * the scoredRefs (whereas now we just use * one statically loaded scoredRef). */ -if (mp->ct) + +if (track->isBigBed) + { + mp->list = bigMafLoadInRegion(track, chromName, begin, winEnd+2); + } +else if (mp->ct) { char *fileName = getCustomMafFile(track); conn = hAllocConn(CUSTOM_TRASH); conn2 = hAllocConn(CUSTOM_TRASH); mp->list = wigMafLoadInRegion(conn, conn2, mp->ct->dbTableName, chromName, begin, winEnd + 2, fileName); hFreeConn(&conn); hFreeConn(&conn2); } else { char *fileName = getTrackMafFile(track); // optional conn = hAllocConn(database); conn2 = hAllocConn(database); @@ -495,31 +522,35 @@ struct track *wigTrack = track->subtracks; int scoreHeight = tl.fontHeight * 4; char *snpTable = trackDbSetting(track->tdb, "snpTable"); boolean doSnpTable = FALSE; if ( (track->limitedVis == tvPack) && (snpTable != NULL) && cartOrTdbBoolean(cart, track->tdb, MAF_SHOW_SNP,FALSE)) doSnpTable = TRUE; // the maf's only get loaded if we're not in summary or snpTable views if (!doSnpTable && !inSummaryMode(cart, track->tdb, winBaseCount)) { /* "close in" display uses actual alignments from file */ struct mafPriv *mp = getMafPriv(track); struct sqlConnection *conn, *conn2; - if (mp->ct) + if (track->isBigBed) + { + mp->list = bigMafLoadInRegion(track, chromName, winStart, winEnd); + } + else if (mp->ct) { char *fileName = getCustomMafFile(track); conn = hAllocConn(CUSTOM_TRASH); conn2 = hAllocConn(CUSTOM_TRASH); mp->list = wigMafLoadInRegion(conn, conn2, mp->ct->dbTableName, chromName, winStart, winEnd, fileName); hFreeConn(&conn); hFreeConn(&conn2); } else { char *fileName = getTrackMafFile(track); // optional conn = hAllocConn(database); conn2 = hAllocConn(database); mp->list = wigMafLoadInRegion(conn, conn2, track->table, @@ -724,31 +755,31 @@ static void wigMafFree(struct track *track) /* Free up maf items. */ { struct mafPriv *mp = getMafPriv(track); if (mp->list != NULL && mp->list != (char *)-1) mafAliFreeList((struct mafAli **)&mp->list); if (track->items != NULL) wigMafItemFreeList((struct wigMafItem **)&track->items); } static char *wigMafItemName(struct track *track, void *item) /* Return name of maf level track. */ { struct wigMafItem *mi = item; -return mi->name; +return hubConnectSkipHubPrefix(mi->name); } static void processInserts(char *text, struct mafAli *maf, struct hash *itemHash, int insertCounts[], int baseCount) /* Make up insert line from sequence of reference species. It has a gap count at each displayed base position, and is generated by counting up '-' chars in the sequence, where */ { int i, baseIx = 0; struct mafComp *mc; char c; for (i=0; i < maf->textSize && baseIx < baseCount - 1; i++) { @@ -1138,30 +1169,50 @@ return FALSE; /* get summary table name from trackDb */ if ((summary = summarySetting(track)) == NULL) return FALSE; if (cartVarExistsAnyLevel(cart, track->tdb,FALSE,MAF_CHAIN_VAR)) useIrowChains = cartUsualBooleanClosestToHome(cart, track->tdb, FALSE, MAF_CHAIN_VAR,TRUE); else { char *irowString = trackDbSetting(track->tdb, "irows"); if (irowString && sameString(irowString, "off")) useIrowChains = FALSE; } +if (track->isBigBed) + { + struct lm *lm = lmInit(0); + struct bigBedInterval *bb, *bbList = bigBedSelectRangeExtra(track, chromName, seqStart, seqEnd, lm, "summary"); + printf("bbiList %p\n", bbList); + char *bedRow[7]; + char startBuf[16], endBuf[16]; + + for (bb = bbList; bb != NULL; bb = bb->next) + { + bigBedIntervalToRow(bb, chromName, startBuf, endBuf, bedRow, ArraySize(bedRow)); + ms = mafSummaryLoad(bedRow); + if ((hel = hashLookup(componentHash, ms->src)) == NULL) + hashAdd(componentHash, ms->src, ms); + else + slAddHead(&(hel->val), ms); + } + } +else + { /* Create SQL where clause that will load up just the * summaries for the species that we are including. */ conn = hAllocConn(database); dyStringAppend(where, "src in ("); for (mi = miList; mi != NULL; mi = mi->next) { if (!isPairwiseItem(mi)) /* exclude non-species items (e.g. conservation wiggle */ continue; dyStringPrintf(where, "'%s'", mi->db); if (mi->next != NULL) dyStringAppend(where, ","); } dyStringAppend(where, ")"); /* check for empty where clause */ @@ -1180,35 +1231,37 @@ ms = mafSummaryLoad(row + rowOffset); else /* previous table schema didn't have status fields */ ms = mafSummaryMiniLoad(row + rowOffset); /* prune to fit in window bounds */ if (ms->chromStart < seqStart) ms->chromStart = seqStart; if (ms->chromEnd > seqEnd) ms->chromEnd = seqEnd; if ((hel = hashLookup(componentHash, ms->src)) == NULL) hashAdd(componentHash, ms->src, ms); else slAddHead(&(hel->val), ms); } sqlFreeResult(&sr); + } /* reverse summary lists */ cookie = hashFirst(componentHash); while ((hel = hashNext(&cookie)) != NULL) slReverse(&hel->val); +if (!track->isBigBed) hFreeConn(&conn); /* display pairwise items */ for (mi = miList; mi != NULL; mi = mi->next) { if (mi->ix < 0) /* ignore item for the score */ continue; summaryList = (struct mafSummary *)hashFindVal(componentHash, mi->db); if (summaryList == NULL) summaryList = (struct mafSummary *)hashFindVal(componentHash, mi->name); if (summaryList != NULL) { if (vis == tvFull) @@ -1391,31 +1444,31 @@ continue; //if (mcPair->srcSize != 0) // TODO: replace with a cloneMafComp() AllocVar(mcPair); mcPair->src = cloneString(mcThis->src); mcPair->srcSize = mcThis->srcSize; mcPair->strand = mcThis->strand; mcPair->start = mcThis->start; mcPair->size = mcThis->size; mcPair->text = cloneString(mcThis->text); mcPair->leftStatus = mcThis->leftStatus; mcPair->leftLen = mcThis->leftLen; mcPair->rightStatus = mcThis->rightStatus; mcPair->rightLen = mcThis->rightLen; - mcThis = mafFindCompSpecies(maf, database, '.'); + mcThis = mafFindCompSpecies(maf, hubConnectSkipHubPrefix(database), '.'); AllocVar(mcMaster); mcMaster->src = cloneString(mcThis->src); mcMaster->srcSize = mcThis->srcSize; mcMaster->strand = mcThis->strand; mcMaster->start = mcThis->start; mcMaster->size = mcThis->size; mcMaster->text = cloneString(mcThis->text); mcMaster->next = mcPair; AllocVar(pairMaf); pairMaf->components = mcMaster; pairMaf->textSize = maf->textSize; slAddHead(&mafList, pairMaf); } slReverse(&mafList); @@ -1820,67 +1873,52 @@ struct hash *srcHash = newHash(0); char dbChrom[64]; int alignLineLength = winBaseCount * 2; /* doubled to allow space for insert counts */ boolean complementBases = cartUsualBooleanDb(cart, database, COMPLEMENT_BASES_VAR, FALSE); bool dots = FALSE; /* configuration option */ /* this line must be longer than the longest base-level display */ char noAlignment[2000]; boolean useIrowChains = TRUE; int offset; char *framesTable = NULL; char *defaultCodonSpecies = cartUsualString(cart, SPECIES_CODON_DEFAULT, NULL); char *codonTransMode = NULL; boolean startSub2 = FALSE; -int mafOrig = 0; int mafOrigOffset = 0; -char query[256]; struct mafPriv *mp = getMafPriv(track); char *mafFile = NULL; struct sqlConnection *conn2 = NULL; struct sqlConnection *conn3 = NULL; char *tableName = NULL; if (mp->ct != NULL) { conn2 = hAllocConn(CUSTOM_TRASH); conn3 = hAllocConn(CUSTOM_TRASH); tableName = mp->ct->dbTableName; mafFile = getCustomMafFile(track); } -else +else if (!track->isBigBed) { conn2 = hAllocConn(database); conn3 = hAllocConn(database); tableName = track->table; mafFile = getTrackMafFile(track); // optional } -if (hIsGsidServer()) - { - /* decide the value of mafOrigOffset to be used to display xxAaMaf tracks. */ - struct sqlConnection *conn = hAllocConn(database); - sqlSafef(query, sizeof(query), "select chromStart from %s", track->table); - mafOrig = atoi(sqlNeedQuickString(conn, query)); - mafOrigOffset = (mafOrig % 3) - 1; - /* offset has to be non-negative */ - if (mafOrigOffset < 0) mafOrigOffset = mafOrigOffset +3; - - hFreeConn(&conn); - } - if (defaultCodonSpecies == NULL) defaultCodonSpecies = trackDbSetting(track->tdb, "speciesCodonDefault"); if (defaultCodonSpecies == NULL) defaultCodonSpecies = database; if (seqStart > 2) { startSub2 = TRUE; seqStart -=2; } seqEnd +=2; if (seqEnd > seqBaseCount) seqEnd = seqBaseCount; @@ -1946,31 +1984,31 @@ memcpy(selfLine, seq->dna, winBaseCount + 4); //toUpperN(selfLine, winBaseCount); freeDnaSeq(&seq); /* Make hash of species items keyed by database. */ i = 0; for (mi = miList; mi != NULL; mi = mi->next) { mi->ix = i++; if (mi->db != NULL) hashAdd(miHash, mi->db, mi); } /* Go through the mafs saving relevant info in lines. */ mafList = mp->list; -safef(dbChrom, sizeof(dbChrom), "%s.%s", database, chromName); +safef(dbChrom, sizeof(dbChrom), "%s.%s", hubConnectSkipHubPrefix(database), chromName); for (maf = mafList; maf != NULL; maf = maf->next) { int mafStart; /* get info about sequences from full alignment, for use later, when determining if sequence is unaligned or missing */ for (mc = maf->components; mc != NULL; mc = mc->next) if (!hashFindVal(srcHash, mc->src)) hashAdd(srcHash, mc->src, maf); mcMaster = mafFindComponent(maf, dbChrom); mafStart = mcMaster->start; /* get portion of maf in this window */ if (startSub2) sub = mafSubset(maf, dbChrom, winStart - 2, winEnd + 2);