ebbd241c1fcafa6204bcab73dc66770a3202901d braney Wed Oct 21 15:55:42 2015 -0700 get frames to work with bigMaf #15935 diff --git src/hg/hgTracks/wigMafTrack.c src/hg/hgTracks/wigMafTrack.c index 4142e52..6a1f19e 100644 --- src/hg/hgTracks/wigMafTrack.c +++ src/hg/hgTracks/wigMafTrack.c @@ -89,31 +89,31 @@ /* Load mafs from region */ { return mafLoadInRegion2(conn, conn2, table, chrom, start, end, file); } static struct wigMafItem *newMafItem(char *s, int g, boolean lowerFirstChar) /* Allocate and initialize a maf item. Species param can be a db or name */ { struct wigMafItem *mi; char *val; AllocVar(mi); if ((val = hGenome(s)) != NULL) { /* it's a database name */ - mi->db = cloneString(s); + mi->db = cloneString(hubConnectSkipHubPrefix(s)); mi->name = val; } else { mi->db = cloneString(s); mi->name = cloneString(s); } mi->name = hgDirForOrg(mi->name); if (lowerFirstChar) *mi->name = tolower(*mi->name); mi->height = tl.fontHeight; mi->group = g; return mi; } @@ -1567,49 +1567,80 @@ #define ISGAP(x) (((x) == '=') || (((x) == '-'))) #define ISN(x) ((x) == 'N') #define ISSPACE(x) ((x) == ' ') #define ISGAPSPACEORN(x) (ISSPACE(x) || ISGAP(x) || ISN(x)) static AA lookupAndCheckCodon(char *codon) { AA retValue; if ((retValue = lookupCodon(codon)) == 0) return '*'; return retValue; } -static void translateCodons(struct sqlConnection *conn, - struct sqlConnection *conn2, char *tableName, char *compName, +struct sqlClosure +{ +char *mafFile; +struct sqlConnection *conn2; +struct sqlConnection *conn3; +char *tableName; +}; + +struct bbClosure +{ +struct bbiFile *bbi; +}; + +typedef struct mafAli * (*mafRetrieveFunc)(void *closure, char *chrom, int start, int end); + +static struct mafAli *mafLoadFromBb(void *closure, char *chrom, int start, int end) +/* Retrieve maf blocks from a bigBed file. */ +{ +struct bbClosure *bbClosure = closure; + +return bigMafLoadInRegion( bbClosure->bbi, chrom, start, end); +} + +static struct mafAli *mafLoadFromSql(void *closure, char *chrom, int start, int end) +/* Retrieve maf blocks from an SQL table. */ +{ +struct sqlClosure *sqlClosure = closure; + +return mafLoadInRegion2(sqlClosure->conn2, sqlClosure->conn3, sqlClosure->tableName, chrom, start, end, sqlClosure->mafFile ); +} + +static void translateCodons(mafRetrieveFunc func, void *closure, + char *compName, DNA *dna, int start, int length, int frame, char strand, int prevEnd, int nextStart, bool alreadyComplemented, - int x, int y, int width, int height, struct hvGfx *hvg, char *mafFile) + int x, int y, int width, int height, struct hvGfx *hvg) { int size = length; DNA *ptr; int color; int end = start + length; int x1; char masterChrom[128]; struct mafAli *ali, *sub = NULL; struct mafComp *comp = NULL; int mult = 1; char codon[4]; int fillBox = FALSE; -safef(masterChrom, sizeof(masterChrom), "%s.%s", database, chromName); +safef(masterChrom, sizeof(masterChrom), "%s.%s", hubConnectSkipHubPrefix(database), chromName); dna += start; reverseForStrand(dna, length, strand, alreadyComplemented); ptr = dna; color = shadesOfSea[0]; mult = 0; if (frame && (prevEnd == -1)) { switch(frame) { case 1: if (0)//!( ISGAPSPACEORN(ptr[0]) ||ISGAPSPACEORN(ptr[1]))) @@ -1637,64 +1668,62 @@ else { reverseForStrand(ptr, 1, strand, alreadyComplemented); ptr++; } length -=1; break; } } else if (frame && (prevEnd != -1)) { memset(codon, 0, sizeof(codon)); switch(frame) { case 1: - ali = mafLoadInRegion2(conn, conn2, tableName, chromName, prevEnd , prevEnd + 1, mafFile ); + ali = func(closure, chromName, prevEnd , prevEnd + 1); if (ali != NULL) { sub = mafSubset(ali, masterChrom, prevEnd , prevEnd + 1 ); comp = mafMayFindCompSpecies(sub, compName, '.'); } if (comp && comp->text && (!(ISGAPSPACEORN(comp->text[0]) ||ISGAPSPACEORN(ptr[0]) ||ISGAPSPACEORN(ptr[1])))) { if (strand == '-') complement(comp->text, 1); codon[0] = comp->text[0]; codon[1] = ptr[0]; codon[2] = ptr[1]; fillBox = TRUE; mult = 2; *ptr++ = ' '; *ptr++ = lookupAndCheckCodon(codon); } else ptr+=2; length -= 2; break; case 2: if (strand == '-') { - ali = mafLoadInRegion2(conn, conn2, tableName, chromName, - prevEnd, prevEnd + 2, mafFile); + ali = func(closure, chromName, prevEnd , prevEnd + 2); if (ali != NULL) sub = mafSubset(ali, masterChrom, prevEnd, prevEnd + 2 ); } else { - ali = mafLoadInRegion2(conn, conn2, tableName, chromName, - prevEnd - 1, prevEnd + 1, mafFile); + ali = func(closure, chromName, prevEnd - 1 , prevEnd + 1); if (ali != NULL) sub = mafSubset(ali, masterChrom, prevEnd - 1, prevEnd + 1); } if (sub != NULL) comp = mafMayFindCompSpecies(sub, compName, '.'); if (comp && comp->text && (!(ISGAPSPACEORN(comp->text[0])||ISGAPSPACEORN(comp->text[1]) ||ISGAPSPACEORN(*ptr)))) { if (strand == '-') reverseComplement(comp->text, 2); codon[0] = comp->text[0]; codon[1] = comp->text[1]; codon[2] = *ptr; fillBox = TRUE; mult = 1; *ptr++ = lookupAndCheckCodon(codon); @@ -1742,40 +1771,38 @@ hvGfxBox(hvg, x1, y, 3*width/winBaseCount + 1 , height, color); } } if (length && (nextStart != -1)) { char codon[4]; int mult = 1; boolean fillBox = FALSE; memset(codon, 0, sizeof(codon)); sub = NULL; if (strand == '-') { - ali = mafLoadInRegion2(conn, conn2, tableName, chromName, - nextStart - 2 + length, nextStart + 1, mafFile ); + ali = func(closure, chromName, nextStart - 2 + length, nextStart + 1); if (ali != NULL) sub = mafSubset(ali, masterChrom, nextStart - 2 + length, nextStart + 1); } else { - ali = mafLoadInRegion2(conn, conn2, tableName, chromName, - nextStart , nextStart + 2, mafFile ); + ali = func(closure, chromName, nextStart , nextStart + 2); if (ali != NULL) sub = mafSubset(ali, masterChrom, nextStart , nextStart + 2); } if (sub != NULL) comp = mafMayFindCompSpecies(sub, compName, '.'); if (sub && comp && comp->text) { switch(length) { case 2: if (strand == '-') complement(comp->text, 1); codon[0] = *ptr; codon[1] = *(1 + ptr); codon[2] = *comp->text; @@ -1825,30 +1852,74 @@ { if (!alreadyComplemented) complement(ptr, length); } else { if (alreadyComplemented) complement(ptr, length); } } if (strand == '-') reverseBytes(dna, size); } +static struct mafFrames *getFramesFromSql( char *framesTable, + char *chromName, int seqStart, int seqEnd, char *extra, boolean newTableType) +{ +struct mafFrames *mfList = NULL; +int rowOffset; +struct sqlResult *sr; +struct mafFrames *mf; +char **row; + +struct sqlConnection *conn = hAllocConn(database); +sr = hRangeQuery(conn, framesTable, chromName, seqStart, seqEnd, extra, &rowOffset); +while ((row = sqlNextRow(sr)) != NULL) + { + if (newTableType) + mf = mafFramesLoad(row + rowOffset); + else + mf = mafFramesLoadOld(row + rowOffset); + slAddHead(&mfList, mf); + } +sqlFreeResult(&sr); +hFreeConn(&conn); +return mfList; +} + +static struct mafFrames *getFramesFromBb( char *framesTable, char *chromName, int seqStart, int seqEnd) +{ +struct lm *lm = lmInit(0); +struct bbiFile *bbi = bigBedFileOpen(framesTable); +struct bigBedInterval *bb, *bbList = bigBedIntervalQuery(bbi, chromName, seqStart, seqEnd, 0, lm); +char *bedRow[11]; +char startBuf[16], endBuf[16]; +struct mafFrames *mfList = NULL, *mf; + +for (bb = bbList; bb != NULL; bb = bb->next) + { + bigBedIntervalToRow(bb, chromName, startBuf, endBuf, bedRow, ArraySize(bedRow)); + mf = mafFramesLoad( bedRow ); + slAddHead(&mfList, mf); + } + +slReverse(&mfList); +return mfList; +} + static int wigMafDrawBases(struct track *track, int seqStart, int seqEnd, struct hvGfx *hvg, int xOff, int yOff, int width, MgFont *font, Color color, enum trackVisibility vis, struct wigMafItem *miList) /* Draw base-by-base view, return new Y offset. */ { struct wigMafItem *mi; struct mafAli *mafList, *maf, *sub; struct mafComp *mc, *mcMaster; int lineCount = slCount(miList); char **lines = NULL, *selfLine; int *insertCounts; int i, x = xOff, y = yOff; struct dnaSeq *seq = NULL; struct hash *miHash = newHash(9); @@ -1918,31 +1989,36 @@ if (cartVarExistsAnyLevel(cart, track->tdb,FALSE,"frames")) framesTable = cartOptionalStringClosestToHome(cart, track->tdb,FALSE,"frames"); else framesTable = trackDbSetting(track->tdb, "frames"); if (framesTable) { codonTransMode = cartUsualStringClosestToHome(cart, track->tdb,FALSE,"codons", "codonDefault"); if (sameString("codonNone", codonTransMode)) framesTable = NULL; } boolean newTableType = FALSE; if (framesTable != NULL) + { + if (track->isBigBed) + newTableType = TRUE; + else newTableType = hHasField(database, framesTable, "isExonStart"); + } /* initialize "no alignment" string to o's */ for (i = 0; i < sizeof noAlignment - 1; i++) noAlignment[i] = UNALIGNED_SEQ; if (cartVarExistsAnyLevel(cart, track->tdb,FALSE,MAF_CHAIN_VAR)) useIrowChains = cartUsualBooleanClosestToHome(cart, track->tdb, FALSE, MAF_CHAIN_VAR,TRUE); else { char *irowString = trackDbSetting(track->tdb, "irows"); if (irowString && sameString(irowString, "off")) useIrowChains = FALSE; } @@ -2259,106 +2335,119 @@ /* TODO: leave lower case in to indicate masking ? * NOTE: want to make sure that all sequences are soft-masked * if we do this */ /* HAVE DONE: David doesn't like lower case by default * TODO: casing based on quality values ?? */ alignSeqToUpperN(line); if (complementBases) { complement(line, strlen(line)); } /* draw sequence letters for alignment */ hvGfxSetClip(hvg, x, y-1, width, mi->height); if (framesTable != NULL) { - int rowOffset; - char **row; - struct sqlConnection *conn = hAllocConn(database); - struct sqlResult *sr; + struct mafFrames *mfList = NULL, *mf; char extra[512]; boolean found = FALSE; if (sameString("codonDefault", codonTransMode)) { safef(extra, sizeof(extra), "src='%s'",defaultCodonSpecies); found = TRUE; } else if (sameString("codonFrameDef", codonTransMode)) { safef(extra, sizeof(extra), "src='%s'",mi->db); found = FALSE; } else if (sameString("codonFrameNone", codonTransMode)) { safef(extra, sizeof(extra), "src='%s'",mi->db); found = TRUE; } else errAbort("unknown codon translation mode %s",codonTransMode); tryagain: - sr = hRangeQuery(conn, framesTable, chromName, seqStart, seqEnd, extra, &rowOffset); - while ((row = sqlNextRow(sr)) != NULL) - { - struct mafFrames mf; - int start, end, w; - int frame; + if (track->isBigBed) + mfList = getFramesFromBb( framesTable, chromName, seqStart, seqEnd); + else + mfList = getFramesFromSql( framesTable, chromName, seqStart, seqEnd, extra, newTableType); + if (mfList != NULL) found = TRUE; - if (newTableType) - mafFramesStaticLoad(row + rowOffset, &mf); - else - mafFramesStaticLoadOld(row + rowOffset, &mf); - if (mf.chromStart < seqStart) + for(mf = mfList; mf; mf = mf->next) + { + int start, end, w; + int frame; + if (mf->chromStart < seqStart) start = 0; else - start = mf.chromStart-seqStart; - frame = mf.frame; - if (mf.strand[0] == '-') + start = mf->chromStart-seqStart; + frame = mf->frame; + if (mf->strand[0] == '-') { - if (mf.chromEnd > seqEnd) - frame = (frame + mf.chromEnd-seqEnd ) % 3; + if (mf->chromEnd > seqEnd) + frame = (frame + mf->chromEnd-seqEnd ) % 3; } else { - if (mf.chromStart < seqStart) - frame = (frame + seqStart-mf.chromStart ) % 3; + if (mf->chromStart < seqStart) + frame = (frame + seqStart-mf->chromStart ) % 3; } - end = mf.chromEnd > seqEnd ? seqEnd - seqStart : mf.chromEnd - seqStart; + end = mf->chromEnd > seqEnd ? seqEnd - seqStart : mf->chromEnd - seqStart; w= end - start; - translateCodons(conn2, conn3, tableName, mi->db, line, start, - w, frame, mf.strand[0],mf.prevFramePos,mf.nextFramePos, - complementBases, x, y, width, mi->height, hvg, mafFile); + void *closure; + mafRetrieveFunc func; + struct sqlClosure sqlClosure; + struct bbClosure bbClosure; + + if (track->isBigBed) + { + func = mafLoadFromBb; + closure = &bbClosure; + bbClosure.bbi = fetchBbiForTrack(track); + } + else + { + func = mafLoadFromSql; + closure = &sqlClosure; + sqlClosure.conn2 = conn2; + sqlClosure.conn3 = conn3; + sqlClosure.mafFile = mafFile; + sqlClosure.tableName = tableName; + } + translateCodons(func, closure, mi->db, line, start, + w, frame, mf->strand[0],mf->prevFramePos,mf->nextFramePos, + complementBases, x, y, width, mi->height, hvg); } - sqlFreeResult(&sr); if (!found) { /* try the default species */ safef(extra, sizeof(extra), "src='%s'",defaultCodonSpecies); found = TRUE; /* don't try again */ goto tryagain; } - - hFreeConn(&conn); } if (startSub2) { if (strstr(track->tdb->type, "wigMafProt")) { spreadAlignStringProt(hvg, x, y, width, mi->height-1, color, font, &line[2], &selfLine[2], winBaseCount, dots, FALSE, seqStart, mafOrigOffset); } else { /* make sure we have bases to display before printing them */ if (strlen(line) > 2) spreadAlignString(hvg, x, y, width, mi->height-1, color, font, &line[2], &selfLine[2], winBaseCount, dots, FALSE);