6aaa041c6e3cea383a6ca3275144f6998613cec6 braney Thu Nov 18 17:34:47 2021 -0800 be a lot smarter about how we parse bigMaf files. We were opening the file on every bed item, which for bigMaf is about one per base refs #28534 diff --git src/hg/hgTables/maf.c src/hg/hgTables/maf.c index 9fd8181..5eab26c 100644 --- src/hg/hgTables/maf.c +++ src/hg/hgTables/maf.c @@ -1,142 +1,150 @@ /* maf - stuff to process maf tracks. */ /* Copyright (C) 2011 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "hash.h" #include "linefile.h" #include "dystring.h" #include "portable.h" #include "obscure.h" #include "jksql.h" #include "cheapcgi.h" #include "cart.h" #include "web.h" #include "trackDb.h" #include "maf.h" #include "hgMaf.h" #include "hgTables.h" #include "hubConnect.h" boolean isMafTable(char *database, struct trackDb *track, char *table) /* Return TRUE if table is maf. */ { if (track == NULL) return FALSE; if (isEmpty(track->type)) return FALSE; if (sameString(track->table, table)) { if (startsWithWord("maf",track->type) || startsWithWord("wigMaf",track->type) || startsWithWord("bigMaf",track->type)) return TRUE; } else { struct slRef *tdbRefList = trackDbListGetRefsToDescendantLeaves(track->subtracks); struct slRef *tdbRef; for (tdbRef = tdbRefList; tdbRef != NULL; tdbRef = tdbRef->next) { struct trackDb *childTdb = tdbRef->val; if(sameString(childTdb->table, table)) { if (startsWithWord("maf",childTdb->type) || startsWithWord("wigMaf",childTdb->type) || startsWithWord("bigMaf",childTdb->type)) return TRUE; break; } } slFreeList(&tdbRefList); } return FALSE; } void doOutMaf(struct trackDb *track, char *table, struct sqlConnection *conn) /* Output regions as MAF. maf tables look bed-like enough for * cookedBedsOnRegions to handle intersections. */ { struct region *region = NULL, *regionList = getRegions(); struct lm *lm = lmInit(64*1024); textOpen(); struct sqlConnection *ctConn = NULL; struct sqlConnection *ctConn2 = NULL; struct customTrack *ct = NULL; struct hash *settings = track->settingsHash; char *mafFile = hashFindVal(settings, "mafFile"); if (isCustomTrack(table)) { ctConn = hAllocConn(CUSTOM_TRASH); ctConn2 = hAllocConn(CUSTOM_TRASH); ct = ctLookupName(table); if (mafFile == NULL) { /* this shouldn't happen */ printf("cannot find custom track file %s\n", mafFile); return; } } mafWriteStart(stdout, NULL); + +// if this is a bigMaf file, open the source. +struct bbiFile *bigMafBbi = NULL; +if (isBigBed(database, table, curTrack, ctLookupName)) + { + struct trackDb *subTdb = hashFindVal(fullTableToTdbHash, table); + char *fileName = trackDbSetting(subTdb, "bigDataUrl"); + bigMafBbi = bigBedFileOpen(fileName); + } + for (region = regionList; region != NULL; region = region->next) { struct bed *bedList = cookedBedList(conn, table, region, lm, NULL); struct bed *bed = NULL; for (bed = bedList; bed != NULL; bed = bed->next) { struct mafAli *mafList = NULL, *maf = NULL; char dbChrom[64]; safef(dbChrom, sizeof(dbChrom), "%s.%s", hubConnectSkipHubPrefix(database), bed->chrom); /* For MAF, we clip to viewing window (region) instead of showing * entire items that happen to overlap the window/region, which is * what we get from cookedBedList. */ if (bed->chromStart < region->start) bed->chromStart = region->start; if (bed->chromEnd > region->end) bed->chromEnd = region->end; if (bed->chromStart >= bed->chromEnd) continue; if (ct == NULL) { - if (isBigBed(database, table, curTrack, ctLookupName)) - { - struct trackDb *subTdb = hashFindVal(fullTableToTdbHash, table); - char *fileName = trackDbSetting(subTdb, "bigDataUrl"); - struct bbiFile *bbi = bigBedFileOpen(fileName); - mafList = bigMafLoadInRegion(bbi, bed->chrom, bed->chromStart, bed->chromEnd); - } + if (bigMafBbi) + mafList = bigMafLoadInRegion(bigMafBbi, bed->chrom, bed->chromStart, bed->chromEnd); else if (mafFile != NULL) mafList = mafLoadInRegion2(conn, conn, table, bed->chrom, bed->chromStart, bed->chromEnd, mafFile); else mafList = mafLoadInRegion(conn, table, bed->chrom, bed->chromStart, bed->chromEnd); } else mafList = mafLoadInRegion2(ctConn, ctConn2, ct->dbTableName, bed->chrom, bed->chromStart, bed->chromEnd, mafFile); for (maf = mafList; maf != NULL; maf = maf->next) { struct mafAli *subset = mafSubset(maf, dbChrom, bed->chromStart, bed->chromEnd); if (subset != NULL) { subset->score = mafScoreMultiz(subset); mafWrite(stdout, subset); mafAliFree(&subset); } } mafAliFreeList(&mafList); } } mafWriteEnd(stdout); lmCleanup(&lm); +if (bigMafBbi) + bigBedFileClose(&bigMafBbi); + if (isCustomTrack(table)) { hFreeConn(&ctConn); hFreeConn(&ctConn2); } }