459b8273fa9eaafb16d8980567a7fcfa209fd4ef max Wed Nov 23 09:30:30 2016 -0800 Adding VAI/DAI support for bigDataIndex, refs #18420 diff --git src/hg/lib/hAnno.c src/hg/lib/hAnno.c index ca724be..63c6b76 100644 --- src/hg/lib/hAnno.c +++ src/hg/lib/hAnno.c @@ -126,30 +126,39 @@ if (asObj == NULL) asObj = asFromTableDescriptions(conn, table); hFreeConn(&conn); if (columnsMatch(asObj, fieldList)) return asObj; else { // Special case for pgSnp, which includes its bin column in autoSql... struct asObject *pgSnpAsO = pgSnpAsObj(); if (columnsMatch(pgSnpAsO, fieldList)) return pgSnpAsO; return asObjectFromFields(table, fieldList, skipBin); } } +static char *getBigDataIndexName(struct trackDb *tdb) +/* Get tbi/bai URL for a BAM/VCF from trackDb or custom track. */ +{ +char *bigIndexUrl = trackDbSetting(tdb, "bigDataIndex"); +if (isNotEmpty(bigIndexUrl)) + return bigIndexUrl; +return NULL; +} + static char *getBigDataFileName(char *db, struct trackDb *tdb, char *selTable, char *chrom) /* Get fileName from bigBed/bigWig/BAM/VCF database table, or bigDataUrl from custom track. */ { char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl"); if (isNotEmpty(bigDataUrl)) { return bigDataUrl; } else { struct sqlConnection *conn = hAllocConn(db); char *fileOrUrl = bbiNameFromSettingOrTableChrom(tdb, conn, selTable, chrom); hFreeConn(&conn); return fileOrUrl; } @@ -157,55 +166,56 @@ static boolean dbTableMatchesAutoSql(char *db, char *table, struct asObject *asObj) /* Return true if table exists and its fields match the columns of asObj. */ { boolean matches = FALSE; struct sqlConnection *conn = hAllocConn(db); if (sqlTableExists(conn, table)) { struct sqlFieldInfo *fieldList = sqlFieldInfoGet(conn, table); matches = columnsMatch(asObj, fieldList); } hFreeConn(&conn); return matches; } -struct annoStreamer *hAnnoStreamerFromBigFileUrl(char *fileOrUrl, struct annoAssembly *assembly, +struct annoStreamer *hAnnoStreamerFromBigFileUrl(char *fileOrUrl, char *indexUrl, struct annoAssembly *assembly, int maxOutRows, char *type) /* Determine what kind of big data file/url we have and make streamer for it. - * If type is NULL, this will determine type using custom track type or file suffix. */ + * If type is NULL, this will determine type using custom track type or file suffix. + * indexUrl can be NULL, unless the type is VCF and the .tbi file is not alongside the .VCF */ { struct annoStreamer *streamer = NULL; if (isEmpty(type)) type = customTrackTypeFromBigFile(fileOrUrl); if (type == NULL) { if (endsWith(fileOrUrl, "pgSnp") || endsWith(fileOrUrl, "pgsnp") || endsWith(fileOrUrl, "pgSnp.gz") || endsWith(fileOrUrl, "pgsnp.gz") || endsWith(fileOrUrl, "bed") || endsWith(fileOrUrl, "bed.gz")) { type = "pgSnp"; } else errAbort("Unrecognized bigData type of file or url '%s'", fileOrUrl); } if (sameString(type, "bigBed") || sameString("bigGenePred", type)) streamer = annoStreamBigBedNew(fileOrUrl, assembly, maxOutRows); else if (sameString(type, "vcfTabix")) - streamer = annoStreamVcfNew(fileOrUrl, TRUE, assembly, maxOutRows); + streamer = annoStreamVcfNew(fileOrUrl, indexUrl, TRUE, assembly, maxOutRows); else if (sameString(type, "vcf")) - streamer = annoStreamVcfNew(fileOrUrl, FALSE, assembly, maxOutRows); + streamer = annoStreamVcfNew(fileOrUrl, NULL, FALSE, assembly, maxOutRows); else if (sameString(type, "bigWig")) streamer = annoStreamBigWigNew(fileOrUrl, assembly); else if (sameString(type, "pgSnp")) streamer = annoStreamTabNew(fileOrUrl, assembly, pgSnpFileAsObj()); else if (sameString(type, "bam")) errAbort("Sorry, BAM is not yet supported"); else errAbort("Unrecognized bigData type %s of file or url '%s'", type, fileOrUrl); return streamer; } struct annoStreamer *hAnnoStreamerFromTrackDb(struct annoAssembly *assembly, char *selTable, struct trackDb *tdb, char *chrom, int maxOutRows, struct jsonElement *config) /* Figure out the source and type of data and make an annoStreamer. */ @@ -219,36 +229,37 @@ dbTable = trackDbSetting(tdb, "dbTableName"); if (dbTable != NULL) // This is really a database table, not a bigDataUrl CT. dataDb = CUSTOM_TRASH; } if (startsWithWord("wig", tdb->type)) streamer = annoStreamWigDbNew(dataDb, dbTable, assembly, maxOutRows); else if (sameString("longTabix", tdb->type)) { char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom); streamer = annoStreamTabixNew(fileOrUrl, assembly, maxOutRows); } else if (sameString("vcfTabix", tdb->type)) { char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom); - streamer = annoStreamVcfNew(fileOrUrl, TRUE, assembly, maxOutRows); + char *indexUrl = getBigDataIndexName(tdb); + streamer = annoStreamVcfNew(fileOrUrl, indexUrl, TRUE, assembly, maxOutRows); } else if (sameString("vcf", tdb->type)) { char *fileOrUrl = getBigDataFileName(dataDb, tdb, dbTable, chrom); - streamer = annoStreamVcfNew(fileOrUrl, FALSE, assembly, maxOutRows); + streamer = annoStreamVcfNew(fileOrUrl, NULL, FALSE, assembly, maxOutRows); } else if (sameString("bam", tdb->type)) { warn("Sorry, BAM is not yet supported"); } else if (startsWith("bigBed", tdb->type) || sameString("bigGenePred", tdb->type)) { char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom); streamer = annoStreamBigBedNew(fileOrUrl, assembly, maxOutRows); } else if (startsWith("bigWig", tdb->type)) { char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom); streamer = annoStreamBigWigNew(fileOrUrl, assembly); //#*** no maxOutRows support } @@ -257,76 +268,77 @@ { char *sourceTable = trackDbSetting(tdb, "sourceTable"); char *inputsTable = trackDbSetting(tdb, "inputTrackTable"); streamer = annoStreamDbFactorSourceNew(dataDb, tdb->track, sourceTable, inputsTable, assembly, maxOutRows); } else if (trackHubDatabase(db)) errAbort("Unrecognized type '%s' for hub track '%s'", tdb->type, tdb->track); if (streamer == NULL) { streamer = annoStreamDbNew(dataDb, dbTable, assembly, maxOutRows, config); } return streamer; } -struct annoGrator *hAnnoGratorFromBigFileUrl(char *fileOrUrl, struct annoAssembly *assembly, +struct annoGrator *hAnnoGratorFromBigFileUrl(char *fileOrUrl, char *indexUrl, struct annoAssembly *assembly, int maxOutRows, enum annoGratorOverlap overlapRule) /* Determine what kind of big data file/url we have and make streamer & grator for it. */ { struct annoStreamer *streamer = NULL; struct annoGrator *grator = NULL; char *type = customTrackTypeFromBigFile(fileOrUrl); if (sameString(type, "bigBed") || sameString("bigGenePred", type)) streamer = annoStreamBigBedNew(fileOrUrl, assembly, maxOutRows); else if (sameString(type, "vcfTabix")) - streamer = annoStreamVcfNew(fileOrUrl, TRUE, assembly, maxOutRows); + streamer = annoStreamVcfNew(fileOrUrl, indexUrl, TRUE, assembly, maxOutRows); else if (sameString(type, "bigWig")) grator = annoGrateBigWigNew(fileOrUrl, assembly, agwmAverage); else if (sameString(type, "bam")) errAbort("Sorry, BAM is not yet supported"); else errAbort("Unrecognized bigData type %s of file or url '%s'", type, fileOrUrl); if (grator == NULL) grator = annoGratorNew(streamer); grator->setOverlapRule(grator, overlapRule); return grator; } struct annoGrator *hAnnoGratorFromTrackDb(struct annoAssembly *assembly, char *selTable, struct trackDb *tdb, char *chrom, int maxOutRows, struct asObject *primaryAsObj, enum annoGratorOverlap overlapRule, struct jsonElement *config) /* Figure out the source and type of data, make an annoStreamer & wrap in annoGrator. * If not NULL, primaryAsObj is used to determine whether we can make an annoGratorGpVar. */ { struct annoGrator *grator = NULL; boolean primaryIsVariants = (primaryAsObj != NULL && (asObjectsMatch(primaryAsObj, pgSnpAsObj()) || asObjectsMatch(primaryAsObj, pgSnpFileAsObj()) || asObjectsMatch(primaryAsObj, vcfAsObj()))); char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl"); +char *indexUrl = getBigDataIndexName(tdb); if (bigDataUrl != NULL) { if (primaryIsVariants && sameString("bigGenePred", tdb->type)) { struct annoStreamer *streamer = annoStreamBigBedNew(bigDataUrl, assembly, maxOutRows); grator = annoGratorGpVarNew(streamer); } else - grator = hAnnoGratorFromBigFileUrl(bigDataUrl, assembly, maxOutRows, overlapRule); + grator = hAnnoGratorFromBigFileUrl(bigDataUrl, indexUrl, assembly, maxOutRows, overlapRule); } else if (startsWithWord("wig", tdb->type)) { char *dataDb = assembly->name; char *dbTable = selTable; if (isCustomTrack(selTable)) { dbTable = trackDbSetting(tdb, "dbTableName"); if (dbTable != NULL) // This is really a database table, not a bigDataUrl CT. dataDb = CUSTOM_TRASH; } grator = annoGrateWigDbNew(dataDb, dbTable, assembly, agwmAverage, maxOutRows); } else if (startsWithWord("bigWig", tdb->type))