cd8540f3042b909432e4257588c13d9249e583f2 angie Wed Apr 1 16:49:33 2015 -0700 hgTables fixes to support bigBed and VCF tracks that have no mysql table, only a bigDataUrl setting that may point to a local file. refs #15059 diff --git src/hg/hgTables/vcf.c src/hg/hgTables/vcf.c index e675028..a9286b6 100644 --- src/hg/hgTables/vcf.c +++ src/hg/hgTables/vcf.c @@ -164,45 +164,37 @@ row[5] = rec->qual; dyJoin(dyFilter, ";", rec->filters, rec->filterCount); row[6] = dyFilter->string; vcfInfoElsToString(dyInfo, vcff, rec); row[7] = dyInfo->string; if (vcff->genotypeCount > 0) { row[8] = rec->format; dyJoin(dyGt, "\t", rec->genotypeUnparsedStrings, vcff->genotypeCount); row[9] = dyGt->string; } else row[8] = row[9] = ""; // compatible with localmem usage } -static char *vcfFileName(struct trackDb *tdb, struct sqlConnection *conn, char *table, char *chrom) +static char *vcfFileName(struct sqlConnection *conn, char *table, char *chrom) // Look up the vcf or vcfTabix file name, using CUSTOM_TRASH if necessary. { -boolean isCt = isCustomTrack(table); -char *dbTable = table; -struct sqlConnection *dbConn = conn; -if (isCt) - { - dbConn = hAllocConn(CUSTOM_TRASH); - struct customTrack *ct = ctLookupName(table); - dbTable = ct->dbTableName; - } -char *fileName = bbiNameFromSettingOrTableChrom(tdb, dbConn, dbTable, chrom); -if (isCt) - hFreeConn(&dbConn); +char *fileName = bigFileNameFromCtOrHub(table, conn); +struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); +if (fileName == NULL) + fileName = bbiNameFromSettingOrTableChrom(tdb, conn, table, chrom); return fileName; } void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f, boolean isTabix) /* Print out selected fields from VCF. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; /* if we know what field to use for the identifiers, get the hash of names */ if (idField != NULL) @@ -239,42 +231,41 @@ } // If we are outputting a subset of fields, invalidate the VCF header. boolean allFields = (fieldCount == VCFDATALINE_NUM_COLS); if (!allFields) fprintf(f, "# Only selected columns are included below; output is not valid VCF.\n"); struct asObject *as = vcfAsObj(); struct asFilter *filter = NULL; if (anyFilter()) filter = asFilterFromCart(cart, db, table, as); /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); -struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); // Include the header, absolutely necessary for VCF parsing. boolean printedHeader = FALSE; // Temporary storage for row-ification: struct dyString *dyAlt = newDyString(1024); struct dyString *dyFilter = newDyString(1024); struct dyString *dyInfo = newDyString(1024); struct dyString *dyGt = newDyString(1024); struct vcfRecord *rec; for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { - char *fileName = vcfFileName(tdb, conn, table, region->chrom); + char *fileName = vcfFileName(conn, table, region->chrom); struct vcfFile *vcff; if (isTabix) vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut); else vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut, TRUE); if (vcff == NULL) noWarnAbort(); // If we are outputting all fields, but this VCF has no genotype info, omit the // genotype columns from output: if (allFields && vcff->genotypeCount == 0) fieldCount = VCFDATALINE_NUM_COLS - 2; if (!printedHeader) { @@ -374,58 +365,56 @@ vcfFileFree(&vcff); } struct bed *vcfGetFilteredBedsOnRegions(struct sqlConnection *conn, char *db, char *table, struct region *regionList, struct lm *lm, int *retFieldCount, boolean isTabix) /* Get list of beds from VCF, in all regions, that pass filtering. */ { int maxOut = bigFileMaxOutput(); /* Figure out vcf file name get column info and filter. */ struct asObject *as = vcfAsObj(); struct asFilter *filter = asFilterFromCart(cart, db, table, as); struct hash *idHash = identifierHash(db, table); /* Get beds a region at a time. */ -struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); struct bed *bedList = NULL; struct region *region; for (region = regionList; region != NULL; region = region->next) { - char *fileName = vcfFileName(tdb, conn, table, region->chrom); + char *fileName = vcfFileName(conn, table, region->chrom); if (fileName == NULL) continue; addFilteredBedsOnRegion(fileName, region, table, filter, lm, &bedList, idHash, &maxOut, isTabix); freeMem(fileName); if (maxOut <= 0) { warn("Reached output limit of %d data values, please make region smaller,\n" "\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); break; } } slReverse(&bedList); return bedList; } struct slName *randomVcfIds(char *table, struct sqlConnection *conn, int count, boolean isTabix) /* Return some semi-random IDs from a VCF file. */ { /* Read 10000 items from vcf file, or if they ask for a big list, then 4x what they ask for. */ -struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); -char *fileName = vcfFileName(tdb, conn, table, hDefaultChrom(database)); +char *fileName = vcfFileName(conn, table, hDefaultChrom(database)); struct lineFile *lf = isTabix ? lineFileTabixMayOpen(fileName, TRUE) : lineFileMayOpen(fileName, TRUE); if (lf == NULL) noWarnAbort(); int orderedCount = count * 4; if (orderedCount < 100) orderedCount = 100; struct slName *idList = NULL; char *words[4]; int i; for (i = 0; i < orderedCount && lineFileChop(lf, words); i++) { // compress runs of identical ID, in case most are placeholder if (i == 0 || !sameString(words[2], idList->name)) slAddHead(&idList, slNameNew(words[2])); @@ -440,31 +429,31 @@ { slNameFreeList(&(sl->next)); break; } } freez(&fileName); return idList; } #define VCF_MAX_SCHEMA_COLS 20 void showSchemaVcf(char *table, struct trackDb *tdb, boolean isTabix) /* Show schema on vcf. */ { struct sqlConnection *conn = hAllocConn(database); -char *fileName = vcfFileName(tdb, conn, table, hDefaultChrom(database)); +char *fileName = vcfFileName(conn, table, hDefaultChrom(database)); struct asObject *as = vcfAsObj(); hPrintf("<B>Database:</B> %s", database); hPrintf(" <B>Primary Table:</B> %s<br>", table); hPrintf("<B>VCF File:</B> %s", fileName); hPrintf("<BR>\n"); hPrintf("<B>Format description:</B> %s<BR>", as->comment); hPrintf("See the <A HREF=\"%s\" target=_blank>Variant Call Format specification</A> for more details<BR>\n", "http://www.1000genomes.org/wiki/analysis/vcf4.0"); /* Put up table that describes fields. */ hTableStart(); hPrintf("<TR><TH>field</TH>"); hPrintf("<TH>description</TH> "); puts("</TR>\n");