a29e98e451251954d3c79928eda4ea4111f11151 angie Tue Jan 30 14:34:52 2018 -0800 hgTables vcfFileName wasn't handling plain VCF custom tracks correctly. diff --git src/hg/hgTables/vcf.c src/hg/hgTables/vcf.c index 1c68406..99ea471 100644 --- src/hg/hgTables/vcf.c +++ src/hg/hgTables/vcf.c @@ -164,35 +164,45 @@ row[5] = rec->qual; dyJoin(dyFilter, ";", rec->filters, rec->filterCount); row[6] = dyFilter->string; vcfInfoElsToString(dyInfo, vcff, rec); row[7] = dyInfo->string; if (vcff->genotypeCount > 0) { row[8] = rec->format; dyJoin(dyGt, "\t", rec->genotypeUnparsedStrings, vcff->genotypeCount); row[9] = dyGt->string; } else row[8] = row[9] = ""; // compatible with localmem usage } -static char *vcfFileName(struct sqlConnection *conn, char *table, char *chrom) +static char *vcfFileName(struct sqlConnection *conn, char *table, char *chrom, boolean isTabix) // Look up the vcf or vcfTabix file name, using CUSTOM_TRASH if necessary. { char *fileName = bigFileNameFromCtOrHub(table, conn); struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); +if (isCustomTrack(table) && ! isTabix) + { + // fileName is stored in the customTrash table + struct customTrack *ct = ctLookupName(table); + struct sqlConnection *conn = hAllocConn(CUSTOM_TRASH); + char query[1024]; + sqlSafef(query, sizeof(query), "select fileName from %s", ct->dbTableName); + fileName = sqlQuickString(conn, query); + hFreeConn(&conn); + } if (fileName == NULL) fileName = bbiNameFromSettingOrTableChrom(tdb, conn, table, chrom); return fileName; } void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f, boolean isTabix) /* Print out selected fields from VCF. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; @@ -241,31 +251,31 @@ filter = asFilterFromCart(cart, db, table, as); /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); // Include the header, absolutely necessary for VCF parsing. boolean printedHeader = FALSE; // Temporary storage for row-ification: struct dyString *dyAlt = newDyString(1024); struct dyString *dyFilter = newDyString(1024); struct dyString *dyInfo = newDyString(1024); struct dyString *dyGt = newDyString(1024); struct vcfRecord *rec; for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { - char *fileName = vcfFileName(conn, table, region->chrom); + char *fileName = vcfFileName(conn, table, region->chrom, isTabix); struct vcfFile *vcff; if (isTabix) { char *indexUrl = bigDataIndexFromCtOrHub(table, conn); vcff = vcfTabixFileAndIndexMayOpen(fileName, indexUrl, region->chrom, region->start, region->end, 100, maxOut); } else vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut, TRUE); if (vcff == NULL) noWarnAbort(); // If we are outputting all fields, but this VCF has no genotype info, omit the // genotype columns from output: if (allFields && vcff->genotypeCount == 0) @@ -372,52 +382,52 @@ char *db, char *table, struct region *regionList, struct lm *lm, int *retFieldCount, boolean isTabix) /* Get list of beds from VCF, in all regions, that pass filtering. */ { int maxOut = bigFileMaxOutput(); /* Figure out vcf file name get column info and filter. */ struct asObject *as = vcfAsObj(); struct asFilter *filter = asFilterFromCart(cart, db, table, as); struct hash *idHash = identifierHash(db, table); /* Get beds a region at a time. */ struct bed *bedList = NULL; struct region *region; for (region = regionList; region != NULL; region = region->next) { - char *fileName = vcfFileName(conn, table, region->chrom); + char *fileName = vcfFileName(conn, table, region->chrom, isTabix); if (fileName == NULL) continue; char *indexUrl = bigDataIndexFromCtOrHub(table, conn); addFilteredBedsOnRegion(fileName, indexUrl, region, table, filter, lm, &bedList, idHash, &maxOut, isTabix); freeMem(fileName); if (maxOut <= 0) { errAbort("Reached output limit of %d data values, please make region smaller,\n" "\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); } } slReverse(&bedList); return bedList; } struct slName *randomVcfIds(char *table, struct sqlConnection *conn, int count, boolean isTabix) /* Return some semi-random IDs from a VCF file. */ { /* Read 10000 items from vcf file, or if they ask for a big list, then 4x what they ask for. */ -char *fileName = vcfFileName(conn, table, hDefaultChrom(database)); +char *fileName = vcfFileName(conn, table, hDefaultChrom(database), isTabix); char *indexUrl = bigDataIndexFromCtOrHub(table, conn); struct lineFile *lf = isTabix ? lineFileTabixAndIndexMayOpen(fileName, indexUrl, TRUE) : lineFileMayOpen(fileName, TRUE); if (lf == NULL) noWarnAbort(); int orderedCount = count * 4; if (orderedCount < 100) orderedCount = 100; struct slName *idList = NULL; char *words[4]; int i; for (i = 0; i < orderedCount && lineFileChop(lf, words); i++) { // compress runs of identical ID, in case most are placeholder @@ -434,31 +444,31 @@ { slNameFreeList(&(sl->next)); break; } } freez(&fileName); return idList; } #define VCF_MAX_SCHEMA_COLS 20 void showSchemaVcf(char *table, struct trackDb *tdb, boolean isTabix) /* Show schema on vcf. */ { struct sqlConnection *conn = hAllocConn(database); -char *fileName = vcfFileName(conn, table, hDefaultChrom(database)); +char *fileName = vcfFileName(conn, table, hDefaultChrom(database), isTabix); struct asObject *as = vcfAsObj(); hPrintf("<B>Database:</B> %s", database); hPrintf(" <B>Primary Table:</B> %s<br>", table); hPrintf("<B>VCF File:</B> %s", fileName); hPrintf("<BR>\n"); hPrintf("<B>Format description:</B> %s<BR>", as->comment); hPrintf("See the <A HREF=\"%s\" target=_blank>Variant Call Format specification</A> for more details<BR>\n", "http://www.1000genomes.org/wiki/analysis/vcf4.0"); /* Put up table that describes fields. */ hTableStart(); hPrintf("<TR><TH>field</TH>"); hPrintf("<TH>description</TH> "); puts("</TR>\n");