5a73e86ce8424ee217b2b8de96d3a6b1ff6fe470 braney Thu Jan 18 12:02:46 2024 -0800 limit VCF track loads to 10,000 items diff --git src/lib/vcf.c src/lib/vcf.c index 85db6c6..39d2358 100644 --- src/lib/vcf.c +++ src/lib/vcf.c @@ -922,66 +922,77 @@ static boolean chromsMatch(char *chromA, char *chromB) // Return TRUE if chromA and chromB are non-NULL and identical, possibly ignoring // "chr" at the beginning of one but not the other. { // Allow SARS-CoV-2 VCF to use GenBank or RefSeq ID instead of our chromified RefSeq ID: static char *sarsCoV2Ids[] = {"NC_045512v2", "MN908947.3", "NC_045512.2"}; if (chromA == NULL || chromB == NULL) return FALSE; else if (stringIx(chromA, sarsCoV2Ids) >= 0 && stringIx(chromB, sarsCoV2Ids) >= 0) return TRUE; char *chromAPlus = startsWith("chr", chromA) ? chromA+3 : chromA; char *chromBPlus = startsWith("chr", chromB) ? chromB+3 : chromB; return sameString(chromAPlus, chromBPlus); } -static struct vcfRecord *vcfParseData(struct vcfFile *vcff, char *chrom, int start, int end, - int maxRecords) +static struct vcfRecord *vcfParseDataExt(struct vcfFile *vcff, char *chrom, int start, int end, + int maxRecords, char *abortMessage) // Given a vcfFile into which the header has been parsed, and whose // lineFile is positioned at the beginning of a data row, parse and // return all data rows (in region, if chrom is non-NULL) from lineFile, // up to maxRecords. { if (vcff == NULL) return NULL; int recCount = 0; struct vcfRecord *records = NULL; struct vcfRecord *record; while ((record = vcfNextRecord(vcff)) != NULL) { if (maxRecords >= 0 && recCount >= maxRecords) + { + if (abortMessage != NULL) + errAbort("%s",abortMessage); + else break; + } if (chrom == NULL) { slAddHead(&records, record); recCount++; } else if (chromsMatch(chrom, record->chrom)) { if (end > 0 && record->chromStart >= end) break; else if (record->chromEnd > start) { slAddHead(&records, record); recCount++; } } } +printf("records %d\n", recCount); slReverse(&records); return records; } +static struct vcfRecord *vcfParseData(struct vcfFile *vcff, char *chrom, int start, int end, int maxRecords) +{ +return vcfParseDataExt(vcff, chrom, start, end, maxRecords, NULL); +} + struct vcfFile *vcfFileMayOpen(char *fileOrUrl, char *chrom, int start, int end, int maxErr, int maxRecords, boolean parseAll) /* Open fileOrUrl and parse VCF header; return NULL if unable. * If chrom is non-NULL, scan past any variants that precede {chrom, chromStart}. * Note: this is very inefficient -- it's better to use vcfTabix if possible! * If parseAll, then read in all lines in region, parse and store in * vcff->records; if maxErr >= zero, then continue to parse until * there are maxErr+1 errors. A maxErr less than zero does not stop * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence */ { struct lineFile *lf = NULL; if (startsWith("http://", fileOrUrl) || startsWith("ftp://", fileOrUrl) || startsWith("https://", fileOrUrl)) lf = netLineFileOpen(fileOrUrl); else @@ -1018,55 +1029,62 @@ return vcff; } struct vcfFile *vcfTabixFileMayOpen(char *fileOrUrl, char *chrom, int start, int end, int maxErr, int maxRecords) /* Open a VCF file that has been compressed and indexed by tabix and * parse VCF header, or return NULL if unable. If chrom is non-NULL, * seek to the position range and parse all lines in range into * vcff->records. If maxErr >= zero, then continue to parse until * there are maxErr+1 errors. A maxErr less than zero does not stop * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence */ { return vcfTabixFileAndIndexMayOpen(fileOrUrl, NULL, chrom, start, end, maxErr, maxRecords); } -struct vcfFile *vcfTabixFileAndIndexMayOpen(char *fileOrUrl, char *tbiFileOrUrl, char *chrom, int start, int end, - int maxErr, int maxRecords) +struct vcfFile *vcfTabixFileAndIndexMayOpenExt(char *fileOrUrl, char *tbiFileOrUrl, char *chrom, int start, int end, + int maxErr, int maxRecords, char *abortMessage) + /* Open a VCF file that has been compressed and indexed by tabix and * parse VCF header, or return NULL if unable. tbiFileOrUrl can be NULL. * If chrom is non-NULL, seek to the position range and parse all lines in * range into vcff->records. If maxErr >= zero, then continue to parse until * there are maxErr+1 errors. A maxErr less than zero does not stop * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence */ { struct lineFile *lf = lineFileTabixAndIndexMayOpen(fileOrUrl, tbiFileOrUrl, TRUE); if (lf == NULL) return NULL; struct vcfFile *vcff = vcfFileHeaderFromLineFile(lf, maxErr); if (vcff == NULL) return NULL; if (isNotEmpty(chrom) && start != end) { if (lineFileSetTabixRegion(lf, chrom, start, end)) - vcff->records = vcfParseData(vcff, NULL, 0, 0, maxRecords); + vcff->records = vcfParseDataExt(vcff, NULL, 0, 0, maxRecords, abortMessage); lineFileClose(&(vcff->lf)); // file is all read in so we close it } return vcff; } +struct vcfFile *vcfTabixFileAndIndexMayOpen(char *fileOrUrl, char *tbiFileOrUrl, char *chrom, int start, int end, + int maxErr, int maxRecords) +{ +return vcfTabixFileAndIndexMayOpenExt(fileOrUrl, tbiFileOrUrl, chrom, start, end, maxErr, maxRecords, NULL); +} + int vcfRecordCmp(const void *va, const void *vb) /* Compare to sort based on position. */ { const struct vcfRecord *a = *((struct vcfRecord **)va); const struct vcfRecord *b = *((struct vcfRecord **)vb); int dif; dif = strcmp(a->chrom, b->chrom); if (dif == 0) dif = a->chromStart - b->chromStart; if (dif == 0) dif = a->chromEnd - b->chromEnd; // shortest first if (dif == 0) dif = strcmp(a->name, b->name); // finally by name return dif; }