6dfdf2e6277b1815d40117b0f1ff1508e6f92058 angie Fri Mar 15 13:41:09 2024 -0700 Support for M. tuberculosis: at 4M bases, the genome is too large for arrays on the stack. Allocate & free instead. diff --git src/hg/hgPhyloPlace/vcfFromFasta.c src/hg/hgPhyloPlace/vcfFromFasta.c index 5e1b00b..067a5a2 100644 --- src/hg/hgPhyloPlace/vcfFromFasta.c +++ src/hg/hgPhyloPlace/vcfFromFasta.c @@ -519,40 +519,41 @@ for (i = 0; i < sampleCount; i++) fprintf(f, "\t%s", sampleNames[i]); fputc('\n', f); int chromStart; for (chromStart = 0; chromStart < ref->size; chromStart++) if (!maskSites[chromStart]) writeVcfSnv(snvsByPos, sampleCount, ref->name, chromStart, f); carefulClose(&f); } static void pslSnvsToVcfFile(struct psl *psls, struct dnaSeq *ref, struct seqInfo *querySeqs, char *vcfFileName, struct slName **maskSites) /* Find single-nucleotide differences between each query sequence and reference, and * write out a VCF file with genotype columns for the queries. */ { -struct snvInfo *snvsByPos[ref->size]; -memset(snvsByPos, 0, sizeof snvsByPos); +struct snvInfo **snvsByPos = NULL; +AllocArray(snvsByPos, ref->size); extractSnvs(psls, ref, querySeqs, snvsByPos, maskSites); int sampleCount = slCount(querySeqs); char *sampleNames[sampleCount]; struct seqInfo *qSeq; int i; for (i = 0, qSeq = querySeqs; i < sampleCount; i++, qSeq = qSeq->next) sampleNames[i] = qSeq->seq->name; writeSnvsToVcfFile(snvsByPos, ref, sampleNames, sampleCount, maskSites, vcfFileName); +freeMem(snvsByPos); } static void analyzeGaps(struct seqInfo *filteredSeqs, struct dnaSeq *refGenome) /* Tally up actual insertions and deletions in each psl; ignore skipped N bases. */ { struct seqInfo *si; for (si = filteredSeqs; si != NULL; si = si->next) { struct psl *psl = si->psl; if (psl && (psl->qBaseInsert || psl->tBaseInsert)) { struct dyString *dyIns = dyStringNew(0); struct dyString *dyDel = dyStringNew(0); int insBases = 0, delBases = 0; int ix;