41723d134f8b0c52c78705c2e5da97f8875e3cf6 angie Wed Feb 15 11:44:39 2017 -0800 Added HGVS terms as variant input option in hgVai. refs #11460 diff --git src/lib/vcf.c src/lib/vcf.c index 3eebe95..8598768 100644 --- src/lib/vcf.c +++ src/lib/vcf.c @@ -516,30 +516,40 @@ if (line[0] != '#') { lineFileReuse(lf); vcfFileWarn(vcff, "Expected to find # followed by column names (\"#CHROM POS ...\"), " "assuming default VCF 4.1 columns"); safef(headerLineBuf, sizeof(headerLineBuf), "%s", vcfDefaultHeader); line = headerLineBuf; } dyStringAppend(dyHeader, line); dyStringAppendC(dyHeader, '\n'); parseColumnHeaderRow(vcff, line); vcff->headerString = dyStringCannibalize(&dyHeader); return vcff; } +struct vcfFile *vcfFileFromHeader(char *name, char *headerString, int maxErr) +/* Parse the VCF header string into a vcfFile object with no rows. + * name is for error reporting. + * If maxErr is non-negative then continue to parse until maxErr+1 errors have been found. + * A maxErr less than zero does not stop and reports all errors. + * Set maxErr to VCF_IGNORE_ERRS for silence. */ +{ +struct lineFile *lf = lineFileOnString(name, TRUE, cloneString(headerString)); +return vcfFileHeaderFromLineFile(lf, maxErr); +} #define VCF_MAX_INFO (4*1024) static void parseRefAndAlt(struct vcfFile *vcff, struct vcfRecord *record, char *ref, char *alt) /* Make an array of alleles, ref first, from the REF and comma-sep'd ALT columns. * Use the length of the reference sequence to set record->chromEnd. * Note: this trashes the alt argument, since this is expected to be its last use. */ { char *altAlleles[VCF_MAX_INFO]; int altCount = chopCommas(alt, altAlleles); record->alleleCount = 1 + altCount; record->alleles = vcfFileAlloc(vcff, record->alleleCount * sizeof(record->alleles[0])); record->alleles[0] = vcfFilePooledStr(vcff, ref); int i; for (i = 0; i < altCount; i++) @@ -1463,15 +1473,95 @@ int trimmedBases = countIdenticalBasesRight(alleles, alCount); // Build a /-separated allele string, trimming bases on the right if necessary: dyStringClear(dy); for (i = 0; i < alCount; i++) { if (i > 0) dyStringAppendC(dy, '/'); char *allele = alleles[i]; if (allele[trimmedBases] == '\0') dyStringAppendC(dy, '-'); else dyStringAppendN(dy, allele, strlen(allele)-trimmedBases); } return dy->string; } + +static void vcfWriteWordArrayWithSep(FILE *f, int count, char **words, char sep) +/* Write words joined by sep to f (or, if count is zero, "."). */ +{ +if (count < 1) + fputc('.', f); +else + { + fputs(words[0], f); + int i; + for (i = 1; i < count; i++) + { + fputc(sep, f); + fputs(words[i], f); + } + } +} + +static void vcfWriteInfo(FILE *f, struct vcfRecord *rec) +/* Write rec->infoElements to f. */ +{ +if (rec->infoCount < 1) + fputc('.', f); +else + { + int i, j; + for (i = 0; i < rec->infoCount; i++) + { + struct vcfInfoElement *info = &(rec->infoElements[i]); + enum vcfInfoType type = typeForInfoKey(rec->file, info->key); + if (i > 0) + fputc(';', f); + fputs(info->key, f); + for (j = 0; j < info->count; j++) + { + union vcfDatum datum = info->values[j]; + switch (type) + { + case vcfInfoInteger: + fprintf(f, "=%d", datum.datInt); + break; + case vcfInfoFloat: + fprintf(f, "=%lf", datum.datFloat); + break; + case vcfInfoFlag: + // Flag key might have a value in older VCFs e.g. 3.2's DB=0, DB=1 + if (isNotEmpty(datum.datString)) + fprintf(f, "=%s", datum.datString); + break; + case vcfInfoCharacter: + fprintf(f, "%c", datum.datChar); + break; + case vcfInfoString: + fputc('=', f); + if (isNotEmpty(datum.datString)) + fputs(datum.datString, f); + break; + default: + vcfFileErr(rec->file, "invalid vcfInfoType (uninitialized?) %d", type); + break; + } + } + } + } +} + +void vcfRecordWriteNoGt(FILE *f, struct vcfRecord *rec) +/* Write the first 8 columns of VCF rec to f. Genotype data will be ignored if present. */ +{ +fprintf(f, "%s\t%d\t%s\t%s\t", rec->chrom, rec->chromStart+1, rec->name, rec->alleles[0]); +// Alternate alleles start at [1] +vcfWriteWordArrayWithSep(f, rec->alleleCount-1, &(rec->alleles[1]), ','); +fputc('\t', f); +fputs(rec->qual, f); +fputc('\t', f); +vcfWriteWordArrayWithSep(f, rec->filterCount, rec->filters, ';'); +fputc('\t', f); +vcfWriteInfo(f, rec); +fputc('\n', f); +}