06d7be056190c14b85e71bc12523f18ea6815b5e
markd
  Mon Dec 7 00:50:29 2020 -0800
BLAT mmap index support merge with master

diff --git src/hg/utils/vcfToBed/vcfToBed.c src/hg/utils/vcfToBed/vcfToBed.c
index dfae97e..b36c374 100644
--- src/hg/utils/vcfToBed/vcfToBed.c
+++ src/hg/utils/vcfToBed/vcfToBed.c
@@ -1,25 +1,26 @@
 /* vcfToBed - Convert VCF to BED9+ with optional extra fields.
  * Extra VCF tags get placed into a separate tab file for later indexing.. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "vcf.h"
 #include "dnautil.h"
 
-#define MAX_BED_EXTRA 50 // how many extra fields  can we put into the bigBed itself (for filtering, labels, etc)
+// how many extra fields  can we put into the bigBed itself (for filtering, labels, etc):
+#define MAX_BED_EXTRA 100
 #define bed9Header "#chrom\tchromStart\tchromEnd\tname\tscore\tstrand\tthickStart\tthickEnd\titemRgb\tref\talt\tFILTER"
 
 // hash up all the info tags and their vcfInfoDef structs for faster searching
 struct hash *infoHash = NULL;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
     "vcfToBed - Convert VCF to BED9+ with optional extra fields.\n"
     "usage:\n"
     "    vcfToBed in.vcf outPrefix\n"
     "options:\n"
     "    -fields=comma-sep list of tags to include in the bed file, other fields will be placed into\n"
     "           out.extraFields.tab\n"
@@ -46,34 +47,34 @@
     char *tag = tempKeepFields[i];
     struct vcfInfoDef *def = NULL;
     if ( (def = vcfInfoDefForKey(vcff, tag)) == NULL)
         verbose(2, "Warning: skipping desired tag '%s', does not exist in INFO fields.\n", tag);
     else
         {
         keepFields[keepIx++] = cloneString(tag);
         }
     }
 return keepIx;
 }
 
 char *fixupChromName(char *chrom)
 /* Prepend "chr" if missing */
 {
-if (!startsWith(chrom, "chr"))
-    return catTwoStrings("chr", chrom);
-else
+if (startsWith("chr", chrom))
     return chrom;
+else
+    return catTwoStrings("chr", chrom);
 }
 
 #define VCF_MAX_INFO (4*1024)
 
 void fixupLeftPadding(int *start, char *ref, char *alt)
 /* If an indel is left padded fix up the start position */
 {
 char *altTmp = cloneString(alt);
 char *alleles[VCF_MAX_INFO+1];
 alleles[0] = cloneString(ref);
 char *altAlleles[VCF_MAX_INFO];
 int altCount = chopCommas(altTmp, altAlleles);
 int alleleCount = 1 + altCount;
 if (alleleCount > 1)
     {
@@ -156,32 +157,32 @@
 /* Turn a VCF line into a bed9+ line */
 {
 struct dyString *name = dyStringNew(0);
 struct dyString *shortenedRef = dyStringNew(0);
 struct dyString *shortenedAlt = dyStringNew(0);
 char *line = NULL;
 char *chopped[9]; // 9 fields in case VCF has genotype fields, keep them separate from info fields
 int infoCount = slCount(vcff->infoDefs);
 char *infoTags[infoCount];
 while (lineFileNext(vcff->lf, &line, NULL))
     {
     int fieldCount = chopTabs(line, chopped);
     if (fieldCount < 8)
         errAbort("ERROR: malformed VCF, missing fields at line: '%d'", vcff->lf->lineIx);
     char *chrom = fixupChromName(chopped[0]);
-    int start = atoi(chopped[1]);
-    int end = start + 1;
+    int start = atoi(chopped[1]) - 1;
+    int end = start;
     char *ref = cloneString(chopped[3]);
     char *alt = cloneString(chopped[4]);
     if (strlen(ref) == dnaFilteredSize(ref))
         end = start + strlen(ref);
     fixupLeftPadding(&start, ref, alt);
     if (!sameString(chopped[2], "."))
         dyStringPrintf(name, "%s", chopped[2]);
     else
         {
         shortenRefAltString(shortenedRef, shortenedAlt, ref, alt);
         dyStringPrintf(name, "%s_%d:%s/%s", chrom,start,shortenedRef->string,shortenedAlt->string);
         }
     fprintf(outBed, "%s\t%d\t%d\t%s\t0\t.\t%d\t%d\t0,0,0", chrom,start,end,name->string,start,end);
     fprintf(outBed, "\t%s\t%s\t%s", ref, alt, chopped[6]);
     if (extraFieldCount)