f78d6e76501646c8b8cc966103bafca896322509
angie
  Mon Oct 7 13:54:42 2013 -0700
Work in progress for #11460 (paste/upload variant input options...):adding an option for user to paste/upload variant identifiers which
will be translated into a sorted list of vcfRecords.

Currently we recognize only rs# IDs.  I was considering adding
dbVar IDs, but those could come from multiple sources (DGV, ClinVar,
ISCA) so I'm not sure.

Treating all symbolic/named alleles as deletions... non-ideal,
but fortunately those are a small minority in dbSNP.

Next: recognize HGVS IDs.

The grander vision of #11460 includes accepting VEP input format
and VCF, but I think those should be new SELECT options so we don't
get into quagmire of guessing format.

diff --git src/inc/vcf.h src/inc/vcf.h
index ca3d942..20f616a 100644
--- src/inc/vcf.h
+++ src/inc/vcf.h
@@ -174,30 +174,33 @@
 	break;
     case vcfInfoCharacter:
 	fprintf(f, "%c", datum.datChar);
 	break;
     case vcfInfoString:
 	fprintf(f, "%s", datum.datString);
 	break;
     default:
 	errAbort("vcfPrintDatum: Unrecognized type %d", type);
 	break;
     }
 }
 
 #define VCF_IGNORE_ERRS (INT_MAX - 1)
 
+struct vcfFile *vcfFileNew();
+/* Return a new, empty vcfFile object. */
+
 struct vcfFile *vcfFileMayOpen(char *fileOrUrl, int maxErr, int maxRecords, boolean parseAll);
 /* Open fileOrUrl and parse VCF header; return NULL if unable.
  * If parseAll, then read in all lines, parse and store in
  * vcff->records; if maxErr >= zero, then continue to parse until
  * there are maxErr+1 errors.  A maxErr less than zero does not stop
  * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence. */
 
 struct vcfFile *vcfTabixFileMayOpen(char *fileOrUrl, char *chrom, int start, int end,
 				    int maxErr, int maxRecords);
 /* Open a VCF file that has been compressed and indexed by tabix and
  * parse VCF header, or return NULL if unable.  If chrom is non-NULL,
  * seek to the position range and parse all lines in range into
  * vcff->records.  If maxErr >= zero, then continue to parse until
  * there are maxErr+1 errors.  A maxErr less than zero does not stop
  * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence. */
@@ -227,30 +230,33 @@
 /* Parse the words in the next line from vcff into a vcfRecord. Return NULL at end of file.
  * Note: this does not store record in vcff->records! */
 
 struct vcfRecord *vcfRecordFromRow(struct vcfFile *vcff, char **words);
 /* Parse words from a VCF data line into a VCF record structure. */
 
 unsigned int vcfRecordTrimIndelLeftBase(struct vcfRecord *rec);
 /* For indels, VCF includes the left neighboring base; for example, if the alleles are
  * AA/- following a G base, then the VCF record will start one base to the left and have
  * "GAA" and "G" as the alleles.  That is not nice for display for two reasons:
  * 1. Indels appear one base wider than their dbSNP entries.
  * 2. In pgSnp display mode, the two alleles are always the same color.
  * However, for hgTracks' mapBox we need the correct chromStart for identifying the
  * record in hgc -- so return the original chromStart. */
 
+int vcfRecordCmp(const void *va, const void *vb);
+/* Compare to sort based on position. */
+
 void vcfFileFree(struct vcfFile **vcffPtr);
 /* Free a vcfFile object. */
 
 const struct vcfRecord *vcfFileFindVariant(struct vcfFile *vcff, char *variantId);
 /* Return all records with name=variantId, or NULL if not found. */
 
 const struct vcfInfoElement *vcfRecordFindInfo(const struct vcfRecord *record, char *key);
 /* Find an INFO element, or NULL. */
 
 struct vcfInfoDef *vcfInfoDefForKey(struct vcfFile *vcff, const char *key);
 /* Return infoDef for key, or NULL if it wasn't specified in the header or VCF spec. */
 
 void vcfParseGenotypes(struct vcfRecord *record);
 /* Translate record->genotypesUnparsedStrings[] into proper struct vcfGenotype[].
  * This destroys genotypesUnparsedStrings. */