f53f70b9f131634b0eb32706a98f525fefa55cce
tdreszer
  Tue Feb 5 17:36:46 2013 -0800
Some changes requested by Angie in code review, along with a bug fix.  Next checkin will break out the vcfBits routines into separate .c/.h files
diff --git src/inc/vcf.h src/inc/vcf.h
index 2d7a826..28c14a6 100644
--- src/inc/vcf.h
+++ src/inc/vcf.h
@@ -194,48 +194,45 @@
  * vcff->records; if maxErr >= zero, then continue to parse until
  * there are maxErr+1 errors.  A maxErr less than zero does not stop
  * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence. */
 
 struct vcfFile *vcfTabixFileMayOpen(char *fileOrUrl, char *chrom, int start, int end,
 				    int maxErr, int maxRecords);
 /* Open a VCF file that has been compressed and indexed by tabix and
  * parse VCF header, or return NULL if unable.  If chrom is non-NULL,
  * seek to the position range and parse all lines in range into
  * vcff->records.  If maxErr >= zero, then continue to parse until
  * there are maxErr+1 errors.  A maxErr less than zero does not stop
  * and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence. */
 
 int vcfTabixBatchRead(struct vcfFile *vcff, char *chrom, int start, int end,
                       int maxErr, int maxRecords);
-// Reads a batch of records from an opened and indexed VCF file, returning number
-// of records in batch.  Seeks to the start position and parses all lines in range,
-// adding them to vcff->records.  Note: vcff->records will continue to be sorted,
-// even if batches are loaded out of order.  If maxErr >= zero, then continue to
-// parse until there are maxErr+1 errors.  A maxErr less than zero does not stop
-// and reports all errors. Set maxErr to VCF_IGNORE_ERRS for silence.
+// Reads a batch of records from an opened and indexed VCF file, adding them to
+// vcff->records and returning the count of new records added in this batch.
+// Note: vcff->records will continue to be sorted, even if batches are loaded
+// out of order.  Additionally, resulting vcff->records will contain no duplicates
+// so returned count refects only the new records added, as opposed to all records
+// in range.  If maxErr >= zero, then continue to parse until there are maxErr+1
+// errors.  A maxErr less than zero does not stop and reports all errors.  Set
+// maxErr to VCF_IGNORE_ERRS for silence.
 
 void vcfFileMakeReusePool(struct vcfFile *vcff, int initialSize);
 // Creates a separate memory pool for records.  Establishing this pool allows
 // using vcfFileFlushRecords to abandon previously read records and free
 // the associated memory. Very useful when reading an entire file in batches.
 #define vcfFileLm(vcff) ((vcff)->reusePool ? (vcff)->reusePool : (vcff)->pool->lm)
 
-void vcfFileAbandonReusePool(struct vcfFile *vcff);
-// Abandons all previously allocated data from the reuse pool and reverts to
-// common pool. The vcf->records set will also be abandoned as pointers are invalid.
-// USE WITH CAUTION.  All previously allocated pointers from this pool are now invalid.
-
 void vcfFileFlushRecords(struct vcfFile *vcff);
 // Abandons all previously read vcff->records and flushes the reuse pool (if it exists).
 // USE WITH CAUTION.  All previously allocated record pointers are now invalid.
 
 struct vcfRecord *vcfNextRecord(struct vcfFile *vcff);
 /* Parse the words in the next line from vcff into a vcfRecord. Return NULL at end of file.
  * Note: this does not store record in vcff->records! */
 
 struct vcfRecord *vcfRecordFromRow(struct vcfFile *vcff, char **words);
 /* Parse words from a VCF data line into a VCF record structure. */
 
 unsigned int vcfRecordTrimIndelLeftBase(struct vcfRecord *rec);
 /* For indels, VCF includes the left neighboring base; for example, if the alleles are
  * AA/- following a G base, then the VCF record will start one base to the left and have
  * "GAA" and "G" as the alleles.  That is not nice for display for two reasons:
@@ -359,29 +356,29 @@
 
 int vcfHaploBitsListCollapseIdentical(struct vcfFile *vcff, struct haploBits **phBitsList,
                                       int haploGenomeMin);
 // Collapses a list of haploBits based upon identical bit arrays.
 // If haploGenomeMin > 1, will drop all hBits structs covering less than N haploGenomes.
 // Returns count of hBit structs removed.
 
 INLINE struct variantBits *vcfHaploBitIxToVariantBits(struct haploBits *hBits, int bitIx,
                                                       struct variantBits *vBitsList)
 // Returns appropriate vBits from vBits list associated with a given bit in an hBits struct.
 // Assumes vBitsList is in same order as hBits bit array.  Note vBits->record has full vcf details.
 {
 return slElementFromIx(vBitsList,vcfRecordIxFromBitIx(hBits,bitIx));
 }
 
-unsigned char vcfHaploBitsToVariantIx(struct haploBits *hBits,int bitIx);
+unsigned char vcfHaploBitsToVariantAlleleIx(struct haploBits *hBits,int bitIx);
 // Given a hBits struct and bitIx, what is the actual variant allele ix
 // to use when accessing the vcfRecord?
 
 enum elmNodeOverlap vcfHaploBitsCmp(const struct slList *elA, const struct slList *elB,
                                     int *matchWeight, void *extra);
 // HaploBits compare routine for building tree of relations using elmTreeGrow().
 
 struct slList *vcfHaploBitsMatching(const struct slList *elA, const struct slList *elB,
                                     void *extra);
 // Returns a HaploBits structure representing the common parts of elements A and B.
 // Used with elmTreeGrow() to create nodes that are the common parts between leaves/branches.
 
 #endif // vcf_h