11e45667d4e291b3038ccda729a1cdf5bcaf004a
braney
  Mon Jul 11 15:46:54 2016 -0700
incorporate htslib in kent src, remove USE_BAM, USE_SAMTABIX, USE_TABIX
defines, modify a bunch of makefiles to include kentSrc variable
pointing to top of the tree.

diff --git src/lib/bamFile.c src/lib/bamFile.c
index 6290ee9..3248c60 100644
--- src/lib/bamFile.c
+++ src/lib/bamFile.c
@@ -1,170 +1,102 @@
 /* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "portable.h"
 #include "bamFile.h"
-#ifdef USE_BAM
 #include "htmshell.h"
 #include "udc.h"
 
-#ifdef KNETFILE_HOOKS
 // If KNETFILE_HOOKS is used (as recommended!), then we can simply call bam_index_load
 // without worrying about the samtools lib creating local cache files in cgi-bin:
 
-#ifdef USE_HTS
 static bam_index_t *bamOpenIdx(samfile_t *sam, char *fileOrUrl)
 /* If fileOrUrl has a valid accompanying .bai file, parse and return the index;
  * otherwise return NULL. */
 {
 if (sam->format.format == cram) 
     return sam_index_load(sam, fileOrUrl);
 
 // assume that index is a .bai file 
 char indexName[4096];
 safef(indexName, sizeof indexName, "%s.bai", fileOrUrl);
 return sam_index_load2(sam, fileOrUrl, indexName);
 }
-#else
-static bam_index_t *bamOpenIdx(char *fileOrUrl)
-/* If fileOrUrl has a valid accompanying .bai file, parse and return the index;
- * otherwise return NULL. */
-{
-bam_index_t *idx = bam_index_load(fileOrUrl);
-return idx;
-}
-#endif
 
-#else// no KNETFILE_HOOKS
-// Oh well.  The unmodified samtools lib downloads .bai files into the current
-// working directory, which is cgi-bin -- not good.  So we need to temporarily
-// change to a trash directory, let samtools download there, then pop back to
-// cgi-bin.
-
-static char *getSamDir()
-/* Return the name of a trash dir for samtools to run in (it creates files in current dir)
- * and make sure the directory exists. */
-{
-static char *samDir = NULL;
-char *dirName = "samtools";
-if (samDir == NULL)
-    {
-    mkdirTrashDirectory(dirName);
-    size_t len = strlen(trashDir()) + 1 + strlen(dirName) + 1;
-    samDir = needMem(len);
-    safef(samDir, len, "%s/%s", trashDir(), dirName);
-    }
-return samDir;
-}
-
-static bam_index_t *bamOpenIdx(char *fileOrUrl)
-/* If fileOrUrl has a valid accompanying .bai file, parse and return the index;
- * otherwise return NULL. */
-{
-// When file is an URL, this caches the index file in addition to validating:
-// Since samtools's url-handling code saves the .bai file to the current directory,
-// chdir to a trash directory before calling bam_index_load, then chdir back.
-char *runDir = getCurrentDir();
-char *samDir = getSamDir();
-boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
-if (usingUrl)
-    setCurrentDir(samDir);
-bam_index_t *idx = bam_index_load(fileOrUrl);
-if (usingUrl)
-    setCurrentDir(runDir);
-return idx;
-}
-
-#endif//ndef KNETFILE_HOOKS
 
 static void bamCloseIdx(bam_index_t **pIdx)
 /* Free unless already NULL. */
 {
 if (pIdx != NULL && *pIdx != NULL)
     {
     free(*pIdx); // Not freeMem, freez etc -- sam just uses malloc/calloc.
     *pIdx = NULL;
     }
 }
 
 boolean bamFileExists(char *fileOrUrl)
 /* Return TRUE if we can successfully open the bam file and its index file.
  * NOTE: this doesn't give enough diagnostics */
 {
 char *bamFileName = fileOrUrl;
 samfile_t *fh = samopen(bamFileName, "rb", NULL);
-#ifdef USE_HTS
 if (fh != NULL)
     {
     bam_index_t *idx = bamOpenIdx(fh, bamFileName);
-#else
-// Check both fh and fh->header; non-NULL fh can have NULL header if header doesn't parse!
-if (fh != NULL && fh->header != NULL)
-    {
-    bam_index_t *idx = bamOpenIdx(bamFileName);
-#endif
     samclose(fh);
     if (idx == NULL)
 	{
 	warn("bamFileExists: failed to read index corresponding to %s", bamFileName);
 	return FALSE;
 	}
     bamCloseIdx(&idx);
     bamClose(&fh);
     return TRUE;
     }
 return FALSE;
 }
 
 samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName)
 /* Return an open bam file or errAbort (should be named bamMustOpen).
  * Return parameter if NON-null will return the file name (vestigial; long ago
  * there was a plan to use udcFuse filenames instead of URLs) */
 {
 char *bamFileName = fileOrUrl;
 if (retBamFileName != NULL)
     *retBamFileName = bamFileName;
 
 #ifdef BAM_VERSION
 // suppress too verbose messages in samtools >= 0.1.18; see redmine #6491
 // This variable didn't exist in older versions of samtools (where BAM_VERSION wasn't defined).
 bam_verbose = 1;
 #endif
 
 samfile_t *fh = samopen(bamFileName, "rb", NULL);
 // Check both fh and fh->header; non-NULL fh can have NULL header if header doesn't parse!
-#ifdef USE_HTS
 if (fh == NULL)
-#else
-if (fh == NULL || fh->header == NULL)
-#endif
     {
     boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://"));
     struct dyString *urlWarning = dyStringNew(0);
     if (usingUrl && fh == NULL)
 	{
 	dyStringAppend(urlWarning,
 		       ". If you are able to access the URL with your web browser, "
 		       "please try reloading this page.");
 	}
-#ifndef USE_HTS
-    else if (fh != NULL && fh->header == NULL)
-	dyStringAppend(urlWarning, ": parser error while reading the file header.");
-#endif
     errAbort("Failed to open %s%s", fileOrUrl, urlWarning->string);
     }
 return fh;
 }
 
 samfile_t *bamMustOpenLocal(char *fileName, char *mode, void *extraHeader)
 /* Open up sam or bam file or die trying.  The mode parameter is 
  *    "r" - open SAM to read
  *    "rb" - open BAM to read
  *    "w" - open SAM to write
  *    "wb" - open BAM to write
  * The extraHeader is generally NULL in the read case, and the write case
  * contains a pointer to a bam_header_t with information about the header.
  * The implementation is just a wrapper around samopen from the samtools library
  * that aborts with error message if there's a problem with the open. */
@@ -178,53 +110,44 @@
 void bamClose(samfile_t **pSamFile)
 /* Close down a samfile_t */
 {
 if (pSamFile != NULL)
     {
     samclose(*pSamFile);
     *pSamFile = NULL;
     }
 }
 
 void bamFileAndIndexMustExist(char *fileOrUrl)
 /* Open both a bam file and its accompanying index or errAbort; this is what it
  * takes for diagnostic info to propagate up through errCatches in calling code. */
 {
 samfile_t *bamF = bamOpen(fileOrUrl, NULL);
-#ifdef USE_HTS
 bam_index_t *idx = bamOpenIdx(bamF, fileOrUrl);
-#else
-bam_index_t *idx = bamOpenIdx(fileOrUrl);
-#endif
 if (idx == NULL)
     errAbort("failed to read index file (.bai) corresponding to %s", fileOrUrl);
 bamCloseIdx(&idx);
 bamClose(&bamF);
 }
 
-#ifdef USE_HTS
 void bamFetchAlreadyOpen(samfile_t *samfile, bam_hdr_t *header,  bam_index_t *idx, char *bamFileName, 
-#else
-void bamFetchAlreadyOpen(samfile_t *samfile, bam_index_t *idx, char *bamFileName, 
-#endif
 			 char *position, bam_fetch_f callbackFunc, void *callbackData)
 /* With the open bam file, return items the same way with the callbacks as with bamFetch() */
 /* except in this case use an already-open bam file and index (use bam_index_load and free() for */
 /* the index). It seems a little strange to pass the filename in with the open bam, but */
 /* it's just used to report errors. */
 {
-#ifdef USE_HTS
 bam1_t *b;
 AllocVar(b);
 hts_itr_t *iter = sam_itr_querys(idx, header, position);
 if (iter == NULL && startsWith("chr", position))
     iter = sam_itr_querys(idx, header, position + strlen("chr"));
 
 if (iter == NULL)
     return;
 int result;
 while ((result = sam_itr_next(samfile, iter, b)) >= 0) 
     callbackFunc(b, callbackData, header);
 
 // if we're reading a CRAM file and the MD5 string has been set
 // we know there was an error finding the reference and we need
 // to request that it be loaded.
@@ -250,79 +173,57 @@
             {
             char errorBuf[4096];
             mustGetLine(downFile, errorBuf, sizeof errorBuf);
             errAbort("cannot find reference %s.  Error: %s\n", md5String,errorBuf);
             }
         else
             {
             if ((downFile = fopen(pendingFile, "w")) == NULL)
                 errAbort("cannot find reference %s.  Cannot create file %s.",md5String, pendingFile);  
             fprintf(downFile,  "%s\n", server);
             fclose(downFile);
             errAbort("Cannot find reference %s.  Downloading from %s. Please refresh screen to check download status.",md5String, server);
             }
         }
     }
-#else
-int chromId, start, end;
-int ret = bam_parse_region(samfile->header, position, &chromId, &start, &end);
-if (ret != 0 && startsWith("chr", position))
-    ret = bam_parse_region(samfile->header, position+strlen("chr"), &chromId, &start, &end);
-if (ret != 0)
-    // If the bam file does not cover the current chromosome, OK
-    return;
-ret = bam_fetch(samfile->x.bam, idx, chromId, start, end, callbackData, callbackFunc);
-if (ret != 0)
-    warn("bam_fetch(%s, %s (chromId=%d) failed (%d)", bamFileName, position, chromId, ret);
-#endif
 }
 
 void bamFetchPlus(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
 		 samfile_t **pSamFile, char *refUrl, char *cacheDir)
 /* Open the .bam file, fetch items in the seq:start-end position range,
  * and call callbackFunc on each bam item retrieved from the file plus callbackData.
  * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. 
  * The pSamFile parameter is optional.  If non-NULL it will be filled in, just for
  * the benefit of the callback function, with the open samFile.  */
 {
 char *bamFileName = NULL;
 samfile_t *fh = bamOpen(fileOrUrl, &bamFileName);
-#ifdef USE_HTS
 if (fh->format.format == cram) 
     {
     if (cacheDir == NULL)
         errAbort("CRAM cache dir hg.conf variable (cramRef) must exist for CRAM support");
     cram_set_cache_url(fh, cacheDir, refUrl);  
     }
 bam_hdr_t *header = sam_hdr_read(fh);
 if (pSamFile != NULL)
     *pSamFile = fh;
 bam_index_t *idx = bamOpenIdx(fh, bamFileName);
-#else
-if (pSamFile != NULL)
-    *pSamFile = fh;
-bam_index_t *idx = bamOpenIdx(bamFileName);
-#endif
 if (idx == NULL)
     warn("bam_index_load(%s) failed.", bamFileName);
 else
     {
-#ifdef USE_HTS
     bamFetchAlreadyOpen(fh, header, idx, bamFileName, position, callbackFunc, callbackData);
-#else
-    bamFetchAlreadyOpen(fh, idx, bamFileName, position, callbackFunc, callbackData);
-#endif
     bamCloseIdx(&idx);
     }
 bamClose(&fh);
 }
 
 void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
 		 samfile_t **pSamFile)
 {
 bamFetchPlus(fileOrUrl, position, callbackFunc, callbackData, pSamFile,
     NULL, NULL);
 }
 
 boolean bamIsRc(const bam1_t *bam)
 /* Return TRUE if alignment is on - strand. */
 {
@@ -651,250 +552,31 @@
     else if (type == 'c') { dyStringPrintf(dy, "%d", *s); ++s; }
     else if (type == 'S') { dyStringPrintf(dy, "%u", *(uint16_t*)s); s += 2; }
     else if (type == 's') { dyStringPrintf(dy, "%d", *(int16_t*)s); s += 2; }
     else if (type == 'I') { dyStringPrintf(dy, "%u", *(uint32_t*)s); s += 4; }
     else if (type == 'i') { dyStringPrintf(dy, "%d", *(int32_t*)s); s += 4; }
     else if (type == 'f') { dyStringPrintf(dy, "%g", *(float*)s); s += 4; }
     else if (type == 'd') { dyStringPrintf(dy, "%lg", *(double*)s); s += 8; }
     else if (type == 'Z' || type == 'H')
 	{
 	dyStringAppend(dy, (char *)s);
 	s += strlen((char *)s) + 1;
 	}
     }
 }
 
-#ifndef USE_HTS
-struct bamChromInfo *bamChromList(samfile_t *fh)
-{
-/* Return list of chromosomes from bam header. We make no attempty to normalize chromosome names to UCSC format,
-   so list may contain things like "1" for "chr1", "I" for "chrI", "MT" for "chrM" etc. */
-int i;
-struct bamChromInfo *list = NULL;
-bam_header_t *bamHeader = fh->header;
-if(bamHeader == NULL)
-    return NULL;
-for(i = 0; i < bamHeader->n_targets; i++)
-    {
-    struct bamChromInfo *info = NULL;
-    AllocVar(info);
-    info->name = cloneString(bamHeader->target_name[i]);
-    info->size = bamHeader->target_len[i];
-    slAddHead(&list, info);
-    }
-slReverse(&list);
-return list;
-}
-
-void samToOpenBed(char *samIn, FILE *f)
-/* Like samToOpenBed, but the output is the already open file f. */
-{
-samfile_t *sf = samopen(samIn, "r", NULL);
-bam_header_t *bamHeader = sf->header;
-bam1_t one;
-ZeroVar(&one);
-int err;
-while ((err = samread(sf, &one)) >= 0)
-    {
-    int32_t tid = one.core.tid;
-    if (tid < 0)
-        continue;
-    char *chrom = bamHeader->target_name[tid];
-    // Approximate here... can do better if parse cigar.
-    int start = one.core.pos;
-    int size = one.core.l_qseq;
-    int end = start + size;	
-    boolean isRc = (one.core.flag & BAM_FREVERSE);
-    char strand = '+';
-    if (isRc)
-	{
-	strand = '-';
-	reverseIntRange(&start, &end, bamHeader->target_len[tid]);
-	}
-    fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
-    }
-if (err < 0 && err != -1)
-    errnoAbort("samread err %d", err);
-samclose(sf);
-}
-
-void samToBed(char *samIn, char *bedOut)
-/* samToBed - Convert SAM file to a pretty simple minded bed file.. */
-{
-FILE *f = mustOpen(bedOut, "w");
-samToOpenBed(samIn, f);
-carefulClose(&f);
-}
-#endif
-
-#else
-// If we're not compiling with samtools, make stub routines so compile won't fail:
-
-boolean bamFileExists(char *bamFileName)
-/* Return TRUE if we can successfully open the bam file and its index file. */
-{
-warn(COMPILE_WITH_SAMTOOLS, "bamFileExists");
-return FALSE;
-}
-
-void bamFileAndIndexMustExist(char *fileOrUrl)
-/* Open both a bam file and its accompanying index or errAbort; this is what it
- * takes for diagnostic info to propagate up through errCatches in calling code. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamFileAndIndexMustExist");
-}
-
-samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName)
-/* Return an open bam file */
-{
-warn(COMPILE_WITH_SAMTOOLS, "bamOpen");
-return FALSE;
-}
-
-samfile_t *bamMustOpenLocal(char *fileName, char *mode, void *extraHeader)
-/* Open up sam or bam file or die trying.  The mode parameter is 
- *    "r" - open SAM to read
- *    "rb" - open BAM to read
- *    "w" - open SAM to write
- *    "wb" - open BAM to write
- * The extraHeader is generally NULL in the read case, and the write case
- * contains a pointer to a bam_header_t with information about the header.
- * The implementation is just a wrapper around samopen from the samtools library
- * that aborts with error message if there's a problem with the open. */
-{
-warn(COMPILE_WITH_SAMTOOLS, "bamMustOpenLocal");
-return FALSE;
-}
-
-void bamClose(samfile_t **pSamFile)
-/* Close down a samefile_t */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamClose");
-}
-
-void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
-	      samfile_t **pSamFile)
-/* Open the .bam file, fetch items in the seq:start-end position range,
- * and call callbackFunc on each bam item retrieved from the file plus callbackData.
- * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl.
- * The pSamFile parameter is optional.  If non-NULL it will be filled in, just for
- * the benefit of the callback function, with the open samFile.  */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamFetch");
-}
-
-boolean bamIsRc(const bam1_t *bam)
-/* Return TRUE if alignment is on - strand. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamIsRc");
-return FALSE;
-}
-
-void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen)
-/* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
- * the beginning of the query sequence and quality; likewise for retHigh at end.
- * For convenience, retClippedQLen is the original query length minus soft clipping
- * (and the length of the query sequence that will be returned). */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamGetSoftClipping");
-}
-
-char *bamGetQuerySequence(const bam1_t *bam, boolean useStrand)
-/* Return the nucleotide sequence encoded in bam.  The BAM format
- * reverse-complements query sequence when the alignment is on the - strand,
- * so if useStrand is given we rev-comp it back to restore the original query
- * sequence. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamGetQuerySequence");
-return NULL;
-}
-
-UBYTE *bamGetQueryQuals(const bam1_t *bam, boolean useStrand)
-/* Return the base quality scores encoded in bam as an array of ubytes. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamGetQueryQuals");
-return NULL;
-}
-
-char *bamGetCigar(const bam1_t *bam)
-/* Return a BAM-enhanced CIGAR string, decoded from the packed encoding in bam. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamGetCigar");
-return NULL;
-}
-
-void bamShowCigarEnglish(const bam1_t *bam)
-/* Print out cigar in English e.g. "20 (mis)Match, 1 Deletion, 3 (mis)Match" */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamShowCigarEnglish");
-}
-
-void bamShowFlagsEnglish(const bam1_t *bam)
-/* Print out flags in English, e.g. "Mate is on '-' strand; Properly paired". */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamShowFlagsEnglish");
-}
-
-int bamGetTargetLength(const bam1_t *bam)
-/* Tally up the alignment's length on the reference sequence from
- * bam's packed-int CIGAR representation. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamGetTargetLength");
-return 0;
-}
-
-bam1_t *bamClone(const bam1_t *bam)
-/* Return a newly allocated copy of bam. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamClone");
-return NULL;
-}
-
-void bamShowTags(const bam1_t *bam)
-/* Print out tags in HTML: bold key, no type indicator for brevity. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamShowTags");
-}
-
-char *bamGetTagString(const bam1_t *bam, char *tag, char *buf, size_t bufSize)
-/* If bam's tags include the given 2-character tag, place the value into
- * buf (zero-terminated, trunc'd if nec) and return a pointer to buf,
- * or NULL if tag is not present. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamGetTagString");
-return NULL;
-}
-
-struct bamChromInfo *bamChromList(samfile_t *fh)
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamChromList");
-return NULL;
-}
-
-void samToBed(char *samIn, char *bedOut)
-/* samToBed - Convert SAM file to a pretty simple minded bed file.. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "samToBed");
-}
-
-void samToOpenBed(char *samIn, FILE *bedOut)
-/* samToBed - Convert SAM file to a pretty simple minded bed file.. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "samToOpenBed");
-}
 
-#endif//ndef USE_BAM
 
 static void bamChromInfoFree(struct bamChromInfo **pInfo)
 /* Free up one chromInfo */
 {
 struct bamChromInfo *info = *pInfo;
 if (info != NULL)
     {
     freeMem(info->name);
     freez(pInfo);
     }
 }
 
 void bamChromInfoFreeList(struct bamChromInfo **pList)
 /* Free a list of dynamically allocated bamChromInfo's */
 {