fdb4b5d215ff23b0f066a9b7d7a96ae31045a347
braney
  Fri Jan 15 12:27:29 2016 -0800
yet more work on support CRAM under htslib

diff --git src/lib/bamFile.c src/lib/bamFile.c
index c050b22..9ba386b 100644
--- src/lib/bamFile.c
+++ src/lib/bamFile.c
@@ -208,78 +208,127 @@
 /* it's just used to report errors. */
 {
 #ifdef USE_HTS
 bam1_t *b;
 AllocVar(b);
 hts_itr_t *iter = sam_itr_querys(idx, header, position);
 if (iter == NULL && startsWith("chr", position))
     iter = sam_itr_querys(idx, header, position + strlen("chr"));
 
 if (iter == NULL)
     return;
 int result;
 while ((result = sam_itr_next(samfile, iter, b)) >= 0) 
     callbackFunc(b, callbackData, header);
 
+// if we're reading a CRAM file and the MD5 string has been set
+// we know there was an error finding the reference and we need
+// to request that it be loaded.
+if (samfile->format.format == cram) 
+    {
+    char *md5String =  cram_get_Md5(samfile);
+    if (!isEmpty(md5String))
+        {
+        char server[4096];
+        char pendingFile[4096];
+        char errorFile[4096];
+
+        char *refPath = cram_get_ref_url(samfile);
+        char *cacheDir = cram_get_cache_dir(samfile);
+
+        sprintf(server, refPath, md5String);
+        sprintf(pendingFile, "%s/pending/%s",cacheDir,md5String);
+        sprintf(errorFile, "%s/error/%s",cacheDir,md5String);
+        makeDirsOnPath(pendingFile);
+        FILE *downFile;
+        if ((downFile = fopen(errorFile, "r")) != NULL)
+            {
+            char errorBuf[4096];
+            mustGetLine(downFile, errorBuf, sizeof errorBuf);
+            errAbort("cannot find reference %s.  Error: %s\n", md5String,errorBuf);
+            }
+        else
+            {
+            if ((downFile = fopen(pendingFile, "w")) == NULL)
+                errAbort("cannot find reference %s.  Cannot create file %s.",md5String, pendingFile);  
+            fprintf(downFile,  "%s\n", server);
+            fclose(downFile);
+            errAbort("cannot find reference %s.  Downloading from %s. Pending in %s",md5String, server, pendingFile);
+            }
+        }
+    }
 #else
 int chromId, start, end;
 int ret = bam_parse_region(samfile->header, position, &chromId, &start, &end);
 if (ret != 0 && startsWith("chr", position))
     ret = bam_parse_region(samfile->header, position+strlen("chr"), &chromId, &start, &end);
 if (ret != 0)
     // If the bam file does not cover the current chromosome, OK
     return;
 ret = bam_fetch(samfile->x.bam, idx, chromId, start, end, callbackData, callbackFunc);
 if (ret != 0)
     warn("bam_fetch(%s, %s (chromId=%d) failed (%d)", bamFileName, position, chromId, ret);
 #endif
 }
 
-void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
-		 samfile_t **pSamFile)
+void bamFetchPlus(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
+		 samfile_t **pSamFile, char *refUrl, char *cacheDir)
 /* Open the .bam file, fetch items in the seq:start-end position range,
  * and call callbackFunc on each bam item retrieved from the file plus callbackData.
  * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. 
  * The pSamFile parameter is optional.  If non-NULL it will be filled in, just for
  * the benefit of the callback function, with the open samFile.  */
 {
 char *bamFileName = NULL;
 samfile_t *fh = bamOpen(fileOrUrl, &bamFileName);
 #ifdef USE_HTS
+if (fh->format.format == cram) 
+    {
+    if (cacheDir == NULL)
+        errAbort("CRAM cache dir hg.conf variable (cramRef) must exist for CRAM support");
+    cram_set_cache_url(fh, cacheDir, refUrl);  
+    }
 bam_hdr_t *header = sam_hdr_read(fh);
 if (pSamFile != NULL)
     *pSamFile = fh;
 bam_index_t *idx = bamOpenIdx(fh, bamFileName);
 #else
 if (pSamFile != NULL)
     *pSamFile = fh;
 bam_index_t *idx = bamOpenIdx(bamFileName);
 #endif
 if (idx == NULL)
     warn("bam_index_load(%s) failed.", bamFileName);
 else
     {
 #ifdef USE_HTS
     bamFetchAlreadyOpen(fh, header, idx, bamFileName, position, callbackFunc, callbackData);
 #else
     bamFetchAlreadyOpen(fh, idx, bamFileName, position, callbackFunc, callbackData);
 #endif
     bamCloseIdx(&idx);
     }
 bamClose(&fh);
 }
 
+void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
+		 samfile_t **pSamFile)
+{
+bamFetchPlus(fileOrUrl, position, callbackFunc, callbackData, pSamFile,
+    NULL, NULL);
+}
+
 boolean bamIsRc(const bam1_t *bam)
 /* Return TRUE if alignment is on - strand. */
 {
 const bam1_core_t *core = &bam->core;
 return (core->flag & BAM_FREVERSE);
 }
 
 void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen)
 /* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at
  * the beginning of the query sequence and quality; likewise for retHigh at end.
  * For convenience, retClippedQLen is the original query length minus soft clipping
  * (and the length of the query sequence that will be returned). */
 {
 unsigned int *cigarPacked = bam1_cigar(bam);
 const bam1_core_t *core = &bam->core;