d2c7e8be3ceae6027a5274faafdf6239d778cc63
aamp
  Sat Jul 16 08:31:16 2011 -0700
Changes related to change source filenames around: bamUdc.ch -> bamFile.ch and bamFile.ch -> hgBam.ch
diff --git src/hg/lib/bamFile.c src/hg/lib/bamFile.c
deleted file mode 100644
index 37bfedb..0000000
--- src/hg/lib/bamFile.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */
-
-#include "common.h"
-#include "hdb.h"
-#include "bamFile.h"
-
-#ifdef USE_BAM
-#include "htmshell.h"
-#include "hgConfig.h"
-#include "udc.h"
-#include "samAlignment.h"
-
-char *bamFileNameFromTable(struct sqlConnection *conn, char *table, char *bamSeqName)
-/* Return file name from table.  If table has a seqName column, then grab the 
- * row associated with bamSeqName (which can be e.g. '1' not 'chr1' if that is the
- * case in the bam file). */
-{
-boolean checkSeqName = (sqlFieldIndex(conn, table, "seqName") >= 0);
-if (checkSeqName && bamSeqName == NULL)
-    errAbort("bamFileNameFromTable: table %s has seqName column, but NULL seqName passed in",
-	     table);
-char query[512];
-if (checkSeqName)
-    safef(query, sizeof(query), "select fileName from %s where seqName = '%s'",
-	  table, bamSeqName);
-else
-    safef(query, sizeof(query), "select fileName from %s", table);
-char *fileName = sqlQuickString(conn, query);
-if (fileName == NULL && checkSeqName)
-    {
-    if (startsWith("chr", bamSeqName))
-	safef(query, sizeof(query), "select fileName from %s where seqName = '%s'",
-	      table, bamSeqName+strlen("chr"));
-    else
-	safef(query, sizeof(query), "select fileName from %s where seqName = 'chr%s'",
-	      table, bamSeqName);
-    fileName = sqlQuickString(conn, query);
-    }
-if (fileName == NULL)
-    {
-    if (checkSeqName)
-	errAbort("Missing fileName for seqName '%s' in %s table", bamSeqName, table);
-    else
-	errAbort("Missing fileName in %s table", table);
-    }
-return fileName;
-}
-
-boolean bamFileExists(char *fileOrUrl)
-/* Return TRUE if we can successfully open the bam file and its index file. */
-{
-char *udcFuseRoot = cfgOption("udcFuse.mountPoint");
-return bamFileExistsUdc(fileOrUrl, udcFuseRoot);
-}
-samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName)
-/* Return an open bam file, dealing with FUSE caching if need be. 
- * Return parameter if NON-null will return the file name after FUSing */
-{
-char *udcFuseRoot = cfgOption("udcFuse.mountPoint");
-return bamOpenUdc(fileOrUrl, retBamFileName, udcFuseRoot);
-}
-
-void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData,
-		     samfile_t **pSamFile)
-/* Open the .bam file, fetch items in the seq:start-end position range,
- * and call callbackFunc on each bam item retrieved from the file plus callbackData.
- * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. 
- * The pSamFile parameter is optional.  If non-NULL it will be filled in, just for
- * the benefit of the callback function, with the open samFile.  */
-{
-char *udcFuseRoot = cfgOption("udcFuse.mountPoint");
-bamFetchUdc(fileOrUrl, position, callbackFunc, callbackData, pSamFile, udcFuseRoot);
-}
-
-struct ffAli *bamToFfAli(const bam1_t *bam, struct dnaSeq *target, int targetOffset,
-			 boolean useStrand, char **retQSeq)
-/* Convert from bam to ffAli format.  If retQSeq is non-null, set it to the 
- * query sequence into which ffAli needle pointers point. (Adapted from psl.c's pslToFfAli.) */
-{
-struct ffAli *ffList = NULL, *ff;
-const bam1_core_t *core = &bam->core;
-boolean isRc = useStrand && bamIsRc(bam);
-DNA *needle = (DNA *)bamGetQuerySequence(bam, useStrand);
-if (retQSeq)
-    *retQSeq = needle;
-if (isRc)
-    reverseComplement(target->dna, target->size);
-DNA *haystack = target->dna;
-unsigned int *cigarPacked = bam1_cigar(bam);
-int tStart = targetOffset, qStart = 0, i;
-// If isRc, need to go through the CIGAR ops backwards, but sequence offsets still count up.
-int iStart = isRc ? (core->n_cigar - 1) : 0;
-int iIncr = isRc ? -1 : 1;
-for (i = iStart;  isRc ? (i >= 0) : (i < core->n_cigar);  i += iIncr)
-    {
-    char op;
-    int size = bamUnpackCigarElement(cigarPacked[i], &op);
-    switch (op)
-	{
-	case 'M': // match or mismatch (gapless aligned block)
-	    AllocVar(ff);
-	    ff->left = ffList;
-	    ffList = ff;
-	    ff->nStart = needle + qStart;
-	    ff->nEnd = ff->nStart + size;
-	    ff->hStart = haystack + tStart - targetOffset;
-	    ff->hEnd = ff->hStart + size;
-	    tStart += size;
-	    qStart += size;
-	    break;
-	case 'I': // inserted in query
-	case 'S': // skipped query bases at beginning or end ("soft clipping")
-	    qStart += size;
-	    break;
-	case 'D': // deleted from query
-	case 'N': // long deletion from query (intron as opposed to small del)
-	    tStart += size;
-	    break;
-	case 'H': // skipped query bases not stored in record's query sequence ("hard clipping")
-	case 'P': // P="silent deletion from padded reference sequence" -- ignore these.
-	    break;
-	default:
-	    errAbort("bamToFfAli: unrecognized CIGAR op %c -- update me", op);
-	}
-    }
-ffList = ffMakeRightLinks(ffList);
-ffCountGoodEnds(ffList);
-return ffList;
-}
-
-
-struct bamToSamHelper
-/* Helper structure to help get sam alignments out of a BAM file */
-    {
-    struct lm *lm;	/* Local memory pool to allocate into */
-    char *chrom;	/* Chromosome name */
-    struct dyString *dy;	/* Dynamic temporary string */
-    samfile_t *samFile;	/* Open sam/bam file header. */
-    struct samAlignment *samList;	/* List of alignments. */
-    };
-
-static void addToChars(char *s, int size, char amount)
-/* Add amount to each char in s of given size. */
-{
-int i;
-for (i=0; i<size; ++i)
-    s[i] += amount;
-}
-
-static boolean isAllSameChar(char *s, int size, char c)
-/* Return TRUE if first size chars of s are 0 */
-{
-int i;
-for (i=0; i<size; ++i)
-    if (s[i] != c)
-	return FALSE;
-return TRUE;
-}
-
-int bamAddOneSamAlignment(const bam1_t *bam, void *data)
-/* bam_fetch() calls this on each bam alignment retrieved.  Translate each bam
- * into a samAlignment. */
-{
-struct bamToSamHelper *helper = (struct bamToSamHelper *)data;
-struct lm *lm = helper->lm;
-struct samAlignment *sam;
-lmAllocVar(lm, sam);
-const bam1_core_t *core = &bam->core;
-struct dyString *dy = helper->dy;
-sam->qName = lmCloneString(lm, bam1_qname(bam));
-sam->flag = core->flag;
-if (helper->chrom != NULL)
-    sam->rName = helper->chrom;
-else
-    sam->rName = lmCloneString(lm, helper->samFile->header->target_name[core->tid]);
-sam->pos = core->pos + 1;
-sam->mapQ = core->qual;
-dyStringClear(dy);
-bamUnpackCigar(bam, dy);
-sam->cigar = lmCloneStringZ(lm, dy->string, dy->stringSize);
-if (core->mtid >= 0)
-    {
-    if (core->tid == core->mtid)
-	sam->rNext = "=";
-    else
-	sam->rNext = lmCloneString(lm, helper->samFile->header->target_name[core->mtid]);
-    }
-else
-    sam->rNext = "*";
-sam->pNext = core->mpos + 1;
-sam->tLen = core->isize;
-sam->seq = lmAlloc(lm, core->l_qseq + 1);
-bamUnpackQuerySequence(bam, FALSE, sam->seq);
-char *bamQual = (char *)bam1_qual(bam);
-if (isAllSameChar(bamQual, core->l_qseq, -1))
-    sam->qual = "*";
-else
-    {
-    sam->qual = lmCloneStringZ(lm, bamQual, core->l_qseq);
-    addToChars(sam->qual, core->l_qseq, 33);
-    }
-dyStringClear(dy);
-bamUnpackAux(bam, dy);
-sam->tagTypeVals = lmCloneStringZ(lm, dy->string, dy->stringSize);
-slAddHead(&helper->samList, sam);
-return 0;
-}
-
-struct samAlignment *bamFetchSamAlignment(char *fileOrUrl, char *chrom, int start, int end,
-	struct lm *lm)
-/* Fetch region as a list of samAlignments - which is more or less an unpacked
- * bam record.  Results is allocated out of lm, since it tends to be large... */
-{
-struct bamToSamHelper helper;
-helper.lm = lm;
-helper.chrom = chrom;
-helper.dy = dyStringNew(0);
-helper.samList = NULL;
-char posForBam[256];
-safef(posForBam, sizeof(posForBam), "%s:%d-%d", chrom, start, end);
-bamFetch(fileOrUrl, posForBam, bamAddOneSamAlignment, &helper, &helper.samFile);
-dyStringFree(&helper.dy);
-slReverse(&helper.samList);
-return helper.samList;
-}
-
-struct samAlignment *bamReadNextSamAlignments(samfile_t *fh, int count, struct lm *lm)
-/* Read next count alignments in SAM format, allocated in lm.  May return less than
- * count at end of file. */
-{
-/* Set up helper. */
-struct bamToSamHelper helper;
-helper.lm = lm;
-helper.chrom = NULL;
-helper.dy = dyStringNew(0);
-helper.samFile = fh;
-helper.samList = NULL;
-
-/* Loop through calling our own fetch function */
-int i;
-bam1_t *b = bam_init1();
-for (i=0; i<count; ++i)
-    {
-    if (samread(fh, b) < 0)
-       break;
-    bamAddOneSamAlignment(b, &helper);
-    }
-bam_destroy1(b);
-
-/* Clean up and go home. */
-dyStringFree(&helper.dy);
-slReverse(&helper.samList);
-return helper.samList;
-}
-
-#else
-// If we're not compiling with samtools, make stub routines so compile won't fail:
-
-char *bamFileNameFromTable(struct sqlConnection *conn, char *table, char *bamSeqName)
-/* Return file name from table.  If table has a seqName column, then grab the
- * row associated with bamSeqName (which is not nec. in chromInfo, e.g.
- * bam file might have '1' not 'chr1'). */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamFileNameFromTable");
-return NULL;
-}
-
-boolean bamFileExists(char *fileOrUrl)
-/* Return TRUE if we can successfully open the bam file and its index file. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamFileExists");
-return FALSE;
-}
-
-samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName)
-/* Return an open bam file, dealing with some FUSE caching if need be. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamOpen");
-return FALSE;
-}
-
-struct samAlignment *bamFetchSamAlignment(char *fileOrUrl, char *chrom, int start, int end,
-	struct lm *lm)
-/* Fetch region as a list of samAlignments - which is more or less an unpacked
- * bam record.  Results is allocated out of lm, since it tends to be large... */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamFetchSamAlignment");
-return NULL;
-}
-
-struct samAlignment *bamReadNextSamAlignments(samfile_t *fh, int count, struct lm *lm)
-/* Read next count alignments in SAM format, allocated in lm.  May return less than
- * count at end of file. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamReadNextSamAlignments");
-return NULL;
-}
-
-struct ffAli *bamToFfAli(const bam1_t *bam, struct dnaSeq *target, int targetOffset,
-			 boolean useStrand, char **retQSeq)
-/* Convert from bam to ffAli format.  If retQSeq is non-null, set it to the
- * query sequence into which ffAli needle pointers point. */
-{
-errAbort(COMPILE_WITH_SAMTOOLS, "bamToFfAli");
-return NULL;
-}
-
-#endif//ndef USE_BAM