3377b968b5f797ef07f454f5a5fb125668f6c2e4 braney Tue May 10 12:10:55 2016 -0700 fix a problem that shows up if neither samtabix or htslib are installed diff --git src/hg/lib/hgBam.c src/hg/lib/hgBam.c index 6565075..9d5ab91 100644 --- src/hg/lib/hgBam.c +++ src/hg/lib/hgBam.c @@ -1,302 +1,315 @@ /* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "hdb.h" #include "hgBam.h" #ifdef USE_BAM #include "htmshell.h" #include "samAlignment.h" char *bamFileNameFromTable(struct sqlConnection *conn, char *table, char *bamSeqName) /* Return file name from table. If table has a seqName column, then grab the * row associated with bamSeqName (which can be e.g. '1' not 'chr1' if that is the * case in the bam file). */ { boolean checkSeqName = (sqlFieldIndex(conn, table, "seqName") >= 0); if (checkSeqName && bamSeqName == NULL) errAbort("bamFileNameFromTable: table %s has seqName column, but NULL seqName passed in", table); char query[512]; if (checkSeqName) sqlSafef(query, sizeof(query), "select fileName from %s where seqName = '%s'", table, bamSeqName); else sqlSafef(query, sizeof(query), "select fileName from %s", table); char *fileName = sqlQuickString(conn, query); if (fileName == NULL && checkSeqName) { if (startsWith("chr", bamSeqName)) sqlSafef(query, sizeof(query), "select fileName from %s where seqName = '%s'", table, bamSeqName+strlen("chr")); else sqlSafef(query, sizeof(query), "select fileName from %s where seqName = 'chr%s'", table, bamSeqName); fileName = sqlQuickString(conn, query); } if (fileName == NULL) { if (checkSeqName) errAbort("Missing fileName for seqName '%s' in %s table", bamSeqName, table); else errAbort("Missing fileName in %s table", table); } return fileName; } struct ffAli *bamToFfAli(const bam1_t *bam, struct dnaSeq *target, int targetOffset, boolean useStrand, char **retQSeq) /* Convert from bam to ffAli format. If retQSeq is non-null, set it to the * query sequence into which ffAli needle pointers point. (Adapted from psl.c's pslToFfAli.) */ { struct ffAli *ffList = NULL, *ff; const bam1_core_t *core = &bam->core; boolean isRc = useStrand && bamIsRc(bam); DNA *needle = (DNA *)bamGetQuerySequence(bam, useStrand); if (retQSeq) *retQSeq = needle; if (isRc) reverseComplement(target->dna, target->size); DNA *haystack = target->dna; unsigned int *cigarPacked = bam1_cigar(bam); int tStart = targetOffset, qStart = 0, i; // If isRc, need to go through the CIGAR ops backwards, but sequence offsets still count up. int iStart = isRc ? (core->n_cigar - 1) : 0; int iIncr = isRc ? -1 : 1; for (i = iStart; isRc ? (i >= 0) : (i < core->n_cigar); i += iIncr) { char op; int size = bamUnpackCigarElement(cigarPacked[i], &op); switch (op) { case 'M': // match or mismatch (gapless aligned block) case '=': // match case 'X': // mismatch AllocVar(ff); ff->left = ffList; ffList = ff; ff->nStart = needle + qStart; ff->nEnd = ff->nStart + size; ff->hStart = haystack + tStart - targetOffset; ff->hEnd = ff->hStart + size; tStart += size; qStart += size; break; case 'I': // inserted in query case 'S': // skipped query bases at beginning or end ("soft clipping") qStart += size; break; case 'D': // deleted from query case 'N': // long deletion from query (intron as opposed to small del) tStart += size; break; case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") case 'P': // P="silent deletion from padded reference sequence" -- ignore these. break; default: errAbort("bamToFfAli: unrecognized CIGAR op %c -- update me", op); } } ffList = ffMakeRightLinks(ffList); ffCountGoodEnds(ffList); return ffList; } struct bamToSamHelper /* Helper structure to help get sam alignments out of a BAM file */ { struct lm *lm; /* Local memory pool to allocate into */ char *chrom; /* Chromosome name */ struct dyString *dy; /* Dynamic temporary string */ samfile_t *samFile; /* Open sam/bam file header. */ struct samAlignment *samList; /* List of alignments. */ }; static void addToChars(char *s, int size, char amount) /* Add amount to each char in s of given size. */ { int i; for (i=0; ilm; struct samAlignment *sam; lmAllocVar(lm, sam); const bam1_core_t *core = &bam->core; struct dyString *dy = helper->dy; sam->qName = lmCloneString(lm, bam1_qname(bam)); sam->flag = core->flag; if (helper->chrom != NULL) sam->rName = helper->chrom; else #ifdef USE_HTS sam->rName = lmCloneString(lm, header->target_name[core->tid]); #else sam->rName = lmCloneString(lm, helper->samFile->header->target_name[core->tid]); #endif sam->pos = core->pos + 1; sam->mapQ = core->qual; dyStringClear(dy); bamUnpackCigar(bam, dy); sam->cigar = lmCloneStringZ(lm, dy->string, dy->stringSize); if (core->mtid >= 0) { if (core->tid == core->mtid) sam->rNext = "="; else #ifdef USE_HTS sam->rNext = lmCloneString(lm, header->target_name[core->mtid]); #else sam->rNext = lmCloneString(lm, helper->samFile->header->target_name[core->mtid]); #endif } else sam->rNext = "*"; sam->pNext = core->mpos + 1; sam->tLen = core->isize; sam->seq = lmAlloc(lm, core->l_qseq + 1); bamUnpackQuerySequence(bam, FALSE, sam->seq); char *bamQual = (char *)bam1_qual(bam); if (isAllSameChar(bamQual, core->l_qseq, -1)) sam->qual = "*"; else { sam->qual = lmCloneStringZ(lm, bamQual, core->l_qseq); addToChars(sam->qual, core->l_qseq, 33); } dyStringClear(dy); bamUnpackAux(bam, dy); sam->tagTypeVals = lmCloneStringZ(lm, dy->string, dy->stringSize); slAddHead(&helper->samList, sam); return 0; } struct samAlignment *bamFetchSamAlignmentPlus(char *fileOrUrl, char *chrom, int start, int end, struct lm *lm, char *refUrl, char *cacheDir) /* Fetch region as a list of samAlignments - which is more or less an unpacked * bam record. Results is allocated out of lm, since it tends to be large... */ { struct bamToSamHelper helper; helper.lm = lm; helper.chrom = chrom; helper.dy = dyStringNew(0); helper.samList = NULL; char posForBam[256]; safef(posForBam, sizeof(posForBam), "%s:%d-%d", chrom, start+1, end); bamFetchPlus(fileOrUrl, posForBam, bamAddOneSamAlignment, &helper, &helper.samFile, refUrl, cacheDir); dyStringFree(&helper.dy); slReverse(&helper.samList); return helper.samList; } struct samAlignment *bamFetchSamAlignment(char *fileOrUrl, char *chrom, int start, int end, struct lm *lm) /* Fetch region as a list of samAlignments - which is more or less an unpacked * bam record. Results is allocated out of lm, since it tends to be large... */ { return bamFetchSamAlignmentPlus(fileOrUrl, chrom, start, end, lm, NULL, NULL); } #ifdef USE_HTS struct samAlignment *bamReadNextSamAlignments(samfile_t *fh, bam_hdr_t *header, int count, struct lm *lm) #else struct samAlignment *bamReadNextSamAlignments(samfile_t *fh, int count, struct lm *lm) #endif /* Read next count alignments in SAM format, allocated in lm. May return less than * count at end of file. */ { /* Set up helper. */ struct bamToSamHelper helper; helper.lm = lm; helper.chrom = NULL; helper.dy = dyStringNew(0); helper.samFile = fh; helper.samList = NULL; /* Loop through calling our own fetch function */ int i; bam1_t *b = bam_init1(); for (i=0; i