533112afe2a2005e80cdb1f82904ea65032d4302 braney Sat Oct 2 11:37:34 2021 -0700 split hg/lib into two separate libaries, one only used by the cgis diff --git src/hg/lib/hgBam.c src/hg/lib/hgBam.c deleted file mode 100644 index 5e0ac5d..0000000 --- src/hg/lib/hgBam.c +++ /dev/null @@ -1,247 +0,0 @@ -/* bamFile -- interface to binary alignment format files using Heng Li's samtools lib. */ - -/* Copyright (C) 2014 The Regents of the University of California - * See README in this or parent directory for licensing information. */ - -#include "common.h" -#include "hdb.h" -#include "hgBam.h" - -#include "htmshell.h" -#include "samAlignment.h" - -char *bamFileNameFromTable(struct sqlConnection *conn, char *table, char *bamSeqName) -/* Return file name from table. If table has a seqName column, then grab the - * row associated with bamSeqName (which can be e.g. '1' not 'chr1' if that is the - * case in the bam file). */ -{ -boolean checkSeqName = (sqlFieldIndex(conn, table, "seqName") >= 0); -if (checkSeqName && bamSeqName == NULL) - errAbort("bamFileNameFromTable: table %s has seqName column, but NULL seqName passed in", - table); -char query[512]; -if (checkSeqName) - sqlSafef(query, sizeof(query), "select fileName from %s where seqName = '%s'", - table, bamSeqName); -else - sqlSafef(query, sizeof(query), "select fileName from %s", table); -char *fileName = sqlQuickString(conn, query); -if (fileName == NULL && checkSeqName) - { - if (startsWith("chr", bamSeqName)) - sqlSafef(query, sizeof(query), "select fileName from %s where seqName = '%s'", - table, bamSeqName+strlen("chr")); - else - sqlSafef(query, sizeof(query), "select fileName from %s where seqName = 'chr%s'", - table, bamSeqName); - fileName = sqlQuickString(conn, query); - } -if (fileName == NULL) - { - if (checkSeqName) - errAbort("Missing fileName for seqName '%s' in %s table", bamSeqName, table); - else - errAbort("Missing fileName in %s table", table); - } -return fileName; -} - -struct ffAli *bamToFfAli(const bam1_t *bam, struct dnaSeq *target, int targetOffset, - boolean useStrand, char **retQSeq) -/* Convert from bam to ffAli format. If retQSeq is non-null, set it to the - * query sequence into which ffAli needle pointers point. (Adapted from psl.c's pslToFfAli.) */ -{ -struct ffAli *ffList = NULL, *ff; -const bam1_core_t *core = &bam->core; -boolean isRc = useStrand && bamIsRc(bam); -DNA *needle = (DNA *)bamGetQuerySequence(bam, useStrand); -if (retQSeq) - *retQSeq = needle; -if (isRc) - reverseComplement(target->dna, target->size); -DNA *haystack = target->dna; -unsigned int *cigarPacked = bam1_cigar(bam); -int tStart = targetOffset, qStart = 0, i; -// If isRc, need to go through the CIGAR ops backwards, but sequence offsets still count up. -int iStart = isRc ? (core->n_cigar - 1) : 0; -int iIncr = isRc ? -1 : 1; -for (i = iStart; isRc ? (i >= 0) : (i < core->n_cigar); i += iIncr) - { - char op; - int size = bamUnpackCigarElement(cigarPacked[i], &op); - switch (op) - { - case 'M': // match or mismatch (gapless aligned block) - case '=': // match - case 'X': // mismatch - AllocVar(ff); - ff->left = ffList; - ffList = ff; - ff->nStart = needle + qStart; - ff->nEnd = ff->nStart + size; - ff->hStart = haystack + tStart - targetOffset; - ff->hEnd = ff->hStart + size; - tStart += size; - qStart += size; - break; - case 'I': // inserted in query - case 'S': // skipped query bases at beginning or end ("soft clipping") - qStart += size; - break; - case 'D': // deleted from query - case 'N': // long deletion from query (intron as opposed to small del) - tStart += size; - break; - case 'H': // skipped query bases not stored in record's query sequence ("hard clipping") - case 'P': // P="silent deletion from padded reference sequence" -- ignore these. - break; - default: - errAbort("bamToFfAli: unrecognized CIGAR op %c -- update me", op); - } - } -ffList = ffMakeRightLinks(ffList); -ffCountGoodEnds(ffList); -return ffList; -} - - -struct bamToSamHelper -/* Helper structure to help get sam alignments out of a BAM file */ - { - struct lm *lm; /* Local memory pool to allocate into */ - char *chrom; /* Chromosome name */ - struct dyString *dy; /* Dynamic temporary string */ - samfile_t *samFile; /* Open sam/bam file header. */ - struct samAlignment *samList; /* List of alignments. */ - }; - -static void addToChars(char *s, int size, char amount) -/* Add amount to each char in s of given size. */ -{ -int i; -for (i=0; ilm; -struct samAlignment *sam; -lmAllocVar(lm, sam); -const bam1_core_t *core = &bam->core; -struct dyString *dy = helper->dy; -sam->qName = lmCloneString(lm, bam1_qname(bam)); -sam->flag = core->flag; -if (helper->chrom != NULL) - sam->rName = helper->chrom; -else - sam->rName = lmCloneString(lm, header->target_name[core->tid]); -sam->pos = core->pos + 1; -sam->mapQ = core->qual; -dyStringClear(dy); -bamUnpackCigar(bam, dy); -sam->cigar = lmCloneStringZ(lm, dy->string, dy->stringSize); -if (core->mtid >= 0) - { - if (core->tid == core->mtid) - sam->rNext = "="; - else - sam->rNext = lmCloneString(lm, header->target_name[core->mtid]); - } -else - sam->rNext = "*"; -sam->pNext = core->mpos + 1; -sam->tLen = core->isize; -sam->seq = lmAlloc(lm, core->l_qseq + 1); -bamUnpackQuerySequence(bam, FALSE, sam->seq); -char *bamQual = (char *)bam1_qual(bam); -if (isAllSameChar(bamQual, core->l_qseq, -1)) - sam->qual = "*"; -else - { - sam->qual = lmCloneStringZ(lm, bamQual, core->l_qseq); - addToChars(sam->qual, core->l_qseq, 33); - } -dyStringClear(dy); -bamUnpackAux(bam, dy); -sam->tagTypeVals = lmCloneStringZ(lm, dy->string, dy->stringSize); -slAddHead(&helper->samList, sam); -return 0; -} - -struct samAlignment *bamFetchSamAlignmentPlus(char *fileOrUrl, char *chrom, int start, int end, - struct lm *lm, char *refUrl, char *cacheDir) -/* Fetch region as a list of samAlignments - which is more or less an unpacked - * bam record. Results is allocated out of lm, since it tends to be large... */ -{ -return bamAndIndexFetchSamAlignmentPlus(fileOrUrl, NULL, chrom, start, end, lm, refUrl, cacheDir); -} - -struct samAlignment *bamAndIndexFetchSamAlignmentPlus(char *fileOrUrl, char *baiUrl, char *chrom, int start, int end, - struct lm *lm, char *refUrl, char *cacheDir) -/* Like bamFetchSamAlignmentPlus but can specify bai index file url in addition to the bam file */ -{ -struct bamToSamHelper helper; -helper.lm = lm; -helper.chrom = chrom; -helper.dy = dyStringNew(0); -helper.samList = NULL; -char posForBam[256]; -safef(posForBam, sizeof(posForBam), "%s:%d-%d", chrom, start+1, end); -bamAndIndexFetchPlus(fileOrUrl, baiUrl, posForBam, bamAddOneSamAlignment, &helper, &helper.samFile, - refUrl, cacheDir); -dyStringFree(&helper.dy); -slReverse(&helper.samList); -return helper.samList; -} - -struct samAlignment *bamFetchSamAlignment(char *fileOrUrl, char *chrom, int start, int end, - struct lm *lm) -/* Fetch region as a list of samAlignments - which is more or less an unpacked - * bam record. Results is allocated out of lm, since it tends to be large... */ -{ -return bamFetchSamAlignmentPlus(fileOrUrl, chrom, start, end, lm, NULL, NULL); -} - -struct samAlignment *bamReadNextSamAlignments(samfile_t *fh, bam_hdr_t *header, int count, struct lm *lm) -/* Read next count alignments in SAM format, allocated in lm. May return less than - * count at end of file. */ -{ -/* Set up helper. */ -struct bamToSamHelper helper; -helper.lm = lm; -helper.chrom = NULL; -helper.dy = dyStringNew(0); -helper.samFile = fh; -helper.samList = NULL; - -/* Loop through calling our own fetch function */ -int i; -bam1_t *b = bam_init1(); -for (i=0; i