0345dae65cc819f447fb0414829b2f6d32473c32 markd Sun Jun 2 13:24:16 2024 -0700 added number of start/end hard and soft based to BAM hgc page diff --git src/hg/hgc/bamClick.c src/hg/hgc/bamClick.c index 2b9cb39..2d3c9b8 100644 --- src/hg/hgc/bamClick.c +++ src/hg/hgc/bamClick.c @@ -13,54 +13,98 @@ #include "udc.h" #include "chromAlias.h" #include "hgBam.h" #include "hgConfig.h" struct bamTrackData { int itemStart; char *itemName; struct hash *pairHash; boolean foundIt; }; +#include <htslib/sam.h> + +static void getClippedBases(const bam1_t *bam, int clippedBases[4]) +/* Function to get the number of soft and hard clipped bases at the start and end of a read, + the array will contain [startHardCnt, startSoftCnt, endSoftCnt, endHardCnt] */ +{ +uint32_t *cigar = bam_get_cigar(bam); +int nCigar = bam->core.n_cigar; +for (int i = 0; i < 4; i++) + clippedBases[i] = 0; + +int iCigar = 0; + +// start +if ((iCigar < nCigar) && (bam_cigar_op(cigar[iCigar]) == BAM_CHARD_CLIP)) + { + clippedBases[0] += bam_cigar_oplen(cigar[iCigar]); + iCigar++; + } +if ((iCigar < nCigar) && (bam_cigar_op(cigar[iCigar]) == BAM_CSOFT_CLIP)) + { + clippedBases[1] += bam_cigar_oplen(cigar[iCigar]); + } + +// end +iCigar = nCigar - 1; +if ((iCigar >= 0) && (bam_cigar_op(cigar[iCigar]) == BAM_CHARD_CLIP)) + { + clippedBases[3] += bam_cigar_oplen(cigar[iCigar]); + iCigar--; + } +if ((iCigar >= 0) && (bam_cigar_op(cigar[iCigar]) == BAM_CSOFT_CLIP)) + { + clippedBases[2] += bam_cigar_oplen(cigar[iCigar]); + } +} + /* Maybe make this an option someday -- for now, I find it too confusing to deal with * CIGAR that is anchored to positive strand while showing rc'd sequence. I think * to do it right, we would need to reverse the CIGAR string for display. */ static boolean useStrand = FALSE; static boolean skipQualityScore = FALSE; + static void singleBamDetails(const bam1_t *bam) /* Print out the properties of this alignment. */ { const bam1_core_t *core = &bam->core; char *itemName = bam1_qname(bam); int tLength = bamGetTargetLength(bam); int tStart = core->pos, tEnd = tStart+tLength; boolean isRc = useStrand && bamIsRc(bam); printPosOnChrom(seqName, tStart, tEnd, NULL, FALSE, itemName); if (!skipQualityScore) printf("<B>Alignment Quality: </B>%d<BR>\n", core->qual); if (core->n_cigar > 50) printf("<B>CIGAR string: </B> Cannot show long CIGAR string, more than 50 operations. Contact us if you need to see the full CIGAR string here.<BR>\n"); else { printf("<B>CIGAR string: </B><tt>%s</tt>", bamGetCigar(bam)); //bamShowCigarEnglish(bam); printf("<BR>\n"); } + +int clippedBases[4]; +getClippedBases(bam, clippedBases); +printf("<B>Start clipping:</B> hard: %d soft: %d</BR>\n", clippedBases[0], clippedBases[1]); +printf("<B>End clipping:</B> hard: %d soft: %d</BR> \n", clippedBases[3], clippedBases[2]); + printf("<B>Tags:</B>"); bamShowTags(bam); puts("<BR>"); printf("<B>Flags: </B><tt>0x%02x:</tt><BR>\n ", core->flag); bamShowFlagsEnglish(bam); puts("<BR>"); if (bamIsRc(bam)) printf("<em>Note: although the read was mapped to the reverse strand of the genome, " "the sequence and CIGAR in BAM are relative to the forward strand.</em><BR>\n"); puts("<BR>"); struct dnaSeq *genoSeq = hChromSeq(database, seqName, tStart, tEnd); char *qSeq = bamGetQuerySequence(bam, FALSE); if (core->l_qseq > 5000) printf("<B>Alignment not shown, very long sequence</B><BR>\n"); else